Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 18 additions & 12 deletions src/main/java/org/jadice/filetype/Analyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -228,17 +228,22 @@ public Map<String, Object> analyze(final InputStream sis, final AnalysisListener
public Map<String, Object> analyze(final InputStream is, final AnalysisListener listener, final String fileName)
throws IOException {
Map<String, Object> result = new HashMap<>();


// POI (3.1-Final) closes the stream during analyszs of office files - use an uncloseable stream wrapper
final UncloseableInputStream uis = new UncloseableInputStream(is);
final UncloseableSeekableInputStreamWrapper usis = new UncloseableSeekableInputStreamWrapper(new MemoryInputStream(uis));
usis.lockClose(); // and don't unlock later as POI attempts to close asynchronously!

final AnalysisListener effectiveListener = listener != null ? listener : DEFAULT_LISTENER;
// POI may close streams during analysis; shield callers by using an uncloseable, seekable wrapper.
// If the input is already seekable, avoid buffering the full stream in memory.
final SeekableInputStream baseStream;
if (is instanceof SeekableInputStream) {
baseStream = (SeekableInputStream) is;
} else {
final UncloseableInputStream uis = new UncloseableInputStream(is);
baseStream = new MemoryInputStream(uis);
}
final UncloseableSeekableInputStreamWrapper usis = new UncloseableSeekableInputStreamWrapper(baseStream);
usis.lockClose(); // do not unlock later as POI may attempt to close asynchronously
final String sanitizedFileName = fileName != null ? fileName.replaceAll("[:\\\\/*?|<>]", "_") : null;
String extension = FilenameUtils.getExtension(sanitizedFileName);

Context ctx = new Context(usis, result, listener, locale, extension);
Context ctx = new Context(usis, result, effectiveListener, locale, extension);

database.analyze(ctx);

Expand Down Expand Up @@ -267,12 +272,13 @@ public Map<String, Object> analyze(final File file, final AnalysisListener liste
SeekableInputStream sis = new RandomAccessFileInputStream(file);
try {
String fileName = file.getName();
return analyze(sis, null, fileName);
return analyze(sis, listener, fileName);
} finally {
try {
sis.close();
} catch (IOException e) {
listener.error(this, "Exception closing RandomAccessFileInputStream", e);
final AnalysisListener effectiveListener = listener != null ? listener : DEFAULT_LISTENER;
effectiveListener.error(this, "Exception closing RandomAccessFileInputStream", e);
}
}
}
Expand All @@ -289,13 +295,13 @@ public Map<String, Object> analyze(final SeekableInputStream sis) throws IOExcep
}


public Map<String, Object> analyzeWithFilename(final SeekableInputStream sis,final String fileName) throws IOException {
public Map<String, Object> analyzeWithFilename(final SeekableInputStream sis, final String fileName) throws IOException {
return analyze(sis, DEFAULT_LISTENER, fileName);
}

/**
* Analyze the stream supplied via an {@link InputStream}. <br>
* Caveat: the data will be buffered in memory. If you don't like this, supply a
* Caveat: non-seekable streams may be buffered in memory. If you don't like this, supply a
* {@link SeekableInputStream} implementation or a {@link File} instead.
*
* @param is
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,19 +237,13 @@ public boolean matches(final Context context) {
SeekableInputStream sis = context.getStream();
try {
sis.seek(0);

ZipFile archive = ZipUtil.createZipFile(sis);
try {
detect(context, archive);
} finally {
archive.close();
try {
Files.delete(archive.getFile().toPath());
} catch (IOException ioe) {
LOGGER.debug("failed to delete temporary zip file", ioe);
}
}

ZipFile archive = ZipUtil.createZipFile(sis);
try {
detect(context, archive);
} finally {
// Close releases file handles; any temp cleanup is handled by the ZipUtil ZipFile implementation.
archive.close();
}
return context.getProperty(MimeTypeAction.KEY) != null;
} catch (IOException e) {
context.error(this, "Exception analyzing Office Open XML Container", e);
Expand Down Expand Up @@ -486,23 +480,39 @@ private InputStream getSafeInputStream(String fileName, final ZipFile archive) t
if (fileName.startsWith("/")) {
fileName = fileName.substring(1);
}
// Support both entry layouts: root entries and entries prefixed with "<zipname>/".
final String prefixed = archive.getFile().getName() + File.separator + fileName;
InputStream direct = tryGetSingleEntry(fileName, archive);
if (direct != null) {
return direct;
}
InputStream prefixedDirect = tryGetSingleEntry(prefixed, archive);
if (prefixedDirect != null) {
return prefixedDirect;
}
InputStream pieces = tryGetPieceStream(fileName, archive);
if (pieces != null) {
return pieces;
}
return tryGetPieceStream(prefixed, archive);
}

// consider the uuid directory name
fileName = archive.getFile().getName() + File.separator + fileName;

final FileHeader entry = archive.getFileHeader(fileName);
private InputStream tryGetSingleEntry(final String name, final ZipFile archive) throws IOException {
final FileHeader entry = archive.getFileHeader(name);
if (entry != null && !entry.isDirectory()) {
LOGGER.debug("Get '{}' from 1 piece", fileName);
LOGGER.debug("Get '{}' from 1 piece", name);
return archive.getInputStream(entry);
}
return null;
}

// try directory browsing:
private InputStream tryGetPieceStream(final String baseName, final ZipFile archive) throws IOException {
// Assemble stream from "[0].piece"..."[$n].last.piece";
// see Office Open XML, Part 2: Open Packaging Conventions, sec 9.1.3.1 Logical Item Names
List<InputStream> streams = new LinkedList<>();
int i = 0;
FileHeader piece;
while ((piece = archive.getFileHeader(fileName + "/[" + i + "].piece")) != null) {
while ((piece = archive.getFileHeader(baseName + "/[" + i + "].piece")) != null) {
final InputStream is = archive.getInputStream(piece);
if (is == null) {
break;
Expand All @@ -512,7 +522,7 @@ private InputStream getSafeInputStream(String fileName, final ZipFile archive) t
i++;
}

final FileHeader last = archive.getFileHeader(fileName + "/[" + i + "].last.piece");
final FileHeader last = archive.getFileHeader(baseName + "/[" + i + "].last.piece");
if (last == null) {
return null;
}
Expand All @@ -523,7 +533,7 @@ private InputStream getSafeInputStream(String fileName, final ZipFile archive) t
}
streams.add(is);

LOGGER.debug("Get '{}' from {} pieces", fileName, streams.size());
LOGGER.debug("Get '{}' from {} pieces", baseName, streams.size());
return new SequenceInputStream(Collections.enumeration(streams));
}
}
63 changes: 33 additions & 30 deletions src/main/java/org/jadice/filetype/ziputil/ZipUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,12 @@
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import java.nio.file.Files;
import java.nio.file.Path;

import org.jadice.filetype.io.SeekableInputStream;

import net.lingala.zip4j.ZipFile;
import net.lingala.zip4j.io.inputstream.ZipInputStream;
import net.lingala.zip4j.model.LocalFileHeader;

public class ZipUtil {

Expand All @@ -22,35 +19,41 @@ private ZipUtil() {
// utility class
}

/** Caller must close the returned {@link ZipFile} so the temporary ZIP file can be deleted. */
public static ZipFile createZipFile(SeekableInputStream sis) throws IOException {
final long fp = sis.getStreamPosition();
final Path baseDir = TEMP_DIRECTORY.toPath();
final Path tmpZip = Files.createTempFile(baseDir, "jadice-filetype-", ".zip");
tmpZip.toFile().deleteOnExit();
try (OutputStream os = new FileOutputStream(tmpZip.toFile())) {
final byte[] buffer = new byte[128 * 1024];
int read;
while ((read = sis.read(buffer)) != -1) {
os.write(buffer, 0, read);
}
} finally {
sis.seek(fp);
}
return new AutoDeletingZipFile(tmpZip.toFile());
}

final UUID uuid = UUID.randomUUID();
final File tmpDir = new File(TEMP_DIRECTORY + File.separator + uuid);

long fp = sis.getStreamPosition();
LocalFileHeader localFileHeader;
int readLen;
byte[] readBuffer = new byte[4096];

try (ZipInputStream zipInputStream = new ZipInputStream(sis); ZipFile zipFile = new ZipFile(uuid.toString())) {
List<File> files = new ArrayList<>();
while ((localFileHeader = zipInputStream.getNextEntry()) != null) {
if (!localFileHeader.isDirectory()) {
final File extractedFile = new File(
tmpDir.getAbsolutePath() + File.separator + localFileHeader.getFileName());
File parentFolder = new File(extractedFile.getParent());
parentFolder.mkdirs();
try (OutputStream outputStream = new FileOutputStream(extractedFile)) {
while ((readLen = zipInputStream.read(readBuffer)) != -1) {
outputStream.write(readBuffer, 0, readLen);
}
}
files.add(extractedFile);
private static final class AutoDeletingZipFile extends ZipFile {
private final File tmpFile;
private AutoDeletingZipFile(File tmpFile) {
super(tmpFile);
this.tmpFile = tmpFile;
}
@Override
public void close() throws IOException {
try {
super.close();
} finally {
try {
Files.deleteIfExists(tmpFile.toPath());
} catch (IOException ignore) {
// best-effort cleanup
}
}
sis.seek(fp);
zipFile.addFolder(tmpDir);
return zipFile;
}
}
}
Loading