diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..345e61a --- /dev/null +++ b/.gitignore @@ -0,0 +1,49 @@ +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff: +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/dictionaries + +# Sensitive or high-churn files: +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.xml +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml + +# Gradle: +.idea/**/gradle.xml +.idea/**/libraries + +# CMake +cmake-build-debug/ + +# Mongo Explorer plugin: +.idea/**/mongoSettings.xml + +## File-based project format: +*.iws + +## Plugin-specific files: + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 0000000..d965ffc --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..f01716a --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,31 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..65e58fe --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/ZipFile.iml b/ZipFile.iml new file mode 100644 index 0000000..023d7f1 --- /dev/null +++ b/ZipFile.iml @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..12881d0 --- /dev/null +++ b/pom.xml @@ -0,0 +1,41 @@ + + + 4.0.0 + + ru.spbau.mit.kazakov + ZipFile + 1.0-SNAPSHOT + + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.7 + 1.7 + + + + + + + + junit + junit + 4.0 + + + org.jetbrains + annotations + 13.0 + + + commons-io + commons-io + 2.5 + + + + \ No newline at end of file diff --git a/src/main/java/ru/spbau/mit/kazakov/ZipFile/NotExistingDirectoryException.java b/src/main/java/ru/spbau/mit/kazakov/ZipFile/NotExistingDirectoryException.java new file mode 100644 index 0000000..b43fc9d --- /dev/null +++ b/src/main/java/ru/spbau/mit/kazakov/ZipFile/NotExistingDirectoryException.java @@ -0,0 +1,8 @@ +package ru.spbau.mit.kazakov.ZipFile; + +/** + * Exception thrown when passed directory doesn't exist. + */ +public class NotExistingDirectoryException extends Exception { + +} diff --git a/src/main/java/ru/spbau/mit/kazakov/ZipFile/RegexpUnzip.java b/src/main/java/ru/spbau/mit/kazakov/ZipFile/RegexpUnzip.java new file mode 100644 index 0000000..4e570b0 --- /dev/null +++ b/src/main/java/ru/spbau/mit/kazakov/ZipFile/RegexpUnzip.java @@ -0,0 +1,105 @@ +package ru.spbau.mit.kazakov.ZipFile; + + +import org.jetbrains.annotations.NotNull; + +import java.io.*; +import java.util.ArrayList; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + +/** + * Class for extracting files with names satisfying specified regular expression from archives located in specified directory. + * Extracted files with their folders will be located in their archive directory. + */ +public class RegexpUnzip { + /** + * Finds archives in specified directory and extract files that satisfies specified regular expression from them. + * + * @param path to directory + * @param regexp to satisfy + * @throws NotExistingDirectoryException if path doesn't specify a directory + */ + public static void unzipMatchedFilesInPath(@NotNull String path, @NotNull String regexp) throws NotExistingDirectoryException, IOException { + File directory = new File(path); + if (!directory.isDirectory()) { + throw new NotExistingDirectoryException(); + } + + ArrayList zipFiles = new ArrayList<>(); + File[] filesInDirectory = directory.listFiles(); + if (filesInDirectory == null) { + return; + } + + for (File file : filesInDirectory) { + if (isZipFile(file)) { + zipFiles.add(file); + } + } + + for (File zipFile : zipFiles) { + unzipMatchedFiles(zipFile.getAbsolutePath(), path, regexp); + } + } + + /** + * Unzips files with names satisfying regular expression from specified archive. + * + * @param zipFile for files to extract + * @param path to archive + * @param regexp to satisfy + */ + private static void unzipMatchedFiles(String zipFile, String path, String regexp) throws IOException { + try (ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(zipFile))) { + ZipEntry zipEntry = zipInputStream.getNextEntry(); + + while (zipEntry != null) { + String fileNameToExtract = zipEntry.getName(); + + if (fileNameToExtract.matches(regexp) && !zipEntry.isDirectory()) { + File fileToExtract = new File(path + File.separator + fileNameToExtract); + new File(fileToExtract.getParent()).mkdirs(); + extract(fileToExtract, zipInputStream); + } + + zipEntry = zipInputStream.getNextEntry(); + } + } + } + + /** + * Extracts a zip entry. + * + * @param fileToExtract extracting file + * @param zipInputStream zip stream to extract from + */ + private static void extract(final File fileToExtract, final ZipInputStream zipInputStream) throws IOException { + try (FileOutputStream fileOutputStream = new FileOutputStream(fileToExtract)) { + byte[] buffer = new byte[1024]; + int readBytes; + + while ((readBytes = zipInputStream.read(buffer)) > 0) { + fileOutputStream.write(buffer, 0, readBytes); + } + } + } + + /** + * Determines whether a file is a zip file. + * + * @param file to check + * @return true if file is a zip file, and false otherwise + */ + private static boolean isZipFile(final File file) throws IOException { + if (file.isDirectory()) { + return false; + } + + try (RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r")) { + long n = randomAccessFile.readInt(); + return n == 0x504B0304; + } + } + +} diff --git a/src/test/java/ru/spbau/mit/kazakov/ZipFile/RegexpUnzipTest.java b/src/test/java/ru/spbau/mit/kazakov/ZipFile/RegexpUnzipTest.java new file mode 100644 index 0000000..4ccfbe1 --- /dev/null +++ b/src/test/java/ru/spbau/mit/kazakov/ZipFile/RegexpUnzipTest.java @@ -0,0 +1,90 @@ +package ru.spbau.mit.kazakov.ZipFile; + +import org.apache.commons.io.FileUtils; +import org.junit.Before; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; + +import static org.junit.Assert.*; + +public class RegexpUnzipTest { + private final static String ABSOLUTE_PATH_TO_RESOURCES + = new File("src" + File.separator + "test" + File.separator + "resources").getAbsolutePath(); + @Before + public void clearTestingFolder() throws IOException { + File testingFolder = new File(ABSOLUTE_PATH_TO_RESOURCES + File.separator + "testing folder"); + FileUtils.cleanDirectory(testingFolder); + + File testingFiles = new File(ABSOLUTE_PATH_TO_RESOURCES + File.separator + "testing files"); + FileUtils.copyDirectory(testingFiles, testingFolder); + } + + private ArrayList getTestingFolderFileNames() { + ArrayList fileNamesInTestingFolder = new ArrayList<>(); + File testingFolder = new File(ABSOLUTE_PATH_TO_RESOURCES + File.separator + "testing folder"); + + Collection filesInTestingFolder = FileUtils.listFiles(testingFolder, null, true); + for (File file : filesInTestingFolder) { + fileNamesInTestingFolder.add(file.getName()); + } + + return fileNamesInTestingFolder; + } + + @Test(expected = NotExistingDirectoryException.class) + public void testUnzipMatchedFilesInPathNotExistingFileThrowsNotExistingDirectoryException() throws Exception { + String wrongDirectory = ABSOLUTE_PATH_TO_RESOURCES + File.separator + "testing folder" + File.separator + "not existing file"; + RegexpUnzip.unzipMatchedFilesInPath(wrongDirectory, ".*"); + } + + @Test(expected = NotExistingDirectoryException.class) + public void testUnzipMatchedFilesInPathNotDirectoryThrowsNotExistingDirectoryException() throws Exception { + String wrongDirectory = ABSOLUTE_PATH_TO_RESOURCES + File.separator + "testing folder" + File.separator + "Some archive.zip"; + RegexpUnzip.unzipMatchedFilesInPath(wrongDirectory, ".*"); + } + + @Test + public void testUnzipMatchedFilesInPathNoMatchings() throws Exception { + RegexpUnzip.unzipMatchedFilesInPath(ABSOLUTE_PATH_TO_RESOURCES + File.separator + "testing folder", ""); + ArrayList fileNamesInTestingFolder = getTestingFolderFileNames(); + assertEquals(5, fileNamesInTestingFolder.size()); + } + + @Test + public void testUnzipMatchedFilesInPathOneArchiveMatchings() throws Exception { + RegexpUnzip.unzipMatchedFilesInPath(ABSOLUTE_PATH_TO_RESOURCES + File.separator + "testing folder", ".*file.*"); + ArrayList fileNamesInTestingFolder = getTestingFolderFileNames(); + assertEquals(8, fileNamesInTestingFolder.size()); + assertTrue(fileNamesInTestingFolder.contains("Some file.txt")); + assertTrue(fileNamesInTestingFolder.contains("Another file")); + assertTrue(fileNamesInTestingFolder.contains("Some file in a directory.txt")); + } + + @Test + public void testUnzipMatchedFilesInPathBothArchiveMatchings() throws Exception { + RegexpUnzip.unzipMatchedFilesInPath(ABSOLUTE_PATH_TO_RESOURCES + File.separator + "testing folder", ".*txt.*"); + ArrayList fileNamesInTestingFolder = getTestingFolderFileNames(); + assertEquals(9, fileNamesInTestingFolder.size()); + assertTrue(fileNamesInTestingFolder.contains("Some file.txt")); + assertTrue(fileNamesInTestingFolder.contains("Regular.txt")); + assertTrue(fileNamesInTestingFolder.contains("Some file in a directory.txt")); + assertTrue(fileNamesInTestingFolder.contains("It isn't a folder.txt")); + } + + @Test + public void testUnzipMatchedFilesInPathAllMatchings() throws Exception { + RegexpUnzip.unzipMatchedFilesInPath(ABSOLUTE_PATH_TO_RESOURCES + File.separator + "testing folder", ".*"); + ArrayList fileNamesInTestingFolder = getTestingFolderFileNames(); + assertEquals(11, fileNamesInTestingFolder.size()); + assertTrue(fileNamesInTestingFolder.contains("Some file.txt")); + assertTrue(fileNamesInTestingFolder.contains("Another file")); + assertTrue(fileNamesInTestingFolder.contains("Regular.txt")); + assertTrue(fileNamesInTestingFolder.contains("Some file in a directory.txt")); + assertTrue(fileNamesInTestingFolder.contains("It isn't a folder.txt")); + assertTrue(fileNamesInTestingFolder.contains("Unknown")); + } +} diff --git a/src/test/resources/testing files/Another archive.zip b/src/test/resources/testing files/Another archive.zip new file mode 100644 index 0000000..db5c7db Binary files /dev/null and b/src/test/resources/testing files/Another archive.zip differ diff --git a/src/test/resources/testing files/Not an archive/Some file not inside an archive.txt b/src/test/resources/testing files/Not an archive/Some file not inside an archive.txt new file mode 100644 index 0000000..9be9bcf --- /dev/null +++ b/src/test/resources/testing files/Not an archive/Some file not inside an archive.txt @@ -0,0 +1 @@ +Nothing \ No newline at end of file diff --git a/src/test/resources/testing files/Not an archive/This archive isn't in specified directory.zip b/src/test/resources/testing files/Not an archive/This archive isn't in specified directory.zip new file mode 100644 index 0000000..36c40df Binary files /dev/null and b/src/test/resources/testing files/Not an archive/This archive isn't in specified directory.zip differ diff --git a/src/test/resources/testing files/Some archive.zip b/src/test/resources/testing files/Some archive.zip new file mode 100644 index 0000000..37d5fc1 Binary files /dev/null and b/src/test/resources/testing files/Some archive.zip differ diff --git a/src/test/resources/testing files/This file isn't an archive b/src/test/resources/testing files/This file isn't an archive new file mode 100644 index 0000000..451bed1 --- /dev/null +++ b/src/test/resources/testing files/This file isn't an archive @@ -0,0 +1 @@ +Go away \ No newline at end of file