Updated Branches: refs/heads/develop b72d342dd -> f53b62551
added 7z support to loader Project: http://git-wip-us.apache.org/repos/asf/marmotta/repo Commit: http://git-wip-us.apache.org/repos/asf/marmotta/commit/f53b6255 Tree: http://git-wip-us.apache.org/repos/asf/marmotta/tree/f53b6255 Diff: http://git-wip-us.apache.org/repos/asf/marmotta/diff/f53b6255 Branch: refs/heads/develop Commit: f53b62551b7ad0bd99f245e8b50645d0bf8a76d9 Parents: b72d342 Author: Sebastian Schaffert <[email protected]> Authored: Wed Feb 5 18:17:37 2014 +0100 Committer: Sebastian Schaffert <[email protected]> Committed: Wed Feb 5 18:17:37 2014 +0100 ---------------------------------------------------------------------- .../marmotta/loader/core/MarmottaLoader.java | 128 ++++++++++++++----- .../marmotta/loader/core/test/ArchiveTest.java | 3 +- .../loader/core/test/LoaderTestBase.java | 2 +- .../src/test/resources/demo-data.7z | Bin 0 -> 1423 bytes 4 files changed, 97 insertions(+), 36 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/marmotta/blob/f53b6255/loader/marmotta-loader-core/src/main/java/org/apache/marmotta/loader/core/MarmottaLoader.java ---------------------------------------------------------------------- diff --git a/loader/marmotta-loader-core/src/main/java/org/apache/marmotta/loader/core/MarmottaLoader.java b/loader/marmotta-loader-core/src/main/java/org/apache/marmotta/loader/core/MarmottaLoader.java index 38e8316..cceb297 100644 --- a/loader/marmotta-loader-core/src/main/java/org/apache/marmotta/loader/core/MarmottaLoader.java +++ b/loader/marmotta-loader-core/src/main/java/org/apache/marmotta/loader/core/MarmottaLoader.java @@ -9,6 +9,8 @@ import org.apache.commons.compress.archivers.ArchiveException; import org.apache.commons.compress.archivers.ArchiveInputStream; import org.apache.commons.compress.archivers.ArchiveStreamFactory; import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; +import org.apache.commons.compress.archivers.sevenz.SevenZArchiveEntry; +import org.apache.commons.compress.archivers.sevenz.SevenZFile; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; import org.apache.commons.compress.compressors.CompressorException; @@ -269,52 +271,107 @@ public class MarmottaLoader { log.info("loading files in archive {} ...", archive); if(archive.exists() && archive.canRead()) { - InputStream in; - String archiveCompression = detectCompression(archive); - InputStream fin = new BufferedInputStream(new FileInputStream(archive)); - if(archiveCompression != null) { - if (CompressorStreamFactory.GZIP.equalsIgnoreCase(archiveCompression)) { - log.info("auto-detected archive compression: GZIP"); - in = new GzipCompressorInputStream(fin,true); - } else if (CompressorStreamFactory.BZIP2.equalsIgnoreCase(archiveCompression)) { - log.info("auto-detected archive compression: BZIP2"); - in = new BZip2CompressorInputStream(fin, true); + if(archive.getName().endsWith("7z")) { + log.info("auto-detected archive format: 7Z"); + + final SevenZFile sevenZFile = new SevenZFile(archive); + + try { + SevenZArchiveEntry entry; + while( (entry = sevenZFile.getNextEntry()) != null) { + + if(! entry.isDirectory()) { + log.info("loading entry {} ...", entry.getName()); + + // detect the file format + RDFFormat detectedFormat = RDFFormat.forFileName(entry.getName()); + if(format == null) { + if(detectedFormat != null) { + log.info("auto-detected entry format: {}", detectedFormat.getName()); + format = detectedFormat; + } else { + throw new RDFParseException("could not detect input format of entry "+ entry.getName()); + } + } else { + if(detectedFormat != null && !format.equals(detectedFormat)) { + log.warn("user-specified entry format ({}) overrides auto-detected format ({})", format.getName(), detectedFormat.getName()); + } else { + log.info("user-specified entry format: {}", format.getName()); + } + } + + + load(new InputStream() { + @Override + public int read() throws IOException { + return sevenZFile.read(); + } + + @Override + public int read(byte[] b) throws IOException { + return sevenZFile.read(b); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + return sevenZFile.read(b, off, len); + } + },handler,format); + } + } + } finally { + sevenZFile.close(); + } + + } else { + InputStream in; + + String archiveCompression = detectCompression(archive); + InputStream fin = new BufferedInputStream(new FileInputStream(archive)); + if(archiveCompression != null) { + if (CompressorStreamFactory.GZIP.equalsIgnoreCase(archiveCompression)) { + log.info("auto-detected archive compression: GZIP"); + in = new GzipCompressorInputStream(fin,true); + } else if (CompressorStreamFactory.BZIP2.equalsIgnoreCase(archiveCompression)) { + log.info("auto-detected archive compression: BZIP2"); + in = new BZip2CompressorInputStream(fin, true); + } else { + in = fin; + } } else { in = fin; } - } else { - in = fin; - } - ArchiveInputStream zipStream = new ArchiveStreamFactory().createArchiveInputStream(new BufferedInputStream(in)); - logArchiveType(zipStream); + ArchiveInputStream zipStream = new ArchiveStreamFactory().createArchiveInputStream(new BufferedInputStream(in)); + logArchiveType(zipStream); - ArchiveEntry entry; - while( (entry = zipStream.getNextEntry()) != null) { + ArchiveEntry entry; + while( (entry = zipStream.getNextEntry()) != null) { - if(! entry.isDirectory()) { - log.info("loading entry {} ...", entry.getName()); + if(! entry.isDirectory()) { + log.info("loading entry {} ...", entry.getName()); - // detect the file format - RDFFormat detectedFormat = RDFFormat.forFileName(entry.getName()); - if(format == null) { - if(detectedFormat != null) { - log.info("auto-detected entry format: {}", detectedFormat.getName()); - format = detectedFormat; - } else { - throw new RDFParseException("could not detect input format of entry "+ entry.getName()); - } - } else { - if(detectedFormat != null && !format.equals(detectedFormat)) { - log.warn("user-specified entry format ({}) overrides auto-detected format ({})", format.getName(), detectedFormat.getName()); + // detect the file format + RDFFormat detectedFormat = RDFFormat.forFileName(entry.getName()); + if(format == null) { + if(detectedFormat != null) { + log.info("auto-detected entry format: {}", detectedFormat.getName()); + format = detectedFormat; + } else { + throw new RDFParseException("could not detect input format of entry "+ entry.getName()); + } } else { - log.info("user-specified entry format: {}", format.getName()); + if(detectedFormat != null && !format.equals(detectedFormat)) { + log.warn("user-specified entry format ({}) overrides auto-detected format ({})", format.getName(), detectedFormat.getName()); + } else { + log.info("user-specified entry format: {}", format.getName()); + } } - } - load(zipStream,handler,format); + load(zipStream,handler,format); + } } } @@ -324,6 +381,7 @@ public class MarmottaLoader { } + private void logArchiveType(ArchiveInputStream stream) { if(log.isInfoEnabled()) { if(stream instanceof ZipArchiveInputStream) { @@ -332,6 +390,8 @@ public class MarmottaLoader { log.info("auto-detected archive format: TAR"); } else if (stream instanceof CpioArchiveInputStream) { log.info("auto-detected archive format: CPIO"); + } else if (stream instanceof CpioArchiveInputStream) { + log.info("auto-detected archive format: CPIO"); } else { log.info("unknown archive format, relying on commons-compress"); } http://git-wip-us.apache.org/repos/asf/marmotta/blob/f53b6255/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/ArchiveTest.java ---------------------------------------------------------------------- diff --git a/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/ArchiveTest.java b/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/ArchiveTest.java index 9e9f9e8..a477a59 100644 --- a/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/ArchiveTest.java +++ b/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/ArchiveTest.java @@ -50,7 +50,8 @@ public class ArchiveTest extends LoaderTestBase { public static Collection<Object[]> data() { Object[][] data = new Object[][] { { "demo-data.tar.gz"}, - { "demo-data.zip"} + { "demo-data.zip"}, + { "demo-data.7z"} }; return Arrays.asList(data); } http://git-wip-us.apache.org/repos/asf/marmotta/blob/f53b6255/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/LoaderTestBase.java ---------------------------------------------------------------------- diff --git a/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/LoaderTestBase.java b/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/LoaderTestBase.java index 7cd3872..c4dfc1b 100644 --- a/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/LoaderTestBase.java +++ b/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/LoaderTestBase.java @@ -65,7 +65,7 @@ public abstract class LoaderTestBase { log.info("running loader tests from temporary directory {}", tempDir); - for(String filename : new String[] {"demo-data.rdf", "demo-data.rdf.gz", "demo-data.rdf.bz2", "demo-data.tar.gz", "demo-data.zip"}) { + for(String filename : new String[] {"demo-data.rdf", "demo-data.rdf.gz", "demo-data.rdf.bz2", "demo-data.tar.gz", "demo-data.zip", "demo-data.7z"}) { File data = new File(tempDir.toFile(), filename); FileUtils.copyInputStreamToFile(ArchiveTest.class.getResourceAsStream("/" + filename), data); } http://git-wip-us.apache.org/repos/asf/marmotta/blob/f53b6255/loader/marmotta-loader-core/src/test/resources/demo-data.7z ---------------------------------------------------------------------- diff --git a/loader/marmotta-loader-core/src/test/resources/demo-data.7z b/loader/marmotta-loader-core/src/test/resources/demo-data.7z new file mode 100644 index 0000000..b6cfb8f Binary files /dev/null and b/loader/marmotta-loader-core/src/test/resources/demo-data.7z differ
