theobisproject commented on pull request #113:
URL: https://github.com/apache/commons-compress/pull/113#issuecomment-730541671


   The benchmark was added to the 
https://github.com/bodewig/commons-compress-benchmarks repository for easier 
setup.  
   
   ```java
   package de.samaflost.commons_compress.tar;
   
   import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
   import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
   import org.apache.commons.compress.archivers.tar.TarFile;
   import org.apache.commons.compress.utils.IOUtils;
   import org.openjdk.jmh.annotations.Benchmark;
   import org.openjdk.jmh.annotations.Fork;
   import org.openjdk.jmh.annotations.Measurement;
   import org.openjdk.jmh.annotations.OutputTimeUnit;
   import org.openjdk.jmh.annotations.Scope;
   import org.openjdk.jmh.annotations.State;
   import org.openjdk.jmh.annotations.Warmup;
   import org.openjdk.jmh.util.NullOutputStream;
   
   import java.io.IOException;
   import java.io.InputStream;
   import java.nio.file.Files;
   import java.nio.file.Path;
   import java.nio.file.Paths;
   import java.util.concurrent.TimeUnit;
   
   @State(Scope.Benchmark)
   @Fork(1)
   @OutputTimeUnit(TimeUnit.SECONDS)
   @Measurement(iterations = 10)
   @Warmup(iterations = 10)
   public class ReadLargeTarBenchmark {
   
       private InputStream getLargeTarStream() throws IOException {
           return Files.newInputStream(getLargeTarPath());
       }
   
       private Path getLargeTarPath() {
           return Paths.get("path-to-file");
       }
   
       private void readEntry(final InputStream inputStream) throws IOException 
{
           IOUtils.copy(inputStream, new NullOutputStream(), 8192);
       }
   
       @Benchmark
       public void readAllEntries_tarFile() throws IOException {
           try (TarFile tarFile = new TarFile(getLargeTarPath())) {
               for (TarArchiveEntry entry : tarFile.getEntries()) {
                   try (InputStream entryInput = tarFile.getInputStream(entry)) 
{
                       readEntry(entryInput);
                   }
               }
           }
       }
   
       @Benchmark
       public void readAllEntries_tarStream() throws IOException {
           try (TarArchiveInputStream tarIn = new 
TarArchiveInputStream(getLargeTarStream())) {
               TarArchiveEntry entry;
               while ((entry = tarIn.getNextTarEntry()) != null) {
                   readEntry(tarIn);
               }
           }
       }
   
       @Benchmark
       public void readFirstEntry_tarFile() throws IOException {
           try (TarFile tarFile = new TarFile(getLargeTarPath())) {
               try (InputStream entryInput = 
tarFile.getInputStream(tarFile.getEntries().get(0))) {
                   readEntry(entryInput);
               }
           }
       }
   
       @Benchmark
       public void readFirstEntry_tarStream() throws IOException {
           try (TarArchiveInputStream tarIn = new 
TarArchiveInputStream(getLargeTarStream())) {
               tarIn.getNextTarEntry();
               readEntry(tarIn);
           }
       }
   
       @Benchmark
       public void readSecondEntry_tarFile() throws IOException {
           try (TarFile tarFile = new TarFile(getLargeTarPath())) {
               try (InputStream entryInput = 
tarFile.getInputStream(tarFile.getEntries().get(1))) {
                   readEntry(entryInput);
               }
           }
       }
   
       @Benchmark
       public void readSecondEntry_tarStream() throws IOException {
           try (TarArchiveInputStream tarIn = new 
TarArchiveInputStream(getLargeTarStream())) {
               tarIn.getNextTarEntry();
               tarIn.getNextTarEntry();
               readEntry(tarIn);
           }
       }
   
       @Benchmark
       public void readLastEntry_tarFile() throws IOException {
           try (TarFile tarFile = new TarFile(getLargeTarPath())) {
               try (InputStream entryInput = 
tarFile.getInputStream(tarFile.getEntries().get(tarFile.getEntries().size() - 
1))) {
                   readEntry(entryInput);
               }
           }
       }
   
       @Benchmark
       public void readLastEntry_tarStream() throws IOException {
           try (TarArchiveInputStream tarIn = new 
TarArchiveInputStream(getLargeTarStream())) {
               tarIn.getNextTarEntry();
               tarIn.getNextTarEntry();
               tarIn.getNextTarEntry();
               readEntry(tarIn);
           }
       }
   }
   
   ```
   
   This is the result on my machine (Windows 10, Java 11.0.9, File on SSD) with 
the testfile containing 3 times the Ubuntu 20.04 Desktop image.
   ```
   Benchmark                                         Mode  Cnt  Score   Error  
Units
   ReadLargeTarBenchmark.readAllEntries_tarFile     thrpt   10  0,228 ± 0,003  
ops/s
   ReadLargeTarBenchmark.readAllEntries_tarStream   thrpt   10  0,256 ± 0,002  
ops/s
   ReadLargeTarBenchmark.readFirstEntry_tarFile     thrpt   10  0,680 ± 0,010  
ops/s
   ReadLargeTarBenchmark.readFirstEntry_tarStream   thrpt   10  0,766 ± 0,006  
ops/s
   ReadLargeTarBenchmark.readLastEntry_tarFile      thrpt   10  0,689 ± 0,006  
ops/s
   ReadLargeTarBenchmark.readLastEntry_tarStream    thrpt   10  0,127 ± 0,001  
ops/s
   ReadLargeTarBenchmark.readSecondEntry_tarFile    thrpt   10  0,690 ± 0,012  
ops/s
   ReadLargeTarBenchmark.readSecondEntry_tarStream  thrpt   10  0,218 ± 0,003  
ops/s
   ```
   I think this demonstrates the strength and weakness of the implementation.
   
   **Strength**
   - Performance of random access is constant
   - Extracting all entries is near to the performance of the `TarArchiveStream`
   
   **Weakness**
   - Only extracting the first entry is slower because of the extra work which 
is done for the random access
   
   The performance gap should be smaller when the archive is smaller. Feel free 
to experiment with different tar contents and number of files and report back 
your result.
   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to