[ 
https://issues.apache.org/jira/browse/COMPRESS-649?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Maheshinder Goyal updated COMPRESS-649:
---------------------------------------
    Description: 
Hi Team,

 

We use LZ4 compression in our project to compress data under 1MB. We generally 
deal with String to String compression.

We typically compress around 50 to 100 strings in a loop as part of a function 
in one call.

When we got new Apache-Commons-compress version of 1.24 we started noticing a 
degradation in performance where our workflow started taking more than double 
time in seconds than it used to take with 1.21 version.

We reverted the change and went back to 1.21 and performance returned back to 
good.

Now,  we have reproduced the issue in a small standalone java unit-test where 
we have noticed 2x performance degradation on a small example.

 

In the following example, we have used a text-file with name 
"some-900KB-text.txt" which can be any random text file of 900KB size.

 

You will notice that b/w 1.24 and 1.21 , the following program would take 7 to 
8 seconds with 1.24 version and around 3 seconds with 1.21 version.

 

If you increase the number of loops, the performance will degrade further.

 

###################################################

import 
org.apache.commons.compress.compressors.lz4.FramedLZ4CompressorOutputStream;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.util.Base64;

public class CompressionDegradationTest {

    public static void main(String[] args) throws Exception {
        RandomAccessFile aFile = new RandomAccessFile("some-900kb-text.txt", 
"r");
        FileChannel inChannel = aFile.getChannel();
        long fileSize = inChannel.size();

        ByteBuffer buffer = ByteBuffer.allocate((int) fileSize);
        inChannel.read(buffer);
        buffer.flip();

        String rawPlan = new String(buffer.array(), StandardCharsets.UTF_8);
        long start = System.currentTimeMillis();
        for (int i = 0; i < 80; i++) {
            String compressed = compress(rawPlan);
        }
        long end = System.currentTimeMillis();
        float sec = (end - start) / 1000F; 
        System.out.println(sec + " seconds");
    }

    private static String compress(final String value) throws IOException {
        ByteArrayOutputStream byteStream = new 
ByteArrayOutputStream(value.length());
        FramedLZ4CompressorOutputStream compress = new 
FramedLZ4CompressorOutputStream(byteStream);
        String compressedValue = null;
        try {
            compress.write(value.getBytes(StandardCharsets.UTF_8));
            compress.finish();
            compressedValue = 
Base64.getEncoder().encodeToString(byteStream.toByteArray());
        } finally {
            compress.close();
            byteStream.close();
        }

        return compressedValue;
    }
};

########################################################

  was:
Hi Team,

 

We use LZ4 compression in our project to compress data under 1MB. We generally 
deal with String to String compression.

We typically compress around 50 to 100 strings in a loop as part of a function 
in one call.

When we got new Apache-Commons-compress version of 1.24 we started noticing a 
degradation in performance where our workflow started taking more than double 
time in seconds than it used to take with 1.21 version.

We reverted the change and went back to 1.21 and performance returned back to 
good.

Now,  we have reproduced the issue in a small standalone java unit-test where 
we have noticed 2x performance degradation on a small example.

 

In the following example, we have used a text-file with name 
"some-900KB-text.txt" which can be any random text file of 900KB size.

 

You will notice that b/w 1.24 and 1.21 , the following program would take 7 to 
8 seconds with 1.24 version and around 3 seconds with 1.21 version.

 

If you increase the number of loops, the performance will degrade further.

 

###################################################

{{@Test
    public void testGetValueTypedComplexType() throws Exception {
        RandomAccessFile aFile = new RandomAccessFile("some-900KB-text.txt", 
"r");
        FileChannel inChannel = aFile.getChannel();
        long fileSize = inChannel.size();

        ByteBuffer buffer = ByteBuffer.allocate((int) fileSize);
        inChannel.read(buffer);
        buffer.flip();

        String rawPlan = new String(buffer.array(), StandardCharsets.UTF_8);
        for (int i = 0; i < 80; i++)

        {             String compressed = compress(rawPlan, new 
JsonDataConverter());      

        }

    }

    private String compress(final String value, final  DataConverter converter) 
throws IOException {
        String json = converter.toData(value);
        ByteArrayOutputStream byteStream = new 
ByteArrayOutputStream(json.length());
        FramedLZ4CompressorOutputStream compress = new 
FramedLZ4CompressorOutputStream(byteStream);
        String compressedValue = null;
        try

             {   compress.write(json.getBytes(StandardCharsets.UTF_8));      
                  compress.finish();  
                  compressedValue = 
Base64.getEncoder().encodeToString(byteStream.toByteArray());        

            }  finally {
                   compress.close();        
                   byteStream.close();    
            }

           return compressedValue;
    }
}}
########################################################


> Performance Degradation in LZ4 compression between 1.21 and 1.24 versions
> -------------------------------------------------------------------------
>
>                 Key: COMPRESS-649
>                 URL: https://issues.apache.org/jira/browse/COMPRESS-649
>             Project: Commons Compress
>          Issue Type: Bug
>          Components: Compressors
>    Affects Versions: 1.24.0
>            Reporter: Maheshinder Goyal
>            Priority: Major
>             Fix For: 1.21
>
>         Attachments: CompressionDegradationTest.java
>
>
> Hi Team,
>  
> We use LZ4 compression in our project to compress data under 1MB. We 
> generally deal with String to String compression.
> We typically compress around 50 to 100 strings in a loop as part of a 
> function in one call.
> When we got new Apache-Commons-compress version of 1.24 we started noticing a 
> degradation in performance where our workflow started taking more than double 
> time in seconds than it used to take with 1.21 version.
> We reverted the change and went back to 1.21 and performance returned back to 
> good.
> Now,  we have reproduced the issue in a small standalone java unit-test where 
> we have noticed 2x performance degradation on a small example.
>  
> In the following example, we have used a text-file with name 
> "some-900KB-text.txt" which can be any random text file of 900KB size.
>  
> You will notice that b/w 1.24 and 1.21 , the following program would take 7 
> to 8 seconds with 1.24 version and around 3 seconds with 1.21 version.
>  
> If you increase the number of loops, the performance will degrade further.
>  
> ###################################################
> import 
> org.apache.commons.compress.compressors.lz4.FramedLZ4CompressorOutputStream;
> import java.io.ByteArrayOutputStream;
> import java.io.IOException;
> import java.io.RandomAccessFile;
> import java.nio.ByteBuffer;
> import java.nio.channels.FileChannel;
> import java.nio.charset.StandardCharsets;
> import java.util.Base64;
> public class CompressionDegradationTest {
>     public static void main(String[] args) throws Exception {
>         RandomAccessFile aFile = new RandomAccessFile("some-900kb-text.txt", 
> "r");
>         FileChannel inChannel = aFile.getChannel();
>         long fileSize = inChannel.size();
>         ByteBuffer buffer = ByteBuffer.allocate((int) fileSize);
>         inChannel.read(buffer);
>         buffer.flip();
>         String rawPlan = new String(buffer.array(), StandardCharsets.UTF_8);
>         long start = System.currentTimeMillis();
>         for (int i = 0; i < 80; i++) {
>             String compressed = compress(rawPlan);
>         }
>         long end = System.currentTimeMillis();
>         float sec = (end - start) / 1000F; 
>         System.out.println(sec + " seconds");
>     }
>     private static String compress(final String value) throws IOException {
>         ByteArrayOutputStream byteStream = new 
> ByteArrayOutputStream(value.length());
>         FramedLZ4CompressorOutputStream compress = new 
> FramedLZ4CompressorOutputStream(byteStream);
>         String compressedValue = null;
>         try {
>             compress.write(value.getBytes(StandardCharsets.UTF_8));
>             compress.finish();
>             compressedValue = 
> Base64.getEncoder().encodeToString(byteStream.toByteArray());
>         } finally {
>             compress.close();
>             byteStream.close();
>         }
>         return compressedValue;
>     }
> };
> ########################################################



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to