This is an automated email from the ASF dual-hosted git repository.

gongxun pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


The following commit(s) were added to refs/heads/main by this push:
     new be6717cdd7c PAX: Support LZ4 compression for table columns (#1344)
be6717cdd7c is described below

commit be6717cdd7c5310165f706142f9f08839c8383c9
Author: Hao Wu <[email protected]>
AuthorDate: Thu Sep 4 17:34:16 2025 +0800

    PAX: Support LZ4 compression for table columns (#1344)
    
    * PAX: Support LZ4 compression for table columns
    
    PAX only support zlib and zstd compression for column values.
    This commit add lz4 support for pax table columns.
    
    * map compress level to acceleration for lz4
    
    * strict acceleration to range [0, 3]
    
    * add macro control
---
 contrib/pax_storage/src/cpp/access/paxc_rel_options.cc    |  4 ++++
 contrib/pax_storage/src/cpp/access/paxc_rel_options.h     |  1 +
 .../src/cpp/storage/columns/pax_column_test.cc            |  6 ++++++
 .../pax_storage/src/cpp/storage/columns/pax_compress.cc   | 15 ++++++++++++---
 contrib/pax_storage/src/cpp/storage/proto/pax.proto       |  1 +
 5 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc 
b/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc
index 647cb5743cf..5de1b14cd97 100644
--- a/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc
+++ b/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc
@@ -50,6 +50,10 @@ static const relopt_compress_type_mapping 
kSelfRelCompressMap[] = {
      pax::ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_ZSTD},
     {ColumnEncoding_Kind_COMPRESS_ZLIB_STR,
      pax::ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_ZLIB},
+#ifdef USE_LZ4
+    {ColumnEncoding_Kind_COMPRESS_LZ4_STR,
+     pax::ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_LZ4},
+#endif
 };
 
 typedef struct {
diff --git a/contrib/pax_storage/src/cpp/access/paxc_rel_options.h 
b/contrib/pax_storage/src/cpp/access/paxc_rel_options.h
index 4e813f38c40..e6c29363ab1 100644
--- a/contrib/pax_storage/src/cpp/access/paxc_rel_options.h
+++ b/contrib/pax_storage/src/cpp/access/paxc_rel_options.h
@@ -41,6 +41,7 @@ namespace paxc {
 #define ColumnEncoding_Kind_DICTIONARY_STR "dict"
 #define ColumnEncoding_Kind_COMPRESS_ZSTD_STR "zstd"
 #define ColumnEncoding_Kind_COMPRESS_ZLIB_STR "zlib"
+#define ColumnEncoding_Kind_COMPRESS_LZ4_STR "lz4"
 
 #define STORAGE_FORMAT_TYPE_PORC "porc"
 #define STORAGE_FORMAT_TYPE_PORC_VEC "porc_vec"
diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc 
b/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc
index b26fdff65bf..f39e453cfee 100644
--- a/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc
+++ b/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc
@@ -798,6 +798,9 @@ INSTANTIATE_TEST_SUITE_P(
     PaxColumnEncodingTestCombine, PaxColumnCompressTest,
     testing::Combine(testing::Values(16, 32, 64),
                      testing::Values(ColumnEncoding_Kind_NO_ENCODED,
+#ifdef USE_LZ4
+                                     ColumnEncoding_Kind_COMPRESS_LZ4,
+#endif
                                      ColumnEncoding_Kind_COMPRESS_ZSTD,
                                      ColumnEncoding_Kind_COMPRESS_ZLIB)));
 
@@ -805,6 +808,9 @@ INSTANTIATE_TEST_SUITE_P(
     PaxColumnEncodingTestCombine, PaxNonFixedColumnCompressTest,
     testing::Combine(testing::Values(16, 32, 64),
                      testing::Values(ColumnEncoding_Kind_NO_ENCODED,
+#ifdef USE_LZ4
+                                     ColumnEncoding_Kind_COMPRESS_LZ4,
+#endif
                                      ColumnEncoding_Kind_COMPRESS_ZSTD,
                                      ColumnEncoding_Kind_COMPRESS_ZLIB),
                      testing::Values(true, false),
diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc 
b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc
index 87a34cbb6d7..f4bae52ea7d 100644
--- a/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc
+++ b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc
@@ -50,6 +50,12 @@ std::shared_ptr<PaxCompressor> 
PaxCompressor::CreateBlockCompressor(
       compressor = std::make_shared<PaxZlibCompressor>();
       break;
     }
+#ifdef USE_LZ4
+    case ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_LZ4: {
+      compressor = std::make_shared<PaxLZ4Compressor>();
+      break;
+    }
+#endif
     case ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED: {
       CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError,
                  fmt("Invalid compress type %d",
@@ -230,9 +236,12 @@ size_t PaxLZ4Compressor::GetCompressBound(size_t src_len) {
 }
 
 size_t PaxLZ4Compressor::Compress(void *dst_buff, size_t dst_cap,
-                                  void *src_buff, size_t src_len, int /*lvl*/) 
{
-  return LZ4_compress_default((char *)src_buff, (char *)dst_buff, src_len,
-                              dst_cap);
+                                  void *src_buff, size_t src_len, int lvl) {
+  // acceleration affects compression speed, the larger acceleration value,
+  // the less compression ratio.
+  int acceleration = (20 - lvl) / 6;
+  return LZ4_compress_fast((char *)src_buff, (char *)dst_buff, src_len,
+                              dst_cap, acceleration);
 }
 
 size_t PaxLZ4Compressor::Decompress(void *dst_buff, size_t dst_len,
diff --git a/contrib/pax_storage/src/cpp/storage/proto/pax.proto 
b/contrib/pax_storage/src/cpp/storage/proto/pax.proto
index 3e25710027d..765d3e0f8a5 100644
--- a/contrib/pax_storage/src/cpp/storage/proto/pax.proto
+++ b/contrib/pax_storage/src/cpp/storage/proto/pax.proto
@@ -37,6 +37,7 @@ message ColumnEncoding {
     COMPRESS_ZLIB = 4;        // use ZLIB to compress 
 
     DICTIONARY = 5;           // use dict-endoing 
+    COMPRESS_LZ4 = 6;         // use lz4 to compress
   }
 
   optional Kind kind = 1;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to