This is an automated email from the ASF dual-hosted git repository.
gongxun pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git
The following commit(s) were added to refs/heads/main by this push:
new be6717cdd7c PAX: Support LZ4 compression for table columns (#1344)
be6717cdd7c is described below
commit be6717cdd7c5310165f706142f9f08839c8383c9
Author: Hao Wu <[email protected]>
AuthorDate: Thu Sep 4 17:34:16 2025 +0800
PAX: Support LZ4 compression for table columns (#1344)
* PAX: Support LZ4 compression for table columns
PAX only support zlib and zstd compression for column values.
This commit add lz4 support for pax table columns.
* map compress level to acceleration for lz4
* strict acceleration to range [0, 3]
* add macro control
---
contrib/pax_storage/src/cpp/access/paxc_rel_options.cc | 4 ++++
contrib/pax_storage/src/cpp/access/paxc_rel_options.h | 1 +
.../src/cpp/storage/columns/pax_column_test.cc | 6 ++++++
.../pax_storage/src/cpp/storage/columns/pax_compress.cc | 15 ++++++++++++---
contrib/pax_storage/src/cpp/storage/proto/pax.proto | 1 +
5 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc
b/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc
index 647cb5743cf..5de1b14cd97 100644
--- a/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc
+++ b/contrib/pax_storage/src/cpp/access/paxc_rel_options.cc
@@ -50,6 +50,10 @@ static const relopt_compress_type_mapping
kSelfRelCompressMap[] = {
pax::ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_ZSTD},
{ColumnEncoding_Kind_COMPRESS_ZLIB_STR,
pax::ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_ZLIB},
+#ifdef USE_LZ4
+ {ColumnEncoding_Kind_COMPRESS_LZ4_STR,
+ pax::ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_LZ4},
+#endif
};
typedef struct {
diff --git a/contrib/pax_storage/src/cpp/access/paxc_rel_options.h
b/contrib/pax_storage/src/cpp/access/paxc_rel_options.h
index 4e813f38c40..e6c29363ab1 100644
--- a/contrib/pax_storage/src/cpp/access/paxc_rel_options.h
+++ b/contrib/pax_storage/src/cpp/access/paxc_rel_options.h
@@ -41,6 +41,7 @@ namespace paxc {
#define ColumnEncoding_Kind_DICTIONARY_STR "dict"
#define ColumnEncoding_Kind_COMPRESS_ZSTD_STR "zstd"
#define ColumnEncoding_Kind_COMPRESS_ZLIB_STR "zlib"
+#define ColumnEncoding_Kind_COMPRESS_LZ4_STR "lz4"
#define STORAGE_FORMAT_TYPE_PORC "porc"
#define STORAGE_FORMAT_TYPE_PORC_VEC "porc_vec"
diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc
b/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc
index b26fdff65bf..f39e453cfee 100644
--- a/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc
+++ b/contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc
@@ -798,6 +798,9 @@ INSTANTIATE_TEST_SUITE_P(
PaxColumnEncodingTestCombine, PaxColumnCompressTest,
testing::Combine(testing::Values(16, 32, 64),
testing::Values(ColumnEncoding_Kind_NO_ENCODED,
+#ifdef USE_LZ4
+ ColumnEncoding_Kind_COMPRESS_LZ4,
+#endif
ColumnEncoding_Kind_COMPRESS_ZSTD,
ColumnEncoding_Kind_COMPRESS_ZLIB)));
@@ -805,6 +808,9 @@ INSTANTIATE_TEST_SUITE_P(
PaxColumnEncodingTestCombine, PaxNonFixedColumnCompressTest,
testing::Combine(testing::Values(16, 32, 64),
testing::Values(ColumnEncoding_Kind_NO_ENCODED,
+#ifdef USE_LZ4
+ ColumnEncoding_Kind_COMPRESS_LZ4,
+#endif
ColumnEncoding_Kind_COMPRESS_ZSTD,
ColumnEncoding_Kind_COMPRESS_ZLIB),
testing::Values(true, false),
diff --git a/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc
b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc
index 87a34cbb6d7..f4bae52ea7d 100644
--- a/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc
+++ b/contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc
@@ -50,6 +50,12 @@ std::shared_ptr<PaxCompressor>
PaxCompressor::CreateBlockCompressor(
compressor = std::make_shared<PaxZlibCompressor>();
break;
}
+#ifdef USE_LZ4
+ case ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_LZ4: {
+ compressor = std::make_shared<PaxLZ4Compressor>();
+ break;
+ }
+#endif
case ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED: {
CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError,
fmt("Invalid compress type %d",
@@ -230,9 +236,12 @@ size_t PaxLZ4Compressor::GetCompressBound(size_t src_len) {
}
size_t PaxLZ4Compressor::Compress(void *dst_buff, size_t dst_cap,
- void *src_buff, size_t src_len, int /*lvl*/)
{
- return LZ4_compress_default((char *)src_buff, (char *)dst_buff, src_len,
- dst_cap);
+ void *src_buff, size_t src_len, int lvl) {
+ // acceleration affects compression speed, the larger acceleration value,
+ // the less compression ratio.
+ int acceleration = (20 - lvl) / 6;
+ return LZ4_compress_fast((char *)src_buff, (char *)dst_buff, src_len,
+ dst_cap, acceleration);
}
size_t PaxLZ4Compressor::Decompress(void *dst_buff, size_t dst_len,
diff --git a/contrib/pax_storage/src/cpp/storage/proto/pax.proto
b/contrib/pax_storage/src/cpp/storage/proto/pax.proto
index 3e25710027d..765d3e0f8a5 100644
--- a/contrib/pax_storage/src/cpp/storage/proto/pax.proto
+++ b/contrib/pax_storage/src/cpp/storage/proto/pax.proto
@@ -37,6 +37,7 @@ message ColumnEncoding {
COMPRESS_ZLIB = 4; // use ZLIB to compress
DICTIONARY = 5; // use dict-endoing
+ COMPRESS_LZ4 = 6; // use lz4 to compress
}
optional Kind kind = 1;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]