This is an automated email from the ASF dual-hosted git repository.
william pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new 725fbc513 ORC-1961: Support `orc.compression.zstd.strategy`
725fbc513 is described below
commit 725fbc5133601a5433ec3901b6e4682d14244009
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Wed Jul 23 23:42:12 2025 -0700
ORC-1961: Support `orc.compression.zstd.strategy`
### What changes were proposed in this pull request?
This PR aims to support `orc.compression.zstd.strategy`.
### Why are the changes needed?
To allow a user to choose a proper strategy based on their data.
https://facebook.github.io/zstd/zstd_manual.html#Chapter5
```
typedef enum { ZSTD_fast=1,
ZSTD_dfast=2,
ZSTD_greedy=3,
ZSTD_lazy=4,
ZSTD_lazy2=5,
ZSTD_btlazy2=6,
ZSTD_btopt=7,
ZSTD_btultra=8,
ZSTD_btultra2=9
/* note : new strategies _might_ be added in the future.
Only the order (from fast to strong) is guaranteed
*/
} ZSTD_strategy;
```
### How was this patch tested?
Pass the CIs.
```
$ cd java
$ mvn package -DskipTests -Pbenchmark
$ cd bench
$ time java -Dorc.compression.zstd.strategy=1 -jar
core/target/orc-benchmarks-core-*-uber.jar generate data -d sales -c zstd -f orc
...
54.51s user 1.28s system 103% cpu 53.984 total
$ time java -Dorc.compression.zstd.strategy=9 -jar
core/target/orc-benchmarks-core-*-uber.jar generate data -d sales -c zstd -f orc
...
148.21s user 1.75s system 101% cpu 2:28.13 total
```
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #2338 from dongjoon-hyun/ORC-1961.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: William Hyun <[email protected]>
---
java/core/src/java/org/apache/orc/OrcConf.java | 4 ++++
java/core/src/java/org/apache/orc/OrcFile.java | 11 ++++++++++
.../java/org/apache/orc/impl/PhysicalFsWriter.java | 1 +
.../src/java/org/apache/orc/impl/ZstdCodec.java | 24 ++++++++++++++++------
4 files changed, 34 insertions(+), 6 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/OrcConf.java
b/java/core/src/java/org/apache/orc/OrcConf.java
index 6516517ba..26d1b7881 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -80,6 +80,10 @@ public enum OrcConf {
"hive.exec.orc.compression.zstd.windowlog", 0,
"Set the maximum allowed back-reference distance for "
+ "ZStandard codec, expressed as power of 2."),
+ COMPRESSION_ZSTD_STRATEGY("orc.compression.zstd.strategy",
+ "hive.exec.orc.compression.zstd.strategy", 0,
+ "Define the compression strategy to use with ZStandard codec "
+ + "while writing data. The valid range is 0~9."),
BLOCK_PADDING_TOLERANCE("orc.block.padding.tolerance",
"hive.exec.orc.block.padding.tolerance", 0.05,
"Define the tolerance for block padding as a decimal fraction of\n" +
diff --git a/java/core/src/java/org/apache/orc/OrcFile.java
b/java/core/src/java/org/apache/orc/OrcFile.java
index 278c0813e..160aaf1f9 100644
--- a/java/core/src/java/org/apache/orc/OrcFile.java
+++ b/java/core/src/java/org/apache/orc/OrcFile.java
@@ -429,6 +429,7 @@ public class OrcFile {
public static class ZstdCompressOptions {
private int compressionZstdLevel;
private int compressionZstdWindowLog;
+ private int compressionZstdStrategy;
public int getCompressionZstdLevel() {
return compressionZstdLevel;
@@ -445,6 +446,14 @@ public class OrcFile {
public void setCompressionZstdWindowLog(int compressionZstdWindowLog) {
this.compressionZstdWindowLog = compressionZstdWindowLog;
}
+
+ public int getCompressionZstdStrategy() {
+ return compressionZstdStrategy;
+ }
+
+ public void setCompressionZstdStrategy(int compressionZstdStrategy) {
+ this.compressionZstdStrategy = compressionZstdStrategy;
+ }
}
/**
@@ -520,6 +529,8 @@ public class OrcFile {
OrcConf.COMPRESSION_ZSTD_LEVEL.getInt(tableProperties, conf));
zstdCompressOptions.setCompressionZstdWindowLog(
OrcConf.COMPRESSION_ZSTD_WINDOWLOG.getInt(tableProperties,
conf));
+ zstdCompressOptions.setCompressionZstdStrategy(
+ OrcConf.COMPRESSION_ZSTD_STRATEGY.getInt(tableProperties, conf));
paddingTolerance =
OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(tableProperties, conf);
diff --git a/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
b/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
index 87f777a7e..d6fb296bd 100644
--- a/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
@@ -121,6 +121,7 @@ public class PhysicalFsWriter implements PhysicalWriter {
if (zstdCompressOptions != null) {
options.setLevel(zstdCompressOptions.getCompressionZstdLevel());
options.setWindowLog(zstdCompressOptions.getCompressionZstdWindowLog());
+
options.setStrategy(zstdCompressOptions.getCompressionZstdStrategy());
}
}
compress.withCodec(codec, tempOptions);
diff --git a/java/core/src/java/org/apache/orc/impl/ZstdCodec.java
b/java/core/src/java/org/apache/orc/impl/ZstdCodec.java
index d352c860f..186e5696f 100644
--- a/java/core/src/java/org/apache/orc/impl/ZstdCodec.java
+++ b/java/core/src/java/org/apache/orc/impl/ZstdCodec.java
@@ -29,12 +29,12 @@ public class ZstdCodec implements CompressionCodec,
DirectDecompressionCodec {
private ZstdOptions zstdOptions = null;
private ZstdCompressCtx zstdCompressCtx = null;
- public ZstdCodec(int level, int windowLog) {
- this.zstdOptions = new ZstdOptions(level, windowLog);
+ public ZstdCodec(int level, int windowLog, int strategy) {
+ this.zstdOptions = new ZstdOptions(level, windowLog, strategy);
}
public ZstdCodec() {
- this(3, 0);
+ this(3, 0, 0);
}
public ZstdOptions getZstdOptions() {
@@ -57,15 +57,17 @@ public class ZstdCodec implements CompressionCodec,
DirectDecompressionCodec {
static class ZstdOptions implements Options {
private int level;
private int windowLog;
+ private int strategy;
- ZstdOptions(int level, int windowLog) {
+ ZstdOptions(int level, int windowLog, int strategy) {
this.level = level;
this.windowLog = windowLog;
+ this.strategy = strategy;
}
@Override
public ZstdOptions copy() {
- return new ZstdOptions(level, windowLog);
+ return new ZstdOptions(level, windowLog, strategy);
}
@Override
@@ -123,6 +125,13 @@ public class ZstdCodec implements CompressionCodec,
DirectDecompressionCodec {
return this;
}
+ public ZstdOptions setStrategy(int newValue) {
+ // https://facebook.github.io/zstd/zstd_manual.html#Chapter5
+ // Although the value is between 1 and 9 and 0 means `use default`, ZStd
can change it.
+ strategy = newValue;
+ return this;
+ }
+
@Override
public ZstdOptions setData(DataKind newValue) {
return this; // We don't support setting DataKind in ZstdCodec.
@@ -136,6 +145,7 @@ public class ZstdCodec implements CompressionCodec,
DirectDecompressionCodec {
ZstdOptions that = (ZstdOptions) o;
if (level != that.level) return false;
+ if (strategy != that.strategy) return false;
return windowLog == that.windowLog;
}
@@ -143,12 +153,13 @@ public class ZstdCodec implements CompressionCodec,
DirectDecompressionCodec {
public int hashCode() {
int result = level;
result = 31 * result + windowLog;
+ result = 31 * result + strategy;
return result;
}
}
private static final ZstdOptions DEFAULT_OPTIONS =
- new ZstdOptions(3, 0);
+ new ZstdOptions(3, 0, 0);
@Override
public Options getDefaultOptions() {
@@ -183,6 +194,7 @@ public class ZstdCodec implements CompressionCodec,
DirectDecompressionCodec {
zstdCompressCtx.setLevel(zso.level);
zstdCompressCtx.setLong(zso.windowLog);
zstdCompressCtx.setChecksum(false);
+ zstdCompressCtx.setStrategy(zso.strategy);
try {
byte[] compressed = getBuffer((int) Zstd.compressBound(inBytes));