This is an automated email from the ASF dual-hosted git repository.
djwang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry-pxf.git
The following commit(s) were added to refs/heads/main by this push:
new 54e34262 Upgrade Apache ORC library in PXF
54e34262 is described below
commit 54e342626e1e938723b4405e181e72ebfbba280d
Author: Nikolay Antonov <[email protected]>
AuthorDate: Thu Feb 5 09:03:41 2026 +0500
Upgrade Apache ORC library in PXF
* Bump Apache `ORC` library to 1.7.11
In theory, we can bump up to 1.9.8. However pxf-hbase tests fail with
new protobuf-3.x. For now, let's move fast rather than struggle in dependency
hell.
* Bump `ORC`'s dependencies to support `zstd` compression and tests for
compression.
Surprisingly, according to documentation, `zstd` should work well.
However, old `aircompressor:0.8` doesn't have the ZStd codec, and attempting to
use it will lead to an error.
---
.../pxf/automation/features/orc/OrcReadTest.java | 18 ------------------
server/build.gradle | 2 +-
server/gradle.properties | 4 ++--
.../plugins/hdfs/orc/ORCVectorizedAccessorTest.java | 18 ++++++++++++++++++
4 files changed, 21 insertions(+), 21 deletions(-)
diff --git
a/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/orc/OrcReadTest.java
b/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/orc/OrcReadTest.java
index 68e86d00..c462aeb0 100644
---
a/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/orc/OrcReadTest.java
+++
b/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/orc/OrcReadTest.java
@@ -157,30 +157,12 @@ public class OrcReadTest extends BaseFeature {
runSqlTest("features/orc/read/multidim_list_types");
}
- /*
- * FDW fails for the data that contain a NUL-byte (i.e. '\/u000'"). This
behaviour is different from external-table but same as GPDB Heap
- * FDW Failure: invalid byte sequence for encoding "UTF8": 0x00
- *
- * GPDB also throws the same error when copying the data containing a
NUL-byte
- *
- * postgres=# copy test from '/Users/pandeyhi/Documents/bad_data.txt' ;
- * ERROR: invalid byte sequence for encoding "UTF8": 0x00
- * TODO Do we need to do some changes to make sure the external-table
behaves the same way as GPDB/FDW?
- *
- */
- @FailsWithFDW
@Test(groups = {"features", "gpdb", "security", "hcfs"})
public void orcReadStringsContainingNullByte() throws Exception {
prepareReadableExternalTable("pxf_orc_null_in_string",
ORC_NULL_IN_STRING_COLUMNS, hdfsPath + ORC_NULL_IN_STRING);
runSqlTest("features/orc/read/null_in_string");
}
- // @Test(groups = {"features", "gpdb", "security", "hcfs"})
- // public void orcReadStringsContainingNullByte() throws Exception {
- // prepareReadableExternalTable("pxf_orc_null_in_string",
ORC_NULL_IN_STRING_COLUMNS, hdfsPath + ORC_NULL_IN_STRING);
- // runTincTest("pxf.features.orc.read.null_in_string.runTest");
- // }
-
private void prepareReadableExternalTable(String name, String[] fields,
String path) throws Exception {
prepareReadableExternalTable(name, fields, path, false);
}
diff --git a/server/build.gradle b/server/build.gradle
index cafaec6b..499a0b72 100644
--- a/server/build.gradle
+++ b/server/build.gradle
@@ -91,7 +91,7 @@ configure(javaProjects) {
dependency("commons-io:commons-io:2.7")
dependency("commons-lang:commons-lang:2.6")
dependency("commons-logging:commons-logging:1.1.3")
- dependency("io.airlift:aircompressor:0.8")
+ dependency("io.airlift:aircompressor:0.27")
dependency("javax.jdo:jdo-api:3.0.1")
dependency("joda-time:joda-time:2.8.1")
dependency("net.sf.opencsv:opencsv:2.3")
diff --git a/server/gradle.properties b/server/gradle.properties
index 772b69c4..eb6191df 100644
--- a/server/gradle.properties
+++ b/server/gradle.properties
@@ -20,7 +20,7 @@ apiVersion=0
license=ASL 2.0
hadoopVersion=2.10.2
hiveVersion=2.3.8
-hiveStorageApiVersion=2.7.2
+hiveStorageApiVersion=2.7.3
hbaseVersion=1.3.2
junitVersion=4.11
parquetVersion=1.11.1
@@ -28,4 +28,4 @@ awsJavaSdk=1.12.261
springBootVersion=2.7.18
org.gradle.daemon=true
org.gradle.parallel=false
-orcVersion=1.6.13
+orcVersion=1.7.11
diff --git
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/orc/ORCVectorizedAccessorTest.java
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/orc/ORCVectorizedAccessorTest.java
index 6890e6ae..0fcf0625 100644
---
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/orc/ORCVectorizedAccessorTest.java
+++
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/orc/ORCVectorizedAccessorTest.java
@@ -148,6 +148,24 @@ class ORCVectorizedAccessorTest extends
ORCVectorizedBaseTest {
runTestScenario_OpenForWrite(CompressionKind.SNAPPY, true);
}
+ @Test
+ public void testOpenForWrite_ZstdCompression() throws IOException {
+ context.addOption("COMPRESSION_CODEC", "zstd");
+ runTestScenario_OpenForWrite(CompressionKind.ZSTD, true);
+ }
+
+ @Test
+ public void testOpenForWrite_Lz4Compression() throws IOException {
+ context.addOption("COMPRESSION_CODEC", "lz4");
+ runTestScenario_OpenForWrite(CompressionKind.LZ4, true);
+ }
+
+ @Test
+ public void testOpenForWrite_LZOCompression() throws IOException {
+ context.addOption("COMPRESSION_CODEC", "lzo");
+ runTestScenario_OpenForWrite(CompressionKind.LZO, true);
+ }
+
@Test
public void testOpenForWrite_OrcWriteTimeZoneUTCMissing() throws
IOException {
runTestScenario_OpenForWrite(CompressionKind.ZLIB, true);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]