This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-2.2
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-2.2 by this push:
new e68a0792d ORC-1959: Add test String statistics with Presto writer
e68a0792d is described below
commit e68a0792d27ffce35d2d571f12eac1827d3cdf46
Author: sychen <[email protected]>
AuthorDate: Wed Jul 16 21:51:08 2025 -0700
ORC-1959: Add test String statistics with Presto writer
### What changes were proposed in this pull request?
This PR aims to add a string statistics test for ORC-1075 fix.
### Why are the changes needed?
https://github.com/apache/orc/issues/1061
### How was this patch tested?
Add UT
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #2332 from cxzl25/ORC-1959.
Authored-by: sychen <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit 09693fee2d65a49a6a65b5774cf4eb2001bedf3c)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../org/apache/orc/impl/TestRecordReaderImpl.java | 33 +++++++++++++++++++++
.../src/test/resources/orc-file-presto-string.orc | Bin 0 -> 521 bytes
2 files changed, 33 insertions(+)
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index f785e6e58..3c70b7284 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -2518,6 +2518,39 @@ public class TestRecordReaderImpl implements TestConf {
}
}
+ @Test
+ public void testStringStatisticsWithPrestoWriter() throws Exception {
+ // struct<id:int,name:string,score:int>
+ Path testFilePath = new Path(ClassLoader.
+ getSystemResource("orc-file-presto-string.orc").getPath());
+ FileSystem fs = FileSystem.get(conf);
+
+ Reader reader = OrcFile.createReader(testFilePath,
+ OrcFile.readerOptions(conf).filesystem(fs));
+
+ try (RecordReader rr = reader.rows()) {
+ RecordReaderImpl rri = (RecordReaderImpl) rr;
+ // x.z id is 2, We just need to read this column
+ OrcIndex orcIndex = rri.readRowIndex(0,
+ new boolean[] { false, false, true, false },
+ new boolean[] { false, false, true, false });
+ OrcProto.RowIndex[] rowGroupIndex = orcIndex.getRowGroupIndex();
+ OrcProto.ColumnStatistics statistics =
rowGroupIndex[2].getEntry(0).getStatistics();
+ OrcProto.ColumnEncoding encoding = OrcProto.ColumnEncoding.newBuilder()
+ .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2)
+ .build();
+ PredicateLeaf pred = createPredicateLeaf(
+ PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.STRING, "name",
null, null);
+
+ TruthValue truthValue = RecordReaderImpl.evaluatePredicateProto(
+ statistics,
+ pred, null, encoding, null,
+ CURRENT_WRITER, TypeDescription.createString());
+
+ assertEquals(TruthValue.YES_NO_NULL, truthValue);
+ }
+ }
+
@Test
public void testDoubleColumnWithoutDoubleStatistics() throws Exception {
// orc-file-no-double-statistic.orc is an orc file created by cudf with a
schema of
diff --git a/java/core/src/test/resources/orc-file-presto-string.orc
b/java/core/src/test/resources/orc-file-presto-string.orc
new file mode 100644
index 000000000..8884485d1
Binary files /dev/null and
b/java/core/src/test/resources/orc-file-presto-string.orc differ