This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new f0e4cd7cfd [orc] Enable READER_USE_SELECTED only deletionVectors 
disabled
f0e4cd7cfd is described below

commit f0e4cd7cfdd9260753b068f5243134f6e9a3339e
Author: Jingsong <[email protected]>
AuthorDate: Tue Nov 12 15:40:10 2024 +0800

    [orc] Enable READER_USE_SELECTED only deletionVectors disabled
---
 .../src/main/java/org/apache/orc/OrcConf.java      | 15 ------------
 .../apache/paimon/format/orc/OrcFileFormat.java    |  6 ++++-
 .../apache/paimon/format/orc/OrcReaderFactory.java | 28 +++++++++++-----------
 .../paimon/format/orc/OrcReaderFactoryTest.java    |  3 ++-
 4 files changed, 21 insertions(+), 31 deletions(-)

diff --git a/paimon-format/src/main/java/org/apache/orc/OrcConf.java 
b/paimon-format/src/main/java/org/apache/orc/OrcConf.java
index ee07e45117..a7fa1a21bc 100644
--- a/paimon-format/src/main/java/org/apache/orc/OrcConf.java
+++ b/paimon-format/src/main/java/org/apache/orc/OrcConf.java
@@ -305,21 +305,6 @@ public enum OrcConf {
                     + "must have the filter\n"
                     + "reapplied to avoid using unset values in the unselected 
rows.\n"
                     + "If unsure please leave this as false."),
-
-    READER_ONLY_ALLOW_SARG_TO_FILTER(
-            "orc.reader.sarg.to.filter",
-            "orc.reader.sarg.to.filter",
-            false,
-            "A boolean flag to determine if a SArg is allowed to become a 
filter, only for reader."),
-    READER_ONLY_USE_SELECTED(
-            "orc.reader.filter.use.selected",
-            "orc.reader.filter.use.selected",
-            false,
-            "A boolean flag to determine if the selected vector is supported 
by\n"
-                    + "the reading application, only for reader.  If false, 
the output of the ORC reader "
-                    + "must have the filter\n"
-                    + "reapplied to avoid using unset values in the unselected 
rows.\n"
-                    + "If unsure please leave this as false."),
     ALLOW_PLUGIN_FILTER(
             "orc.filter.plugin",
             "orc.filter.plugin",
diff --git 
a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java 
b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java
index 053f699580..c564b69409 100644
--- 
a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java
+++ 
b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java
@@ -56,6 +56,7 @@ import java.util.Optional;
 import java.util.Properties;
 import java.util.stream.Collectors;
 
+import static org.apache.paimon.CoreOptions.DELETION_VECTORS_ENABLED;
 import static org.apache.paimon.types.DataTypeChecks.getFieldTypes;
 
 /** Orc {@link FileFormat}. */
@@ -69,6 +70,7 @@ public class OrcFileFormat extends FileFormat {
     private final org.apache.hadoop.conf.Configuration writerConf;
     private final int readBatchSize;
     private final int writeBatchSize;
+    private final boolean deletionVectorsEnabled;
 
     public OrcFileFormat(FormatContext formatContext) {
         super(IDENTIFIER);
@@ -79,6 +81,7 @@ public class OrcFileFormat extends FileFormat {
         this.orcProperties.forEach((k, v) -> writerConf.set(k.toString(), 
v.toString()));
         this.readBatchSize = formatContext.readBatchSize();
         this.writeBatchSize = formatContext.writeBatchSize();
+        this.deletionVectorsEnabled = 
formatContext.options().get(DELETION_VECTORS_ENABLED);
     }
 
     @VisibleForTesting
@@ -113,7 +116,8 @@ public class OrcFileFormat extends FileFormat {
                 readerConf,
                 (RowType) refineDataType(projectedRowType),
                 orcPredicates,
-                readBatchSize);
+                readBatchSize,
+                deletionVectorsEnabled);
     }
 
     @Override
diff --git 
a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
 
b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
index 5543fd7910..dbc5de265c 100644
--- 
a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
+++ 
b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
@@ -62,14 +62,11 @@ import static 
org.apache.paimon.utils.Preconditions.checkNotNull;
 public class OrcReaderFactory implements FormatReaderFactory {
 
     protected final Configuration hadoopConfig;
-
     protected final TypeDescription schema;
-
-    private final RowType tableType;
-
+    protected final RowType tableType;
     protected final List<OrcFilters.Predicate> conjunctPredicates;
-
     protected final int batchSize;
+    protected final boolean deletionVectorsEnabled;
 
     /**
      * @param hadoopConfig the hadoop config for orc reader.
@@ -80,12 +77,14 @@ public class OrcReaderFactory implements 
FormatReaderFactory {
             final org.apache.hadoop.conf.Configuration hadoopConfig,
             final RowType readType,
             final List<OrcFilters.Predicate> conjunctPredicates,
-            final int batchSize) {
+            final int batchSize,
+            final boolean deletionVectorsEnabled) {
         this.hadoopConfig = checkNotNull(hadoopConfig);
         this.schema = toOrcType(readType);
         this.tableType = readType;
         this.conjunctPredicates = checkNotNull(conjunctPredicates);
         this.batchSize = batchSize;
+        this.deletionVectorsEnabled = deletionVectorsEnabled;
     }
 
     // ------------------------------------------------------------------------
@@ -108,7 +107,8 @@ public class OrcReaderFactory implements 
FormatReaderFactory {
                         context.filePath(),
                         0,
                         context.fileSize(),
-                        context.fileIndex());
+                        context.fileIndex(),
+                        deletionVectorsEnabled);
         return new OrcVectorizedReader(orcReader, poolOfBatches);
     }
 
@@ -258,7 +258,8 @@ public class OrcReaderFactory implements 
FormatReaderFactory {
             org.apache.paimon.fs.Path path,
             long splitStart,
             long splitLength,
-            FileIndexResult fileIndexResult)
+            FileIndexResult fileIndexResult,
+            boolean deletionVectorsEnabled)
             throws IOException {
         org.apache.orc.Reader orcReader = createReader(conf, fileIO, path, 
fileIndexResult);
         try {
@@ -275,12 +276,11 @@ public class OrcReaderFactory implements 
FormatReaderFactory {
                             
.skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf))
                             .tolerateMissingSchema(
                                     
OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));
-            if (!conjunctPredicates.isEmpty()) {
-                // TODO fix it , if open this option,future deletion vectors 
would not work,
-                //  cased by getRowNumber would be changed .
-                
options.useSelected(OrcConf.READER_ONLY_USE_SELECTED.getBoolean(conf));
-                options.allowSARGToFilter(
-                        
OrcConf.READER_ONLY_ALLOW_SARG_TO_FILTER.getBoolean(conf));
+            if (!conjunctPredicates.isEmpty() && !deletionVectorsEnabled) {
+                // deletion vectors can not enable this feature, cased by 
getRowNumber would be
+                // changed.
+                
options.useSelected(OrcConf.READER_USE_SELECTED.getBoolean(conf));
+                
options.allowSARGToFilter(OrcConf.ALLOW_SARG_TO_FILTER.getBoolean(conf));
             }
             // configure filters
             if (!conjunctPredicates.isEmpty()) {
diff --git 
a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java
 
b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java
index 1efd984965..63b391b44c 100644
--- 
a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java
+++ 
b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java
@@ -277,7 +277,8 @@ class OrcReaderFactoryTest {
                 new Configuration(),
                 Projection.of(selectedFields).project(formatType),
                 conjunctPredicates,
-                BATCH_SIZE);
+                BATCH_SIZE,
+                false);
     }
 
     private RecordReader<InternalRow> createReader(OrcReaderFactory format, 
Path split)

Reply via email to