This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new f0e4cd7cfd [orc] Enable READER_USE_SELECTED only deletionVectors
disabled
f0e4cd7cfd is described below
commit f0e4cd7cfdd9260753b068f5243134f6e9a3339e
Author: Jingsong <[email protected]>
AuthorDate: Tue Nov 12 15:40:10 2024 +0800
[orc] Enable READER_USE_SELECTED only deletionVectors disabled
---
.../src/main/java/org/apache/orc/OrcConf.java | 15 ------------
.../apache/paimon/format/orc/OrcFileFormat.java | 6 ++++-
.../apache/paimon/format/orc/OrcReaderFactory.java | 28 +++++++++++-----------
.../paimon/format/orc/OrcReaderFactoryTest.java | 3 ++-
4 files changed, 21 insertions(+), 31 deletions(-)
diff --git a/paimon-format/src/main/java/org/apache/orc/OrcConf.java
b/paimon-format/src/main/java/org/apache/orc/OrcConf.java
index ee07e45117..a7fa1a21bc 100644
--- a/paimon-format/src/main/java/org/apache/orc/OrcConf.java
+++ b/paimon-format/src/main/java/org/apache/orc/OrcConf.java
@@ -305,21 +305,6 @@ public enum OrcConf {
+ "must have the filter\n"
+ "reapplied to avoid using unset values in the unselected
rows.\n"
+ "If unsure please leave this as false."),
-
- READER_ONLY_ALLOW_SARG_TO_FILTER(
- "orc.reader.sarg.to.filter",
- "orc.reader.sarg.to.filter",
- false,
- "A boolean flag to determine if a SArg is allowed to become a
filter, only for reader."),
- READER_ONLY_USE_SELECTED(
- "orc.reader.filter.use.selected",
- "orc.reader.filter.use.selected",
- false,
- "A boolean flag to determine if the selected vector is supported
by\n"
- + "the reading application, only for reader. If false,
the output of the ORC reader "
- + "must have the filter\n"
- + "reapplied to avoid using unset values in the unselected
rows.\n"
- + "If unsure please leave this as false."),
ALLOW_PLUGIN_FILTER(
"orc.filter.plugin",
"orc.filter.plugin",
diff --git
a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java
b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java
index 053f699580..c564b69409 100644
---
a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java
+++
b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java
@@ -56,6 +56,7 @@ import java.util.Optional;
import java.util.Properties;
import java.util.stream.Collectors;
+import static org.apache.paimon.CoreOptions.DELETION_VECTORS_ENABLED;
import static org.apache.paimon.types.DataTypeChecks.getFieldTypes;
/** Orc {@link FileFormat}. */
@@ -69,6 +70,7 @@ public class OrcFileFormat extends FileFormat {
private final org.apache.hadoop.conf.Configuration writerConf;
private final int readBatchSize;
private final int writeBatchSize;
+ private final boolean deletionVectorsEnabled;
public OrcFileFormat(FormatContext formatContext) {
super(IDENTIFIER);
@@ -79,6 +81,7 @@ public class OrcFileFormat extends FileFormat {
this.orcProperties.forEach((k, v) -> writerConf.set(k.toString(),
v.toString()));
this.readBatchSize = formatContext.readBatchSize();
this.writeBatchSize = formatContext.writeBatchSize();
+ this.deletionVectorsEnabled =
formatContext.options().get(DELETION_VECTORS_ENABLED);
}
@VisibleForTesting
@@ -113,7 +116,8 @@ public class OrcFileFormat extends FileFormat {
readerConf,
(RowType) refineDataType(projectedRowType),
orcPredicates,
- readBatchSize);
+ readBatchSize,
+ deletionVectorsEnabled);
}
@Override
diff --git
a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
index 5543fd7910..dbc5de265c 100644
---
a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
+++
b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
@@ -62,14 +62,11 @@ import static
org.apache.paimon.utils.Preconditions.checkNotNull;
public class OrcReaderFactory implements FormatReaderFactory {
protected final Configuration hadoopConfig;
-
protected final TypeDescription schema;
-
- private final RowType tableType;
-
+ protected final RowType tableType;
protected final List<OrcFilters.Predicate> conjunctPredicates;
-
protected final int batchSize;
+ protected final boolean deletionVectorsEnabled;
/**
* @param hadoopConfig the hadoop config for orc reader.
@@ -80,12 +77,14 @@ public class OrcReaderFactory implements
FormatReaderFactory {
final org.apache.hadoop.conf.Configuration hadoopConfig,
final RowType readType,
final List<OrcFilters.Predicate> conjunctPredicates,
- final int batchSize) {
+ final int batchSize,
+ final boolean deletionVectorsEnabled) {
this.hadoopConfig = checkNotNull(hadoopConfig);
this.schema = toOrcType(readType);
this.tableType = readType;
this.conjunctPredicates = checkNotNull(conjunctPredicates);
this.batchSize = batchSize;
+ this.deletionVectorsEnabled = deletionVectorsEnabled;
}
// ------------------------------------------------------------------------
@@ -108,7 +107,8 @@ public class OrcReaderFactory implements
FormatReaderFactory {
context.filePath(),
0,
context.fileSize(),
- context.fileIndex());
+ context.fileIndex(),
+ deletionVectorsEnabled);
return new OrcVectorizedReader(orcReader, poolOfBatches);
}
@@ -258,7 +258,8 @@ public class OrcReaderFactory implements
FormatReaderFactory {
org.apache.paimon.fs.Path path,
long splitStart,
long splitLength,
- FileIndexResult fileIndexResult)
+ FileIndexResult fileIndexResult,
+ boolean deletionVectorsEnabled)
throws IOException {
org.apache.orc.Reader orcReader = createReader(conf, fileIO, path,
fileIndexResult);
try {
@@ -275,12 +276,11 @@ public class OrcReaderFactory implements
FormatReaderFactory {
.skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf))
.tolerateMissingSchema(
OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));
- if (!conjunctPredicates.isEmpty()) {
- // TODO fix it , if open this option,future deletion vectors
would not work,
- // cased by getRowNumber would be changed .
-
options.useSelected(OrcConf.READER_ONLY_USE_SELECTED.getBoolean(conf));
- options.allowSARGToFilter(
-
OrcConf.READER_ONLY_ALLOW_SARG_TO_FILTER.getBoolean(conf));
+ if (!conjunctPredicates.isEmpty() && !deletionVectorsEnabled) {
+ // deletion vectors can not enable this feature, cased by
getRowNumber would be
+ // changed.
+
options.useSelected(OrcConf.READER_USE_SELECTED.getBoolean(conf));
+
options.allowSARGToFilter(OrcConf.ALLOW_SARG_TO_FILTER.getBoolean(conf));
}
// configure filters
if (!conjunctPredicates.isEmpty()) {
diff --git
a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java
b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java
index 1efd984965..63b391b44c 100644
---
a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java
+++
b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java
@@ -277,7 +277,8 @@ class OrcReaderFactoryTest {
new Configuration(),
Projection.of(selectedFields).project(formatType),
conjunctPredicates,
- BATCH_SIZE);
+ BATCH_SIZE,
+ false);
}
private RecordReader<InternalRow> createReader(OrcReaderFactory format,
Path split)