This is an automated email from the ASF dual-hosted git repository.
junhao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new f2e6eb7201 [arrow] Strip paimon-core module and paimon-format module
off from paimon-arrow module (#5811)
f2e6eb7201 is described below
commit f2e6eb7201c0337d1c80189f82194ba4d58fb11f
Author: YeJunHao <[email protected]>
AuthorDate: Mon Jun 30 15:55:51 2025 +0800
[arrow] Strip paimon-core module and paimon-format module off from
paimon-arrow module (#5811)
---
paimon-arrow/pom.xml | 93 ----------------------
.../converter/Arrow2PaimonVectorConverter.java | 15 +++-
.../converter/ArrowVectorizedBatchConverter.java | 8 +-
.../paimon/arrow/vector/ArrowFormatCWriter.java | 7 +-
.../paimon/arrow/vector/ArrowFormatWriterTest.java | 5 +-
.../DeletionFileRecordIterator.java | 33 ++++++++
.../deletionvectors/DeletionVectorJudger.java | 30 +++++++
paimon-core/pom.xml | 7 ++
.../ApplyDeletionFileRecordIterator.java | 5 +-
.../paimon/deletionvectors/DeletionVector.java | 10 +--
.../arrow/converter/ArrowBatchConverterTest.java | 0
pom.xml | 1 +
12 files changed, 101 insertions(+), 113 deletions(-)
diff --git a/paimon-arrow/pom.xml b/paimon-arrow/pom.xml
index 5348ceb043..b35b6e8c39 100644
--- a/paimon-arrow/pom.xml
+++ b/paimon-arrow/pom.xml
@@ -33,19 +33,7 @@ under the License.
<packaging>jar</packaging>
- <properties>
- <arrow.version>14.0.0</arrow.version>
- </properties>
-
<dependencies>
-
- <dependency>
- <groupId>org.apache.paimon</groupId>
- <artifactId>paimon-core</artifactId>
- <version>${project.version}</version>
- <scope>provided</scope>
- </dependency>
-
<dependency>
<groupId>org.apache.paimon</groupId>
<artifactId>paimon-common</artifactId>
@@ -57,105 +45,24 @@ under the License.
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
<version>${arrow.version}</version>
- <scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-c-data</artifactId>
<version>${arrow.version}</version>
- <scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-memory-core</artifactId>
<version>${arrow.version}</version>
- <scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-memory-unsafe</artifactId>
<version>${arrow.version}</version>
- <scope>provided</scope>
- </dependency>
-
- <!-- test dependencies -->
-
- <dependency>
- <groupId>org.apache.paimon</groupId>
- <artifactId>paimon-format</artifactId>
- <version>${project.version}</version>
- <scope>test</scope>
- </dependency>
-
- <dependency>
- <groupId>org.apache.paimon</groupId>
- <artifactId>paimon-test-utils</artifactId>
- <version>${project.version}</version>
- <scope>test</scope>
- </dependency>
-
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-common</artifactId>
- <version>${hadoop.version}</version>
- <scope>test</scope>
- <exclusions>
- <exclusion>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
-
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-hdfs-client</artifactId>
- <version>${hadoop.version}</version>
- <scope>test</scope>
- </dependency>
-
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-mapreduce-client-core</artifactId>
- <version>${hadoop.version}</version>
- <scope>test</scope>
- <exclusions>
- <exclusion>
- <groupId>org.apache.curator</groupId>
- <artifactId>curator-test</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-common</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.codehaus.jackson</groupId>
- <artifactId>jackson-core-asl</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- </exclusion>
- <exclusion>
- <groupId>jdk.tools</groupId>
- <artifactId>jdk.tools</artifactId>
- </exclusion>
- <exclusion>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </exclusion>
- </exclusions>
</dependency>
</dependencies>
</project>
diff --git
a/paimon-arrow/src/main/java/org/apache/paimon/arrow/converter/Arrow2PaimonVectorConverter.java
b/paimon-arrow/src/main/java/org/apache/paimon/arrow/converter/Arrow2PaimonVectorConverter.java
index b89f2a83d0..0f29ea5770 100644
---
a/paimon-arrow/src/main/java/org/apache/paimon/arrow/converter/Arrow2PaimonVectorConverter.java
+++
b/paimon-arrow/src/main/java/org/apache/paimon/arrow/converter/Arrow2PaimonVectorConverter.java
@@ -46,6 +46,7 @@ import org.apache.paimon.types.BigIntType;
import org.apache.paimon.types.BinaryType;
import org.apache.paimon.types.BooleanType;
import org.apache.paimon.types.CharType;
+import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.DataTypeVisitor;
import org.apache.paimon.types.DateType;
@@ -528,8 +529,11 @@ public interface Arrow2PaimonVectorConverter {
@Override
public Arrow2PaimonVectorConverter visit(RowType rowType) {
final List<Arrow2PaimonVectorConverter> convertors = new
ArrayList<>();
+ final List<String> names = new ArrayList<>();
+ List<DataField> fields = rowType.getFields();
for (int i = 0; i < rowType.getFields().size(); i++) {
convertors.add(rowType.getTypeAt(i).accept(this));
+ names.add(fields.get(i).name());
}
return vector ->
@@ -542,10 +546,15 @@ public interface Arrow2PaimonVectorConverter {
if (!inited) {
List<FieldVector> children =
((StructVector)
vector).getChildrenFromFields();
- ColumnVector[] vectors = new
ColumnVector[children.size()];
- for (int i = 0; i < children.size(); i++) {
- vectors[i] =
convertors.get(i).convertVector(children.get(i));
+ ColumnVector[] vectors = new
ColumnVector[convertors.size()];
+
+ for (FieldVector child : children) {
+ int index = names.indexOf(child.getName());
+ if (index != -1) {
+ vectors[index] =
convertors.get(index).convertVector(child);
+ }
}
+
this.vectorizedColumnBatch = new
VectorizedColumnBatch(vectors);
inited = true;
}
diff --git
a/paimon-arrow/src/main/java/org/apache/paimon/arrow/converter/ArrowVectorizedBatchConverter.java
b/paimon-arrow/src/main/java/org/apache/paimon/arrow/converter/ArrowVectorizedBatchConverter.java
index cc7c6dc301..06dc8d6467 100644
---
a/paimon-arrow/src/main/java/org/apache/paimon/arrow/converter/ArrowVectorizedBatchConverter.java
+++
b/paimon-arrow/src/main/java/org/apache/paimon/arrow/converter/ArrowVectorizedBatchConverter.java
@@ -22,8 +22,8 @@ import org.apache.paimon.arrow.writer.ArrowFieldWriter;
import org.apache.paimon.data.InternalRow;
import org.apache.paimon.data.columnar.ColumnVector;
import org.apache.paimon.data.columnar.VectorizedColumnBatch;
-import org.apache.paimon.deletionvectors.ApplyDeletionFileRecordIterator;
-import org.apache.paimon.deletionvectors.DeletionVector;
+import org.apache.paimon.deletionvectors.DeletionFileRecordIterator;
+import org.apache.paimon.deletionvectors.DeletionVectorJudger;
import org.apache.paimon.reader.FileRecordIterator;
import org.apache.paimon.reader.VectorizedRecordIterator;
import org.apache.paimon.utils.IntArrayList;
@@ -61,14 +61,14 @@ public class ArrowVectorizedBatchConverter extends
ArrowBatchConverter {
}
}
- public void reset(ApplyDeletionFileRecordIterator iterator) {
+ public void reset(DeletionFileRecordIterator iterator) {
this.iterator = iterator;
FileRecordIterator<InternalRow> innerIterator = iterator.iterator();
this.batch = ((VectorizedRecordIterator) innerIterator).batch();
try {
- DeletionVector deletionVector = iterator.deletionVector();
+ DeletionVectorJudger deletionVector = iterator.deletionVector();
int originNumRows = this.batch.getNumRows();
IntArrayList picked = new IntArrayList(originNumRows);
for (int i = 0; i < originNumRows; i++) {
diff --git
a/paimon-arrow/src/main/java/org/apache/paimon/arrow/vector/ArrowFormatCWriter.java
b/paimon-arrow/src/main/java/org/apache/paimon/arrow/vector/ArrowFormatCWriter.java
index 9dfbbf5782..2f1e2f2a53 100644
---
a/paimon-arrow/src/main/java/org/apache/paimon/arrow/vector/ArrowFormatCWriter.java
+++
b/paimon-arrow/src/main/java/org/apache/paimon/arrow/vector/ArrowFormatCWriter.java
@@ -57,12 +57,15 @@ public class ArrowFormatCWriter implements AutoCloseable {
return realWriter.write(currentRow);
}
- public ArrowCStruct flush() {
- realWriter.flush();
+ public ArrowCStruct toCStruct() {
VectorSchemaRoot vectorSchemaRoot = realWriter.getVectorSchemaRoot();
return ArrowUtils.serializeToCStruct(vectorSchemaRoot, array, schema);
}
+ public void flush() {
+ realWriter.flush();
+ }
+
public void reset() {
realWriter.reset();
}
diff --git
a/paimon-arrow/src/test/java/org/apache/paimon/arrow/vector/ArrowFormatWriterTest.java
b/paimon-arrow/src/test/java/org/apache/paimon/arrow/vector/ArrowFormatWriterTest.java
index 63e53ca9b8..d7e857c111 100644
---
a/paimon-arrow/src/test/java/org/apache/paimon/arrow/vector/ArrowFormatWriterTest.java
+++
b/paimon-arrow/src/test/java/org/apache/paimon/arrow/vector/ArrowFormatWriterTest.java
@@ -118,6 +118,7 @@ public class ArrowFormatWriterTest {
.isEqualTo(fieldGetter.getFieldOrNull(expectec));
}
}
+ vectorSchemaRoot.close();
}
}
@@ -160,6 +161,7 @@ public class ArrowFormatWriterTest {
.isEqualTo(fieldGetter.getFieldOrNull(expectec));
}
}
+ vectorSchemaRoot.close();
}
}
@@ -192,6 +194,7 @@ public class ArrowFormatWriterTest {
.isEqualTo(fieldGetter.getFieldOrNull(expectec));
}
}
+ vectorSchemaRoot.close();
}
}
@@ -248,7 +251,7 @@ public class ArrowFormatWriterTest {
.isEqualTo(fieldGetter.getFieldOrNull(expectec));
}
}
- writer.release();
+ vectorSchemaRoot.close();
}
private Object[] randomRowValues(boolean[] nullable) {
diff --git
a/paimon-common/src/main/java/org/apache/paimon/deletionvectors/DeletionFileRecordIterator.java
b/paimon-common/src/main/java/org/apache/paimon/deletionvectors/DeletionFileRecordIterator.java
new file mode 100644
index 0000000000..10a3359c09
--- /dev/null
+++
b/paimon-common/src/main/java/org/apache/paimon/deletionvectors/DeletionFileRecordIterator.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.deletionvectors;
+
+import org.apache.paimon.data.InternalRow;
+import org.apache.paimon.reader.FileRecordIterator;
+import org.apache.paimon.reader.RecordReader;
+
+/**
+ * A {@link FileRecordIterator} wraps a {@link FileRecordIterator} and {@link
DeletionVectorJudger}.
+ */
+public interface DeletionFileRecordIterator extends
RecordReader.RecordIterator<InternalRow> {
+
+ FileRecordIterator<InternalRow> iterator();
+
+ DeletionVectorJudger deletionVector();
+}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/deletionvectors/DeletionVectorJudger.java
b/paimon-common/src/main/java/org/apache/paimon/deletionvectors/DeletionVectorJudger.java
new file mode 100644
index 0000000000..0b042c60d1
--- /dev/null
+++
b/paimon-common/src/main/java/org/apache/paimon/deletionvectors/DeletionVectorJudger.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.deletionvectors;
+
+/** Judge if a specified position exists. */
+public interface DeletionVectorJudger {
+ /**
+ * Checks if the row at the specified position is marked as deleted.
+ *
+ * @param position The position of the row to check.
+ * @return true if the row is marked as deleted, false otherwise.
+ */
+ boolean isDeleted(long position);
+}
diff --git a/paimon-core/pom.xml b/paimon-core/pom.xml
index 7413457b11..bf0090a24f 100644
--- a/paimon-core/pom.xml
+++ b/paimon-core/pom.xml
@@ -90,6 +90,13 @@ under the License.
<!-- test dependencies -->
+ <dependency>
+ <groupId>org.apache.paimon</groupId>
+ <artifactId>paimon-arrow</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+
<dependency>
<groupId>org.apache.paimon</groupId>
<artifactId>paimon-common</artifactId>
diff --git
a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/ApplyDeletionFileRecordIterator.java
b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/ApplyDeletionFileRecordIterator.java
index 69997ab2ed..473b0fef53 100644
---
a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/ApplyDeletionFileRecordIterator.java
+++
b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/ApplyDeletionFileRecordIterator.java
@@ -27,7 +27,8 @@ import javax.annotation.Nullable;
import java.io.IOException;
/** A {@link FileRecordIterator} wraps a {@link FileRecordIterator} and {@link
DeletionVector}. */
-public class ApplyDeletionFileRecordIterator implements
FileRecordIterator<InternalRow> {
+public class ApplyDeletionFileRecordIterator
+ implements FileRecordIterator<InternalRow>, DeletionFileRecordIterator
{
private final FileRecordIterator<InternalRow> iterator;
private final DeletionVector deletionVector;
@@ -38,10 +39,12 @@ public class ApplyDeletionFileRecordIterator implements
FileRecordIterator<Inter
this.deletionVector = deletionVector;
}
+ @Override
public FileRecordIterator<InternalRow> iterator() {
return iterator;
}
+ @Override
public DeletionVector deletionVector() {
return deletionVector;
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java
b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java
index 7766aba992..ff48c7d218 100644
---
a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java
+++
b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java
@@ -41,7 +41,7 @@ import static
org.apache.paimon.deletionvectors.Bitmap64DeletionVector.toLittleE
* The DeletionVector can efficiently record the positions of rows that are
deleted in a file, which
* can then be used to filter out deleted rows when processing the file.
*/
-public interface DeletionVector {
+public interface DeletionVector extends DeletionVectorJudger {
/**
* Marks the row at the specified position as deleted.
@@ -72,14 +72,6 @@ public interface DeletionVector {
}
}
- /**
- * Checks if the row at the specified position is marked as deleted.
- *
- * @param position The position of the row to check.
- * @return true if the row is marked as deleted, false otherwise.
- */
- boolean isDeleted(long position);
-
/**
* Determines if the deletion vector is empty, indicating no deletions.
*
diff --git
a/paimon-arrow/src/test/java/org/apache/paimon/arrow/converter/ArrowBatchConverterTest.java
b/paimon-core/src/test/java/org/apache/paimon/arrow/converter/ArrowBatchConverterTest.java
similarity index 100%
rename from
paimon-arrow/src/test/java/org/apache/paimon/arrow/converter/ArrowBatchConverterTest.java
rename to
paimon-core/src/test/java/org/apache/paimon/arrow/converter/ArrowBatchConverterTest.java
diff --git a/pom.xml b/pom.xml
index 798c1a6439..0b72831ceb 100644
--- a/pom.xml
+++ b/pom.xml
@@ -112,6 +112,7 @@ under the License.
<orc.version>1.9.2</orc.version>
<protobuf-java.version>3.19.6</protobuf-java.version>
<roaringbitmap.version>1.2.1</roaringbitmap.version>
+ <arrow.version>15.0.0</arrow.version>
<!-- Can be set to any value to reproduce a specific build. -->
<test.randomization.seed/>