This is an automated email from the ASF dual-hosted git repository.
hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 762fd82256 [VL] Clarify the code objective of
VeloxDataSourceJniWrapper#splitBlockByPartitionAndBucket (#10179)
762fd82256 is described below
commit 762fd822569335268e60737b2a0cdb22907f37e1
Author: Hongze Zhang <[email protected]>
AuthorDate: Mon Jul 14 17:52:33 2025 +0800
[VL] Clarify the code objective of
VeloxDataSourceJniWrapper#splitBlockByPartitionAndBucket (#10179)
---
.../velox/VeloxFormatWriterInjects.scala | 4 +---
cpp/velox/jni/VeloxJniWrapper.cc | 22 ++++++++++++++++------
2 files changed, 17 insertions(+), 9 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/spark/sql/execution/datasources/velox/VeloxFormatWriterInjects.scala
b/backends-velox/src/main/scala/org/apache/spark/sql/execution/datasources/velox/VeloxFormatWriterInjects.scala
index 1cf167bafa..06e9d91c0f 100644
---
a/backends-velox/src/main/scala/org/apache/spark/sql/execution/datasources/velox/VeloxFormatWriterInjects.scala
+++
b/backends-velox/src/main/scala/org/apache/spark/sql/execution/datasources/velox/VeloxFormatWriterInjects.scala
@@ -117,10 +117,8 @@ class VeloxRowSplitter extends GlutenRowSplitter {
val runtime =
Runtimes.contextInstance(BackendsApiManager.getBackendName,
"VeloxRowSplitter")
val datasourceJniWrapper = VeloxDataSourceJniWrapper.create(runtime)
- val originalColumns: Array[Int] = Array.range(0, batch.numCols())
- val dataColIndice =
originalColumns.filterNot(partitionColIndice.contains(_))
new VeloxBlockStripes(
datasourceJniWrapper
- .splitBlockByPartitionAndBucket(handler, dataColIndice, hasBucket))
+ .splitBlockByPartitionAndBucket(handler, partitionColIndice,
hasBucket))
}
}
diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc
index c372f78d15..951564554b 100644
--- a/cpp/velox/jni/VeloxJniWrapper.cc
+++ b/cpp/velox/jni/VeloxJniWrapper.cc
@@ -407,16 +407,26 @@
Java_org_apache_gluten_datasource_VeloxDataSourceJniWrapper_splitBlockByPartitio
JNI_METHOD_START
auto ctx = gluten::getRuntime(env, wrapper);
auto batch = ObjectStore::retrieve<ColumnarBatch>(batchHandle);
- auto safeArray = gluten::getIntArrayElementsSafe(env, partitionColIndice);
- int size = env->GetArrayLength(partitionColIndice);
- std::vector<int32_t> partitionColIndiceVec;
- for (int i = 0; i < size; ++i) {
- partitionColIndiceVec.push_back(safeArray.elems()[i]);
+
+ std::vector<int32_t> dataColIndicesVec;
+ {
+ auto partitionKeyArray = gluten::getIntArrayElementsSafe(env,
partitionColIndice);
+ int numPartitionKeys = partitionKeyArray.length();
+ std::unordered_set<int32_t> partitionColIndiceVec;
+ for (int i = 0; i < numPartitionKeys; ++i) {
+ partitionColIndiceVec.emplace(partitionKeyArray.elems()[i]);
+ }
+ for (int i = 0; i < batch->numColumns(); ++i) {
+ if (partitionColIndiceVec.count(i) == 0) {
+ // The column is not a partition column. Add it to the data column
vector.
+ dataColIndicesVec.emplace_back(i);
+ }
+ }
}
auto result = batch->toUnsafeRow(0);
auto rowBytes = result.data();
- auto newBatchHandle = ctx->saveObject(ctx->select(batch,
partitionColIndiceVec));
+ auto newBatchHandle = ctx->saveObject(ctx->select(batch, dataColIndicesVec));
auto bytesSize = result.size();
jbyteArray bytesArray = env->NewByteArray(bytesSize);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]