(incubator-gluten) branch main updated: [VL] Clarify the code objective of VeloxDataSourceJniWrapper#splitBlockByPartitionAndBucket (#10179)

hongze Mon, 14 Jul 2025 02:53:02 -0700

This is an automated email from the ASF dual-hosted git repository.

hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new 762fd82256 [VL] Clarify the code objective of 
VeloxDataSourceJniWrapper#splitBlockByPartitionAndBucket (#10179)
762fd82256 is described below

commit 762fd822569335268e60737b2a0cdb22907f37e1
Author: Hongze Zhang <[email protected]>
AuthorDate: Mon Jul 14 17:52:33 2025 +0800

    [VL] Clarify the code objective of 
VeloxDataSourceJniWrapper#splitBlockByPartitionAndBucket (#10179)
---
 .../velox/VeloxFormatWriterInjects.scala           |  4 +---
 cpp/velox/jni/VeloxJniWrapper.cc                   | 22 ++++++++++++++++------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/spark/sql/execution/datasources/velox/VeloxFormatWriterInjects.scala
 
b/backends-velox/src/main/scala/org/apache/spark/sql/execution/datasources/velox/VeloxFormatWriterInjects.scala
index 1cf167bafa..06e9d91c0f 100644
--- 
a/backends-velox/src/main/scala/org/apache/spark/sql/execution/datasources/velox/VeloxFormatWriterInjects.scala
+++ 
b/backends-velox/src/main/scala/org/apache/spark/sql/execution/datasources/velox/VeloxFormatWriterInjects.scala
@@ -117,10 +117,8 @@ class VeloxRowSplitter extends GlutenRowSplitter {
     val runtime =
       Runtimes.contextInstance(BackendsApiManager.getBackendName, 
"VeloxRowSplitter")
     val datasourceJniWrapper = VeloxDataSourceJniWrapper.create(runtime)
-    val originalColumns: Array[Int] = Array.range(0, batch.numCols())
-    val dataColIndice = 
originalColumns.filterNot(partitionColIndice.contains(_))
     new VeloxBlockStripes(
       datasourceJniWrapper
-        .splitBlockByPartitionAndBucket(handler, dataColIndice, hasBucket))
+        .splitBlockByPartitionAndBucket(handler, partitionColIndice, 
hasBucket))
   }
 }
diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc
index c372f78d15..951564554b 100644
--- a/cpp/velox/jni/VeloxJniWrapper.cc
+++ b/cpp/velox/jni/VeloxJniWrapper.cc
@@ -407,16 +407,26 @@ 
Java_org_apache_gluten_datasource_VeloxDataSourceJniWrapper_splitBlockByPartitio
   JNI_METHOD_START
   auto ctx = gluten::getRuntime(env, wrapper);
   auto batch = ObjectStore::retrieve<ColumnarBatch>(batchHandle);
-  auto safeArray = gluten::getIntArrayElementsSafe(env, partitionColIndice);
-  int size = env->GetArrayLength(partitionColIndice);
-  std::vector<int32_t> partitionColIndiceVec;
-  for (int i = 0; i < size; ++i) {
-    partitionColIndiceVec.push_back(safeArray.elems()[i]);
+
+  std::vector<int32_t> dataColIndicesVec;
+  {
+    auto partitionKeyArray = gluten::getIntArrayElementsSafe(env, 
partitionColIndice);
+    int numPartitionKeys = partitionKeyArray.length();
+    std::unordered_set<int32_t> partitionColIndiceVec;
+    for (int i = 0; i < numPartitionKeys; ++i) {
+      partitionColIndiceVec.emplace(partitionKeyArray.elems()[i]);
+    }
+    for (int i = 0; i < batch->numColumns(); ++i) {
+      if (partitionColIndiceVec.count(i) == 0) {
+        // The column is not a partition column. Add it to the data column 
vector.
+        dataColIndicesVec.emplace_back(i);
+      }
+    }
   }
 
   auto result = batch->toUnsafeRow(0);
   auto rowBytes = result.data();
-  auto newBatchHandle = ctx->saveObject(ctx->select(batch, 
partitionColIndiceVec));
+  auto newBatchHandle = ctx->saveObject(ctx->select(batch, dataColIndicesVec));
 
   auto bytesSize = result.size();
   jbyteArray bytesArray = env->NewByteArray(bytesSize);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [VL] Clarify the code objective of VeloxDataSourceJniWrapper#splitBlockByPartitionAndBucket (#10179)

Reply via email to