This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch data_col
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git

commit 61516da334bd77552dd64715966c75dcb953a737
Author: Rui Mo <[email protected]>
AuthorDate: Tue May 27 13:59:56 2025 +0800

    Use column names
---
 cpp/velox/substrait/SubstraitToVeloxPlan.cc | 39 +++++++++++++++++++----------
 ep/build-velox/src/get_velox.sh             |  4 +--
 2 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc 
b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
index d4ec14d085..c6951fd8a9 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
@@ -583,17 +583,19 @@ std::shared_ptr<connector::hive::HiveInsertTableHandle> 
makeHiveInsertTableHandl
     }
     if (std::find(partitionedBy.cbegin(), partitionedBy.cend(), 
tableColumnNames.at(i)) != partitionedBy.cend()) {
       ++numPartitionColumns;
-      
columnHandles.emplace_back(std::make_shared<connector::hive::HiveColumnHandle>(
-          tableColumnNames.at(i),
-          connector::hive::HiveColumnHandle::ColumnType::kPartitionKey,
-          tableColumnTypes.at(i),
-          tableColumnTypes.at(i)));
+      columnHandles.emplace_back(
+          std::make_shared<connector::hive::HiveColumnHandle>(
+              tableColumnNames.at(i),
+              connector::hive::HiveColumnHandle::ColumnType::kPartitionKey,
+              tableColumnTypes.at(i),
+              tableColumnTypes.at(i)));
     } else {
-      
columnHandles.emplace_back(std::make_shared<connector::hive::HiveColumnHandle>(
-          tableColumnNames.at(i),
-          connector::hive::HiveColumnHandle::ColumnType::kRegular,
-          tableColumnTypes.at(i),
-          tableColumnTypes.at(i)));
+      columnHandles.emplace_back(
+          std::make_shared<connector::hive::HiveColumnHandle>(
+              tableColumnNames.at(i),
+              connector::hive::HiveColumnHandle::ColumnType::kRegular,
+              tableColumnTypes.at(i),
+              tableColumnTypes.at(i)));
     }
   }
   VELOX_CHECK_EQ(numPartitionColumns, partitionedBy.size());
@@ -1298,11 +1300,22 @@ core::PlanNodePtr 
SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
     SubstraitParser::parseColumnTypes(baseSchema, columnTypes);
   }
 
-  // Velox requires Filter Pushdown must being enabled.
-  bool filterPushdownEnabled = true;
+  // Data columns are used as requested type in Velox. To support reading 
binary as string, requested type needs to be
+  // provided. However, to workaround the type check between element type and 
array type for unannotated array, for
+  // array type the requested type is set to nullptr.
   auto names = colNameList;
-  auto types = veloxTypeList;
+  std::vector<TypePtr> types;
+  for (const auto& type : veloxTypeList) {
+    if (type->kind() == TypeKind::ARRAY) {
+      types.push_back(nullptr);
+    } else {
+      types.emplace_back(type);
+    }
+  }
   auto dataColumns = ROW(std::move(names), std::move(types));
+
+  // Velox requires Filter Pushdown must being enabled.
+  bool filterPushdownEnabled = true;
   std::shared_ptr<connector::hive::HiveTableHandle> tableHandle;
   if (!readRel.has_filter()) {
     tableHandle = std::make_shared<connector::hive::HiveTableHandle>(
diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index 4741355f3f..49bf2d4ae9 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -16,8 +16,8 @@
 
 set -exu
 
-VELOX_REPO=https://github.com/oap-project/velox.git
-VELOX_BRANCH=2025_05_28
+VELOX_REPO=https://github.com/rui-mo/velox.git
+VELOX_BRANCH=test
 VELOX_HOME=""
 
 OS=`uname -s`


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to