This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch data_col in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
commit 61516da334bd77552dd64715966c75dcb953a737 Author: Rui Mo <[email protected]> AuthorDate: Tue May 27 13:59:56 2025 +0800 Use column names --- cpp/velox/substrait/SubstraitToVeloxPlan.cc | 39 +++++++++++++++++++---------- ep/build-velox/src/get_velox.sh | 4 +-- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc index d4ec14d085..c6951fd8a9 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc @@ -583,17 +583,19 @@ std::shared_ptr<connector::hive::HiveInsertTableHandle> makeHiveInsertTableHandl } if (std::find(partitionedBy.cbegin(), partitionedBy.cend(), tableColumnNames.at(i)) != partitionedBy.cend()) { ++numPartitionColumns; - columnHandles.emplace_back(std::make_shared<connector::hive::HiveColumnHandle>( - tableColumnNames.at(i), - connector::hive::HiveColumnHandle::ColumnType::kPartitionKey, - tableColumnTypes.at(i), - tableColumnTypes.at(i))); + columnHandles.emplace_back( + std::make_shared<connector::hive::HiveColumnHandle>( + tableColumnNames.at(i), + connector::hive::HiveColumnHandle::ColumnType::kPartitionKey, + tableColumnTypes.at(i), + tableColumnTypes.at(i))); } else { - columnHandles.emplace_back(std::make_shared<connector::hive::HiveColumnHandle>( - tableColumnNames.at(i), - connector::hive::HiveColumnHandle::ColumnType::kRegular, - tableColumnTypes.at(i), - tableColumnTypes.at(i))); + columnHandles.emplace_back( + std::make_shared<connector::hive::HiveColumnHandle>( + tableColumnNames.at(i), + connector::hive::HiveColumnHandle::ColumnType::kRegular, + tableColumnTypes.at(i), + tableColumnTypes.at(i))); } } VELOX_CHECK_EQ(numPartitionColumns, partitionedBy.size()); @@ -1298,11 +1300,22 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait:: SubstraitParser::parseColumnTypes(baseSchema, columnTypes); } - // Velox requires Filter Pushdown must being enabled. - bool filterPushdownEnabled = true; + // Data columns are used as requested type in Velox. To support reading binary as string, requested type needs to be + // provided. However, to workaround the type check between element type and array type for unannotated array, for + // array type the requested type is set to nullptr. auto names = colNameList; - auto types = veloxTypeList; + std::vector<TypePtr> types; + for (const auto& type : veloxTypeList) { + if (type->kind() == TypeKind::ARRAY) { + types.push_back(nullptr); + } else { + types.emplace_back(type); + } + } auto dataColumns = ROW(std::move(names), std::move(types)); + + // Velox requires Filter Pushdown must being enabled. + bool filterPushdownEnabled = true; std::shared_ptr<connector::hive::HiveTableHandle> tableHandle; if (!readRel.has_filter()) { tableHandle = std::make_shared<connector::hive::HiveTableHandle>( diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index 4741355f3f..49bf2d4ae9 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -16,8 +16,8 @@ set -exu -VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2025_05_28 +VELOX_REPO=https://github.com/rui-mo/velox.git +VELOX_BRANCH=test VELOX_HOME="" OS=`uname -s` --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
