This is an automated email from the ASF dual-hosted git repository. mahongbin pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push: new 4a953a222 [Gluten-4912][CH] fix bug when a query has no shuffle (#5081) 4a953a222 is described below commit 4a953a22214f0d74757d5a412218d93dda03aca6 Author: Hongbin Ma <mahong...@apache.org> AuthorDate: Fri Mar 22 18:18:59 2024 +0800 [Gluten-4912][CH] fix bug when a query has no shuffle (#5081) --- .../execution/GlutenClickHouseMergeTreeWriteSuite.scala | 15 +++++++++++++-- cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp | 13 +++++++------ cpp-ch/local-engine/Shuffle/ShuffleSplitter.cpp | 2 +- cpp-ch/local-engine/tests/benchmark_local_engine.cpp | 2 +- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala index 6750e251c..0862fd41a 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala @@ -1239,7 +1239,7 @@ class GlutenClickHouseMergeTreeWriteSuite |) |USING clickhouse |LOCATION '$basePath/lineitem_mergetree_lowcard' - |TBLPROPERTIES('lowCardKey'='l_returnflag,L_LINESTATUS') + |TBLPROPERTIES('lowCardKey'='l_returnflag,L_LINESTATUS,l_quantity') |""".stripMargin) spark.sql(s""" @@ -1285,7 +1285,7 @@ class GlutenClickHouseMergeTreeWriteSuite val sqlStr2 = s""" |SELECT - | max(l_returnflag) + | max(l_returnflag), min(l_quantity) |FROM | lineitem_mergetree_lowcard |GROUP BY @@ -1298,6 +1298,17 @@ class GlutenClickHouseMergeTreeWriteSuite // total rows should remain unchanged spark.sql(sqlStr2).collect().apply(0).get(0) == "R" ) + + // test select * + val sqlStr3 = + s""" + |SELECT + | * + |FROM + | lineitem_mergetree_lowcard limit 1 + | + |""".stripMargin + spark.sql(sqlStr3).collect() } test("test mergetree with primary keys filter") { diff --git a/cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp b/cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp index c3ff32a97..cb5f0111c 100644 --- a/cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp +++ b/cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp @@ -325,12 +325,12 @@ SparkRowInfo::SparkRowInfo( { const auto & col = cols[col_idx]; /// No need to calculate backing data length for fixed length types - const auto type_without_nullable = removeNullable(col.type); + const auto type_without_nullable = removeLowCardinalityAndNullable(col.type); if (BackingDataLengthCalculator::isVariableLengthDataType(type_without_nullable)) { if (BackingDataLengthCalculator::isDataTypeSupportRawData(type_without_nullable)) { - auto column = col.column->convertToFullColumnIfConst(); + auto column = col.column->convertToFullIfNeeded(); const auto * nullable_column = checkAndGetColumn<ColumnNullable>(*column); if (nullable_column) { @@ -348,13 +348,13 @@ SparkRowInfo::SparkRowInfo( for (size_t i = 0; i < num_rows; ++i) { size_t row_idx = masks == nullptr ? i : masks->at(i); - lengths[i] += roundNumberOfBytesToNearestWord(col.column->getDataAt(row_idx).size); + lengths[i] += roundNumberOfBytesToNearestWord(column->getDataAt(row_idx).size); } } } else { - BackingDataLengthCalculator calculator(col.type); + BackingDataLengthCalculator calculator(type_without_nullable); for (size_t i = 0; i < num_rows; ++i) { size_t row_idx = masks == nullptr ? i : masks->at(i); @@ -462,11 +462,12 @@ std::unique_ptr<SparkRowInfo> CHColumnToSparkRow::convertCHColumnToSparkRow(cons const auto & col = block.getByPosition(col_idx); int64_t field_offset = spark_row_info->getFieldOffset(col_idx); - ColumnWithTypeAndName col_not_const{col.column->convertToFullColumnIfConst(), col.type, col.name}; + ColumnWithTypeAndName col_full{col.column->convertToFullIfNeeded(), + removeLowCardinality(col.type), col.name}; writeValue( spark_row_info->getBufferAddress(), field_offset, - col_not_const, + col_full, col_idx, spark_row_info->getNumRows(), spark_row_info->getOffsets(), diff --git a/cpp-ch/local-engine/Shuffle/ShuffleSplitter.cpp b/cpp-ch/local-engine/Shuffle/ShuffleSplitter.cpp index 28c57c31f..9baf3c469 100644 --- a/cpp-ch/local-engine/Shuffle/ShuffleSplitter.cpp +++ b/cpp-ch/local-engine/Shuffle/ShuffleSplitter.cpp @@ -301,7 +301,7 @@ void ColumnsBuffer::appendSelective( accumulated_columns.reserve(source.columns()); for (size_t i = 0; i < source.columns(); i++) { - auto column = source.getColumns()[i]->convertToFullColumnIfConst()->convertToFullIfNeeded()->cloneEmpty(); + auto column = source.getColumns()[i]->convertToFullIfNeeded()->cloneEmpty(); column->reserve(prefer_buffer_size); accumulated_columns.emplace_back(std::move(column)); } diff --git a/cpp-ch/local-engine/tests/benchmark_local_engine.cpp b/cpp-ch/local-engine/tests/benchmark_local_engine.cpp index a482e4702..51ee7ad12 100644 --- a/cpp-ch/local-engine/tests/benchmark_local_engine.cpp +++ b/cpp-ch/local-engine/tests/benchmark_local_engine.cpp @@ -586,7 +586,7 @@ DB::ContextMutablePtr global_context; std::vector<ColumnData> columns(arguments.size() + 1); for (size_t i = 0; i < arguments.size(); ++i) { - auto column = block.getByPosition(i).column->convertToFullColumnIfConst(); + auto column = block.getByPosition(i).column->convertToFullIfNeeded(); columns[i] = getColumnData(column.get()); } for (auto _ : state) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org