This is an automated email from the ASF dual-hosted git repository. lgbo-ustc pushed a commit to branch bug_array_flatten_null_rows in repository https://gitbox.apache.org/repos/asf/gluten.git
commit 258a7cc365829d7d03009c213bce612ba251e122 Author: lgbo-ustc <[email protected]> AuthorDate: Thu May 28 20:20:42 2026 +0800 [CH] Fix flatten nullable inner array row handling SparkArrayFlatten handled Array(Nullable(Array(T))) by scanning all nested inner arrays and returning a fully-null result column as soon as any inner array was null. That made unrelated rows null, even though Spark flatten semantics only null the outer row that contains a null inner array. Build a result null map per outer row, mark only rows containing null inner arrays as null, and keep non-null rows using the flattened array offsets. Add a ClickHouse backend regression test where the first row contains a null inner array and the second row remains non-null. --- .../execution/GlutenFunctionValidateSuite.scala | 16 +++++++++++++++ .../local-engine/Functions/SparkArrayFlatten.cpp | 23 +++++++++++++++------- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala index 6a0196e2b1..40c5ab10bb 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala @@ -668,6 +668,22 @@ class GlutenFunctionValidateSuite extends GlutenClickHouseWholeStageTransformerS } } + test("test flatten with nullable inner arrays") { + val sql = + """ + |select id, flatten(arr) + |from ( + | select id, + | if(id = 0, + | array(array(cast(id + 1 as int)), cast(null as array<int>)), + | array(array(cast(id + 1 as int)))) as arr + | from range(2) + |) + |order by id + |""".stripMargin + runQueryAndCompare(sql)(checkGlutenPlan[ProjectExecTransformer]) + } + test("test common subexpression eliminate") { def checkOperatorCount[T <: TransformSupport](count: Int)(df: DataFrame)(implicit tag: ClassTag[T]): Unit = { diff --git a/cpp-ch/local-engine/Functions/SparkArrayFlatten.cpp b/cpp-ch/local-engine/Functions/SparkArrayFlatten.cpp index 96faa9d1dc..7ead48cac1 100644 --- a/cpp-ch/local-engine/Functions/SparkArrayFlatten.cpp +++ b/cpp-ch/local-engine/Functions/SparkArrayFlatten.cpp @@ -16,6 +16,7 @@ */ #include <Columns/ColumnArray.h> #include <Columns/ColumnNullable.h> +#include <Columns/ColumnVector.h> #include <DataTypes/DataTypeArray.h> #include <DataTypes/DataTypeNullable.h> #include <Functions/FunctionFactory.h> @@ -107,19 +108,27 @@ result: Row 1: [1, 2, 3], Row2: [4] const IColumn::Offsets * prev_offsets = &src_offsets; const IColumn * prev_data = &src_col->getData(); bool nullable = prev_data->isNullable(); - // when array has null element, return null + ColumnUInt8::MutablePtr result_null_map; + // When an inner array is null, only the corresponding outer row is null. if (nullable) { const ColumnNullable * nullable_column = checkAndGetColumn<ColumnNullable>(prev_data); prev_data = nullable_column->getNestedColumnPtr().get(); - for (size_t i = 0; i < nullable_column->size(); i++) + result_null_map = ColumnUInt8::create(input_rows_count, 0); + auto & result_null_map_data = result_null_map->getData(); + size_t prev_offset = 0; + for (size_t row = 0; row < input_rows_count; ++row) { - if (nullable_column->isNullAt(i)) + const auto current_offset = src_offsets[row]; + for (size_t i = prev_offset; i < current_offset; ++i) { - auto res= nullable_column->cloneEmpty(); - res->insertManyDefaults(input_rows_count); - return res; + if (nullable_column->isNullAt(i)) + { + result_null_map_data[row] = 1; + break; + } } + prev_offset = current_offset; } } if (isNothing(prev_data->getDataType())) @@ -142,7 +151,7 @@ result: Row 1: [1, 2, 3], Row2: [4] prev_data->getPtr(), result_offsets_column ? std::move(result_offsets_column) : src_col->getOffsetsPtr()); if (nullable) - return makeNullable(res); + return ColumnNullable::create(std::move(res), std::move(result_null_map)); return res; } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
