Copilot commented on code in PR #56880:
URL: https://github.com/apache/doris/pull/56880#discussion_r3008301229
##########
be/src/exprs/function/array/function_array_element.h:
##########
@@ -350,24 +337,31 @@ class FunctionArrayElement : public IFunction {
auto left_element_type = remove_nullable(
assert_cast<const
DataTypeArray&>(*remove_nullable(arguments[0].type))
.get_nested_type());
- // because we impl use_default_implementation_for_nulls
- // we should handle array index column by-self, and array index should
not be nullable.
- auto idx_col = remove_nullable(arguments[1].column);
+ const UInt8* idx_null_map = nullptr;
+ auto idx_col_with_const = unpack_if_const(arguments[1].column);
+ if (idx_col_with_const.first->is_nullable()) {
+ const auto& idx_null_column =
+ reinterpret_cast<const
ColumnNullable&>(*idx_col_with_const.first);
+ idx_null_map =
idx_null_column.get_null_map_column().get_data().data();
+ }
+ const auto& idx_col = remove_nullable(idx_col_with_const.first);
+ bool is_const_index = idx_col_with_const.second;
Review Comment:
This change adds support for nullable (and const) index columns via
`idx_null_map`/`unpack_if_const`, but there are no corresponding test cases for
a NULL index (e.g. `element_at([1,2,3], NULL)` should return NULL). Please
extend the existing `be/test/exprs/function/function_array_element_test.cpp`
coverage to include nullable index inputs (both const and non-const) for
numeric and string arrays.
##########
be/src/exprs/function/array/function_array_element.h:
##########
@@ -292,31 +281,29 @@ class FunctionArrayElement : public IFunction {
ColumnPtr _execute_common(const ColumnArray::Offsets64& offsets, const
IColumn& nested_column,
const UInt8* arr_null_map, const IColumn&
indices,
- const UInt8* nested_null_map, UInt8*
dst_null_map) const {
- // prepare return data
+ const UInt8* nested_null_map, UInt8*
dst_null_map,
+ const UInt8* idx_null_map, bool is_const_index)
const {
+ const auto& index_data = reinterpret_cast<const
ColumnInt64&>(indices).get_data();
+
Review Comment:
Same issue as above: `indices` may be Int32/Int16/etc., but this code
unconditionally reinterprets it as `ColumnInt64`, which is undefined behavior.
Please read indices via a safe API or add a type-dispatch/cast for the index
column.
##########
be/src/exprs/function/array/function_array_element.h:
##########
@@ -174,52 +174,46 @@ class FunctionArrayElement : public IFunction {
template <typename ColumnType>
ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const
IColumn& nested_column,
const UInt8* arr_null_map, const IColumn&
indices,
- const UInt8* nested_null_map, UInt8*
dst_null_map) const {
+ const UInt8* nested_null_map, UInt8*
dst_null_map,
+ const UInt8* idx_null_map, bool is_const_index)
const {
const auto& nested_data = reinterpret_cast<const
ColumnType&>(nested_column).get_data();
+ const auto& index_data = reinterpret_cast<const
ColumnInt64&>(indices).get_data();
auto dst_column = nested_column.clone_empty();
auto& dst_data = reinterpret_cast<ColumnType&>(*dst_column).get_data();
dst_data.resize(offsets.size());
- // process
for (size_t row = 0; row < offsets.size(); ++row) {
size_t off = offsets[row - 1];
size_t len = offsets[row] - off;
- auto index = indices.get_int(row);
- // array is nullable
+ size_t idx = index_check_const(row, is_const_index);
+ auto index = (idx_null_map && idx_null_map[idx]) ? 0 :
index_data[idx];
bool null_flag = bool(arr_null_map && arr_null_map[row]);
- // calc index in nested column
if (!null_flag && index > 0 && index <= len) {
index += off - 1;
} else if (!null_flag && index < 0 && -index <= len) {
index += off + len;
} else {
null_flag = true;
}
- // nested column nullable check
if (!null_flag && nested_null_map && nested_null_map[index]) {
null_flag = true;
}
- // actual data copy
- if (null_flag) {
- dst_null_map[row] = true;
- dst_data[row] = typename ColumnType::value_type();
- } else {
- DCHECK(index >= 0 && index < nested_data.size());
- dst_null_map[row] = false;
- dst_data[row] = nested_data[index];
- }
+ dst_null_map[row] = null_flag;
+ dst_data[row] = !null_flag ? nested_data[index] : typename
ColumnType::value_type();
}
return dst_column;
}
ColumnPtr _execute_string(const ColumnArray::Offsets64& offsets, const
IColumn& nested_column,
const UInt8* arr_null_map, const IColumn&
indices,
- const UInt8* nested_null_map, UInt8*
dst_null_map) const {
+ const UInt8* nested_null_map, UInt8*
dst_null_map,
+ const UInt8* idx_null_map, bool is_const_index)
const {
const auto& src_str_offs =
reinterpret_cast<const
ColumnString&>(nested_column).get_offsets();
const auto& src_str_chars =
reinterpret_cast<const
ColumnString&>(nested_column).get_chars();
+ const auto& index_data = reinterpret_cast<const
ColumnInt64&>(indices).get_data();
Review Comment:
`indices` is not guaranteed to be a `ColumnInt64` (the function accepts
multiple integer types, e.g. Int32). This `reinterpret_cast<const
ColumnInt64&>` is undefined behavior unless the caller always provides an Int64
index column. Use `get_int()` or type-dispatch the index column before
accessing its underlying container.
```suggestion
struct IndexDataView {
const IColumn& column;
Int64 operator[](size_t i) const { return column.get_int(i); }
size_t size() const { return column.size(); }
};
IndexDataView index_data{indices};
```
##########
be/src/exprs/function/array/function_array_element.h:
##########
@@ -350,24 +337,31 @@ class FunctionArrayElement : public IFunction {
auto left_element_type = remove_nullable(
assert_cast<const
DataTypeArray&>(*remove_nullable(arguments[0].type))
.get_nested_type());
- // because we impl use_default_implementation_for_nulls
- // we should handle array index column by-self, and array index should
not be nullable.
- auto idx_col = remove_nullable(arguments[1].column);
+ const UInt8* idx_null_map = nullptr;
+ auto idx_col_with_const = unpack_if_const(arguments[1].column);
+ if (idx_col_with_const.first->is_nullable()) {
+ const auto& idx_null_column =
+ reinterpret_cast<const
ColumnNullable&>(*idx_col_with_const.first);
+ idx_null_map =
idx_null_column.get_null_map_column().get_data().data();
+ }
+ const auto& idx_col = remove_nullable(idx_col_with_const.first);
+ bool is_const_index = idx_col_with_const.second;
Review Comment:
The PR title/description mention removing several "unless session
variables", but the changes in this hunk modify `element_at` array index
handling (nullable/const index support). If this is intentional, the PR
metadata should be updated; otherwise, the intended session-variable refactor
changes may be missing from this PR.
##########
be/src/exprs/function/array/function_array_element.h:
##########
@@ -174,52 +174,46 @@ class FunctionArrayElement : public IFunction {
template <typename ColumnType>
ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const
IColumn& nested_column,
const UInt8* arr_null_map, const IColumn&
indices,
- const UInt8* nested_null_map, UInt8*
dst_null_map) const {
+ const UInt8* nested_null_map, UInt8*
dst_null_map,
+ const UInt8* idx_null_map, bool is_const_index)
const {
const auto& nested_data = reinterpret_cast<const
ColumnType&>(nested_column).get_data();
+ const auto& index_data = reinterpret_cast<const
ColumnInt64&>(indices).get_data();
Review Comment:
`indices` is an `IColumn` whose physical type can be
Int8/Int16/Int32/Int64/Bool (per `get_return_type_impl`). Reinterpreting it as
`ColumnInt64` is undefined behavior and will break existing unit tests that
pass `Int32` indices (and potentially crash in production). Please either keep
using `indices.get_int(...)`, or dispatch/cast based on the actual index column
type before reading its data buffer.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]