Copilot commented on code in PR #19389:
URL: https://github.com/apache/datafusion/pull/19389#discussion_r2631910926
##########
datafusion/functions/src/core/getfield.rs:
##########
@@ -86,10 +88,139 @@ impl Default for GetFieldFunc {
}
}
+/// Extract a single field from a struct or map array
+fn extract_single_field(base: ColumnarValue, name: ScalarValue) ->
Result<ColumnarValue> {
+ let arrays = ColumnarValue::values_to_arrays(&[base])?;
+ let array = Arc::clone(&arrays[0]);
+
+ fn process_map_array(
+ array: &dyn Array,
+ key_array: Arc<dyn Array>,
+ ) -> Result<ColumnarValue> {
+ let map_array = as_map_array(array)?;
+ let keys = if key_array.data_type().is_nested() {
+ let comparator = make_comparator(
+ map_array.keys().as_ref(),
+ key_array.as_ref(),
+ SortOptions::default(),
+ )?;
+ let len = map_array.keys().len().min(key_array.len());
+ let values = (0..len).map(|i| comparator(i, i).is_eq()).collect();
+ let nulls = NullBuffer::union(map_array.keys().nulls(),
key_array.nulls());
+ BooleanArray::new(values, nulls)
+ } else {
+ let be_compared = Scalar::new(key_array);
+ arrow::compute::kernels::cmp::eq(&be_compared, map_array.keys())?
+ };
+
+ let original_data = map_array.entries().column(1).to_data();
+ let capacity = Capacities::Array(original_data.len());
+ let mut mutable =
+ MutableArrayData::with_capacities(vec![&original_data], true,
capacity);
+
+ for entry in 0..map_array.len() {
+ let start = map_array.value_offsets()[entry] as usize;
+ let end = map_array.value_offsets()[entry + 1] as usize;
+
+ let maybe_matched = keys
+ .slice(start, end - start)
+ .iter()
+ .enumerate()
+ .find(|(_, t)| t.unwrap());
+
+ if maybe_matched.is_none() {
+ mutable.extend_nulls(1);
+ continue;
+ }
+ let (match_offset, _) = maybe_matched.unwrap();
+ mutable.extend(0, start + match_offset, start + match_offset + 1);
+ }
+
+ let data = mutable.freeze();
+ let data = make_array(data);
+ Ok(ColumnarValue::Array(data))
+ }
+
+ fn process_map_with_nested_key(
+ array: &dyn Array,
+ key_array: &dyn Array,
+ ) -> Result<ColumnarValue> {
+ let map_array = as_map_array(array)?;
+
+ let comparator = make_comparator(
+ map_array.keys().as_ref(),
+ key_array,
+ SortOptions::default(),
+ )?;
+
+ let original_data = map_array.entries().column(1).to_data();
+ let capacity = Capacities::Array(original_data.len());
+ let mut mutable =
+ MutableArrayData::with_capacities(vec![&original_data], true,
capacity);
+
+ for entry in 0..map_array.len() {
+ let start = map_array.value_offsets()[entry] as usize;
+ let end = map_array.value_offsets()[entry + 1] as usize;
+
+ let mut found_match = false;
+ for i in start..end {
+ if comparator(i, 0).is_eq() {
+ mutable.extend(0, i, i + 1);
+ found_match = true;
+ break;
+ }
+ }
+
+ if !found_match {
+ mutable.extend_nulls(1);
+ }
+ }
+
+ let data = mutable.freeze();
+ let data = make_array(data);
+ Ok(ColumnarValue::Array(data))
+ }
+
+ let string_value = name.try_as_str().flatten().map(|s| s.to_string());
+
+ match (array.data_type(), name, string_value) {
+ (DataType::Map(_, _), ScalarValue::List(arr), _) => {
+ let key_array: Arc<dyn Array> = arr;
+ process_map_array(&array, key_array)
+ }
+ (DataType::Map(_, _), ScalarValue::Struct(arr), _) => {
+ process_map_array(&array, arr as Arc<dyn Array>)
+ }
+ (DataType::Map(_, _), other, _) => {
+ let data_type = other.data_type();
+ if data_type.is_nested() {
+ process_map_with_nested_key(&array, &other.to_array()?)
+ } else {
+ process_map_array(&array, other.to_array()?)
+ }
+ }
+ (DataType::Struct(_), _, Some(k)) => {
+ let as_struct_array = as_struct_array(&array)?;
+ match as_struct_array.column_by_name(&k) {
+ None => exec_err!("get indexed field {k} not found in struct"),
+ Some(col) => Ok(ColumnarValue::Array(Arc::clone(col))),
+ }
+ }
+ (DataType::Struct(_), name, _) => exec_err!(
+ "get_field is only possible on struct with utf8 indexes. \
+ Received with {name:?} index"
+ ),
+ (DataType::Null, _, _) => Ok(ColumnarValue::Scalar(ScalarValue::Null)),
+ (dt, name, _) => exec_err!(
+ "get_field is only possible on maps or structs. Received {dt} with
{name:?} index"
+ ),
Review Comment:
The `extract_single_field` function only handles `ScalarValue::Utf8` for
struct field access (line 200), but doesn't support other string types like
`Utf8View` or `LargeUtf8`. This is inconsistent with the test added at line 452
which uses `ScalarValue::Utf8View`. The match statement should use
`name.try_as_str().flatten()` to extract string values from any string scalar
type, similar to how it's done in `return_field_from_args` at line 328.
##########
datafusion/functions/src/core/getfield.rs:
##########
@@ -86,10 +88,139 @@ impl Default for GetFieldFunc {
}
}
+/// Extract a single field from a struct or map array
+fn extract_single_field(base: ColumnarValue, name: ScalarValue) ->
Result<ColumnarValue> {
+ let arrays = ColumnarValue::values_to_arrays(&[base])?;
+ let array = Arc::clone(&arrays[0]);
+
+ fn process_map_array(
+ array: &dyn Array,
+ key_array: Arc<dyn Array>,
+ ) -> Result<ColumnarValue> {
+ let map_array = as_map_array(array)?;
+ let keys = if key_array.data_type().is_nested() {
+ let comparator = make_comparator(
+ map_array.keys().as_ref(),
+ key_array.as_ref(),
+ SortOptions::default(),
+ )?;
+ let len = map_array.keys().len().min(key_array.len());
+ let values = (0..len).map(|i| comparator(i, i).is_eq()).collect();
+ let nulls = NullBuffer::union(map_array.keys().nulls(),
key_array.nulls());
+ BooleanArray::new(values, nulls)
+ } else {
+ let be_compared = Scalar::new(key_array);
+ arrow::compute::kernels::cmp::eq(&be_compared, map_array.keys())?
+ };
+
+ let original_data = map_array.entries().column(1).to_data();
+ let capacity = Capacities::Array(original_data.len());
+ let mut mutable =
+ MutableArrayData::with_capacities(vec![&original_data], true,
capacity);
+
+ for entry in 0..map_array.len() {
+ let start = map_array.value_offsets()[entry] as usize;
+ let end = map_array.value_offsets()[entry + 1] as usize;
+
+ let maybe_matched = keys
+ .slice(start, end - start)
+ .iter()
+ .enumerate()
+ .find(|(_, t)| t.unwrap());
+
+ if maybe_matched.is_none() {
+ mutable.extend_nulls(1);
+ continue;
+ }
+ let (match_offset, _) = maybe_matched.unwrap();
+ mutable.extend(0, start + match_offset, start + match_offset + 1);
+ }
+
+ let data = mutable.freeze();
+ let data = make_array(data);
+ Ok(ColumnarValue::Array(data))
+ }
+
+ fn process_map_with_nested_key(
+ array: &dyn Array,
+ key_array: &dyn Array,
+ ) -> Result<ColumnarValue> {
+ let map_array = as_map_array(array)?;
+
+ let comparator = make_comparator(
+ map_array.keys().as_ref(),
+ key_array,
+ SortOptions::default(),
+ )?;
+
+ let original_data = map_array.entries().column(1).to_data();
+ let capacity = Capacities::Array(original_data.len());
+ let mut mutable =
+ MutableArrayData::with_capacities(vec![&original_data], true,
capacity);
+
+ for entry in 0..map_array.len() {
+ let start = map_array.value_offsets()[entry] as usize;
+ let end = map_array.value_offsets()[entry + 1] as usize;
+
+ let mut found_match = false;
+ for i in start..end {
+ if comparator(i, 0).is_eq() {
+ mutable.extend(0, i, i + 1);
+ found_match = true;
+ break;
+ }
+ }
+
+ if !found_match {
+ mutable.extend_nulls(1);
+ }
+ }
+
+ let data = mutable.freeze();
+ let data = make_array(data);
+ Ok(ColumnarValue::Array(data))
+ }
+
+ let string_value = name.try_as_str().flatten().map(|s| s.to_string());
+
+ match (array.data_type(), name, string_value) {
+ (DataType::Map(_, _), ScalarValue::List(arr), _) => {
+ let key_array: Arc<dyn Array> = arr;
+ process_map_array(&array, key_array)
+ }
+ (DataType::Map(_, _), ScalarValue::Struct(arr), _) => {
+ process_map_array(&array, arr as Arc<dyn Array>)
+ }
+ (DataType::Map(_, _), other, _) => {
+ let data_type = other.data_type();
+ if data_type.is_nested() {
+ process_map_with_nested_key(&array, &other.to_array()?)
+ } else {
+ process_map_array(&array, other.to_array()?)
+ }
+ }
+ (DataType::Struct(_), _, Some(k)) => {
+ let as_struct_array = as_struct_array(&array)?;
+ match as_struct_array.column_by_name(&k) {
+ None => exec_err!("get indexed field {k} not found in struct"),
Review Comment:
The error message "get indexed field {k} not found in struct" is
inconsistent with the error format used in `return_field_from_args` at line
340, which says "Field {field_name} not found in struct". For consistency
across the codebase, this should be updated to match the format "Field {k} not
found in struct".
```suggestion
None => exec_err!("Field {k} not found in struct"),
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]