martin-g commented on code in PR #21115:
URL: https://github.com/apache/datafusion/pull/21115#discussion_r2987569999
##########
datafusion/functions/src/core/getfield.rs:
##########
@@ -199,6 +199,53 @@ fn extract_single_field(base: ColumnarValue, name:
ScalarValue) -> Result<Column
let string_value = name.try_as_str().flatten().map(|s| s.to_string());
match (array.data_type(), name, string_value) {
+ // Dictionary-encoded struct: extract the field from the dictionary's
+ // values (the deduplicated struct array) and rebuild a dictionary with
+ // the same keys. This preserves dictionary encoding without expanding.
+ (DataType::Dictionary(key_type, value_type), _, Some(field_name))
+ if matches!(value_type.as_ref(), DataType::Struct(_)) =>
+ {
+ // Downcast to DictionaryArray to access keys and values without
+ // materializing the dictionary.
+ macro_rules! extract_dict_field {
+ ($key_ty:ty) => {{
+ let dict = array
+ .as_any()
+
.downcast_ref::<arrow::array::DictionaryArray<$key_ty>>()
+ .ok_or_else(|| {
+
datafusion_common::DataFusionError::Internal(format!(
+ "Failed to downcast dictionary with key type
{}",
+ key_type
+ ))
+ })?;
+ let values_struct = as_struct_array(dict.values())?;
+ let field_col =
+
values_struct.column_by_name(&field_name).ok_or_else(|| {
+
datafusion_common::DataFusionError::Execution(format!(
+ "Field {field_name} not found in dictionary
struct"
+ ))
Review Comment:
```suggestion
exec_err!("Field {field_name} not found in
dictionary struct")
```
##########
datafusion/functions/src/core/getfield.rs:
##########
@@ -199,6 +199,53 @@ fn extract_single_field(base: ColumnarValue, name:
ScalarValue) -> Result<Column
let string_value = name.try_as_str().flatten().map(|s| s.to_string());
match (array.data_type(), name, string_value) {
+ // Dictionary-encoded struct: extract the field from the dictionary's
+ // values (the deduplicated struct array) and rebuild a dictionary with
+ // the same keys. This preserves dictionary encoding without expanding.
+ (DataType::Dictionary(key_type, value_type), _, Some(field_name))
+ if matches!(value_type.as_ref(), DataType::Struct(_)) =>
+ {
+ // Downcast to DictionaryArray to access keys and values without
+ // materializing the dictionary.
+ macro_rules! extract_dict_field {
+ ($key_ty:ty) => {{
+ let dict = array
+ .as_any()
+
.downcast_ref::<arrow::array::DictionaryArray<$key_ty>>()
+ .ok_or_else(|| {
+
datafusion_common::DataFusionError::Internal(format!(
+ "Failed to downcast dictionary with key type
{}",
+ key_type
+ ))
+ })?;
+ let values_struct = as_struct_array(dict.values())?;
+ let field_col =
+
values_struct.column_by_name(&field_name).ok_or_else(|| {
+
datafusion_common::DataFusionError::Execution(format!(
+ "Field {field_name} not found in dictionary
struct"
+ ))
+ })?;
+ // Rebuild dictionary: same keys, extracted field as
values.
+ let new_dict =
arrow::array::DictionaryArray::<$key_ty>::try_new(
+ dict.keys().clone(),
+ Arc::clone(field_col),
+ )?;
+ Ok(ColumnarValue::Array(Arc::new(new_dict)))
+ }};
+ }
+
+ match key_type.as_ref() {
+ DataType::Int8 =>
extract_dict_field!(arrow::datatypes::Int8Type),
Review Comment:
```suggestion
DataType::Int8 => extract_dict_field!(Int8Type),
```
and import them with `use arrow::datatypes::...`
##########
datafusion/functions/src/core/getfield.rs:
##########
@@ -199,6 +199,53 @@ fn extract_single_field(base: ColumnarValue, name:
ScalarValue) -> Result<Column
let string_value = name.try_as_str().flatten().map(|s| s.to_string());
match (array.data_type(), name, string_value) {
+ // Dictionary-encoded struct: extract the field from the dictionary's
+ // values (the deduplicated struct array) and rebuild a dictionary with
+ // the same keys. This preserves dictionary encoding without expanding.
+ (DataType::Dictionary(key_type, value_type), _, Some(field_name))
+ if matches!(value_type.as_ref(), DataType::Struct(_)) =>
+ {
+ // Downcast to DictionaryArray to access keys and values without
+ // materializing the dictionary.
+ macro_rules! extract_dict_field {
+ ($key_ty:ty) => {{
+ let dict = array
+ .as_any()
+
.downcast_ref::<arrow::array::DictionaryArray<$key_ty>>()
Review Comment:
nit: `use arrow::array::DictionaryArray` and
```suggestion
.downcast_ref::<DictionaryArray<$key_ty>>()
```
##########
datafusion/functions/src/core/getfield.rs:
##########
@@ -338,6 +385,42 @@ impl ScalarUDFImpl for GetFieldFunc {
}
}
}
+ // Dictionary-encoded struct: resolve the child field from
+ // the underlying struct, then wrap the result back in the
+ // same Dictionary type so the promised type matches execution.
+ DataType::Dictionary(key_type, value_type)
+ if matches!(value_type.as_ref(), DataType::Struct(_)) =>
+ {
+ let DataType::Struct(fields) = value_type.as_ref() else {
+ unreachable!()
+ };
+ let field_name = sv
+ .as_ref()
+ .and_then(|sv| {
+ sv.try_as_str().flatten().filter(|s| !s.is_empty())
+ })
+ .ok_or_else(|| {
+ datafusion_common::DataFusionError::Execution(
+ "Field name must be a non-empty
string".to_string(),
+ )
Review Comment:
```suggestion
exec_err!("Field name must be a non-empty
string")
```
##########
datafusion/functions/src/core/getfield.rs:
##########
@@ -199,6 +199,53 @@ fn extract_single_field(base: ColumnarValue, name:
ScalarValue) -> Result<Column
let string_value = name.try_as_str().flatten().map(|s| s.to_string());
match (array.data_type(), name, string_value) {
+ // Dictionary-encoded struct: extract the field from the dictionary's
+ // values (the deduplicated struct array) and rebuild a dictionary with
+ // the same keys. This preserves dictionary encoding without expanding.
+ (DataType::Dictionary(key_type, value_type), _, Some(field_name))
+ if matches!(value_type.as_ref(), DataType::Struct(_)) =>
+ {
+ // Downcast to DictionaryArray to access keys and values without
+ // materializing the dictionary.
+ macro_rules! extract_dict_field {
+ ($key_ty:ty) => {{
+ let dict = array
+ .as_any()
+
.downcast_ref::<arrow::array::DictionaryArray<$key_ty>>()
+ .ok_or_else(|| {
+
datafusion_common::DataFusionError::Internal(format!(
+ "Failed to downcast dictionary with key type
{}",
+ key_type
+ ))
+ })?;
+ let values_struct = as_struct_array(dict.values())?;
+ let field_col =
+
values_struct.column_by_name(&field_name).ok_or_else(|| {
+
datafusion_common::DataFusionError::Execution(format!(
+ "Field {field_name} not found in dictionary
struct"
+ ))
+ })?;
+ // Rebuild dictionary: same keys, extracted field as
values.
+ let new_dict =
arrow::array::DictionaryArray::<$key_ty>::try_new(
Review Comment:
```suggestion
let new_dict = DictionaryArray::<$key_ty>::try_new(
```
##########
datafusion/functions/src/core/getfield.rs:
##########
@@ -199,6 +199,53 @@ fn extract_single_field(base: ColumnarValue, name:
ScalarValue) -> Result<Column
let string_value = name.try_as_str().flatten().map(|s| s.to_string());
match (array.data_type(), name, string_value) {
+ // Dictionary-encoded struct: extract the field from the dictionary's
+ // values (the deduplicated struct array) and rebuild a dictionary with
+ // the same keys. This preserves dictionary encoding without expanding.
+ (DataType::Dictionary(key_type, value_type), _, Some(field_name))
+ if matches!(value_type.as_ref(), DataType::Struct(_)) =>
+ {
+ // Downcast to DictionaryArray to access keys and values without
+ // materializing the dictionary.
+ macro_rules! extract_dict_field {
+ ($key_ty:ty) => {{
+ let dict = array
+ .as_any()
+
.downcast_ref::<arrow::array::DictionaryArray<$key_ty>>()
+ .ok_or_else(|| {
+
datafusion_common::DataFusionError::Internal(format!(
+ "Failed to downcast dictionary with key type
{}",
+ key_type
+ ))
Review Comment:
```suggestion
internal_err!("Failed to downcast dictionary
with key type {key_type}")
```
##########
datafusion/functions/src/core/getfield.rs:
##########
@@ -338,6 +385,42 @@ impl ScalarUDFImpl for GetFieldFunc {
}
}
}
+ // Dictionary-encoded struct: resolve the child field from
+ // the underlying struct, then wrap the result back in the
+ // same Dictionary type so the promised type matches execution.
+ DataType::Dictionary(key_type, value_type)
+ if matches!(value_type.as_ref(), DataType::Struct(_)) =>
+ {
+ let DataType::Struct(fields) = value_type.as_ref() else {
+ unreachable!()
+ };
+ let field_name = sv
+ .as_ref()
+ .and_then(|sv| {
+ sv.try_as_str().flatten().filter(|s| !s.is_empty())
+ })
+ .ok_or_else(|| {
+ datafusion_common::DataFusionError::Execution(
+ "Field name must be a non-empty
string".to_string(),
+ )
+ })?;
+
+ let child_field = fields
+ .iter()
+ .find(|f| f.name() == field_name)
+ .ok_or_else(|| {
+ plan_datafusion_err!("Field {field_name} not found
in struct")
+ })?;
+
+ let nullable =
+ current_field.is_nullable() ||
child_field.is_nullable();
+ let dict_type = DataType::Dictionary(
+ key_type.clone(),
+ Box::new(child_field.data_type().clone()),
+ );
+ current_field =
+ Arc::new(Field::new(child_field.name(), dict_type,
nullable));
Review Comment:
Wouldn't it be better to clone the `child_field` as the Map/Struct arms do ?
This would preserve any metadata of the field.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]