This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 6ee019efb7 Enforce unique names for `is_set` on `first_value` and
`last_value` (#18303)
6ee019efb7 is described below
commit 6ee019efb79c828814c28612c2c1c17b52e833d8
Author: Marc Brinkmann <[email protected]>
AuthorDate: Tue Oct 28 03:20:54 2025 +0100
Enforce unique names for `is_set` on `first_value` and `last_value` (#18303)
## Which issue does this PR close?
- Closes #18302
## Rationale for this change
As described in the issue, this is a low-effort QoL fix for now.
## What changes are included in this PR?
Uses the existing function for naming fields to replace the hardcoded
`"is_set"` with a field-dependent name. Example output:
```
Field {
name: "first_value(records_partitioned.trace_id)[first_value]",
data_type: Utf8View,
nullable: true,
dict_id: 0,
dict_is_ordered: false,
metadata: {},
},
Field {
name: "first_value(records_partitioned.trace_id)[first_value_is_set]",
data_type: Boolean,
nullable: true,
dict_id: 0,
dict_is_ordered: false,
metadata: {},
},
Field {
name: "first_value(records_partitioned.value)[first_value]",
data_type: Int32,
nullable: true,
dict_id: 0,
dict_is_ordered: false,
metadata: {},
},
Field {
name: "first_value(records_partitioned.value)[first_value_is_set]",
data_type: Boolean,
nullable: true,
dict_id: 0,
dict_is_ordered: false,
metadata: {},
},
```
## Are these changes tested?
No tests have been added, hopefully it should be covered by existing
changes.
## Are there any user-facing changes?
There should not be any, I assume `is_set` is never user visible.
---
datafusion/core/tests/dataframe/mod.rs | 4 ++--
datafusion/functions-aggregate/src/first_last.rs | 18 ++++++++++++++++--
2 files changed, 18 insertions(+), 4 deletions(-)
diff --git a/datafusion/core/tests/dataframe/mod.rs
b/datafusion/core/tests/dataframe/mod.rs
index 17d1695478..043f42b18c 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -6459,10 +6459,10 @@ async fn
test_duplicate_state_fields_for_dfschema_construct() -> Result<()> {
"ticker",
"first_value(value)[first_value]",
"timestamp@0",
- "is_set",
+ "first_value(value)[first_value_is_set]",
"last_value(value)[last_value]",
"timestamp@0",
- "is_set",
+ "last_value(value)[last_value_is_set]",
];
let binding = partial_agg.schema();
diff --git a/datafusion/functions-aggregate/src/first_last.rs
b/datafusion/functions-aggregate/src/first_last.rs
index 28755427c7..b2a40ff50b 100644
--- a/datafusion/functions-aggregate/src/first_last.rs
+++ b/datafusion/functions-aggregate/src/first_last.rs
@@ -166,7 +166,14 @@ impl AggregateUDFImpl for FirstValue {
)
.into()];
fields.extend(args.ordering_fields.iter().cloned());
- fields.push(Field::new("is_set", DataType::Boolean, true).into());
+ fields.push(
+ Field::new(
+ format_state_name(args.name, "first_value_is_set"),
+ DataType::Boolean,
+ true,
+ )
+ .into(),
+ );
Ok(fields)
}
@@ -1087,7 +1094,14 @@ impl AggregateUDFImpl for LastValue {
)
.into()];
fields.extend(args.ordering_fields.iter().cloned());
- fields.push(Field::new("is_set", DataType::Boolean, true).into());
+ fields.push(
+ Field::new(
+ format_state_name(args.name, "last_value_is_set"),
+ DataType::Boolean,
+ true,
+ )
+ .into(),
+ );
Ok(fields)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]