zhuqi-lucas opened a new issue, #17662: URL: https://github.com/apache/datafusion/issues/17662
### Describe the bug After the PR since apache datafusion 50.0.0, we will have name check for DFSchema construct: https://github.com/apache/datafusion/pull/17189/files#r2277001021 But Partial AggregateMode will generate duplicate field names which will fail DFSchema construct, in our production we use lower level to do this, so we need to make Partial AggregateMode not generate duplicate field names. ### To Reproduce Reproduced code: ```rust #[test] fn test_duplicate_state_fields_fails_for_dfschema_construct() -> Result<()> { let ctx = SessionContext::new(); // Simple schema with just the fields we need let file_schema = Arc::new(Schema::new(vec![ Field::new( "timestamp", DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())), true, ), Field::new("ticker", DataType::Utf8, true), Field::new("value", DataType::Float64, true), Field::new("date", DataType::Utf8, false), ])); let df_schema = DFSchema::try_from(file_schema.clone())?; let timestamp = col("timestamp"); let value = col("value"); let ticker = col("ticker"); let date = col("date"); // Create a simple mock execution plan (you can replace this with EmptyExec if needed) let mock_exec = Arc::new(EmptyExec::new(file_schema.clone())); // Build first_value aggregate let first_value = Arc::new( AggregateExprBuilder::new( datafusion_functions_aggregate::first_last::first_value_udaf(), vec![ctx.create_physical_expr(value.clone(), &df_schema)?], ) .alias("first_value(value)") .order_by(vec![PhysicalSortExpr::new( ctx.create_physical_expr(timestamp.clone(), &df_schema)?, SortOptions::new(false, false), )]) .schema(file_schema.clone()) .build() .expect("Failed to build first_value"), ); // Build last_value aggregate let last_value = Arc::new( AggregateExprBuilder::new( datafusion_functions_aggregate::first_last::last_value_udaf(), vec![ctx.create_physical_expr(value.clone(), &df_schema)?], ) .alias("last_value(value)") .order_by(vec![PhysicalSortExpr::new( ctx.create_physical_expr(timestamp.clone(), &df_schema)?, SortOptions::new(false, false), )]) .schema(file_schema.clone()) .build() .expect("Failed to build last_value"), ); let partial_agg = AggregateExec::try_new( AggregateMode::Partial, PhysicalGroupBy::new_single(vec![ ( ctx.create_physical_expr(date.clone(), &df_schema)?, "date".to_string(), ), ( ctx.create_physical_expr(ticker.clone(), &df_schema)?, "ticker".to_string(), ), ]), vec![first_value, last_value], // Both aggregates together vec![None, None], mock_exec, file_schema, ).expect("Failed to build partial agg"); println!("{:?}", partial_agg.schema()); for field in partial_agg.schema().fields() { println!("Field: {}", field.name()); } let partial_agg_exec_schema = DFSchema::try_from(partial_agg.schema()); // This should fail due to duplicate state field names assert!( partial_agg_exec_schema.is_err(), "Expected get AggregateExec to fail due to duplicate state field names" ); if let Err(e) = partial_agg_exec_schema { println!("Expected error due to duplicate state fields: {}", e); // Verify it's the specific duplicate field error we expect assert!(e.to_string().contains("duplicate") || e.to_string().contains("Duplicate")); } Ok(()) } ``` Schema { fields: [Field { name: "date", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "ticker", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "first_value(value)[first_value]", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "timestamp@0", data_type: Timestamp(Nanosecond, Some("UTC")), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "is_set", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "last_value(value)[last_value]", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "timestamp@0", data_type: Timestamp(Nanosecond, Some("UTC")), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "is_set", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} } Field: date Field: ticker Field: first_value(value)[first_value] Field: timestamp@0 Field: is_set Field: last_value(value)[last_value] Field: timestamp@0 Field: is_set Expected error due to duplicate state fields: Schema error: Schema contains duplicate unqualified field name "timestamp@0" ### Expected behavior _No response_ ### Additional context _No response_ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org