rgehan commented on code in PR #18352:
URL: https://github.com/apache/datafusion/pull/18352#discussion_r2472752391
##########
datafusion/core/tests/dataframe/mod.rs:
##########
@@ -2996,6 +2997,119 @@ async fn test_count_wildcard_on_window() -> Result<()> {
Ok(())
}
+#[tokio::test]
+async fn reproducer_e2e_with_repartition_sorts_false() -> Result<()> {
+ reproducer_e2e_impl(false).await?;
+
+ // 💥 Doesn't pass, and generates this plan:
+ //
+ // AggregateExec: mode=Final, gby=[id@0 as id], aggr=[],
ordering_mode=Sorted
+ // SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]
+ // CoalescePartitionsExec
+ // AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[]
+ // UnionExec
+ // DataSourceExec: file_groups={1 group:
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id],
output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
+ // DataSourceExec: file_groups={1 group:
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn reproducer_e2e_with_repartition_sorts_true() -> Result<()> {
+ reproducer_e2e_impl(true).await?;
+
+ // 💥 Doesn't pass, and generates this plan:
+ //
+ // AggregateExec: mode=Final, gby=[id@0 as id], aggr=[],
ordering_mode=Sorted
+ // SortPreservingMergeExec: [id@0 ASC NULLS LAST]
+ // SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[true]
+ // AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[]
+ // UnionExec
+ // DataSourceExec: file_groups={1 group:
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id],
output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
+ // DataSourceExec: file_groups={1 group:
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet
+
+ Ok(())
+}
+
+async fn reproducer_e2e_impl(repartition_sorts: bool) -> Result<()> {
+ let config = SessionConfig::default()
+ .with_target_partitions(1)
+ .with_repartition_sorts(repartition_sorts);
+ let ctx = SessionContext::new_with_config(config);
+
+ let testdata = parquet_test_data();
+
+ // Register "sorted" table, that is sorted
+ ctx.register_parquet(
+ "sorted",
+ &format!("{testdata}/alltypes_tiny_pages.parquet"),
+ ParquetReadOptions::default()
+ .file_sort_order(vec![vec![col("id").sort(true, false)]]),
Review Comment:
(Sidenote: Interestingly, with `nulls_first: true` (L3074 too), even with
the fixes from #9867, the plan includes an extra `SortExec` node that re-sorts
with nulls last. I'm not sure whether that's on purpose, or if there's another
issue)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]