rgehan commented on code in PR #18352:
URL: https://github.com/apache/datafusion/pull/18352#discussion_r2476859655


##########
datafusion/core/tests/dataframe/mod.rs:
##########
@@ -2996,6 +2997,119 @@ async fn test_count_wildcard_on_window() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn reproducer_e2e_with_repartition_sorts_false() -> Result<()> {
+    reproducer_e2e_impl(false).await?;
+
+    // 💥 Doesn't pass, and generates this plan:
+    //
+    // AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], 
ordering_mode=Sorted
+    //   SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]
+    //     CoalescePartitionsExec
+    //       AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[]
+    //         UnionExec
+    //           DataSourceExec: file_groups={1 group: 
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], 
output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
+    //           DataSourceExec: file_groups={1 group: 
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn reproducer_e2e_with_repartition_sorts_true() -> Result<()> {
+    reproducer_e2e_impl(true).await?;
+
+    // 💥 Doesn't pass, and generates this plan:
+    //
+    // AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], 
ordering_mode=Sorted
+    //   SortPreservingMergeExec: [id@0 ASC NULLS LAST]
+    //     SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[true]
+    //       AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[]
+    //         UnionExec
+    //           DataSourceExec: file_groups={1 group: 
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], 
output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
+    //           DataSourceExec: file_groups={1 group: 
[[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet
+
+    Ok(())
+}
+
+async fn reproducer_e2e_impl(repartition_sorts: bool) -> Result<()> {
+    let config = SessionConfig::default()
+        .with_target_partitions(1)
+        .with_repartition_sorts(repartition_sorts);
+    let ctx = SessionContext::new_with_config(config);
+
+    let testdata = parquet_test_data();
+
+    // Register "sorted" table, that is sorted
+    ctx.register_parquet(
+        "sorted",
+        &format!("{testdata}/alltypes_tiny_pages.parquet"),
+        ParquetReadOptions::default()
+            .file_sort_order(vec![vec![col("id").sort(true, false)]]),

Review Comment:
   The file is not actually sorted no, but I was hoping this was a valid way of 
making the planner think it is, and plan accordingly.
   
   Can this cause issues?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to