alamb commented on code in PR #14157: URL: https://github.com/apache/datafusion/pull/14157#discussion_r1921049964
########## datafusion/core/benches/sql_planner.rs: ########## @@ -147,6 +149,77 @@ fn benchmark_with_param_values_many_columns(ctx: &SessionContext, b: &mut Benche }); } +/// Registers a table like this: +/// c0,c1,c2...,c99 +/// 0,100...9900 +/// 0,200...19800 +/// 0,300...29700 +fn register_union_order_table(ctx: &SessionContext, num_columns: usize, num_rows: usize) { + // ("c0", [0, 0, ...]) + // ("c1": [100, 200, ...]) + // etc + let iter = (0..num_columns).map(|i| i as u64).map(|i| { + let array: ArrayRef = Arc::new(arrow::array::UInt64Array::from_iter_values( + (0..num_rows) + .map(|j| j as u64 * 100 + i) + .collect::<Vec<_>>(), + )); + (format!("c{}", i), array) + }); + let batch = RecordBatch::try_from_iter(iter).unwrap(); + let schema = batch.schema(); + let partitions = vec![vec![batch]]; + + // tell DataFusion that the table is sorted by all columns + let sort_order = (0..num_columns) + .map(|i| col(format!("c{}", i)).sort(true, true)) + .collect::<Vec<_>>(); + + // create the table + let table = MemTable::try_new(schema, partitions) + .unwrap() + .with_sort_order(vec![sort_order]); + + ctx.register_table("t", Arc::new(table)).unwrap(); +} + +/// return a query like +/// ```sql +/// select c1, null as c2, ... null as cn from t ORDER BY c1 +/// UNION ALL +/// select null as c1, c2, ... null as cn from t ORDER BY c2 +/// ... +/// select null as c1, null as c2, ... cn from t ORDER BY cn Review Comment: > I probably get it, the problem is with the planning of such query not the execution Yes, exactly > do we really need inner `ORDER BY` if the query got the outer one? 🤔 Shouldn't be inner sorting ignored? Yes, indeed. I think the way it is ignored is that the sort equivalence code determines that the inner sorts aren't needed (or in this case they are all equivalent, so the top order by can a merge rather than sort) The sort equivalence code (`OrderEquivalenceProperties` in particular) is what is consuming all this time -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org