adriangb commented on issue #14406: URL: https://github.com/apache/datafusion/issues/14406#issuecomment-2629269661
Here's another angle of attack. If I disable the optimizer I end up with 12
partitions and... 12 output rows.
```
DataFusion CLI v44.0.0
> with selection as (
select *
from 'parquet_files/*'
limit 1
)
select 1 as foo
from selection
order by duration
limit 1000;
+-----+
| foo |
+-----+
| 1 |
| 1 |
+-----+
2 row(s) fetched.
Elapsed 0.027 seconds.
> explain analyze
with selection as (
select *
from 'parquet_files/*'
limit 1
)
select 1 as foo
from selection
order by duration
limit 1000;

| plan_type | plan
|

| Plan with Metrics | ProjectionExec: expr=[foo@0 as foo],
metrics=[output_rows=2, elapsed_compute=166ns]
|
| | SortExec: TopK(fetch=1000), expr=[duration@1 ASC
NULLS LAST], preserve_partitioning=[false], metrics=[output_rows=2,
elapsed_compute=10.332µs, row_replacements=2]
|
| | ProjectionExec: expr=[1 as foo, duration@0 as
duration], metrics=[output_rows=2, elapsed_compute=2.583µs]
|
| | CoalescePartitionsExec, metrics=[output_rows=2,
elapsed_compute=29.666µs]
|
| | ParquetExec: file_groups={2 groups:
[[Users/adriangb/GitHub/platform/parquet_files/day=1970-01-01/file_0.parquet],
[Users/adriangb/GitHub/platform/parquet_files/day=1970-01-02/file_1.parquet]]},
projection=[duration], limit=1, metrics=[output_rows=2, elapsed_compute=2ns,
bytes_scanned=128, file_open_errors=0, file_scan_errors=0,
num_predicate_creation_errors=0, page_index_rows_matched=0,
page_index_rows_pruned=0, predicate_evaluation_errors=0,
pushdown_rows_matched=0, pushdown_rows_pruned=0,
row_groups_matched_bloom_filter=0, row_groups_matched_statistics=0,
row_groups_pruned_bloom_filter=0, row_groups_pruned_statistics=0,
bloom_filter_eval_time=4ns, metadata_load_time=199.836µs,
page_index_eval_time=4ns, row_pushdown_eval_time=4ns, statistics_eval_time=4ns,
time_elapsed_opening=225.792µs, time_elapsed_processing=198.625µs,
time_elapsed_scanning_total=134.75µs,
time_elapsed_scanning_until_data=134.75µs] |
| |
|

1 row(s) fetched.
Elapsed 0.024 seconds.
> SET datafusion.optimizer.max_passes = 0;
0 row(s) fetched.
Elapsed 0.000 seconds.
> with selection as (
select *
from 'parquet_files/*'
limit 1
)
select 1 as foo
from selection
order by duration
limit 1000;
+-----+
| foo |
+-----+
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
+-----+
12 row(s) fetched.
Elapsed 0.030 seconds.
> explain analyze
with selection as (
select *
from 'parquet_files/*'
limit 1
)
select 1 as foo
from selection
order by duration
limit 1000;


------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan
|
+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------


| Plan with Metrics | ProjectionExec: expr=[foo@0 as foo],
metrics=[output_rows=12, elapsed_compute=167ns]
|
| | SortExec: TopK(fetch=1000), expr=[duration@1 ASC
NULLS LAST], preserve_partitioning=[false], metrics=[output_rows=12,
elapsed_compute=25.96µs, row_replacements=12]
|
| | ProjectionExec: expr=[1 as foo, duration@0 as
duration], metrics=[output_rows=12, elapsed_compute=8.584µs]
|
| | CoalescePartitionsExec, metrics=[output_rows=12,
elapsed_compute=24.209µs]
|
| | ParquetExec: file_groups={12 groups:
[[Users/adriangb/GitHub/platform/parquet_files/day=1970-01-01/file_0.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-01-02/file_1.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-01-03/file_2.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-01-04/file_3.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-01-05/file_4.parquet,
...],
[Users/adriangb/GitHub/platform/parquet_files/day=1970-01-10/file_9.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-01-11/file_10.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-01-12/file_11.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-01-13/file_12.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-01-14/file_13.parquet,
...],
[Users/adriangb/GitHub/platform/parquet_files/day=1970-01-19/file_18.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-01-20/file_19.par
quet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-01-21/file_20.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-01-22/file_21.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-01-23/file_22.parquet,
...],
[Users/adriangb/GitHub/platform/parquet_files/day=1970-01-28/file_27.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-01-29/file_28.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-01-30/file_29.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-01-31/file_30.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-02-01/file_31.parquet,
...],
[Users/adriangb/GitHub/platform/parquet_files/day=1970-02-06/file_36.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-02-07/file_37.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-02-08/file_38.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970-02-09/file_39.parquet,
Users/adriangb/GitHub/platform/parquet_files/day=1970
-02-10/file_40.parquet, ...], ...]}, projection=[duration, day], limit=1,
metrics=[output_rows=12, elapsed_compute=12ns, bytes_scanned=768,
file_open_errors=0, file_scan_errors=0, num_predicate_creation_errors=0,
page_index_rows_matched=0, page_index_rows_pruned=0,
predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0,
row_groups_matched_bloom_filter=0, row_groups_matched_statistics=0,
row_groups_pruned_bloom_filter=0, row_groups_pruned_statistics=0,
bloom_filter_eval_time=46ns, metadata_load_time=4.195443ms,
page_index_eval_time=46ns, row_pushdown_eval_time=46ns,
statistics_eval_time=46ns, time_elapsed_opening=2.685875ms,
time_elapsed_processing=1.65742ms, time_elapsed_scanning_total=1.8065ms,
time_elapsed_scanning_until_data=1.805166ms] |
| |
|



1 row(s) fetched.
Elapsed 0.025 seconds.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]
