xudong963 commented on code in PR #21637:
URL: https://github.com/apache/datafusion/pull/21637#discussion_r3192608315
##########
datafusion/sqllogictest/test_files/limit_pruning.slt:
##########
@@ -63,7 +63,55 @@ set datafusion.explain.analyze_level = summary;
query TT
explain analyze select * from tracking_data where species > 'M' AND s >= 50
limit 3;
----
-Plan with Metrics DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/limit_pruning/data.parquet]]},
projection=[species, s], limit=3, file_type=parquet, predicate=species@0 > M
AND s@1 >= 50, pruning_predicate=species_null_count@1 != row_count@2 AND
species_max@0 > M AND s_null_count@4 != row_count@2 AND s_max@3 >= 50,
required_guarantees=[], metrics=[output_rows=3, elapsed_compute=<slt:ignore>,
output_bytes=<slt:ignore>, files_ranges_pruned_statistics=1 total → 1 matched,
row_groups_pruned_statistics=4 total → 3 matched -> 1 fully matched,
row_groups_pruned_bloom_filter=3 total → 3 matched, page_index_pages_pruned=2
total → 2 matched, limit_pruned_row_groups=2 total → 0 matched,
bytes_scanned=<slt:ignore>, metadata_load_time=<slt:ignore>,
scan_efficiency_ratio=<slt:ignore> (171/2.35 K)]
+Plan with Metrics DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/limit_pruning/data.parquet]]},
projection=[species, s], limit=3, file_type=parquet, predicate=species@0 > M
AND s@1 >= 50, pruning_predicate=species_null_count@1 != row_count@2 AND
species_max@0 > M AND s_null_count@4 != row_count@2 AND s_max@3 >= 50,
required_guarantees=[], metrics=[output_rows=3, elapsed_compute=<slt:ignore>,
output_bytes=<slt:ignore>, files_ranges_pruned_statistics=1 total → 1 matched,
row_groups_pruned_statistics=4 total → 3 matched -> 1 fully matched,
row_groups_pruned_bloom_filter=3 total → 3 matched, page_index_pages_pruned=0
total → 0 matched, limit_pruned_row_groups=2 total → 0 matched,
bytes_scanned=<slt:ignore>, metadata_load_time=<slt:ignore>,
scan_efficiency_ratio=<slt:ignore> (171/2.35 K)]
+
+statement ok
+CREATE TABLE fully_matched_limit_source AS VALUES
+ (1),
+ (2),
+ (3),
+ (4),
+ (5),
+ (6),
+ (7),
+ (1),
+ (2);
+
+query I
+COPY (SELECT column1 as a FROM fully_matched_limit_source)
+TO 'test_files/scratch/limit_pruning/fully_matched_limit.parquet'
+STORED AS PARQUET
+OPTIONS (
+ 'format.max_row_group_size' '3'
+);
+----
+9
+
+statement ok
+drop table fully_matched_limit_source;
+
+statement ok
+CREATE EXTERNAL TABLE fully_matched_limit
+STORED AS PARQUET
+LOCATION 'test_files/scratch/limit_pruning/fully_matched_limit.parquet';
+
+# One fully matched row group sits between two filtered row groups.
+# LIMIT must apply across the entire scan, not once per decoder run.
+query TT
+explain analyze select a from fully_matched_limit where a >= 3 limit 4;
+----
+Plan with Metrics DataSourceExec: <slt:ignore>metrics=[output_rows=4,
<slt:ignore>row_groups_pruned_statistics=3 total → 3 matched -> 1 fully
matched<slt:ignore>]
+
+query I
+select a from fully_matched_limit where a >= 3 limit 4;
Review Comment:
done
https://github.com/apache/datafusion/pull/21637/changes/3aa4a4700e7a876f3db4686ae906f0db05ddfc99
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]