This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 4b7b5ab11a Minor: add runtime asserts to RowGroup (#10641)
4b7b5ab11a is described below

commit 4b7b5ab11a868c0069245f411f0d9b08e1b2744e
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu May 23 23:04:09 2024 -0400

    Minor: add runtime asserts to RowGroup (#10641)
---
 .../core/src/datasource/physical_plan/parquet/row_groups.rs  | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs 
b/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs
index 2da3cb3072..0a0ca4369d 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs
@@ -93,7 +93,11 @@ impl RowGroupSet {
     /// Prune remaining row groups to only those  within the specified range.
     ///
     /// Updates this set to mark row groups that should not be scanned
+    ///
+    /// # Panics
+    /// if `groups.len() != self.len()`
     pub fn prune_by_range(&mut self, groups: &[RowGroupMetaData], range: 
&FileRange) {
+        assert_eq!(groups.len(), self.len());
         for (idx, metadata) in groups.iter().enumerate() {
             if !self.should_scan(idx) {
                 continue;
@@ -120,6 +124,9 @@ impl RowGroupSet {
     ///
     /// Note: This method currently ignores ColumnOrder
     /// <https://github.com/apache/datafusion/issues/8335>
+    ///
+    /// # Panics
+    /// if `groups.len() != self.len()`
     pub fn prune_by_statistics(
         &mut self,
         arrow_schema: &Schema,
@@ -128,6 +135,7 @@ impl RowGroupSet {
         predicate: &PruningPredicate,
         metrics: &ParquetFileMetrics,
     ) {
+        assert_eq!(groups.len(), self.len());
         for (idx, metadata) in groups.iter().enumerate() {
             if !self.should_scan(idx) {
                 continue;
@@ -161,6 +169,9 @@ impl RowGroupSet {
     /// [`PruningPredicate`].
     ///
     /// Updates this set with row groups that should not be scanned
+    ///
+    /// # Panics
+    /// if the builder does not have the same number of row groups as this set
     pub async fn prune_by_bloom_filters<T: AsyncFileReader + Send + 'static>(
         &mut self,
         arrow_schema: &Schema,
@@ -168,6 +179,7 @@ impl RowGroupSet {
         predicate: &PruningPredicate,
         metrics: &ParquetFileMetrics,
     ) {
+        assert_eq!(builder.metadata().num_row_groups(), self.len());
         for idx in 0..self.len() {
             if !self.should_scan(idx) {
                 continue;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to