NGA-TRAN commented on code in PR #10802:
URL: https://github.com/apache/datafusion/pull/10802#discussion_r1628162150


##########
datafusion-examples/examples/parquet_index.rs:
##########
@@ -518,21 +518,17 @@ impl ParquetMetadataIndexBuilder {
 
         // extract the parquet statistics from the file's footer
         let metadata = reader.metadata();
+        let row_groups = metadata.row_groups();
 
         // Extract the min/max values for each row group from the statistics
-        let row_counts = StatisticsConverter::row_counts(reader.metadata())?;
-        let value_column_mins = StatisticsConverter::try_new(
+        let converter = StatisticsConverter::try_new(
             "value",
-            RequestedStatistics::Min,
             reader.schema(),
-        )?
-        .extract(reader.metadata())?;
-        let value_column_maxes = StatisticsConverter::try_new(
-            "value",
-            RequestedStatistics::Max,
-            reader.schema(),
-        )?
-        .extract(reader.metadata())?;
+            reader.parquet_schema(),
+        )?;
+        let row_counts = 
StatisticsConverter::row_group_row_counts(row_groups.iter())?;
+        let value_column_mins = converter.row_group_mins(row_groups.iter())?;
+        let value_column_maxes = converter.row_group_maxes(row_groups.iter())?;

Review Comment:
   ❤️ 



##########
datafusion/core/src/datasource/physical_plan/parquet/statistics.rs:
##########
@@ -323,17 +324,6 @@ fn collect_scalars<I: Iterator<Item = 
Option<ScalarValue>>>(
     }
 }
 
-/// What type of statistics should be extracted?
-#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub enum RequestedStatistics {
-    /// Minimum Value
-    Min,
-    /// Maximum Value
-    Max,
-    /// Null Count, returned as a [`UInt64Array`])
-    NullCount,
-}
-

Review Comment:
   I agree we do not need this. We store each min/max/row_count as an array 
instead :thus



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to