NGA-TRAN commented on code in PR #10802: URL: https://github.com/apache/datafusion/pull/10802#discussion_r1628162150
########## datafusion-examples/examples/parquet_index.rs: ########## @@ -518,21 +518,17 @@ impl ParquetMetadataIndexBuilder { // extract the parquet statistics from the file's footer let metadata = reader.metadata(); + let row_groups = metadata.row_groups(); // Extract the min/max values for each row group from the statistics - let row_counts = StatisticsConverter::row_counts(reader.metadata())?; - let value_column_mins = StatisticsConverter::try_new( + let converter = StatisticsConverter::try_new( "value", - RequestedStatistics::Min, reader.schema(), - )? - .extract(reader.metadata())?; - let value_column_maxes = StatisticsConverter::try_new( - "value", - RequestedStatistics::Max, - reader.schema(), - )? - .extract(reader.metadata())?; + reader.parquet_schema(), + )?; + let row_counts = StatisticsConverter::row_group_row_counts(row_groups.iter())?; + let value_column_mins = converter.row_group_mins(row_groups.iter())?; + let value_column_maxes = converter.row_group_maxes(row_groups.iter())?; Review Comment: ❤️ ########## datafusion/core/src/datasource/physical_plan/parquet/statistics.rs: ########## @@ -323,17 +324,6 @@ fn collect_scalars<I: Iterator<Item = Option<ScalarValue>>>( } } -/// What type of statistics should be extracted? -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum RequestedStatistics { - /// Minimum Value - Min, - /// Maximum Value - Max, - /// Null Count, returned as a [`UInt64Array`]) - NullCount, -} - Review Comment: I agree we do not need this. We store each min/max/row_count as an array instead :thus -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org