xinlifoobar commented on code in PR #10802: URL: https://github.com/apache/datafusion/pull/10802#discussion_r1627832066
########## datafusion/core/benches/parquet_statistic.rs: ########## @@ -159,41 +157,26 @@ fn criterion_benchmark(c: &mut Criterion) { let file = file.reopen().unwrap(); let reader = ArrowReaderBuilder::try_new(file).unwrap(); let metadata = reader.metadata(); + let row_groups = metadata.row_groups(); let mut group = c.benchmark_group(format!("Extract statistics for {}", dtype.clone())); group.bench_function( BenchmarkId::new("extract_statistics", dtype.clone()), |b| { b.iter(|| { - let _ = StatisticsConverter::try_new( - "col", - RequestedStatistics::Min, - reader.schema(), - ) - .unwrap() - .extract(metadata) - .unwrap(); - - let _ = StatisticsConverter::try_new( - "col", - RequestedStatistics::Max, - reader.schema(), - ) - .unwrap() - .extract(reader.metadata()) - .unwrap(); - - let _ = StatisticsConverter::try_new( + let converter = StatisticsConverter::try_new( "col", - RequestedStatistics::NullCount, reader.schema(), + reader.parquet_schema(), ) - .unwrap() - .extract(reader.metadata()) .unwrap(); - let _ = StatisticsConverter::row_counts(reader.metadata()).unwrap(); + let _ = converter.row_group_mins(row_groups.iter()).unwrap(); Review Comment: This is more clear than using enum IMO :) ########## datafusion-examples/examples/parquet_index.rs: ########## @@ -518,21 +518,17 @@ impl ParquetMetadataIndexBuilder { // extract the parquet statistics from the file's footer let metadata = reader.metadata(); + let row_groups = metadata.row_groups(); // Extract the min/max values for each row group from the statistics - let row_counts = StatisticsConverter::row_counts(reader.metadata())?; - let value_column_mins = StatisticsConverter::try_new( + let converter = StatisticsConverter::try_new( "value", - RequestedStatistics::Min, reader.schema(), - )? - .extract(reader.metadata())?; - let value_column_maxes = StatisticsConverter::try_new( - "value", - RequestedStatistics::Max, - reader.schema(), - )? - .extract(reader.metadata())?; + reader.parquet_schema(), + )?; + let row_counts = StatisticsConverter::row_group_row_counts(row_groups.iter())?; Review Comment: This looks like an user-facing change, should be ok at this stage? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org