xinlifoobar commented on code in PR #10802:
URL: https://github.com/apache/datafusion/pull/10802#discussion_r1627832066


##########
datafusion/core/benches/parquet_statistic.rs:
##########
@@ -159,41 +157,26 @@ fn criterion_benchmark(c: &mut Criterion) {
         let file = file.reopen().unwrap();
         let reader = ArrowReaderBuilder::try_new(file).unwrap();
         let metadata = reader.metadata();
+        let row_groups = metadata.row_groups();
 
         let mut group =
             c.benchmark_group(format!("Extract statistics for {}", 
dtype.clone()));
         group.bench_function(
             BenchmarkId::new("extract_statistics", dtype.clone()),
             |b| {
                 b.iter(|| {
-                    let _ = StatisticsConverter::try_new(
-                        "col",
-                        RequestedStatistics::Min,
-                        reader.schema(),
-                    )
-                    .unwrap()
-                    .extract(metadata)
-                    .unwrap();
-
-                    let _ = StatisticsConverter::try_new(
-                        "col",
-                        RequestedStatistics::Max,
-                        reader.schema(),
-                    )
-                    .unwrap()
-                    .extract(reader.metadata())
-                    .unwrap();
-
-                    let _ = StatisticsConverter::try_new(
+                    let converter = StatisticsConverter::try_new(
                         "col",
-                        RequestedStatistics::NullCount,
                         reader.schema(),
+                        reader.parquet_schema(),
                     )
-                    .unwrap()
-                    .extract(reader.metadata())
                     .unwrap();
 
-                    let _ = 
StatisticsConverter::row_counts(reader.metadata()).unwrap();
+                    let _ = 
converter.row_group_mins(row_groups.iter()).unwrap();

Review Comment:
   This is more clear than using enum IMO :)



##########
datafusion-examples/examples/parquet_index.rs:
##########
@@ -518,21 +518,17 @@ impl ParquetMetadataIndexBuilder {
 
         // extract the parquet statistics from the file's footer
         let metadata = reader.metadata();
+        let row_groups = metadata.row_groups();
 
         // Extract the min/max values for each row group from the statistics
-        let row_counts = StatisticsConverter::row_counts(reader.metadata())?;
-        let value_column_mins = StatisticsConverter::try_new(
+        let converter = StatisticsConverter::try_new(
             "value",
-            RequestedStatistics::Min,
             reader.schema(),
-        )?
-        .extract(reader.metadata())?;
-        let value_column_maxes = StatisticsConverter::try_new(
-            "value",
-            RequestedStatistics::Max,
-            reader.schema(),
-        )?
-        .extract(reader.metadata())?;
+            reader.parquet_schema(),
+        )?;
+        let row_counts = 
StatisticsConverter::row_group_row_counts(row_groups.iter())?;

Review Comment:
   This looks like an user-facing change, should be ok at this stage?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to