alamb commented on code in PR #10973: URL: https://github.com/apache/datafusion/pull/10973#discussion_r1644578233
########## datafusion/core/tests/parquet/arrow_statistics.rs: ########## @@ -2027,7 +2010,7 @@ async fn test_column_non_existent() { // row counts are [5, 5, 5, 5] expected_row_counts: None, column_name: "i_do_not_exist", - check: Check::DataPage, + check: Check::Both, Review Comment: 👍 ########## datafusion/core/src/datasource/physical_plan/parquet/statistics.rs: ########## @@ -718,21 +718,33 @@ impl<'a> StatisticsConverter<'a> { /// /// # Example /// ```no_run + /// # use arrow::datatypes::Schema; + /// # use arrow_array::ArrayRef; /// # use parquet::file::metadata::ParquetMetaData; /// # use datafusion::datasource::physical_plan::parquet::StatisticsConverter; /// # fn get_parquet_metadata() -> ParquetMetaData { unimplemented!() } - /// // Given the metadata for a parquet file + /// # fn get_arrow_schema() -> Schema { unimplemented!() } + /// // Given the metadata for a parquet file and the arrow schema /// let metadata: ParquetMetaData = get_parquet_metadata(); + /// let arrow_schema: Schema = get_arrow_schema(); + /// let parquet_schema = metadata.file_metadata().schema_descr(); + /// // create a converter + /// let converter = StatisticsConverter::try_new("foo", &arrow_schema, parquet_schema) + /// .unwrap(); /// // get the row counts for each row group - /// let row_counts = StatisticsConverter::row_group_row_counts(metadata + /// let row_counts = converter.row_group_row_counts(metadata /// .row_groups() /// .iter() /// ); /// ``` - pub fn row_group_row_counts<I>(metadatas: I) -> Result<UInt64Array> + pub fn row_group_row_counts<I>(&self, metadatas: I) -> Result<Option<UInt64Array>> Review Comment: I think this is a cleaner and more consistent interface -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org