Ted-Jiang commented on code in PR #5268: URL: https://github.com/apache/arrow-datafusion/pull/5268#discussion_r1105328492
########## datafusion/core/src/datasource/file_format/parquet.rs: ########## @@ -550,50 +550,63 @@ pub(crate) mod test_util { use parquet::file::properties::WriterProperties; use tempfile::NamedTempFile; + /// How many rows per page should be written + const ROWS_PER_PAGE: usize = 2; + /// Writes `batches` to a temporary parquet file /// - /// If multi_page is set to `true`, all batches are written into - /// one temporary parquet file and the parquet file is written + /// If multi_page is set to `true`, the parquet file(s) are written /// with 2 rows per data page (used to test page filtering and /// boundaries). pub async fn store_parquet( batches: Vec<RecordBatch>, multi_page: bool, ) -> Result<(Vec<ObjectMeta>, Vec<NamedTempFile>)> { - if multi_page { - // All batches write in to one file, each batch must have same schema. - let mut output = NamedTempFile::new().expect("creating temp file"); - let mut builder = WriterProperties::builder(); - builder = builder.set_data_page_row_count_limit(2); - let proper = builder.build(); - let mut writer = - ArrowWriter::try_new(&mut output, batches[0].schema(), Some(proper)) - .expect("creating writer"); - for b in batches { - writer.write(&b).expect("Writing batch"); - } - writer.close().unwrap(); - Ok((vec![local_unpartitioned_file(&output)], vec![output])) - } else { - // Each batch writes to their own file - let files: Vec<_> = batches - .into_iter() - .map(|batch| { - let mut output = NamedTempFile::new().expect("creating temp file"); + // Each batch writes to their own file + let files: Vec<_> = batches + .into_iter() + .map(|batch| { + let mut output = NamedTempFile::new().expect("creating temp file"); + + let builder = WriterProperties::builder(); + let props = if multi_page { + builder.set_data_page_row_count_limit(2) Review Comment: ```suggestion builder.set_data_page_row_count_limit(ROWS_PER_PAGE) ``` I think here should keep use `ROWS_PER_PAGE` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org