tustvold commented on code in PR #3447: URL: https://github.com/apache/arrow-rs/pull/3447#discussion_r1061473039
########## parquet/src/arrow/arrow_writer/mod.rs: ########## @@ -1838,6 +1851,47 @@ mod tests { one_column_roundtrip(values, false); } + #[test] + fn fallback_flush_data_page() { + //tests if the Fallback::flush_data_page clears all buffers correctly + let raw_values: Vec<_> = (0..MEDIUM_SIZE).map(|i| i.to_string()).collect(); + let values = Arc::new(StringArray::from(raw_values)); + let encodings = vec![ + Encoding::DELTA_BYTE_ARRAY, + Encoding::DELTA_LENGTH_BYTE_ARRAY, + ]; + let data_type = values.data_type().clone(); + let schema = Arc::new(Schema::new(vec![Field::new("col", data_type, false)])); + let expected_batch = RecordBatch::try_new(schema, vec![values]).unwrap(); + + let row_group_sizes = [1024, SMALL_SIZE, SMALL_SIZE / 2, SMALL_SIZE / 2 + 1, 10]; + let data_pagesize_limit: usize = 32; + let write_batch_size: usize = 16; + + for encoding in &encodings { + for row_group_size in row_group_sizes { + let props = WriterProperties::builder() + .set_writer_version(WriterVersion::PARQUET_2_0) + .set_max_row_group_size(row_group_size) + .set_dictionary_enabled(false) + .set_encoding(*encoding) + .set_data_pagesize_limit(data_pagesize_limit) + .set_write_batch_size(write_batch_size) + .build(); + + roundtrip_opts_with_array_validation(&expected_batch, props, |a, b| { + let string_array_a = StringArray::from(a.clone()); Review Comment: Perhaps this could be simplified to `assert_eq(string_array_a, string_array_b)` then? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org