alamb commented on code in PR #6319:
URL: https://github.com/apache/arrow-rs/pull/6319#discussion_r1739708945


##########
parquet/src/file/metadata/writer.rs:
##########
@@ -450,19 +451,47 @@ mod tests {
             true,
         )]));
 
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), None, 
Some(2)]));
+        // build row groups / pages that exercise different combinations of 
nulls and values
+        // note that below we set the row group and page sizes to 4 and 2 
respectively
+        // so that these "groupings" make sense
+        let a: ArrayRef = Arc::new(Int32Array::from(vec![
+            // a row group that has all values
+            Some(i32::MIN), Some(-1),
+            Some(1), Some(i32::MAX),
+            // a row group with a page of all nulls and a page of all values
+            None, None,
+            Some(2), Some(3),
+            // a row group that has all null pages
+            None, None,
+            None, None,
+            // a row group having 1 page with all values and 1 page with some 
nulls
+            Some(4), Some(5),
+            None, Some(6),
+            // a row group having 1 page with all nulls and 1 page with some 
nulls
+            None, None,
+            Some(7), None,
+            // a row group having all pages with some nulls
+            None, Some(8),
+            Some(9), None,
+        ]));
 
         let batch = RecordBatch::try_from_iter(vec![("a", a)]).unwrap();
 
-        let writer_props = match write_page_index {
+        let writer_props_builder = match write_page_index {
             true => WriterProperties::builder()
-                .set_statistics_enabled(EnabledStatistics::Page)
-                .build(),
+                .set_statistics_enabled(EnabledStatistics::Page),
             false => WriterProperties::builder()
-                .set_statistics_enabled(EnabledStatistics::Chunk)
-                .build(),
+                .set_statistics_enabled(EnabledStatistics::Chunk),
         };
 
+        // tune the size or pages to the data above
+        // to make sure we exercise code paths where all items in a page are 
null, etc.
+        let writer_props = writer_props_builder
+            .set_max_row_group_size(4)
+            .set_data_page_row_count_limit(2)
+            .set_write_batch_size(2)

Review Comment:
   I thik this is reasonable



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to