Rich-T-kid commented on code in PR #10064:
URL: https://github.com/apache/arrow-rs/pull/10064#discussion_r3353884084


##########
parquet/src/arrow/arrow_writer/mod.rs:
##########
@@ -5193,4 +5193,159 @@ mod tests {
         let cc = file_meta.row_group(0).column(0);
         assert!(cc.column_index_range().is_none());
     }
+
+    /// Writes `ree` and `flat` to separate Parquet buffers and asserts:
+    /// 1. Reading `ree` bytes back (with the flat schema) produces values 
equal to `flat`.
+    fn write_column_to_bytes(array: ArrayRef) -> Bytes {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "col",
+            array.data_type().clone(),
+            true,
+        )]));
+        let mut buf = Vec::new();
+        let mut writer =
+            ArrowWriter::try_new(&mut buf, schema.clone(), 
None).expect("create writer");
+        writer
+            .write(&RecordBatch::try_new(schema, vec![array]).unwrap())
+            .unwrap();
+        writer.close().unwrap();
+        Bytes::from(buf)
+    }
+
+    /// 2. Reading `ree` bytes back equals reading `flat` bytes back (same 
output).
+    fn read_column_with_schema(bytes: Bytes, schema: SchemaRef) -> ArrayRef {
+        let opts = 
crate::arrow::arrow_reader::ArrowReaderOptions::new().with_schema(schema);
+        ParquetRecordBatchReaderBuilder::try_new_with_options(bytes, opts)
+            .unwrap()
+            .build()
+            .unwrap()
+            .next()
+            .unwrap()
+            .unwrap()
+            .column(0)
+            .clone()
+    }
+
+    fn ree_write_read_roundtrip(ree: ArrayRef, flat: ArrayRef) {
+        let flat_schema = Arc::new(Schema::new(vec![Field::new(
+            "col",
+            flat.data_type().clone(),
+            true,
+        )]));
+        let ree_bytes = write_column_to_bytes(ree);
+        let flat_bytes = write_column_to_bytes(flat.clone());
+
+        let from_ree = read_column_with_schema(ree_bytes, flat_schema.clone());
+        let from_flat = read_column_with_schema(flat_bytes, flat_schema);
+
+        assert_eq!(from_ree.as_ref(), flat.as_ref());
+        assert_eq!(from_ree.as_ref(), from_flat.as_ref());
+    }
+
+    #[test]
+    fn ree_string_column_required() {
+        let ree: ArrayRef = Arc::new(
+            ["alpha", "alpha", "beta", "gamma", "gamma", "gamma"]
+                .into_iter()
+                .collect::<Int32RunArray>(),
+        );
+        let flat: ArrayRef = Arc::new(StringArray::from(vec![
+            "alpha", "alpha", "beta", "gamma", "gamma", "gamma",
+        ]));
+        ree_write_read_roundtrip(ree, flat);
+    }
+
+    #[test]
+    fn ree_string_column_nullable() {
+        let ree: ArrayRef = Arc::new(
+            [
+                Some("alpha"),
+                Some("alpha"),
+                None,

Review Comment:
   Nulls don't need to be there own separate test. including nulls in test that 
test other things should be good enough



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to