This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new a81428240e Add check for empty schema in
`parquet::schema::types::from_thrift_helper` (#6990)
a81428240e is described below
commit a81428240e87f85f12ea8d1bc160598d9af1ba35
Author: Ed Seidl <[email protected]>
AuthorDate: Sun Jan 19 04:43:43 2025 -0800
Add check for empty schema in `parquet::schema::types::from_thrift_helper`
(#6990)
* fix for empty schema
* use Schema::empty per review comment
* check schema and number of batches as well (per suggestion from @alamb)
---
parquet/src/arrow/arrow_writer/mod.rs | 30 ++++++++++++++++++++++++++++++
parquet/src/schema/types.rs | 7 +++++++
2 files changed, 37 insertions(+)
diff --git a/parquet/src/arrow/arrow_writer/mod.rs
b/parquet/src/arrow/arrow_writer/mod.rs
index 41f15569fd..5e0d318322 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -3409,4 +3409,34 @@ mod tests {
"Arrow: Incompatible type. Field 'temperature' has type Float64,
array has type Int32"
);
}
+
+ #[test]
+ // https://github.com/apache/arrow-rs/issues/6988
+ fn test_roundtrip_empty_schema() {
+ // create empty record batch with empty schema
+ let empty_batch = RecordBatch::try_new_with_options(
+ Arc::new(Schema::empty()),
+ vec![],
+ &RecordBatchOptions::default().with_row_count(Some(0)),
+ )
+ .unwrap();
+
+ // write to parquet
+ let mut parquet_bytes: Vec<u8> = Vec::new();
+ let mut writer =
+ ArrowWriter::try_new(&mut parquet_bytes, empty_batch.schema(),
None).unwrap();
+ writer.write(&empty_batch).unwrap();
+ writer.close().unwrap();
+
+ // read from parquet
+ let bytes = Bytes::from(parquet_bytes);
+ let reader = ParquetRecordBatchReaderBuilder::try_new(bytes).unwrap();
+ assert_eq!(reader.schema(), &empty_batch.schema());
+ let batches: Vec<_> = reader
+ .build()
+ .unwrap()
+ .collect::<ArrowResult<Vec<_>>>()
+ .unwrap();
+ assert_eq!(batches.len(), 0);
+ }
}
diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs
index d9e9b22e80..68492e19f4 100644
--- a/parquet/src/schema/types.rs
+++ b/parquet/src/schema/types.rs
@@ -1211,6 +1211,13 @@ fn from_thrift_helper(elements: &[SchemaElement], index:
usize) -> Result<(usize
));
}
let element = &elements[index];
+
+ // Check for empty schema
+ if let (true, None | Some(0)) = (is_root_node, element.num_children) {
+ let builder = Type::group_type_builder(&element.name);
+ return Ok((index + 1, Arc::new(builder.build().unwrap())));
+ }
+
let converted_type = ConvertedType::try_from(element.converted_type)?;
// LogicalType is only present in v2 Parquet files. ConvertedType is always
// populated, regardless of the version of the file (v1 or v2).