mzabaluev commented on code in PR #9237:
URL: https://github.com/apache/arrow-rs/pull/9237#discussion_r2712165072
##########
arrow-avro/src/reader/mod.rs:
##########
@@ -9081,4 +9081,46 @@ mod test {
"entire RecordBatch mismatch (schema, all columns, all rows)"
);
}
+
+ #[test]
+ fn test_bad_varint_bug_nullable_array_items() {
+ use flate2::read::GzDecoder;
+ use std::io::Read;
+ let manifest_dir = env!("CARGO_MANIFEST_DIR");
+ let gz_path =
format!("{manifest_dir}/test/data/bad-varint-bug.avro.gz");
+ let gz_file = File::open(&gz_path).expect("test file should exist");
+ let mut decoder = GzDecoder::new(gz_file);
Review Comment:
I actually had the file unpacked in my test suite branch, it's quite small.
I only gzipped it to be able to post to GitHub. But if there's already a
dependency on `flate2`, why not use it?
##########
arrow-avro/src/codec.rs:
##########
@@ -1529,23 +1529,77 @@ impl<'a> Maker<'a> {
Ok(dt)
}
(writer_non_union, Schema::Union(reader_variants)) => {
- let promo = self.find_best_promotion(
- writer_non_union,
- reader_variants.as_slice(),
- namespace,
- );
- let Some((reader_index, promotion)) = promo else {
- return Err(ArrowError::SchemaError(
- "Writer schema does not match any reader union
branch".to_string(),
- ));
- };
- let mut dt = self.parse_type(reader_schema, namespace)?;
- dt.resolution = Some(ResolutionInfo::Union(ResolvedUnion {
- writer_to_reader: Arc::from(vec![Some((reader_index,
promotion))]),
- writer_is_union: false,
- reader_is_union: true,
- }));
- Ok(dt)
+ let null_position = reader_variants
+ .iter()
+ .position(|x| x ==
&Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)));
+ if let (2, Some(null_idx)) = (reader_variants.len(),
null_position) {
Review Comment:
Would it be good to reuse the `nullable_union_variants` helper here?
##########
arrow-avro/src/codec.rs:
##########
@@ -1529,23 +1529,77 @@ impl<'a> Maker<'a> {
Ok(dt)
}
(writer_non_union, Schema::Union(reader_variants)) => {
- let promo = self.find_best_promotion(
- writer_non_union,
- reader_variants.as_slice(),
- namespace,
- );
- let Some((reader_index, promotion)) = promo else {
- return Err(ArrowError::SchemaError(
- "Writer schema does not match any reader union
branch".to_string(),
- ));
- };
- let mut dt = self.parse_type(reader_schema, namespace)?;
- dt.resolution = Some(ResolutionInfo::Union(ResolvedUnion {
- writer_to_reader: Arc::from(vec![Some((reader_index,
promotion))]),
- writer_is_union: false,
- reader_is_union: true,
- }));
- Ok(dt)
+ let null_position = reader_variants
+ .iter()
+ .position(|x| x ==
&Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)));
+ if let (2, Some(null_idx)) = (reader_variants.len(),
null_position) {
+ let non_null_idx = 1 - null_idx;
+ let non_null_branch = &reader_variants[non_null_idx];
+ let mut dt =
+ self.make_data_type(writer_non_union,
Some(non_null_branch), namespace)?;
Review Comment:
I call `resolve_type` directly here because that's what it always amounts to.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]