jecsand838 commented on code in PR #8349:
URL: https://github.com/apache/arrow-rs/pull/8349#discussion_r2365873073


##########
arrow-avro/src/reader/record.rs:
##########
@@ -486,6 +734,70 @@ impl Decoder {
             Self::Decimal256(_, _, _, builder) => 
builder.append_value(i256::ZERO),
             Self::Enum(indices, _, _) => indices.push(0),
             Self::Duration(builder) => builder.append_null(),
+            Self::Union(fields, type_ids, offsets, encodings, encoding_counts, 
None) => {
+                let mut chosen = None;
+                for (i, ch) in encodings.iter().enumerate() {
+                    if matches!(ch, Decoder::Null(_)) {
+                        chosen = Some(i);
+                        break;
+                    }
+                }
+                let idx = chosen.unwrap_or(0);
+                let type_id = fields
+                    .iter()
+                    .nth(idx)
+                    .map(|(type_id, _)| type_id)
+                    .unwrap_or_else(|| i8::try_from(idx).unwrap_or(0));
+                type_ids.push(type_id);
+                offsets.push(encoding_counts[idx]);
+                encodings[idx].append_null();
+                encoding_counts[idx] += 1;
+            }
+            Self::Union(
+                fields,
+                type_ids,
+                offsets,
+                encodings,
+                encoding_counts,
+                Some(union_resolution),
+            ) => match &mut union_resolution.kind {
+                UnionResolvedKind::Both { .. } => {
+                    let mut chosen = None;
+                    for (i, ch) in encodings.iter().enumerate() {
+                        if matches!(ch, Decoder::Null(_)) {
+                            chosen = Some(i);
+                            break;
+                        }
+                    }
+                    let idx = chosen.unwrap_or(0);
+                    let type_id = fields
+                        .iter()
+                        .nth(idx)
+                        .map(|(type_id, _)| type_id)
+                        .unwrap_or_else(|| i8::try_from(idx).unwrap_or(0));

Review Comment:
   > I don't understand why it's safe to interpret a field index as a type id? 
AFAIK type ids are not required to be contiguous, nor ordered?
   
   As I understand it, Arrow type ids are arbitrary per‑union codes and are not 
required to be contiguous or ordered. Our current fallback to 
`i8::try_from(idx)` is implicitly conflating the Avro branch index with an 
Arrow type code.
   
   In `reader/record.rs` I’m removing that fallback everywhere and will always 
fetch the actual type code either from `UnionFields` or from the 
`reader_type_codes` we already carry in `UnionResolution`.
   
   For context, `arrow‑avro` currently assigns codes `0..n-1` 
(`build_union_fields`), which is why this hadn’t surfaced, but the we shouldn’t 
depend on that.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to