jecsand838 commented on code in PR #8349:
URL: https://github.com/apache/arrow-rs/pull/8349#discussion_r2365858841
##########
arrow-avro/src/reader/record.rs:
##########
@@ -214,6 +254,148 @@ struct EnumResolution {
default_index: i32,
}
+#[derive(Debug, Clone, Copy)]
+enum BranchDispatch {
+ NoMatch,
+ ToReader {
+ reader_idx: usize,
+ promotion: Promotion,
+ },
+}
+
+#[derive(Debug)]
+struct UnionResolution {
+ dispatch: Option<Arc<[BranchDispatch]>>,
+ kind: UnionResolvedKind,
+}
+
+#[derive(Debug)]
+enum UnionResolvedKind {
+ Both {
+ reader_type_codes: Arc<[i8]>,
+ },
+ ToSingle {
+ target: Box<Decoder>,
+ },
+ FromSingle {
+ reader_type_codes: Arc<[i8]>,
+ target_reader_index: usize,
+ promotion: Promotion,
+ },
+}
+
+#[derive(Debug, Default)]
+struct UnionResolutionBuilder {
+ fields: Option<UnionFields>,
+ resolved: Option<ResolvedUnion>,
+}
+
+impl UnionResolutionBuilder {
+ #[inline]
+ fn new() -> Self {
+ Self {
+ fields: None,
+ resolved: None,
+ }
+ }
+
+ #[inline]
+ fn with_fields(mut self, fields: UnionFields) -> Self {
+ self.fields = Some(fields);
+ self
+ }
+
+ #[inline]
+ fn with_resolved_union(mut self, resolved_union: &ResolvedUnion) -> Self {
+ self.resolved = Some(resolved_union.clone());
+ self
+ }
+
+ fn build(self) -> Result<UnionResolution, ArrowError> {
+ let info = self.resolved.ok_or_else(|| {
+ ArrowError::InvalidArgumentError(
+ "UnionResolutionBuilder requires resolved_union to be
provided".to_string(),
+ )
+ })?;
+ match (info.writer_is_union, info.reader_is_union) {
+ (true, true) => {
+ let fields = self.fields.ok_or_else(|| {
+ ArrowError::InvalidArgumentError(
+ "UnionResolutionBuilder for reader union requires
fields".to_string(),
+ )
+ })?;
+ let reader_type_codes: Vec<i8> =
+ fields.iter().map(|(tid, _)| tid).collect::<Vec<_>>();
+ let dispatch: Vec<BranchDispatch> = info
+ .writer_to_reader
+ .iter()
+ .map(|m| match m {
+ Some((reader_index, promotion)) =>
BranchDispatch::ToReader {
+ reader_idx: *reader_index,
+ promotion: *promotion,
+ },
+ None => BranchDispatch::NoMatch,
+ })
+ .collect();
+ Ok(UnionResolution {
+ dispatch: Some(Arc::from(dispatch)),
+ kind: UnionResolvedKind::Both {
+ reader_type_codes: Arc::from(reader_type_codes),
+ },
+ })
+ }
+ (false, true) => {
+ let fields = self.fields.ok_or_else(|| {
+ ArrowError::InvalidArgumentError(
+ "UnionResolutionBuilder for reader union requires
fields".to_string(),
+ )
+ })?;
+ let reader_type_codes: Vec<i8> =
+ fields.iter().map(|(tid, _)| tid).collect::<Vec<_>>();
+ let (target_reader_index, promotion) =
+ match info.writer_to_reader.first().and_then(|x| *x) {
+ Some(pair) => pair,
+ None => {
+ return Err(ArrowError::SchemaError(
+ "Writer schema does not match any reader union
branch".to_string(),
Review Comment:
Good catch on the wording. In this code path we’re in the “writer is not a
union, reader is a union” case. `ResolvedUnion::writer_to_reader` is defined as
“for each writer branch index, the reader branch index and how to read it.”
Since the writer is not a union, there is exactly one writer branch, so
`writer_to_reader` has length 1 here. Using `.first()` simply fetches that
single entry.
Per the [Avro
spec](https://avro.apache.org/docs/1.11.1/specification/#schema-resolution),
when the reader is a union and the writer is not, the reader must pick the
first branch that matches the writer’s schema; if none match, signal an error.
That’s what the error text “does not match any reader union branch” is
attempting to refer to.
I'll clean this up though.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]