martin-g commented on code in PR #420:
URL: https://github.com/apache/avro-rs/pull/420#discussion_r2712608952
##########
avro/src/schema/mod.rs:
##########
@@ -6453,4 +6476,43 @@ mod tests {
Ok(())
}
+
+ #[test]
+ fn avro_rs_420_independent_canonical_form() -> TestResult {
+ let record = Schema::Record(
Review Comment:
nit: I think using Schema::parse_str("...") would be more readable then
constructing the schemas with code.
##########
avro/src/schema/mod.rs:
##########
@@ -870,36 +870,59 @@ impl Schema {
UnionSchema::new(schemas).map(Schema::Union)
}
- fn denormalize(&mut self, schemata: &[Schema]) -> AvroResult<()> {
+ fn denormalize(
+ &mut self,
+ schemata: &[Schema],
+ defined_names: &mut HashSet<Name>,
+ ) -> AvroResult<()> {
+ // If this name already exists in this schema we can reference it.
+ // This makes the denormalized form as small as possible and prevent
infinite loops for recursive types.
+ if let Some(name) = self.name()
+ && defined_names.contains(name)
+ {
+ *self = Schema::Ref { name: name.clone() };
+ return Ok(());
+ }
match self {
Schema::Ref { name } => {
let replacement_schema = schemata
.iter()
.find(|s| s.name().map(|n| *n == *name).unwrap_or(false));
if let Some(schema) = replacement_schema {
let mut denorm = schema.clone();
- denorm.denormalize(schemata)?;
+ denorm.denormalize(schemata, defined_names)?;
*self = denorm;
} else {
return
Err(Details::SchemaResolutionError(name.clone()).into());
}
}
Schema::Record(record_schema) => {
+ defined_names.insert(record_schema.name.clone());
for field in &mut record_schema.fields {
- field.schema.denormalize(schemata)?;
+ field.schema.denormalize(schemata, defined_names)?;
}
}
Schema::Array(array_schema) => {
- array_schema.items.denormalize(schemata)?;
+ array_schema.items.denormalize(schemata, defined_names)?;
}
Schema::Map(map_schema) => {
- map_schema.types.denormalize(schemata)?;
+ map_schema.types.denormalize(schemata, defined_names)?;
}
Schema::Union(union_schema) => {
for schema in &mut union_schema.schemas {
- schema.denormalize(schemata)?;
+ schema.denormalize(schemata, defined_names)?;
}
}
+ Schema::Enum(EnumSchema { name, .. })
+ | Schema::Fixed(FixedSchema { name, .. })
+ | Schema::Decimal(DecimalSchema {
+ inner: InnerDecimalSchema::Fixed(FixedSchema { name, .. }),
+ ..
+ })
+ | Schema::Uuid(UuidSchema::Fixed(FixedSchema { name, .. }))
+ | Schema::Duration(FixedSchema { name, .. }) => {
+ defined_names.insert(name.clone());
Review Comment:
I think this would be more future-proof if redone as:
```rust
schema if schema.is_named() => defined_names.insert(schema.name().clone())
```
This way if we add yet another named schema in the future we will have to
add it only in `Schema::is_named()` and `Schema::name()` but no need to add it
here
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]