This is an automated email from the ASF dual-hosted git repository. mgrigorov pushed a commit to branch avro-3495-fields-order-should-not-matter in repository https://gitbox.apache.org/repos/asf/avro.git
commit 4413d579be8d8e0ebb7add27c66149c4aec2f27b Author: Martin Tzvetanov Grigorov <[email protected]> AuthorDate: Tue Apr 19 00:10:00 2022 +0300 AVRO-3495: The order of the struct's fields and schema's fields should not matter Signed-off-by: Martin Tzvetanov Grigorov <[email protected]> --- lang/rust/avro/src/encode.rs | 27 ++++++++++++----- lang/rust/avro/src/error.rs | 3 ++ lang/rust/avro/src/types.rs | 66 +++++++++++++++++++++++++++--------------- lang/rust/avro/tests/schema.rs | 32 ++++++++++++++++++++ 4 files changed, 96 insertions(+), 32 deletions(-) diff --git a/lang/rust/avro/src/encode.rs b/lang/rust/avro/src/encode.rs index c4c0bd3fe..04d220276 100644 --- a/lang/rust/avro/src/encode.rs +++ b/lang/rust/avro/src/encode.rs @@ -188,18 +188,29 @@ pub(crate) fn encode_internal( if let Schema::Record { ref name, fields: ref schema_fields, + ref lookup, .. } = *schema { let record_namespace = name.fully_qualified_name(enclosing_namespace).namespace; - for (i, &(_, ref value)) in fields.iter().enumerate() { - encode_internal( - value, - &schema_fields[i].schema, - names, - &record_namespace, - buffer, - )?; + for &(ref name, ref value) in fields.iter() { + match lookup.get(name) { + Some(idx) => { + encode_internal( + value, + &schema_fields[*idx].schema, + names, + &record_namespace, + buffer, + )?; + } + None => { + return Err(Error::NoEntryInLookupTable( + name.clone(), + format!("{:?}", lookup), + )); + } + } } } else { error!("invalid schema type for Record: {:?}", schema); diff --git a/lang/rust/avro/src/error.rs b/lang/rust/avro/src/error.rs index 908e040e9..d7483ea91 100644 --- a/lang/rust/avro/src/error.rs +++ b/lang/rust/avro/src/error.rs @@ -403,6 +403,9 @@ pub enum Error { #[error("Signed decimal bytes length {0} not equal to fixed schema size {1}.")] EncodeDecimalAsFixedError(usize, usize), + #[error("There is no entry for {0} in the lookup table: {1}.")] + NoEntryInLookupTable(String, String), + #[error("Can only encode value type {value_kind:?} as one of {supported_schema:?}")] EncodeValueAsSchemaError { value_kind: ValueKind, diff --git a/lang/rust/avro/src/types.rs b/lang/rust/avro/src/types.rs index 25d968120..4016b84dd 100644 --- a/lang/rust/avro/src/types.rs +++ b/lang/rust/avro/src/types.rs @@ -457,7 +457,14 @@ impl Value { Value::accumulate(acc, value.validate_internal(inner, names)) }) } - (&Value::Record(ref record_fields), &Schema::Record { ref fields, .. }) => { + ( + &Value::Record(ref record_fields), + &Schema::Record { + ref fields, + ref lookup, + .. + }, + ) => { if fields.len() != record_fields.len() { return Some(format!( "The value's records length ({}) is different than the schema's ({})", @@ -466,19 +473,37 @@ impl Value { )); } - fields.iter().zip(record_fields.iter()).fold( - None, - |acc, (field, &(ref name, ref value))| { - if field.name != *name { - return Some(format!( - "Value's name '{}' does not match the expected field's name '{}'", - name, field.name - )); - } - let res = value.validate_internal(&field.schema, names); - Value::accumulate(acc, res) - }, - ) + let record_fields_by_name = record_fields + .iter() + .map(|(name, record_field)| (name.clone(), record_field.clone())) + .collect::<HashMap<String, Value>>(); + + fields.iter().fold(None, |acc, field| { + match record_fields_by_name.get(&field.name) { + Some(record_field) => Value::accumulate( + acc, + record_field.validate_internal(&field.schema, names), + ), + None => Value::accumulate( + acc, + Some(format!("There is no value for field '{}'", field.name)), + ), + } + }) + + // fields.iter().zip(record_fields.iter()).fold( + // None, + // |acc, (field, &(ref name, ref value))| { + // if field.name != *name { + // return Some(format!( + // "Value's name '{}' does not match the expected field's name '{}'", + // name, field.name + // )); + // } + // let res = value.validate_internal(&field.schema, names); + // Value::accumulate(acc, res) + // }, + // ) } (&Value::Map(ref items), &Schema::Record { ref fields, .. }) => { fields.iter().fold(None, |acc, field| { @@ -1054,7 +1079,7 @@ mod tests { lookup: Default::default(), }, false, - "Invalid value: Record([(\"unknown_field_name\", Null)]) for schema: Record { name: Name { name: \"record_name\", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: \"field_name\", doc: None, default: None, schema: Int, order: Ignore, position: 0 }], lookup: {} }. Reason: Value's name 'unknown_field_name' does not match the expected field's name 'field_name'", + "Invalid value: Record([(\"unknown_field_name\", Null)]) for schema: Record { name: Name { name: \"record_name\", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: \"field_name\", doc: None, default: None, schema: Int, order: Ignore, position: 0 }], lookup: {} }. Reason: There is no value for field 'field_name'", ), ( Value::Record(vec![("field_name".to_string(), Value::Null)]), @@ -1247,14 +1272,7 @@ mod tests { ("b".to_string(), Value::String("foo".to_string())), ("a".to_string(), Value::Long(42i64)), ]); - assert!(!value.validate(&schema)); - assert_log_message( - format!( - "Invalid value: {:?} for schema: {:?}. Reason: {}", - value, schema, "Value's name 'a' does not match the expected field's name 'b'" - ) - .as_str(), - ); + assert!(value.validate(&schema)); let value = Value::Record(vec![ ("a".to_string(), Value::Boolean(false)), @@ -1269,7 +1287,7 @@ mod tests { ]); assert!(!value.validate(&schema)); assert_log_message( - "Invalid value: Record([(\"a\", Long(42)), (\"c\", String(\"foo\"))]) for schema: Record { name: Name { name: \"some_record\", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: \"a\", doc: None, default: None, schema: Long, order: Ascending, position: 0 }, RecordField { name: \"b\", doc: None, default: None, schema: String, order: Ascending, position: 1 }], lookup: {} }. Reason: Value's name 'c' does not match the expected field's name 'b'" + "Invalid value: Record([(\"a\", Long(42)), (\"c\", String(\"foo\"))]) for schema: Record { name: Name { name: \"some_record\", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: \"a\", doc: None, default: None, schema: Long, order: Ascending, position: 0 }, RecordField { name: \"b\", doc: None, default: None, schema: String, order: Ascending, position: 1 }], lookup: {} }. Reason: There is no value for field 'b'" ); let value = Value::Record(vec![ diff --git a/lang/rust/avro/tests/schema.rs b/lang/rust/avro/tests/schema.rs index f58b62f9e..33f62893c 100644 --- a/lang/rust/avro/tests/schema.rs +++ b/lang/rust/avro/tests/schema.rs @@ -17,6 +17,7 @@ use apache_avro::{ schema::{Name, RecordField}, + to_avro_datum, to_value, types::{Record, Value}, Codec, Error, Reader, Schema, Writer, }; @@ -1311,3 +1312,34 @@ fn test_decimal_valid_type_attributes() { assert_eq!(0, bytes_decimal.get_attribute("scale")); } */ + +#[test] +fn avro_old_issue_47() { + init(); + let schema_str = r#" + { + "type": "record", + "name": "my_record", + "fields": [ + {"name": "a", "type": "long"}, + {"name": "b", "type": "string"} + ] + }"#; + let schema = Schema::parse_str(schema_str).unwrap(); + + use serde::{Deserialize, Serialize}; + + #[derive(Deserialize, Serialize)] + pub struct MyRecord { + b: String, + a: i64, + } + + let record = MyRecord { + b: "hello".to_string(), + a: 1, + }; + + let res = to_avro_datum(&schema, to_value(record).unwrap()).unwrap(); + dbg!(res); +}
