This is an automated email from the ASF dual-hosted git repository. mgrigorov pushed a commit to branch branch-1.11 in repository https://gitbox.apache.org/repos/asf/avro.git
commit 2cb26f6bfc638f741e918f4ad0a42d99b2c2e832 Author: Martin Grigorov <[email protected]> AuthorDate: Wed Jan 5 13:15:24 2022 +0200 AVRO-3197 Fallback to the 'type' when the logical type does not support the type (#1340) * AVRO-3197 Fallback to the 'type' when the logical type does not support the type * AVRO-3197 Better formatting * AVRO-3197 Allow only when the "type" is "string" * AVRO-3197 Handle problematic complex type for date/time logical types Read the complex type recursively. It seems Avro Java may produce {"type": {"type": "string", "avro.java.string": "String"}, "logicalType": "timestamp-millis"}}, i.e. logicalType is on the same level as the outer "type" * AVRO-3197 Make Clippy happy * AVRO-3197 Log a warning when a logical type is not supported by the Schema type This is how Avro Java behaves. (cherry picked from commit bce386623c79b204957f1c37c43079d1c51c2038) --- lang/rust/Cargo.toml | 2 ++ lang/rust/src/error.rs | 2 +- lang/rust/src/lib.rs | 3 ++ lang/rust/src/schema.rs | 92 +++++++++++++++++++++++++++++++++++++++++------ lang/rust/tests/schema.rs | 46 +++++++++++++++++++++--- 5 files changed, 128 insertions(+), 17 deletions(-) diff --git a/lang/rust/Cargo.toml b/lang/rust/Cargo.toml index 7d85220..3da92dd 100644 --- a/lang/rust/Cargo.toml +++ b/lang/rust/Cargo.toml @@ -69,6 +69,7 @@ typed-builder = "0.9.1" uuid = { version = "0.8.2", features = ["serde", "v4"] } zerocopy = "0.3.0" lazy_static = "1.1.1" +log = "0.4.14" zstd = { version = "0.9.0+zstd.1.5.0" , optional = true } [dev-dependencies] @@ -77,3 +78,4 @@ sha2 = "0.9.8" criterion = "0.3.5" anyhow = "1.0.44" hex-literal = "0.3.3" +env_logger = "0.9.0" diff --git a/lang/rust/src/error.rs b/lang/rust/src/error.rs index b690bfb..f4b1fb5 100644 --- a/lang/rust/src/error.rs +++ b/lang/rust/src/error.rs @@ -240,7 +240,7 @@ pub enum Error { #[error("Must be a JSON string, object or array")] ParseSchemaFromValidJson, - #[error("Unknown primitiive type: {0}")] + #[error("Unknown primitive type: {0}")] ParsePrimitive(String), #[error("invalid JSON for {key:?}: {precision:?}")] diff --git a/lang/rust/src/lib.rs b/lang/rust/src/lib.rs index 18d9230..af6e3cf 100644 --- a/lang/rust/src/lib.rs +++ b/lang/rust/src/lib.rs @@ -751,6 +751,9 @@ pub use ser::to_value; pub use util::max_allocation_bytes; pub use writer::{to_avro_datum, Writer}; +#[macro_use] +extern crate log; + /// A convenience type alias for `Result`s with `Error`s. pub type AvroResult<T> = Result<T, Error>; diff --git a/lang/rust/src/schema.rs b/lang/rust/src/schema.rs index 8e15cd4..bab99d4 100644 --- a/lang/rust/src/schema.rs +++ b/lang/rust/src/schema.rs @@ -608,18 +608,63 @@ impl Parser { match complex.get("type") { Some(value) => { let ty = parser.parse(value)?; + if kinds .iter() .any(|&kind| SchemaKind::from(ty.clone()) == kind) { Ok(ty) } else { - Err(Error::GetLogicalTypeVariant(value.clone())) + match get_type_rec(value.clone()) { + Ok(v) => Err(Error::GetLogicalTypeVariant(v)), + Err(err) => Err(err), + } } } None => Err(Error::GetLogicalTypeField), } } + + fn get_type_rec(json_value: Value) -> AvroResult<Value> { + match json_value { + typ @ Value::String(_) => Ok(typ), + Value::Object(ref complex) => match complex.get("type") { + Some(v) => get_type_rec(v.clone()), + None => Err(Error::GetComplexTypeField), + }, + _ => Err(Error::GetComplexTypeField), + } + } + + // checks whether the logicalType is supported by the type + fn try_logical_type( + logical_type: &str, + complex: &Map<String, Value>, + kinds: &[SchemaKind], + ok_schema: Schema, + parser: &mut Parser, + ) -> AvroResult<Schema> { + match logical_verify_type(complex, kinds, parser) { + // type and logicalType match! + Ok(_) => Ok(ok_schema), + // the logicalType is not expected for this type! + Err(Error::GetLogicalTypeVariant(json_value)) => match json_value { + Value::String(_) => match parser.parse(&json_value) { + Ok(schema) => { + warn!( + "Ignoring invalid logical type '{}' for schema of type: {:?}!", + logical_type, schema + ); + Ok(schema) + } + Err(parse_err) => Err(parse_err), + }, + _ => Err(Error::GetLogicalTypeVariant(json_value)), + }, + err => err, + } + } + match complex.get("logicalType") { Some(&Value::String(ref t)) => match t.as_str() { "decimal" => { @@ -642,24 +687,49 @@ impl Parser { return Ok(Schema::Uuid); } "date" => { - logical_verify_type(complex, &[SchemaKind::Int], self)?; - return Ok(Schema::Date); + return try_logical_type( + "date", + complex, + &[SchemaKind::Int], + Schema::Date, + self, + ); } "time-millis" => { - logical_verify_type(complex, &[SchemaKind::Int], self)?; - return Ok(Schema::TimeMillis); + return try_logical_type( + "time-millis", + complex, + &[SchemaKind::Int], + Schema::TimeMillis, + self, + ); } "time-micros" => { - logical_verify_type(complex, &[SchemaKind::Long], self)?; - return Ok(Schema::TimeMicros); + return try_logical_type( + "time-micros", + complex, + &[SchemaKind::Long], + Schema::TimeMicros, + self, + ); } "timestamp-millis" => { - logical_verify_type(complex, &[SchemaKind::Long], self)?; - return Ok(Schema::TimestampMillis); + return try_logical_type( + "timestamp-millis", + complex, + &[SchemaKind::Long], + Schema::TimestampMillis, + self, + ); } "timestamp-micros" => { - logical_verify_type(complex, &[SchemaKind::Long], self)?; - return Ok(Schema::TimestampMicros); + return try_logical_type( + "timestamp-micros", + complex, + &[SchemaKind::Long], + Schema::TimestampMicros, + self, + ); } "duration" => { logical_verify_type(complex, &[SchemaKind::Fixed], self)?; diff --git a/lang/rust/tests/schema.rs b/lang/rust/tests/schema.rs index 14edc0f..efd9df2 100644 --- a/lang/rust/tests/schema.rs +++ b/lang/rust/tests/schema.rs @@ -19,6 +19,13 @@ use avro_rs::{schema::Name, Error, Schema}; use lazy_static::lazy_static; +fn init() { + let _ = env_logger::builder() + .filter_level(log::LevelFilter::Trace) + .is_test(true) + .try_init(); +} + const PRIMITIVE_EXAMPLES: &[(&str, bool)] = &[ (r#""null""#, true), (r#"{"type": "null"}"#, true), @@ -476,7 +483,8 @@ const DATE_LOGICAL_TYPE: &[(&str, bool)] = &[ // this is valid even though its logical type is "date1", because unknown logical types are // ignored (r#"{"type": "int", "logicalType": "date1"}"#, true), - (r#"{"type": "long", "logicalType": "date"}"#, false), + // this is still valid because unknown logicalType should be ignored + (r#"{"type": "long", "logicalType": "date"}"#, true), ]; const TIMEMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[ @@ -484,7 +492,8 @@ const TIMEMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[ // this is valid even though its logical type is "time-milis" (missing the second "l"), // because unknown logical types are ignored (r#"{"type": "int", "logicalType": "time-milis"}"#, true), - (r#"{"type": "long", "logicalType": "time-millis"}"#, false), + // this is still valid because unknown logicalType should be ignored + (r#"{"type": "long", "logicalType": "time-millis"}"#, true), ]; const TIMEMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[ @@ -492,7 +501,8 @@ const TIMEMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[ // this is valid even though its logical type is "time-micro" (missing the last "s"), because // unknown logical types are ignored (r#"{"type": "long", "logicalType": "time-micro"}"#, true), - (r#"{"type": "int", "logicalType": "time-micros"}"#, false), + // this is still valid because unknown logicalType should be ignored + (r#"{"type": "int", "logicalType": "time-micros"}"#, true), ]; const TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[ @@ -507,8 +517,9 @@ const TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[ true, ), ( + // this is still valid because unknown logicalType should be ignored r#"{"type": "int", "logicalType": "timestamp-millis"}"#, - false, + true, ), ]; @@ -524,8 +535,9 @@ const TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[ true, ), ( + // this is still valid because unknown logicalType should be ignored r#"{"type": "int", "logicalType": "timestamp-micros"}"#, - false, + true, ), ]; @@ -562,6 +574,7 @@ that recursive types are not properly supported. #[test] fn test_correct_recursive_extraction() { + init(); let raw_outer_schema = r#"{ "type": "record", "name": "X", @@ -600,6 +613,8 @@ fn test_correct_recursive_extraction() { #[test] fn test_parse() { + init(); + for (raw_schema, valid) in EXAMPLES.iter() { let schema = Schema::parse_str(raw_schema); if *valid { @@ -622,6 +637,7 @@ fn test_parse() { #[test] /// Test that the string generated by an Avro Schema object is, in fact, a valid Avro schema. fn test_valid_cast_to_string_after_parse() { + init(); for (raw_schema, _) in VALID_EXAMPLES.iter() { let schema = Schema::parse_str(raw_schema).unwrap(); Schema::parse_str(schema.canonical_form().as_str()).unwrap(); @@ -633,6 +649,7 @@ fn test_valid_cast_to_string_after_parse() { /// 2. Serialize "original" to a string and parse that string to generate Avro schema "round trip". /// 3. Ensure "original" and "round trip" schemas are equivalent. fn test_equivalence_after_round_trip() { + init(); for (raw_schema, _) in VALID_EXAMPLES.iter() { let original_schema = Schema::parse_str(raw_schema).unwrap(); let round_trip_schema = @@ -645,6 +662,7 @@ fn test_equivalence_after_round_trip() { /// Test that a list of schemas whose definitions do not depend on each other produces the same /// result as parsing each element of the list individually fn test_parse_list_without_cross_deps() { + init(); let schema_str_1 = r#"{ "name": "A", "type": "record", @@ -672,6 +690,7 @@ fn test_parse_list_without_cross_deps() { /// the schemas are input. /// However, the output order is guaranteed to be the same as the input order. fn test_parse_list_with_cross_deps_basic() { + init(); let schema_str_1 = r#"{ "name": "A", "type": "record", @@ -718,6 +737,7 @@ fn test_parse_list_with_cross_deps_basic() { /// and returns an error. N.B. In the future, when recursive types are supported, this should be /// revisited. fn test_parse_list_recursive_type_error() { + init(); let schema_str_1 = r#"{ "name": "A", "type": "record", @@ -741,6 +761,7 @@ fn test_parse_list_recursive_type_error() { #[test] /// Test that schema composition resolves namespaces. fn test_parse_list_with_cross_deps_and_namespaces() { + init(); let schema_str_1 = r#"{ "name": "A", "type": "record", @@ -786,6 +807,7 @@ fn test_parse_list_with_cross_deps_and_namespaces() { #[test] /// Test that schema composition fails on namespace errors. fn test_parse_list_with_cross_deps_and_namespaces_error() { + init(); let schema_str_1 = r#"{ "name": "A", "type": "record", @@ -849,6 +871,7 @@ fn permutation_indices(indices: Vec<usize>) -> Vec<Vec<usize>> { /// Test that a type that depends on more than one other type is parsed correctly when all /// definitions are passed in as a list. This should work regardless of the ordering of the list. fn test_parse_list_multiple_dependencies() { + init(); let schema_str_1 = r#"{ "name": "A", "type": "record", @@ -913,6 +936,7 @@ fn test_parse_list_multiple_dependencies() { /// Test that a type that is depended on by more than one other type is parsed correctly when all /// definitions are passed in as a list. This should work regardless of the ordering of the list. fn test_parse_list_shared_dependency() { + init(); let schema_str_1 = r#"{ "name": "A", "type": "record", @@ -988,6 +1012,7 @@ fn test_parse_list_shared_dependency() { #[test] /// Test that trying to parse two schemas with the same fullname returns an Error fn test_name_collision_error() { + init(); let schema_str_1 = r#"{ "name": "foo.A", "type": "record", @@ -1010,6 +1035,7 @@ fn test_name_collision_error() { #[test] /// Test that having the same name but different fullnames does not return an error fn test_namespace_prevents_collisions() { + init(); let schema_str_1 = r#"{ "name": "A", "type": "record", @@ -1059,6 +1085,7 @@ fn test_namespace_prevents_collisions() { #[test] fn test_fullname_name_and_namespace_specified() { + init(); let name: Name = serde_json::from_str(r#"{"name": "a", "namespace": "o.a.h", "aliases": null}"#).unwrap(); let fullname = name.fullname(None); @@ -1067,6 +1094,7 @@ fn test_fullname_name_and_namespace_specified() { #[test] fn test_fullname_fullname_and_namespace_specified() { + init(); let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": "o.a.h", "aliases": null}"#) .unwrap(); @@ -1076,6 +1104,7 @@ fn test_fullname_fullname_and_namespace_specified() { #[test] fn test_fullname_name_and_default_namespace_specified() { + init(); let name: Name = serde_json::from_str(r#"{"name": "a", "namespace": null, "aliases": null}"#).unwrap(); let fullname = name.fullname(Some("b.c.d")); @@ -1084,6 +1113,7 @@ fn test_fullname_name_and_default_namespace_specified() { #[test] fn test_fullname_fullname_and_default_namespace_specified() { + init(); let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": null, "aliases": null}"#).unwrap(); let fullname = name.fullname(Some("o.a.h")); @@ -1092,6 +1122,7 @@ fn test_fullname_fullname_and_default_namespace_specified() { #[test] fn test_fullname_fullname_namespace_and_default_namespace_specified() { + init(); let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": "o.a.a", "aliases": null}"#) .unwrap(); @@ -1101,6 +1132,7 @@ fn test_fullname_fullname_namespace_and_default_namespace_specified() { #[test] fn test_fullname_name_namespace_and_default_namespace_specified() { + init(); let name: Name = serde_json::from_str(r#"{"name": "a", "namespace": "o.a.a", "aliases": null}"#).unwrap(); let fullname = name.fullname(Some("o.a.h")); @@ -1109,6 +1141,8 @@ fn test_fullname_name_namespace_and_default_namespace_specified() { #[test] fn test_doc_attributes() { + init(); + fn assert_doc(schema: &Schema) { match schema { Schema::Enum { doc, .. } => assert!(doc.is_some()), @@ -1164,6 +1198,8 @@ fn test_other_attributes() { #[test] fn test_root_error_is_not_swallowed_on_parse_error() -> Result<(), String> { + init(); + let raw_schema = r#"/not/a/real/file"#; let error = Schema::parse_str(raw_schema).unwrap_err();
