[avro] 09/30: AVRO-3197 Fallback to the 'type' when the logical type does not support the type (#1340)

mgrigorov Tue, 01 Feb 2022 03:51:37 -0800

This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch branch-1.11
in repository https://gitbox.apache.org/repos/asf/avro.git


commit 2cb26f6bfc638f741e918f4ad0a42d99b2c2e832
Author: Martin Grigorov <[email protected]>
AuthorDate: Wed Jan 5 13:15:24 2022 +0200

    AVRO-3197 Fallback to the 'type' when the logical type does not support the 
type (#1340)
    
    * AVRO-3197 Fallback to the 'type' when the logical type does not support 
the type
    
    * AVRO-3197 Better formatting
    
    * AVRO-3197 Allow only when the "type" is "string"
    
    * AVRO-3197 Handle problematic complex type for date/time logical types
    
    Read the complex type recursively. It seems Avro Java may produce {"type": 
{"type": "string", "avro.java.string": "String"}, "logicalType": 
"timestamp-millis"}}, i.e. logicalType is on the same level as the outer "type"
    
    * AVRO-3197 Make Clippy happy
    
    * AVRO-3197 Log a warning when a logical type is not supported by the 
Schema type
    
    This is how Avro Java behaves.
    
    (cherry picked from commit bce386623c79b204957f1c37c43079d1c51c2038)
---
 lang/rust/Cargo.toml      |  2 ++
 lang/rust/src/error.rs    |  2 +-
 lang/rust/src/lib.rs      |  3 ++
 lang/rust/src/schema.rs   | 92 +++++++++++++++++++++++++++++++++++++++++------
 lang/rust/tests/schema.rs | 46 +++++++++++++++++++++---
 5 files changed, 128 insertions(+), 17 deletions(-)

diff --git a/lang/rust/Cargo.toml b/lang/rust/Cargo.toml
index 7d85220..3da92dd 100644
--- a/lang/rust/Cargo.toml
+++ b/lang/rust/Cargo.toml
@@ -69,6 +69,7 @@ typed-builder = "0.9.1"
 uuid = { version = "0.8.2", features = ["serde", "v4"] }
 zerocopy = "0.3.0"
 lazy_static = "1.1.1"
+log = "0.4.14"
 zstd = { version = "0.9.0+zstd.1.5.0" , optional = true }
 
 [dev-dependencies]
@@ -77,3 +78,4 @@ sha2 = "0.9.8"
 criterion = "0.3.5"
 anyhow = "1.0.44"
 hex-literal = "0.3.3"
+env_logger = "0.9.0"
diff --git a/lang/rust/src/error.rs b/lang/rust/src/error.rs
index b690bfb..f4b1fb5 100644
--- a/lang/rust/src/error.rs
+++ b/lang/rust/src/error.rs
@@ -240,7 +240,7 @@ pub enum Error {
     #[error("Must be a JSON string, object or array")]
     ParseSchemaFromValidJson,
 
-    #[error("Unknown primitiive type: {0}")]
+    #[error("Unknown primitive type: {0}")]
     ParsePrimitive(String),
 
     #[error("invalid JSON for {key:?}: {precision:?}")]
diff --git a/lang/rust/src/lib.rs b/lang/rust/src/lib.rs
index 18d9230..af6e3cf 100644
--- a/lang/rust/src/lib.rs
+++ b/lang/rust/src/lib.rs
@@ -751,6 +751,9 @@ pub use ser::to_value;
 pub use util::max_allocation_bytes;
 pub use writer::{to_avro_datum, Writer};
 
+#[macro_use]
+extern crate log;
+
 /// A convenience type alias for `Result`s with `Error`s.
 pub type AvroResult<T> = Result<T, Error>;
 
diff --git a/lang/rust/src/schema.rs b/lang/rust/src/schema.rs
index 8e15cd4..bab99d4 100644
--- a/lang/rust/src/schema.rs
+++ b/lang/rust/src/schema.rs
@@ -608,18 +608,63 @@ impl Parser {
             match complex.get("type") {
                 Some(value) => {
                     let ty = parser.parse(value)?;
+
                     if kinds
                         .iter()
                         .any(|&kind| SchemaKind::from(ty.clone()) == kind)
                     {
                         Ok(ty)
                     } else {
-                        Err(Error::GetLogicalTypeVariant(value.clone()))
+                        match get_type_rec(value.clone()) {
+                            Ok(v) => Err(Error::GetLogicalTypeVariant(v)),
+                            Err(err) => Err(err),
+                        }
                     }
                 }
                 None => Err(Error::GetLogicalTypeField),
             }
         }
+
+        fn get_type_rec(json_value: Value) -> AvroResult<Value> {
+            match json_value {
+                typ @ Value::String(_) => Ok(typ),
+                Value::Object(ref complex) => match complex.get("type") {
+                    Some(v) => get_type_rec(v.clone()),
+                    None => Err(Error::GetComplexTypeField),
+                },
+                _ => Err(Error::GetComplexTypeField),
+            }
+        }
+
+        // checks whether the logicalType is supported by the type
+        fn try_logical_type(
+            logical_type: &str,
+            complex: &Map<String, Value>,
+            kinds: &[SchemaKind],
+            ok_schema: Schema,
+            parser: &mut Parser,
+        ) -> AvroResult<Schema> {
+            match logical_verify_type(complex, kinds, parser) {
+                // type and logicalType match!
+                Ok(_) => Ok(ok_schema),
+                // the logicalType is not expected for this type!
+                Err(Error::GetLogicalTypeVariant(json_value)) => match 
json_value {
+                    Value::String(_) => match parser.parse(&json_value) {
+                        Ok(schema) => {
+                            warn!(
+                                "Ignoring invalid logical type '{}' for schema 
of type: {:?}!",
+                                logical_type, schema
+                            );
+                            Ok(schema)
+                        }
+                        Err(parse_err) => Err(parse_err),
+                    },
+                    _ => Err(Error::GetLogicalTypeVariant(json_value)),
+                },
+                err => err,
+            }
+        }
+
         match complex.get("logicalType") {
             Some(&Value::String(ref t)) => match t.as_str() {
                 "decimal" => {
@@ -642,24 +687,49 @@ impl Parser {
                     return Ok(Schema::Uuid);
                 }
                 "date" => {
-                    logical_verify_type(complex, &[SchemaKind::Int], self)?;
-                    return Ok(Schema::Date);
+                    return try_logical_type(
+                        "date",
+                        complex,
+                        &[SchemaKind::Int],
+                        Schema::Date,
+                        self,
+                    );
                 }
                 "time-millis" => {
-                    logical_verify_type(complex, &[SchemaKind::Int], self)?;
-                    return Ok(Schema::TimeMillis);
+                    return try_logical_type(
+                        "time-millis",
+                        complex,
+                        &[SchemaKind::Int],
+                        Schema::TimeMillis,
+                        self,
+                    );
                 }
                 "time-micros" => {
-                    logical_verify_type(complex, &[SchemaKind::Long], self)?;
-                    return Ok(Schema::TimeMicros);
+                    return try_logical_type(
+                        "time-micros",
+                        complex,
+                        &[SchemaKind::Long],
+                        Schema::TimeMicros,
+                        self,
+                    );
                 }
                 "timestamp-millis" => {
-                    logical_verify_type(complex, &[SchemaKind::Long], self)?;
-                    return Ok(Schema::TimestampMillis);
+                    return try_logical_type(
+                        "timestamp-millis",
+                        complex,
+                        &[SchemaKind::Long],
+                        Schema::TimestampMillis,
+                        self,
+                    );
                 }
                 "timestamp-micros" => {
-                    logical_verify_type(complex, &[SchemaKind::Long], self)?;
-                    return Ok(Schema::TimestampMicros);
+                    return try_logical_type(
+                        "timestamp-micros",
+                        complex,
+                        &[SchemaKind::Long],
+                        Schema::TimestampMicros,
+                        self,
+                    );
                 }
                 "duration" => {
                     logical_verify_type(complex, &[SchemaKind::Fixed], self)?;
diff --git a/lang/rust/tests/schema.rs b/lang/rust/tests/schema.rs
index 14edc0f..efd9df2 100644
--- a/lang/rust/tests/schema.rs
+++ b/lang/rust/tests/schema.rs
@@ -19,6 +19,13 @@
 use avro_rs::{schema::Name, Error, Schema};
 use lazy_static::lazy_static;
 
+fn init() {
+    let _ = env_logger::builder()
+        .filter_level(log::LevelFilter::Trace)
+        .is_test(true)
+        .try_init();
+}
+
 const PRIMITIVE_EXAMPLES: &[(&str, bool)] = &[
     (r#""null""#, true),
     (r#"{"type": "null"}"#, true),
@@ -476,7 +483,8 @@ const DATE_LOGICAL_TYPE: &[(&str, bool)] = &[
     // this is valid even though its logical type is "date1", because unknown 
logical types are
     // ignored
     (r#"{"type": "int", "logicalType": "date1"}"#, true),
-    (r#"{"type": "long", "logicalType": "date"}"#, false),
+    // this is still valid because unknown logicalType should be ignored
+    (r#"{"type": "long", "logicalType": "date"}"#, true),
 ];
 
 const TIMEMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
@@ -484,7 +492,8 @@ const TIMEMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
     // this is valid even though its logical type is "time-milis" (missing the 
second "l"),
     // because unknown logical types are ignored
     (r#"{"type": "int", "logicalType": "time-milis"}"#, true),
-    (r#"{"type": "long", "logicalType": "time-millis"}"#, false),
+    // this is still valid because unknown logicalType should be ignored
+    (r#"{"type": "long", "logicalType": "time-millis"}"#, true),
 ];
 
 const TIMEMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
@@ -492,7 +501,8 @@ const TIMEMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
     // this is valid even though its logical type is "time-micro" (missing the 
last "s"), because
     // unknown logical types are ignored
     (r#"{"type": "long", "logicalType": "time-micro"}"#, true),
-    (r#"{"type": "int", "logicalType": "time-micros"}"#, false),
+    // this is still valid because unknown logicalType should be ignored
+    (r#"{"type": "int", "logicalType": "time-micros"}"#, true),
 ];
 
 const TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
@@ -507,8 +517,9 @@ const TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
         true,
     ),
     (
+        // this is still valid because unknown logicalType should be ignored
         r#"{"type": "int", "logicalType": "timestamp-millis"}"#,
-        false,
+        true,
     ),
 ];
 
@@ -524,8 +535,9 @@ const TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
         true,
     ),
     (
+        // this is still valid because unknown logicalType should be ignored
         r#"{"type": "int", "logicalType": "timestamp-micros"}"#,
-        false,
+        true,
     ),
 ];
 
@@ -562,6 +574,7 @@ that recursive types are not properly supported.
 
 #[test]
 fn test_correct_recursive_extraction() {
+    init();
     let raw_outer_schema = r#"{
         "type": "record",
         "name": "X",
@@ -600,6 +613,8 @@ fn test_correct_recursive_extraction() {
 
 #[test]
 fn test_parse() {
+    init();
+
     for (raw_schema, valid) in EXAMPLES.iter() {
         let schema = Schema::parse_str(raw_schema);
         if *valid {
@@ -622,6 +637,7 @@ fn test_parse() {
 #[test]
 /// Test that the string generated by an Avro Schema object is, in fact, a 
valid Avro schema.
 fn test_valid_cast_to_string_after_parse() {
+    init();
     for (raw_schema, _) in VALID_EXAMPLES.iter() {
         let schema = Schema::parse_str(raw_schema).unwrap();
         Schema::parse_str(schema.canonical_form().as_str()).unwrap();
@@ -633,6 +649,7 @@ fn test_valid_cast_to_string_after_parse() {
 /// 2. Serialize "original" to a string and parse that string to generate Avro 
schema "round trip".
 /// 3. Ensure "original" and "round trip" schemas are equivalent.
 fn test_equivalence_after_round_trip() {
+    init();
     for (raw_schema, _) in VALID_EXAMPLES.iter() {
         let original_schema = Schema::parse_str(raw_schema).unwrap();
         let round_trip_schema =
@@ -645,6 +662,7 @@ fn test_equivalence_after_round_trip() {
 /// Test that a list of schemas whose definitions do not depend on each other 
produces the same
 /// result as parsing each element of the list individually
 fn test_parse_list_without_cross_deps() {
+    init();
     let schema_str_1 = r#"{
         "name": "A",
         "type": "record",
@@ -672,6 +690,7 @@ fn test_parse_list_without_cross_deps() {
 /// the schemas are input.
 /// However, the output order is guaranteed to be the same as the input order.
 fn test_parse_list_with_cross_deps_basic() {
+    init();
     let schema_str_1 = r#"{
         "name": "A",
         "type": "record",
@@ -718,6 +737,7 @@ fn test_parse_list_with_cross_deps_basic() {
 /// and returns an error. N.B. In the future, when recursive types are 
supported, this should be
 /// revisited.
 fn test_parse_list_recursive_type_error() {
+    init();
     let schema_str_1 = r#"{
         "name": "A",
         "type": "record",
@@ -741,6 +761,7 @@ fn test_parse_list_recursive_type_error() {
 #[test]
 /// Test that schema composition resolves namespaces.
 fn test_parse_list_with_cross_deps_and_namespaces() {
+    init();
     let schema_str_1 = r#"{
         "name": "A",
         "type": "record",
@@ -786,6 +807,7 @@ fn test_parse_list_with_cross_deps_and_namespaces() {
 #[test]
 /// Test that schema composition fails on namespace errors.
 fn test_parse_list_with_cross_deps_and_namespaces_error() {
+    init();
     let schema_str_1 = r#"{
         "name": "A",
         "type": "record",
@@ -849,6 +871,7 @@ fn permutation_indices(indices: Vec<usize>) -> 
Vec<Vec<usize>> {
 /// Test that a type that depends on more than one other type is parsed 
correctly when all
 /// definitions are passed in as a list. This should work regardless of the 
ordering of the list.
 fn test_parse_list_multiple_dependencies() {
+    init();
     let schema_str_1 = r#"{
         "name": "A",
         "type": "record",
@@ -913,6 +936,7 @@ fn test_parse_list_multiple_dependencies() {
 /// Test that a type that is depended on by more than one other type is parsed 
correctly when all
 /// definitions are passed in as a list. This should work regardless of the 
ordering of the list.
 fn test_parse_list_shared_dependency() {
+    init();
     let schema_str_1 = r#"{
         "name": "A",
         "type": "record",
@@ -988,6 +1012,7 @@ fn test_parse_list_shared_dependency() {
 #[test]
 /// Test that trying to parse two schemas with the same fullname returns an 
Error
 fn test_name_collision_error() {
+    init();
     let schema_str_1 = r#"{
         "name": "foo.A",
         "type": "record",
@@ -1010,6 +1035,7 @@ fn test_name_collision_error() {
 #[test]
 /// Test that having the same name but different fullnames does not return an 
error
 fn test_namespace_prevents_collisions() {
+    init();
     let schema_str_1 = r#"{
         "name": "A",
         "type": "record",
@@ -1059,6 +1085,7 @@ fn test_namespace_prevents_collisions() {
 
 #[test]
 fn test_fullname_name_and_namespace_specified() {
+    init();
     let name: Name =
         serde_json::from_str(r#"{"name": "a", "namespace": "o.a.h", "aliases": 
null}"#).unwrap();
     let fullname = name.fullname(None);
@@ -1067,6 +1094,7 @@ fn test_fullname_name_and_namespace_specified() {
 
 #[test]
 fn test_fullname_fullname_and_namespace_specified() {
+    init();
     let name: Name =
         serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": "o.a.h", 
"aliases": null}"#)
             .unwrap();
@@ -1076,6 +1104,7 @@ fn test_fullname_fullname_and_namespace_specified() {
 
 #[test]
 fn test_fullname_name_and_default_namespace_specified() {
+    init();
     let name: Name =
         serde_json::from_str(r#"{"name": "a", "namespace": null, "aliases": 
null}"#).unwrap();
     let fullname = name.fullname(Some("b.c.d"));
@@ -1084,6 +1113,7 @@ fn test_fullname_name_and_default_namespace_specified() {
 
 #[test]
 fn test_fullname_fullname_and_default_namespace_specified() {
+    init();
     let name: Name =
         serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": null, 
"aliases": null}"#).unwrap();
     let fullname = name.fullname(Some("o.a.h"));
@@ -1092,6 +1122,7 @@ fn 
test_fullname_fullname_and_default_namespace_specified() {
 
 #[test]
 fn test_fullname_fullname_namespace_and_default_namespace_specified() {
+    init();
     let name: Name =
         serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": "o.a.a", 
"aliases": null}"#)
             .unwrap();
@@ -1101,6 +1132,7 @@ fn 
test_fullname_fullname_namespace_and_default_namespace_specified() {
 
 #[test]
 fn test_fullname_name_namespace_and_default_namespace_specified() {
+    init();
     let name: Name =
         serde_json::from_str(r#"{"name": "a", "namespace": "o.a.a", "aliases": 
null}"#).unwrap();
     let fullname = name.fullname(Some("o.a.h"));
@@ -1109,6 +1141,8 @@ fn 
test_fullname_name_namespace_and_default_namespace_specified() {
 
 #[test]
 fn test_doc_attributes() {
+    init();
+
     fn assert_doc(schema: &Schema) {
         match schema {
             Schema::Enum { doc, .. } => assert!(doc.is_some()),
@@ -1164,6 +1198,8 @@ fn test_other_attributes() {
 
 #[test]
 fn test_root_error_is_not_swallowed_on_parse_error() -> Result<(), String> {
+    init();
+
     let raw_schema = r#"/not/a/real/file"#;
     let error = Schema::parse_str(raw_schema).unwrap_err();

[avro] 09/30: AVRO-3197 Fallback to the 'type' when the logical type does not support the type (#1340)

Reply via email to