This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch branch-1.11
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/branch-1.11 by this push:
     new 29cd96ac0 AVRO-3526: RUST: Improve schema resolution related to byte 
types (#1706)
29cd96ac0 is described below

commit 29cd96ac06ec4a83af8de2725ad875b29557d28c
Author: Martin Grigorov <[email protected]>
AuthorDate: Tue May 31 08:41:54 2022 +0300

    AVRO-3526: RUST: Improve schema resolution related to byte types (#1706)
    
    * Remove obsolete TODO in Rust code
    
    It has been addressed 2 years ago with 
https://github.com/flavray/avro-rs/pull/146/files#diff-cfb402317f9a1267396cd28c72ab83f4236dca6ebbf2f87ed850f2b82da5862dR385
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    
    * AVRO-3526: Rust: Improve resolving Bytes and Fixed from string
    
    Unicode related documentation: 
https://en.wikipedia.org/wiki/UTF-8#Invalid_sequences_and_error_handling
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    (cherry picked from commit 9738a759f01e267772092c2299d8f0d7047942a8)
---
 lang/rust/avro/src/schema.rs |  1 -
 lang/rust/avro/src/types.rs  | 21 ++++++++++++++++++++-
 lang/rust/avro/tests/io.rs   |  8 ++++----
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs
index fa698b10c..dfb9b146e 100644
--- a/lang/rust/avro/src/schema.rs
+++ b/lang/rust/avro/src/schema.rs
@@ -787,7 +787,6 @@ impl Schema {
 impl Parser {
     /// Create a `Schema` from a string representing a JSON Avro schema.
     fn parse_str(&mut self, input: &str) -> Result<Schema, Error> {
-        // TODO: (#82) this should be a ParseSchemaError wrapping the JSON 
error
         let value = 
serde_json::from_str(input).map_err(Error::ParseSchemaJson)?;
         self.parse(&value, &None)
     }
diff --git a/lang/rust/avro/src/types.rs b/lang/rust/avro/src/types.rs
index f9e472227..045694509 100644
--- a/lang/rust/avro/src/types.rs
+++ b/lang/rust/avro/src/types.rs
@@ -762,7 +762,7 @@ impl Value {
     fn resolve_string(self) -> Result<Self, Error> {
         match self {
             Value::String(s) => Ok(Value::String(s)),
-            Value::Bytes(bytes) => Ok(Value::String(
+            Value::Bytes(bytes) | Value::Fixed(_, bytes) => Ok(Value::String(
                 String::from_utf8(bytes).map_err(Error::ConvertToUtf8)?,
             )),
             other => Err(Error::GetString(other.into())),
@@ -778,6 +778,7 @@ impl Value {
                     Err(Error::CompareFixedSizes { size, n })
                 }
             }
+            Value::String(s) => Ok(Value::Fixed(s.len(), s.into_bytes())),
             other => Err(Error::GetStringForFixed(other.into())),
         }
     }
@@ -1312,6 +1313,24 @@ Field with name '"b"' is not a member of the map items"#,
         );
     }
 
+    #[test]
+    fn resolve_string_from_bytes() {
+        let value = Value::Bytes(vec![97, 98, 99]);
+        assert_eq!(
+            value.resolve(&Schema::String).unwrap(),
+            Value::String("abc".to_string())
+        );
+    }
+
+    #[test]
+    fn resolve_string_from_fixed() {
+        let value = Value::Fixed(3, vec![97, 98, 99]);
+        assert_eq!(
+            value.resolve(&Schema::String).unwrap(),
+            Value::String("abc".to_string())
+        );
+    }
+
     #[test]
     fn resolve_bytes_failure() {
         let value = Value::Array(vec![Value::Int(2000), Value::Int(-42)]);
diff --git a/lang/rust/avro/tests/io.rs b/lang/rust/avro/tests/io.rs
index 193c18140..e3d8a6836 100644
--- a/lang/rust/avro/tests/io.rs
+++ b/lang/rust/avro/tests/io.rs
@@ -55,14 +55,14 @@ lazy_static! {
         (r#""null""#, "null", Value::Null),
         (r#""boolean""#, "true", Value::Boolean(true)),
         (r#""string""#, r#""foo""#, Value::String("foo".to_string())),
-        // TODO: (#96) investigate why this is failing
-        //(r#""bytes""#, r#""\u00FF\u00FF""#, Value::Bytes(vec![0xff, 0xff])),
+        (r#""bytes""#, r#""a""#, Value::Bytes(vec![97])), // ASCII 'a' => one 
byte
+        (r#""bytes""#, r#""\u00FF""#, Value::Bytes(vec![195, 191])), // The 
value is between U+0080 and U+07FF => two bytes
         (r#""int""#, "5", Value::Int(5)),
         (r#""long""#, "5", Value::Long(5)),
         (r#""float""#, "1.1", Value::Float(1.1)),
         (r#""double""#, "1.1", Value::Double(1.1)),
-        // TODO: (#96) investigate why this is failing
-        //(r#"{"type": "fixed", "name": "F", "size": 2}"#, 
r#""\u00FF\u00FF""#, Value::Bytes(vec![0xff, 0xff])),
+        (r#"{"type": "fixed", "name": "F", "size": 2}"#, r#""a""#, 
Value::Fixed(1, vec![97])), // ASCII 'a' => one byte
+        (r#"{"type": "fixed", "name": "F", "size": 2}"#, r#""\u00FF""#, 
Value::Fixed(2, vec![195, 191])), // The value is between U+0080 and U+07FF => 
two bytes
         (r#"{"type": "enum", "name": "F", "symbols": ["FOO", "BAR"]}"#, 
r#""FOO""#, Value::Enum(0, "FOO".to_string())),
         (r#"{"type": "array", "items": "int"}"#, "[1, 2, 3]", 
Value::Array(vec![Value::Int(1), Value::Int(2), Value::Int(3)])),
         (r#"{"type": "map", "values": "int"}"#, r#"{"a": 1, "b": 2}"#, 
Value::Map([("a".to_string(), Value::Int(1)), ("b".to_string(), 
Value::Int(2))].iter().cloned().collect())),

Reply via email to