This is an automated email from the ASF dual-hosted git repository. mgrigorov pushed a commit to branch avro-3526-improve-schema-resolution-related-to-byte-types in repository https://gitbox.apache.org/repos/asf/avro.git
commit e578056291461428cdb18bf7ab5c4a0a4648087e Author: Martin Tzvetanov Grigorov <[email protected]> AuthorDate: Mon May 30 11:28:54 2022 +0300 AVRO-3526: Rust: Improve resolving Bytes and Fixed from string Unicode related documentation: https://en.wikipedia.org/wiki/UTF-8#Invalid_sequences_and_error_handling Signed-off-by: Martin Tzvetanov Grigorov <[email protected]> --- lang/rust/avro/src/types.rs | 21 ++++++++++++++++++++- lang/rust/avro/tests/io.rs | 8 ++++---- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/lang/rust/avro/src/types.rs b/lang/rust/avro/src/types.rs index f9e472227..045694509 100644 --- a/lang/rust/avro/src/types.rs +++ b/lang/rust/avro/src/types.rs @@ -762,7 +762,7 @@ impl Value { fn resolve_string(self) -> Result<Self, Error> { match self { Value::String(s) => Ok(Value::String(s)), - Value::Bytes(bytes) => Ok(Value::String( + Value::Bytes(bytes) | Value::Fixed(_, bytes) => Ok(Value::String( String::from_utf8(bytes).map_err(Error::ConvertToUtf8)?, )), other => Err(Error::GetString(other.into())), @@ -778,6 +778,7 @@ impl Value { Err(Error::CompareFixedSizes { size, n }) } } + Value::String(s) => Ok(Value::Fixed(s.len(), s.into_bytes())), other => Err(Error::GetStringForFixed(other.into())), } } @@ -1312,6 +1313,24 @@ Field with name '"b"' is not a member of the map items"#, ); } + #[test] + fn resolve_string_from_bytes() { + let value = Value::Bytes(vec![97, 98, 99]); + assert_eq!( + value.resolve(&Schema::String).unwrap(), + Value::String("abc".to_string()) + ); + } + + #[test] + fn resolve_string_from_fixed() { + let value = Value::Fixed(3, vec![97, 98, 99]); + assert_eq!( + value.resolve(&Schema::String).unwrap(), + Value::String("abc".to_string()) + ); + } + #[test] fn resolve_bytes_failure() { let value = Value::Array(vec![Value::Int(2000), Value::Int(-42)]); diff --git a/lang/rust/avro/tests/io.rs b/lang/rust/avro/tests/io.rs index 193c18140..e3d8a6836 100644 --- a/lang/rust/avro/tests/io.rs +++ b/lang/rust/avro/tests/io.rs @@ -55,14 +55,14 @@ lazy_static! { (r#""null""#, "null", Value::Null), (r#""boolean""#, "true", Value::Boolean(true)), (r#""string""#, r#""foo""#, Value::String("foo".to_string())), - // TODO: (#96) investigate why this is failing - //(r#""bytes""#, r#""\u00FF\u00FF""#, Value::Bytes(vec![0xff, 0xff])), + (r#""bytes""#, r#""a""#, Value::Bytes(vec![97])), // ASCII 'a' => one byte + (r#""bytes""#, r#""\u00FF""#, Value::Bytes(vec![195, 191])), // The value is between U+0080 and U+07FF => two bytes (r#""int""#, "5", Value::Int(5)), (r#""long""#, "5", Value::Long(5)), (r#""float""#, "1.1", Value::Float(1.1)), (r#""double""#, "1.1", Value::Double(1.1)), - // TODO: (#96) investigate why this is failing - //(r#"{"type": "fixed", "name": "F", "size": 2}"#, r#""\u00FF\u00FF""#, Value::Bytes(vec![0xff, 0xff])), + (r#"{"type": "fixed", "name": "F", "size": 2}"#, r#""a""#, Value::Fixed(1, vec![97])), // ASCII 'a' => one byte + (r#"{"type": "fixed", "name": "F", "size": 2}"#, r#""\u00FF""#, Value::Fixed(2, vec![195, 191])), // The value is between U+0080 and U+07FF => two bytes (r#"{"type": "enum", "name": "F", "symbols": ["FOO", "BAR"]}"#, r#""FOO""#, Value::Enum(0, "FOO".to_string())), (r#"{"type": "array", "items": "int"}"#, "[1, 2, 3]", Value::Array(vec![Value::Int(1), Value::Int(2), Value::Int(3)])), (r#"{"type": "map", "values": "int"}"#, r#"{"a": 1, "b": 2}"#, Value::Map([("a".to_string(), Value::Int(1)), ("b".to_string(), Value::Int(2))].iter().cloned().collect())),
