This is an automated email from the ASF dual-hosted git repository.
mgrigorov pushed a commit to branch branch-1.11
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/branch-1.11 by this push:
new 29cd96ac0 AVRO-3526: RUST: Improve schema resolution related to byte
types (#1706)
29cd96ac0 is described below
commit 29cd96ac06ec4a83af8de2725ad875b29557d28c
Author: Martin Grigorov <[email protected]>
AuthorDate: Tue May 31 08:41:54 2022 +0300
AVRO-3526: RUST: Improve schema resolution related to byte types (#1706)
* Remove obsolete TODO in Rust code
It has been addressed 2 years ago with
https://github.com/flavray/avro-rs/pull/146/files#diff-cfb402317f9a1267396cd28c72ab83f4236dca6ebbf2f87ed850f2b82da5862dR385
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
* AVRO-3526: Rust: Improve resolving Bytes and Fixed from string
Unicode related documentation:
https://en.wikipedia.org/wiki/UTF-8#Invalid_sequences_and_error_handling
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
(cherry picked from commit 9738a759f01e267772092c2299d8f0d7047942a8)
---
lang/rust/avro/src/schema.rs | 1 -
lang/rust/avro/src/types.rs | 21 ++++++++++++++++++++-
lang/rust/avro/tests/io.rs | 8 ++++----
3 files changed, 24 insertions(+), 6 deletions(-)
diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs
index fa698b10c..dfb9b146e 100644
--- a/lang/rust/avro/src/schema.rs
+++ b/lang/rust/avro/src/schema.rs
@@ -787,7 +787,6 @@ impl Schema {
impl Parser {
/// Create a `Schema` from a string representing a JSON Avro schema.
fn parse_str(&mut self, input: &str) -> Result<Schema, Error> {
- // TODO: (#82) this should be a ParseSchemaError wrapping the JSON
error
let value =
serde_json::from_str(input).map_err(Error::ParseSchemaJson)?;
self.parse(&value, &None)
}
diff --git a/lang/rust/avro/src/types.rs b/lang/rust/avro/src/types.rs
index f9e472227..045694509 100644
--- a/lang/rust/avro/src/types.rs
+++ b/lang/rust/avro/src/types.rs
@@ -762,7 +762,7 @@ impl Value {
fn resolve_string(self) -> Result<Self, Error> {
match self {
Value::String(s) => Ok(Value::String(s)),
- Value::Bytes(bytes) => Ok(Value::String(
+ Value::Bytes(bytes) | Value::Fixed(_, bytes) => Ok(Value::String(
String::from_utf8(bytes).map_err(Error::ConvertToUtf8)?,
)),
other => Err(Error::GetString(other.into())),
@@ -778,6 +778,7 @@ impl Value {
Err(Error::CompareFixedSizes { size, n })
}
}
+ Value::String(s) => Ok(Value::Fixed(s.len(), s.into_bytes())),
other => Err(Error::GetStringForFixed(other.into())),
}
}
@@ -1312,6 +1313,24 @@ Field with name '"b"' is not a member of the map items"#,
);
}
+ #[test]
+ fn resolve_string_from_bytes() {
+ let value = Value::Bytes(vec![97, 98, 99]);
+ assert_eq!(
+ value.resolve(&Schema::String).unwrap(),
+ Value::String("abc".to_string())
+ );
+ }
+
+ #[test]
+ fn resolve_string_from_fixed() {
+ let value = Value::Fixed(3, vec![97, 98, 99]);
+ assert_eq!(
+ value.resolve(&Schema::String).unwrap(),
+ Value::String("abc".to_string())
+ );
+ }
+
#[test]
fn resolve_bytes_failure() {
let value = Value::Array(vec![Value::Int(2000), Value::Int(-42)]);
diff --git a/lang/rust/avro/tests/io.rs b/lang/rust/avro/tests/io.rs
index 193c18140..e3d8a6836 100644
--- a/lang/rust/avro/tests/io.rs
+++ b/lang/rust/avro/tests/io.rs
@@ -55,14 +55,14 @@ lazy_static! {
(r#""null""#, "null", Value::Null),
(r#""boolean""#, "true", Value::Boolean(true)),
(r#""string""#, r#""foo""#, Value::String("foo".to_string())),
- // TODO: (#96) investigate why this is failing
- //(r#""bytes""#, r#""\u00FF\u00FF""#, Value::Bytes(vec![0xff, 0xff])),
+ (r#""bytes""#, r#""a""#, Value::Bytes(vec![97])), // ASCII 'a' => one
byte
+ (r#""bytes""#, r#""\u00FF""#, Value::Bytes(vec![195, 191])), // The
value is between U+0080 and U+07FF => two bytes
(r#""int""#, "5", Value::Int(5)),
(r#""long""#, "5", Value::Long(5)),
(r#""float""#, "1.1", Value::Float(1.1)),
(r#""double""#, "1.1", Value::Double(1.1)),
- // TODO: (#96) investigate why this is failing
- //(r#"{"type": "fixed", "name": "F", "size": 2}"#,
r#""\u00FF\u00FF""#, Value::Bytes(vec![0xff, 0xff])),
+ (r#"{"type": "fixed", "name": "F", "size": 2}"#, r#""a""#,
Value::Fixed(1, vec![97])), // ASCII 'a' => one byte
+ (r#"{"type": "fixed", "name": "F", "size": 2}"#, r#""\u00FF""#,
Value::Fixed(2, vec![195, 191])), // The value is between U+0080 and U+07FF =>
two bytes
(r#"{"type": "enum", "name": "F", "symbols": ["FOO", "BAR"]}"#,
r#""FOO""#, Value::Enum(0, "FOO".to_string())),
(r#"{"type": "array", "items": "int"}"#, "[1, 2, 3]",
Value::Array(vec![Value::Int(1), Value::Int(2), Value::Int(3)])),
(r#"{"type": "map", "values": "int"}"#, r#"{"a": 1, "b": 2}"#,
Value::Map([("a".to_string(), Value::Int(1)), ("b".to_string(),
Value::Int(2))].iter().cloned().collect())),