This is an automated email from the ASF dual-hosted git repository. kriskras99 pushed a commit to branch feat/generic_datum_reader in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit 25fe8a2d58f9175ff9f28097dbb3de7b2c5a523c Author: default <[email protected]> AuthorDate: Tue Mar 3 13:52:59 2026 +0000 feat: Replace `from_avro_datum*` functions with `GenericDatumReader` --- avro/src/lib.rs | 21 +- avro/src/reader/datum.rs | 351 ++++++++++++++++++++++++++++++ avro/src/reader/mod.rs | 174 +-------------- avro/src/schema/mod.rs | 12 +- avro/src/schema/resolve.rs | 2 +- avro/src/serde/de.rs | 6 +- avro/src/writer.rs | 5 +- avro/tests/avro-3786.rs | 32 ++- avro/tests/avro-3787.rs | 12 +- avro/tests/io.rs | 65 +++--- avro/tests/schema.rs | 18 +- avro/tests/to_from_avro_datum_schemata.rs | 22 +- 12 files changed, 474 insertions(+), 246 deletions(-) diff --git a/avro/src/lib.rs b/avro/src/lib.rs index a56aaad..e4b97bd 100644 --- a/avro/src/lib.rs +++ b/avro/src/lib.rs @@ -56,7 +56,6 @@ mod decimal; mod decode; mod duration; mod encode; -mod reader; mod writer; #[cfg(doc)] @@ -64,6 +63,7 @@ pub mod documentation; pub mod error; pub mod headers; pub mod rabin; +pub mod reader; pub mod schema; pub mod schema_compatibility; pub mod schema_equality; @@ -90,8 +90,13 @@ pub use codec::{Codec, DeflateSettings}; pub use decimal::Decimal; pub use duration::{Days, Duration, Millis, Months}; pub use error::Error; +#[expect( + deprecated, + reason = "Still need to export it until we remove it completely" +)] pub use reader::{ - Reader, from_avro_datum, from_avro_datum_reader_schemata, from_avro_datum_schemata, + Reader, + datum::{from_avro_datum, from_avro_datum_reader_schemata, from_avro_datum_schemata}, read_marker, single_object::{GenericSingleObjectReader, SpecificSingleObjectReader}, }; @@ -149,9 +154,11 @@ pub fn set_serde_human_readable(human_readable: bool) -> bool { #[cfg(test)] mod tests { use crate::{ - Codec, Reader, Schema, Writer, from_avro_datum, + Codec, Reader, Schema, Writer, + reader::datum::GenericDatumReader, types::{Record, Value}, }; + use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; //TODO: move where it fits better @@ -296,7 +303,7 @@ mod tests { } #[test] - fn test_illformed_length() { + fn test_illformed_length() -> TestResult { let raw_schema = r#" { "type": "record", @@ -313,7 +320,11 @@ mod tests { // Would allocate 18446744073709551605 bytes let illformed: &[u8] = &[0x3e, 0x15, 0xff, 0x1f, 0x15, 0xff]; - let value = from_avro_datum(&schema, &mut &*illformed, None); + let value = GenericDatumReader::builder(&schema) + .build()? + .read_value(&mut &*illformed); assert!(value.is_err()); + + Ok(()) } } diff --git a/avro/src/reader/datum.rs b/avro/src/reader/datum.rs new file mode 100644 index 0000000..070b53b --- /dev/null +++ b/avro/src/reader/datum.rs @@ -0,0 +1,351 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::io::Read; + +use bon::bon; + +use crate::{AvroResult, Schema, decode::decode_internal, schema::ResolvedSchema, types::Value}; + +/// Reader for reading raw Avro data. +/// +/// This is most likely not what you need. Most users should use [`Reader`][crate::Reader], +/// [`GenericSingleObjectReader`][crate::GenericSingleObjectReader], or +/// [`SpecificSingleObjectReader`][crate::SpecificSingleObjectReader] instead. +pub struct GenericDatumReader<'s> { + writer: &'s Schema, + resolved: ResolvedSchema<'s>, + reader: Option<(&'s Schema, ResolvedSchema<'s>)>, +} + +#[bon] +impl<'s> GenericDatumReader<'s> { + /// Build a [`DatumReader`]. + /// + /// This is most likely not what you need. Most users should use [`Reader`][crate::Reader], + /// [`GenericSingleObjectReader`][crate::GenericSingleObjectReader], or + /// [`SpecificSingleObjectReader`][crate::SpecificSingleObjectReader] instead. + #[builder] + pub fn new( + /// The schema that was used to write the Avro datum. + #[builder(start_fn)] + writer_schema: &'s Schema, + /// Already resolved schemata that will be used to resolve references in the writer's schema. + resolved_writer_schemata: Option<ResolvedSchema<'s>>, + /// The schema that will be used to resolve the value to conform the the new schema. + reader_schema: Option<&'s Schema>, + /// Already resolved schemata that will be used to resolve references in the reader's schema. + resolved_reader_schemata: Option<ResolvedSchema<'s>>, + ) -> AvroResult<Self> { + let resolved_writer_schemata = if let Some(resolved) = resolved_writer_schemata { + resolved + } else { + ResolvedSchema::try_from(writer_schema)? + }; + + let reader = if let Some(reader) = reader_schema { + if let Some(resolved) = resolved_reader_schemata { + Some((reader, resolved)) + } else { + Some((reader, ResolvedSchema::try_from(reader)?)) + } + } else { + None + }; + + Ok(Self { + writer: writer_schema, + resolved: resolved_writer_schemata, + reader, + }) + } +} + +impl<'s, S: generic_datum_reader_builder::State> GenericDatumReaderBuilder<'s, S> { + /// Set the schemata that will be used to resolve any references in the writer's schema. + /// + /// This is equivalent to `.resolved_writer_schemata(ResolvedSchema::new_with_schemata(schemata)?)`. + /// If you already have a [`ResolvedSchema`], use that function instead. + pub fn writer_schemata( + self, + schemata: Vec<&'s Schema>, + ) -> AvroResult< + GenericDatumReaderBuilder<'s, generic_datum_reader_builder::SetResolvedWriterSchemata<S>>, + > + where + S::ResolvedWriterSchemata: generic_datum_reader_builder::IsUnset, + { + let resolved = ResolvedSchema::new_with_schemata(schemata)?; + Ok(self.resolved_writer_schemata(resolved)) + } + + /// Set the schemata that will be used to resolve any references in the reader's schema. + /// + /// This is equivalent to `.resolved_reader_schemata(ResolvedSchema::new_with_schemata(schemata)?)`. + /// If you already have a [`ResolvedSchema`], use that function instead. + /// + /// This function can only be called after the reader schema is set. + pub fn reader_schemata( + self, + schemata: Vec<&'s Schema>, + ) -> AvroResult< + GenericDatumReaderBuilder<'s, generic_datum_reader_builder::SetResolvedReaderSchemata<S>>, + > + where + S::ResolvedReaderSchemata: generic_datum_reader_builder::IsUnset, + S::ReaderSchema: generic_datum_reader_builder::IsSet, + { + let resolved = ResolvedSchema::new_with_schemata(schemata)?; + Ok(self.resolved_reader_schemata(resolved)) + } +} + +impl<'s> GenericDatumReader<'s> { + /// Read a Avro datum from the reader. + pub fn read_value<R: Read>(&self, reader: &mut R) -> AvroResult<Value> { + let value = decode_internal(self.writer, self.resolved.get_names(), None, reader)?; + if let Some((reader, resolved)) = &self.reader { + value.resolve_internal(reader, resolved.get_names(), None, &None) + } else { + Ok(value) + } + } +} + +/// Deprecated. +/// +/// This is equivalent to +/// ```no_run +/// GenericDatumReader::builder(writer_schema) +/// .maybe_reader_schema(reader_schema) +/// .build()? +/// .read_value(reader) +/// ``` +/// +/// Decode a `Value` encoded in Avro format given its `Schema` and anything implementing `io::Read` +/// to read from. +/// +/// In case a reader `Schema` is provided, schema resolution will also be performed. +/// +/// **NOTE** This function has a quite small niche of usage and does NOT take care of reading the +/// header and consecutive data blocks; use [`Reader`](struct.Reader.html) if you don't know what +/// you are doing, instead. +#[deprecated(since = "0.22.0", note = "Use `DatumReader` instead")] +pub fn from_avro_datum<R: Read>( + writer_schema: &Schema, + reader: &mut R, + reader_schema: Option<&Schema>, +) -> AvroResult<Value> { + GenericDatumReader::builder(writer_schema) + .maybe_reader_schema(reader_schema) + .build()? + .read_value(reader) +} + +/// Deprecated. +/// +/// This is equivalent to +/// ```no_run +/// GenericDatumReader::builder(writer_schema) +/// .writer_schemata(writer_schemata)? +/// .maybe_reader_schema(reader_schema) +/// .build()? +/// .read_value(reader) +/// ``` +/// +/// Decode a `Value` from raw Avro data. +/// +/// If the writer schema is incomplete, i.e. contains `Schema::Ref`s then it will use the provided +/// schemata to resolve any dependencies. +/// +/// When a reader `Schema` is provided, schema resolution will also be performed. +#[deprecated(since = "0.22.0", note = "Use `DatumReader` instead")] +pub fn from_avro_datum_schemata<R: Read>( + writer_schema: &Schema, + writer_schemata: Vec<&Schema>, + reader: &mut R, + reader_schema: Option<&Schema>, +) -> AvroResult<Value> { + GenericDatumReader::builder(writer_schema) + .writer_schemata(writer_schemata)? + .maybe_reader_schema(reader_schema) + .build()? + .read_value(reader) +} + +/// Deprecated. +/// +/// This is equivalent to +/// ```no_run +/// GenericDatumReader::builder(writer_schema) +/// .writer_schemata(writer_schemata)? +/// .maybe_reader_schema(reader_schema) +/// .reader_schemata(reader_schemata)? +/// .build()? +/// .read_value(reader) +/// ``` +/// +/// Decode a `Value` from raw Avro data. +/// +/// If the writer schema is incomplete, i.e. contains `Schema::Ref`s then it will use the provided +/// schemata to resolve any dependencies. +/// +/// When a reader `Schema` is provided, schema resolution will also be performed. +#[deprecated(since = "0.22.0", note = "Use `DatumReader` instead")] +pub fn from_avro_datum_reader_schemata<R: Read>( + writer_schema: &Schema, + writer_schemata: Vec<&Schema>, + reader: &mut R, + reader_schema: Option<&Schema>, + reader_schemata: Vec<&Schema>, +) -> AvroResult<Value> { + GenericDatumReader::builder(writer_schema) + .writer_schemata(writer_schemata)? + .maybe_reader_schema(reader_schema) + .reader_schemata(reader_schemata)? + .build()? + .read_value(reader) +} + +#[cfg(test)] +mod tests { + use apache_avro_test_helper::TestResult; + use serde::Deserialize; + + use crate::{ + Schema, from_value, + reader::datum::GenericDatumReader, + types::{Record, Value}, + }; + + #[test] + fn test_from_avro_datum() -> TestResult { + let schema = Schema::parse_str( + r#"{ + "type": "record", + "name": "test", + "fields": [ + { + "name": "a", + "type": "long", + "default": 42 + }, + { + "name": "b", + "type": "string" + } + ] + }"#, + )?; + let mut encoded: &'static [u8] = &[54, 6, 102, 111, 111]; + + let mut record = Record::new(&schema).unwrap(); + record.put("a", 27i64); + record.put("b", "foo"); + let expected = record.into(); + + let avro_datum = GenericDatumReader::builder(&schema) + .build()? + .read_value(&mut encoded)?; + + assert_eq!(avro_datum, expected); + + Ok(()) + } + + #[test] + fn test_from_avro_datum_with_union_to_struct() -> TestResult { + const TEST_RECORD_SCHEMA_3240: &str = r#" + { + "type": "record", + "name": "test", + "fields": [ + { + "name": "a", + "type": "long", + "default": 42 + }, + { + "name": "b", + "type": "string" + }, + { + "name": "a_nullable_array", + "type": ["null", {"type": "array", "items": {"type": "string"}}], + "default": null + }, + { + "name": "a_nullable_boolean", + "type": ["null", {"type": "boolean"}], + "default": null + }, + { + "name": "a_nullable_string", + "type": ["null", {"type": "string"}], + "default": null + } + ] + } + "#; + #[derive(Default, Debug, Deserialize, PartialEq, Eq)] + struct TestRecord3240 { + a: i64, + b: String, + a_nullable_array: Option<Vec<String>>, + // we are missing the 'a_nullable_boolean' field to simulate missing keys + // a_nullable_boolean: Option<bool>, + a_nullable_string: Option<String>, + } + + let schema = Schema::parse_str(TEST_RECORD_SCHEMA_3240)?; + let mut encoded: &'static [u8] = &[54, 6, 102, 111, 111]; + + let expected_record: TestRecord3240 = TestRecord3240 { + a: 27i64, + b: String::from("foo"), + a_nullable_array: None, + a_nullable_string: None, + }; + + let avro_datum = GenericDatumReader::builder(&schema) + .build()? + .read_value(&mut encoded)?; + let parsed_record: TestRecord3240 = match &avro_datum { + Value::Record(_) => from_value::<TestRecord3240>(&avro_datum)?, + unexpected => { + panic!("could not map avro data to struct, found unexpected: {unexpected:?}") + } + }; + + assert_eq!(parsed_record, expected_record); + + Ok(()) + } + + #[test] + fn test_null_union() -> TestResult { + let schema = Schema::parse_str(r#"["null", "long"]"#)?; + let mut encoded: &'static [u8] = &[2, 0]; + + let avro_datum = GenericDatumReader::builder(&schema) + .build()? + .read_value(&mut encoded)?; + assert_eq!(avro_datum, Value::Union(1, Box::new(Value::Long(0)))); + + Ok(()) + } +} diff --git a/avro/src/reader/mod.rs b/avro/src/reader/mod.rs index 0113e50..94af909 100644 --- a/avro/src/reader/mod.rs +++ b/avro/src/reader/mod.rs @@ -18,14 +18,10 @@ //! Logic handling reading from Avro format at user level. mod block; +pub mod datum; pub mod single_object; -use crate::{ - AvroResult, - decode::{decode, decode_internal}, - schema::{ResolvedSchema, Schema}, - types::Value, -}; +use crate::{AvroResult, schema::Schema, types::Value}; use block::Block; use bon::bon; use std::{collections::HashMap, io::Read}; @@ -137,74 +133,6 @@ impl<R: Read> Iterator for Reader<'_, R> { } } -/// Decode a `Value` encoded in Avro format given its `Schema` and anything implementing `io::Read` -/// to read from. -/// -/// In case a reader `Schema` is provided, schema resolution will also be performed. -/// -/// **NOTE** This function has a quite small niche of usage and does NOT take care of reading the -/// header and consecutive data blocks; use [`Reader`](struct.Reader.html) if you don't know what -/// you are doing, instead. -pub fn from_avro_datum<R: Read>( - writer_schema: &Schema, - reader: &mut R, - reader_schema: Option<&Schema>, -) -> AvroResult<Value> { - let value = decode(writer_schema, reader)?; - match reader_schema { - Some(schema) => value.resolve(schema), - None => Ok(value), - } -} - -/// Decode a `Value` from raw Avro data. -/// -/// If the writer schema is incomplete, i.e. contains `Schema::Ref`s then it will use the provided -/// schemata to resolve any dependencies. -/// -/// When a reader `Schema` is provided, schema resolution will also be performed. -pub fn from_avro_datum_schemata<R: Read>( - writer_schema: &Schema, - writer_schemata: Vec<&Schema>, - reader: &mut R, - reader_schema: Option<&Schema>, -) -> AvroResult<Value> { - from_avro_datum_reader_schemata( - writer_schema, - writer_schemata, - reader, - reader_schema, - Vec::with_capacity(0), - ) -} - -/// Decode a `Value` from raw Avro data. -/// -/// If the writer schema is incomplete, i.e. contains `Schema::Ref`s then it will use the provided -/// schemata to resolve any dependencies. -/// -/// When a reader `Schema` is provided, schema resolution will also be performed. -pub fn from_avro_datum_reader_schemata<R: Read>( - writer_schema: &Schema, - writer_schemata: Vec<&Schema>, - reader: &mut R, - reader_schema: Option<&Schema>, - reader_schemata: Vec<&Schema>, -) -> AvroResult<Value> { - let rs = ResolvedSchema::try_from(writer_schemata)?; - let value = decode_internal(writer_schema, rs.get_names(), None, reader)?; - match reader_schema { - Some(schema) => { - if reader_schemata.is_empty() { - value.resolve(schema) - } else { - value.resolve_schemata(schema, reader_schemata) - } - } - None => Ok(value), - } -} - /// Reads the marker bytes from Avro bytes generated earlier by a `Writer` pub fn read_marker(bytes: &[u8]) -> [u8; 16] { assert!( @@ -219,11 +147,9 @@ pub fn read_marker(bytes: &[u8]) -> [u8; 16] { #[cfg(test)] mod tests { use super::*; - use crate::from_value; use crate::types::Record; use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; - use serde::Deserialize; use std::io::Cursor; const SCHEMA: &str = r#" @@ -243,7 +169,6 @@ mod tests { ] } "#; - const UNION_SCHEMA: &str = r#"["null", "long"]"#; const ENCODED: &[u8] = &[ 79u8, 98u8, 106u8, 1u8, 4u8, 22u8, 97u8, 118u8, 114u8, 111u8, 46u8, 115u8, 99u8, 104u8, 101u8, 109u8, 97u8, 222u8, 1u8, 123u8, 34u8, 116u8, 121u8, 112u8, 101u8, 34u8, 58u8, 34u8, @@ -261,101 +186,6 @@ mod tests { 207u8, 108u8, 180u8, 158u8, 57u8, 114u8, 40u8, 173u8, 199u8, 228u8, 239u8, ]; - #[test] - fn test_from_avro_datum() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - let mut encoded: &'static [u8] = &[54, 6, 102, 111, 111]; - - let mut record = Record::new(&schema).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - let expected = record.into(); - - assert_eq!(from_avro_datum(&schema, &mut encoded, None)?, expected); - - Ok(()) - } - - #[test] - fn test_from_avro_datum_with_union_to_struct() -> TestResult { - const TEST_RECORD_SCHEMA_3240: &str = r#" - { - "type": "record", - "name": "test", - "fields": [ - { - "name": "a", - "type": "long", - "default": 42 - }, - { - "name": "b", - "type": "string" - }, - { - "name": "a_nullable_array", - "type": ["null", {"type": "array", "items": {"type": "string"}}], - "default": null - }, - { - "name": "a_nullable_boolean", - "type": ["null", {"type": "boolean"}], - "default": null - }, - { - "name": "a_nullable_string", - "type": ["null", {"type": "string"}], - "default": null - } - ] - } - "#; - #[derive(Default, Debug, Deserialize, PartialEq, Eq)] - struct TestRecord3240 { - a: i64, - b: String, - a_nullable_array: Option<Vec<String>>, - // we are missing the 'a_nullable_boolean' field to simulate missing keys - // a_nullable_boolean: Option<bool>, - a_nullable_string: Option<String>, - } - - let schema = Schema::parse_str(TEST_RECORD_SCHEMA_3240)?; - let mut encoded: &'static [u8] = &[54, 6, 102, 111, 111]; - - let expected_record: TestRecord3240 = TestRecord3240 { - a: 27i64, - b: String::from("foo"), - a_nullable_array: None, - a_nullable_string: None, - }; - - let avro_datum = from_avro_datum(&schema, &mut encoded, None)?; - let parsed_record: TestRecord3240 = match &avro_datum { - Value::Record(_) => from_value::<TestRecord3240>(&avro_datum)?, - unexpected => { - panic!("could not map avro data to struct, found unexpected: {unexpected:?}") - } - }; - - assert_eq!(parsed_record, expected_record); - - Ok(()) - } - - #[test] - fn test_null_union() -> TestResult { - let schema = Schema::parse_str(UNION_SCHEMA)?; - let mut encoded: &'static [u8] = &[2, 0]; - - assert_eq!( - from_avro_datum(&schema, &mut encoded, None)?, - Value::Union(1, Box::new(Value::Long(0))) - ); - - Ok(()) - } - #[test] fn test_reader_iterator() -> TestResult { let schema = Schema::parse_str(SCHEMA)?; diff --git a/avro/src/schema/mod.rs b/avro/src/schema/mod.rs index 3fd776c..98600fa 100644 --- a/avro/src/schema/mod.rs +++ b/avro/src/schema/mod.rs @@ -1168,7 +1168,7 @@ fn field_ordering_position(field: &str) -> Option<usize> { #[cfg(test)] mod tests { use super::*; - use crate::{error::Details, rabin::Rabin}; + use crate::{error::Details, rabin::Rabin, reader::datum::GenericDatumReader}; use apache_avro_test_helper::{ TestResult, logger::{assert_logged, assert_not_logged}, @@ -2995,7 +2995,10 @@ mod tests { let datum = crate::to_avro_datum(&writer_schema, avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 2); @@ -3580,7 +3583,10 @@ mod tests { let mut x = &datum[..]; // Deserialization should succeed and we should be able to resolve the schema. - let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; assert!(deser_value.validate(&reader_schema)); // Verify that we can read a field from the record. diff --git a/avro/src/schema/resolve.rs b/avro/src/schema/resolve.rs index 449436b..5087c8e 100644 --- a/avro/src/schema/resolve.rs +++ b/avro/src/schema/resolve.rs @@ -23,7 +23,7 @@ use crate::schema::{ use crate::{AvroResult, Error, Schema}; use std::collections::HashMap; -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ResolvedSchema<'s> { pub(super) names_ref: NamesRef<'s>, schemata: Vec<&'s Schema>, diff --git a/avro/src/serde/de.rs b/avro/src/serde/de.rs index 18c42a2..3144fac 100644 --- a/avro/src/serde/de.rs +++ b/avro/src/serde/de.rs @@ -953,7 +953,7 @@ mod tests { use apache_avro_test_helper::TestResult; use super::*; - use crate::Decimal; + use crate::{Decimal, reader::datum::GenericDatumReader}; #[derive(PartialEq, Eq, Serialize, Deserialize, Debug)] pub struct StringEnum { @@ -992,7 +992,9 @@ mod tests { let mut buf = std::io::Cursor::new(crate::to_avro_datum(&schema, value)?); // decode from avro - let value = crate::from_avro_datum(&schema, &mut buf, None)?; + let value = GenericDatumReader::builder(&schema) + .build()? + .read_value(&mut buf)?; let decoded_data: StringEnum = crate::from_value(&value)?; diff --git a/avro/src/writer.rs b/avro/src/writer.rs index fe264a1..02b883f 100644 --- a/avro/src/writer.rs +++ b/avro/src/writer.rs @@ -898,6 +898,7 @@ mod tests { duration::{Days, Duration, Millis, Months}, headers::GlueSchemaUuidHeader, rabin::Rabin, + reader::datum::GenericDatumReader, schema::{DecimalSchema, FixedSchema, Name}, types::Record, util::zig_i64, @@ -1045,7 +1046,9 @@ mod tests { // Should deserialize from the schema into the logical type. let mut r = ser.as_slice(); - let de = crate::from_avro_datum(&schema, &mut r, None)?; + let de = GenericDatumReader::builder(&schema) + .build()? + .read_value(&mut r)?; assert_eq!(de, value); Ok(()) } diff --git a/avro/tests/avro-3786.rs b/avro/tests/avro-3786.rs index c509a3f..42c5464 100644 --- a/avro/tests/avro-3786.rs +++ b/avro/tests/avro-3786.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use apache_avro::{Schema, from_avro_datum, to_avro_datum, to_value, types}; +use apache_avro::{Schema, reader::datum::GenericDatumReader, to_avro_datum, to_value, types}; use apache_avro_test_helper::TestResult; #[test] @@ -135,7 +135,10 @@ fn avro_3786_deserialize_union_with_different_enum_order() -> TestResult { let datum = to_avro_datum(&writer_schema, avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 2); @@ -259,7 +262,10 @@ fn avro_3786_deserialize_union_with_different_enum_order_defined_in_record() -> let datum = to_avro_datum(&writer_schema, avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 1); @@ -372,7 +378,10 @@ fn test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_ let datum = to_avro_datum(&writer_schema, avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 1); @@ -485,7 +494,10 @@ fn test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_ let datum = to_avro_datum(&writer_schema, avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 1); @@ -598,7 +610,10 @@ fn deserialize_union_with_different_enum_order_defined_in_record() -> TestResult let datum = to_avro_datum(&writer_schema, avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 1); @@ -872,7 +887,10 @@ fn deserialize_union_with_record_with_enum_defined_inline_reader_has_different_i let datum = to_avro_datum(&writer_schema, avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 3); diff --git a/avro/tests/avro-3787.rs b/avro/tests/avro-3787.rs index 192c573..4fc2f3d 100644 --- a/avro/tests/avro-3787.rs +++ b/avro/tests/avro-3787.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use apache_avro::{Schema, from_avro_datum, to_avro_datum, to_value, types}; +use apache_avro::{Schema, reader::datum::GenericDatumReader, to_avro_datum, to_value, types}; use apache_avro_test_helper::TestResult; #[test] @@ -136,7 +136,10 @@ fn avro_3787_deserialize_union_with_unknown_symbol() -> TestResult { let datum = to_avro_datum(&writer_schema, avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 2); @@ -267,7 +270,10 @@ fn avro_3787_deserialize_union_with_unknown_symbol_no_ref() -> TestResult { let datum = to_avro_datum(&writer_schema, avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { types::Value::Record(fields) => { assert_eq!(fields.len(), 1); diff --git a/avro/tests/io.rs b/avro/tests/io.rs index dba7eef..0d95850 100644 --- a/avro/tests/io.rs +++ b/avro/tests/io.rs @@ -16,7 +16,9 @@ // under the License. //! Port of <https://github.com/apache/avro/blob/release-1.9.1/lang/py/test/test_io.py> -use apache_avro::{Error, Schema, error::Details, from_avro_datum, to_avro_datum, types::Value}; +use apache_avro::{ + Error, Schema, error::Details, reader::datum::GenericDatumReader, to_avro_datum, types::Value, +}; use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; use std::{io::Cursor, sync::OnceLock}; @@ -232,7 +234,9 @@ fn test_round_trip() -> TestResult { for (raw_schema, value) in schemas_to_validate().iter() { let schema = Schema::parse_str(raw_schema)?; let encoded = to_avro_datum(&schema, value.clone()).unwrap(); - let decoded = from_avro_datum(&schema, &mut Cursor::new(encoded), None).unwrap(); + let decoded = GenericDatumReader::builder(&schema) + .build()? + .read_value(&mut Cursor::new(encoded))?; assert_eq!(value, &decoded); } @@ -276,14 +280,10 @@ fn test_schema_promotion() -> TestResult { for (j, reader_raw_schema) in promotable_schemas.iter().enumerate().skip(i + 1) { let reader_schema = Schema::parse_str(reader_raw_schema)?; let encoded = to_avro_datum(&writer_schema, original_value.clone())?; - let decoded = from_avro_datum( - &writer_schema, - &mut Cursor::new(encoded), - Some(&reader_schema), - ) - .unwrap_or_else(|_| { - panic!("failed to decode {original_value:?} with schema: {reader_raw_schema:?}",) - }); + let decoded = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut Cursor::new(encoded))?; assert_eq!(decoded, promotable_values[j]); } } @@ -299,11 +299,10 @@ fn test_unknown_symbol() -> TestResult { Schema::parse_str(r#"{"type": "enum", "name": "Test", "symbols": ["BAR", "BAZ"]}"#)?; let original_value = Value::Enum(0, "FOO".to_string()); let encoded = to_avro_datum(&writer_schema, original_value)?; - let decoded = from_avro_datum( - &writer_schema, - &mut Cursor::new(encoded), - Some(&reader_schema), - ); + let decoded = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut Cursor::new(encoded)); assert!(decoded.is_err()); Ok(()) @@ -323,11 +322,10 @@ fn test_default_value() -> TestResult { ))?; let datum_to_read = Value::Record(vec![("H".to_string(), default_datum.clone())]); let encoded = to_avro_datum(long_record_schema(), long_record_datum().clone())?; - let datum_read = from_avro_datum( - long_record_schema(), - &mut Cursor::new(encoded), - Some(&reader_schema), - )?; + let datum_read = GenericDatumReader::builder(long_record_schema()) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut Cursor::new(encoded))?; match default_datum { // For float/double, NaN != NaN, so we check specially here. @@ -380,11 +378,10 @@ fn test_no_default_value() -> TestResult { }"#, )?; let encoded = to_avro_datum(long_record_schema(), long_record_datum().clone())?; - let result = from_avro_datum( - long_record_schema(), - &mut Cursor::new(encoded), - Some(&reader_schema), - ); + let result = GenericDatumReader::builder(long_record_schema()) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut Cursor::new(encoded)); assert!(result.is_err()); Ok(()) @@ -409,11 +406,10 @@ fn test_projection() -> TestResult { ("F".to_string(), Value::Int(6)), ]); let encoded = to_avro_datum(long_record_schema(), long_record_datum().clone())?; - let datum_read = from_avro_datum( - long_record_schema(), - &mut Cursor::new(encoded), - Some(&reader_schema), - )?; + let datum_read = GenericDatumReader::builder(long_record_schema()) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut Cursor::new(encoded))?; assert_eq!(datum_to_read, datum_read); Ok(()) @@ -438,11 +434,10 @@ fn test_field_order() -> TestResult { ("E".to_string(), Value::Int(5)), ]); let encoded = to_avro_datum(long_record_schema(), long_record_datum().clone())?; - let datum_read = from_avro_datum( - long_record_schema(), - &mut Cursor::new(encoded), - Some(&reader_schema), - )?; + let datum_read = GenericDatumReader::builder(long_record_schema()) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut Cursor::new(encoded))?; assert_eq!(datum_to_read, datum_read); Ok(()) diff --git a/avro/tests/schema.rs b/avro/tests/schema.rs index 3347e83..9ec8684 100644 --- a/avro/tests/schema.rs +++ b/avro/tests/schema.rs @@ -23,7 +23,8 @@ use std::{ use apache_avro::{ Codec, Error, Reader, Schema, Writer, error::Details, - from_avro_datum, from_value, + from_value, + reader::datum::GenericDatumReader, schema::{EnumSchema, FixedSchema, Name, RecordField, RecordSchema}, to_avro_datum, to_value, types::{Record, Value}, @@ -33,6 +34,7 @@ use apache_avro_test_helper::{ data::{DOC_EXAMPLES, OTHER_ATTRIBUTES_EXAMPLES, examples, valid_examples}, init, }; +use serde::{Deserialize, Serialize}; #[test] fn test_correct_recursive_extraction() -> TestResult { @@ -859,8 +861,10 @@ fn avro_old_issue_47() -> TestResult { let ser_value = to_value(record.clone())?; let serialized_bytes = to_avro_datum(&schema, ser_value)?; - let de_value = &from_avro_datum(&schema, &mut &*serialized_bytes, None)?; - let deserialized_record = from_value::<MyRecord>(de_value)?; + let de_value = GenericDatumReader::builder(&schema) + .build()? + .read_value(&mut &*serialized_bytes)?; + let deserialized_record = from_value::<MyRecord>(&de_value)?; assert_eq!(record, deserialized_record); Ok(()) @@ -869,9 +873,6 @@ fn avro_old_issue_47() -> TestResult { #[test] fn test_avro_3785_deserialize_namespace_with_nullable_type_containing_reference_type() -> TestResult { - use apache_avro::{from_avro_datum, to_avro_datum, types::Value}; - use serde::{Deserialize, Serialize}; - #[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] pub struct BarUseParent { #[serde(rename = "barUse")] @@ -986,7 +987,10 @@ fn test_avro_3785_deserialize_namespace_with_nullable_type_containing_reference_ let datum = to_avro_datum(&writer_schema, avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + let deser_value = GenericDatumReader::builder(&writer_schema) + .reader_schema(&reader_schema) + .build()? + .read_value(&mut x)?; match deser_value { Value::Record(fields) => { assert_eq!(fields.len(), 2); diff --git a/avro/tests/to_from_avro_datum_schemata.rs b/avro/tests/to_from_avro_datum_schemata.rs index 41c0812..14feacd 100644 --- a/avro/tests/to_from_avro_datum_schemata.rs +++ b/avro/tests/to_from_avro_datum_schemata.rs @@ -16,8 +16,8 @@ // under the License. use apache_avro::{ - Codec, Reader, Schema, Writer, from_avro_datum_reader_schemata, from_avro_datum_schemata, - to_avro_datum_schemata, types::Value, + Codec, Reader, Schema, Writer, reader::datum::GenericDatumReader, to_avro_datum_schemata, + types::Value, }; use apache_avro_test_helper::{TestResult, init}; @@ -55,7 +55,10 @@ fn test_avro_3683_multiple_schemata_to_from_avro_datum() -> TestResult { let actual = to_avro_datum_schemata(schema_b, schemata.clone(), record.clone())?; assert_eq!(actual, expected); - let value = from_avro_datum_schemata(schema_b, schemata, &mut actual.as_slice(), None)?; + let value = GenericDatumReader::builder(schema_b) + .writer_schemata(schemata)? + .build()? + .read_value(&mut actual.as_slice())?; assert_eq!(value, record); Ok(()) @@ -79,13 +82,12 @@ fn avro_rs_106_test_multiple_schemata_to_from_avro_datum_with_resolution() -> Te let actual = to_avro_datum_schemata(schema_b, schemata.clone(), record.clone())?; assert_eq!(actual, expected); - let value = from_avro_datum_reader_schemata( - schema_b, - schemata.clone(), - &mut actual.as_slice(), - Some(schema_b), - schemata, - )?; + let value = GenericDatumReader::builder(schema_b) + .writer_schemata(schemata.clone())? + .reader_schema(schema_b) + .reader_schemata(schemata)? + .build()? + .read_value(&mut actual.as_slice())?; assert_eq!(value, record); Ok(())
