This is an automated email from the ASF dual-hosted git repository. kriskras99 pushed a commit to branch feat/full_enum_support in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit 226d4c2dfb3a7dd3ea5c2ab2bf9709ec28baf8b9 Author: Kriskras99 <[email protected]> AuthorDate: Fri Mar 13 11:25:58 2026 +0100 doc: Document the mapping between the Serde and Avro data models --- avro/src/documentation/avro_data_model_to_serde.rs | 54 ++++++++++ avro/src/documentation/mod.rs | 2 + avro/src/documentation/serde_data_model_to_avro.rs | 116 +++++++++++++++++++++ avro/src/serde/mod.rs | 3 +- 4 files changed, 174 insertions(+), 1 deletion(-) diff --git a/avro/src/documentation/avro_data_model_to_serde.rs b/avro/src/documentation/avro_data_model_to_serde.rs new file mode 100644 index 0000000..fdde42f --- /dev/null +++ b/avro/src/documentation/avro_data_model_to_serde.rs @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! # Mapping the Avro data model to the Serde data model +//! +//! When manually mapping an Avro schema to Rust types it is important to understand +//! how the different data models are mapped. When mapping from Rust types to an Avro schema, +//! see [the documentation for the reverse](super::serde_data_model_to_avro). +//! +//! Only the the mapping as defined here is supported. Any other behavior might change in a minor version. +//! +//! ## Primitive Types +//! - `null`: `()` +//! - `boolean`: [`bool`] +//! - `int`: [`i32`] (or [`i16`], [`i8`], [`u16`], [`u8`]) +//! - `long`: [`i64`] (or [`u32`]) +//! - `float`: [`f32`] +//! - `double`: [`f64`] +//! - `bytes`: [`Vec<u8>`](std::vec::Vec) (or any type that uses [`Serialize::serialize_bytes`](serde::Serialize), [`Deserialize::deserialize_bytes`](serde::Deserialize), [`Deserialize::deserialize_byte_buf`](serde::Deserialize)) +//! - It is required to use [`apache_avro::serde::bytes`] as otherwise Serde will (de)serialize a `Vec` as an array of integers instead. +//! - `string`: [`String`] (or any type that uses [`Serialize::serialize_str`](serde::Serialize), [`Deserialize::deserialize_str`](serde::Deserialize), [`Deserialize::deserialize_string`](serde::Deserialize)) +//! +//! ## Complex Types +//! - `records`: A struct with the same name and fields or a tuple with the same fields. +//! - Extra fields can be added to the struct if they are marked with `#[serde(skip)]` +//! - `enums`: A enum with the same name and unit variants for every symbol +//! - The index of the symbol most match the index of the variant +//! - `arrays`: [`Vec`] (or any type that uses [`Serialize::serialize_seq`](serde::Serialize), [`Deserialize::deserialize_seq`](serde::Deserialize)) +//! - `[T; N]` is (de)serialized as a tuple by Serde, to (de)serialize them as an `array` use [`apache_avro::serde::array`] +//! - `maps`: [`HashMap<String, _>`](std::collections::HashMap) (or any type that uses [`Serialize::serialize_map`](serde::Serialize), [`Deserialize::deserialize_map`](serde::Deserialize)) +//! - `unions`: A enum with a variant for each variant +//! - The index of the union variant must match the enum variant +//! - A `null` can be a unit variant or a newtype variant with a unit type +//! - All other variants must be newtype variants, struct variants, or tuple variants. +//! - `fixed`: [`Vec<u8>`](std::vec::Vec) (or any type that uses [`Serialize::serialize_bytes`](serde::Serialize), [`Deserialize::deserialize_bytes`](serde::Deserialize), [`Deserialize::deserialize_byte_buf`](serde::Deserialize)) +//! - It is required to use [`apache_avro::serde::bytes`] as otherwise Serde will (de)serialize a `Vec` as an array of integers instead. +//! +//! [`apache_avro::serde::array`]: crate::serde::array +//! [`apache_avro::serde::bytes`]: crate::serde::bytes +//! [`apache_avro::serde::fixed`]: crate::serde::fixed diff --git a/avro/src/documentation/mod.rs b/avro/src/documentation/mod.rs index 7771012..9fc2524 100644 --- a/avro/src/documentation/mod.rs +++ b/avro/src/documentation/mod.rs @@ -20,5 +20,7 @@ //! This module does not contain any code, and is only available during `rustdoc` builds. //! +pub mod avro_data_model_to_serde; pub mod dynamic; pub mod primer; +pub mod serde_data_model_to_avro; diff --git a/avro/src/documentation/serde_data_model_to_avro.rs b/avro/src/documentation/serde_data_model_to_avro.rs new file mode 100644 index 0000000..b7dfa1e --- /dev/null +++ b/avro/src/documentation/serde_data_model_to_avro.rs @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! # Mapping the Serde data model to the Avro data model +//! +//! When manually mapping Rust types to an Avro schema, or the reverse, it is important to understand +//! how the different data models are mapped. When mapping from an Avro schema to Rust types, +//! see [the documentation for the reverse](super::serde_data_model_to_avro). +//! +//! Only the schemas generated by the [`AvroSchema`] derive and the mapping as defined here are +//! supported. Any other behavior might change in a minor version. +//! +//! The following list is based on [the data model defined by Serde](https://serde.rs/data-model.html): +//! - **14 primitive types** +//! - `bool` => [`Schema::Boolean`] +//! - `i8`, `i16`, `i32`, `u8`, `u16` => [`Schema::Int`] +//! - `i64`, `u32` => [`Schema::Long`] +//! - `u64` => [`Schema::Fixed`]`(name: "u64", size: 8)` +//! - This is not a `Schema::Long` as that is a signed number of maximum 64 bits. +//! - `i128` => [`Schema::Fixed`]`(name: "i128", size: 16)` +//! - `u128` => [`Schema::Fixed`]`(name: "u128", size: 16)` +//! - `f32` => [`Schema::Float`] +//! - `f64` => [`Schema::Double`] +//! - `char` => [`Schema::String`] +//! - Only one character allowed, deserializer will return an error for strings with more than one character. +//! - **string** => [`Schema::String`] +//! - **byte array** => [`Schema::Bytes`] or [`Schema::Fixed`] +//! - **option** => [`Schema::Union([Schema::Null, _])`](crate::schema::Schema::Union) +//! - **unit** => [`Schema::Null`] +//! - **unit struct** => [`Schema::Record`] with the unqualified name equal to the name of the struct and zero fields +//! - **unit variant** => See [Enums](##Enums) +//! - **newtype struct** => [`Schema::Record`] with the unqualified name equal to the name of the struct and one field +//! - **newtype variant** => See [Enums](##Enums) +//! - **seq** => [`Schema::Array`] +//! - **tuple** +//! - For tuples with one element, the schema will be the schema the only element +//! - For tuples with more than one element, the schema will be a [`Schema::Record`] with as many fields as there are elements. +//! The schema must have the attribute `org.apache.avro.rust.tuple` with the value set to `true`. +//! - **Note:** Serde (de)serializes arrays (`[T; N]`) as tuples. To (de)serialize an array as a +//! [`Schema::Array`] use [`apache_avro::serde::array`]. +//! - **tuple_struct** => [`Schema::Record`] with the unqualified name equal to the name of the struct and as many fields as there are elements +//! - **Note:** Tuple structs with 0 or 1 elements will also be (de)serialized as a [`Schema::Record`]. This +//! is different from normal tuples`. +//! - **tuple_variant** => See [Enums](##Enums) +//! - **map** => [`Schema::Map`] +//! - **Note:** the key type of the map will be (de)serialized as a [`Schema::String`] +//! - **struct** => [`Schema::Record`] +//! - **struct_variant** => See [Enums](##Enums) +//! +//! ## Enums +//! +//! ### Externally tagged +//! This is the default enum representation for Serde. It can be mapped in three ways to the Avro data model. +//! For all three options it is important that the enum index matches the Avro index. +//! - As a [`Schema::Enum`], this is only possible for enums with only unit variants. +//! - As a [`Schema::Union`] with a [`Schema::Record`] for every variant: +//! - **unit_variant** => [`Schema::Record`] named as the variant and with no fields. +//! - **newtype_variant** => [`Schema::Record`] named as the variant and with one field. +//! The schema must have the attribute `org.apache.avro.rust.union_of_records` with the value set to `true`. +//! - **tuple_variant** => [`Schema::Record`] named as the variant and with as many fields as there are element. +//! - **struct_variant** => [`Schema::Record`] named as the variant and with a field for every field of the struct variant. +//! - As a [`Schema::Union`] without the wrapper [`Schema::Record`], all schemas must be unique: +//! - **unit_variant** => [`Schema::Null`]. +//! - **newtype_variant** => The schema of the inner type. +//! - **tuple_variant** => [`Schema::Record`] named as the variant and with as many fields as there are element. +//! - **struct_variant** => [`Schema::Record`] named as the variant and with a field for every field of the struct variant. +//! +//! ### Internally tagged +//! This enum representation is used by Serde if the attribute `#[serde(tag = "...")]` is used. +//! It maps to a [`Schema::Record`]. There must be at least one field that is named as the value of the +//! `tag` attribute. If a field is not used by all variants it must have a `default` set. +//! +//! ### Adjacently tagged +//! This enum representation is used by Serde if the attributes `#[serde(tag = "...", content = "...")]` are used. +//! It maps to a [`Schema::Record`] with two fields. One field must be named as the value of the `tag` +//! attribute and use the [`Schema::Enum`] schema. The other field must be named as the value of the +//! `content` tag and use the [`Schema::Union`] schema. +//! +//! ### Untagged +//! This enum representation is ued by Serde if the attribute `#[serde(untagged)]` is used. It maps +//! to a [`Schema::Union`] with the following schemas: +//! - **unit_variant** => [`Schema::Null`]. +//! - **newtype_variant** => The schema of the inner type. +//! - **tuple_variant** => [`Schema::Record`] named as the variant and with as many fields as there are element. +//! - **struct_variant** => [`Schema::Record`] named as the variant and with a field for every field of the struct variant. +//! +//! [`AvroSchema`]: crate::AvroSchema +//! [`Schema::Array`]: crate::schema::Schema::Array +//! [`Schema::Boolean`]: crate::schema::Schema::Boolean +//! [`Schema::Bytes`]: crate::schema::Schema::Bytes +//! [`Schema::Double`]: crate::schema::Schema::Double +//! [`Schema::Enum`]: crate::schema::Schema::Enum +//! [`Schema::Fixed`]: crate::schema::Schema::Fixed +//! [`Schema::Float`]: crate::schema::Schema::Float +//! [`Schema::Int`]: crate::schema::Schema::Int +//! [`Schema::Long`]: crate::schema::Schema::Long +//! [`Schema::Map`]: crate::schema::Schema::Map +//! [`Schema::Null`]: crate::schema::Schema::Null +//! [`Schema::Record`]: crate::schema::Schema::Record +//! [`Schema::String`]: crate::schema::Schema::String +//! [`Schema::Union`]: crate::schema::Schema::Union +//! [`apache_avro::serde::array`]: crate::serde::array diff --git a/avro/src/serde/mod.rs b/avro/src/serde/mod.rs index b3bfd2a..e182ffe 100644 --- a/avro/src/serde/mod.rs +++ b/avro/src/serde/mod.rs @@ -27,7 +27,8 @@ //! details on how to change the generated schema. //! //! Alternatively, you can write your own schema. If you go down this path, it is recommended you start with -//! the schema derived by [`AvroSchema`] and then modify it to fit your needs. +//! the schema derived by [`AvroSchema`] and then modify it to fit your needs. For more information on mapping +//! between Sere and Avro see [Avro to Serde](crate::documentation::avro_data_model_to_serde) and [Serde to Avro](crate::documentation::serde_data_model_to_avro). //! //! #### Performance pitfall //! One performance pitfall with Serde is (de)serializing bytes. The implementation of [`Serialize`][`serde::Serialize`]
