This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 2b354a3e8 Support Rust structures --> `RecordBatch` by adding `Serde` 
support to `RawDecoder` (#3949) (#3979)
2b354a3e8 is described below

commit 2b354a3e8d7b57f2ad5eb12aeb283cc15bc9e170
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Wed Apr 5 18:35:15 2023 +0100

    Support Rust structures --> `RecordBatch` by adding `Serde` support to 
`RawDecoder` (#3949) (#3979)
    
    * Add serde support to RawDecoder (#3949)
    
    * Clippy
    
    * More examples
    
    * Use BTreeMap for deterministic test output
    
    * Use new Field constructors
    
    * Review feedback
---
 arrow-json/Cargo.toml            |   2 +
 arrow-json/src/raw/mod.rs        | 181 +++++++++++++++++
 arrow-json/src/raw/serializer.rs | 422 +++++++++++++++++++++++++++++++++++++++
 arrow-json/src/raw/tape.rs       |  23 +++
 arrow/Cargo.toml                 |   1 +
 arrow/src/lib.rs                 |  46 +++++
 6 files changed, 675 insertions(+)

diff --git a/arrow-json/Cargo.toml b/arrow-json/Cargo.toml
index 34bd447da..453e4aa35 100644
--- a/arrow-json/Cargo.toml
+++ b/arrow-json/Cargo.toml
@@ -42,6 +42,7 @@ arrow-schema = { workspace = true  }
 half = { version = "2.1", default-features = false }
 indexmap = { version = "1.9", default-features = false, features = ["std"] }
 num = { version = "0.4", default-features = false, features = ["std"] }
+serde = { version = "1.0", default-features = false }
 serde_json = { version = "1.0", default-features = false, features = ["std"] }
 chrono = { version = "0.4.23", default-features = false, features = ["clock"] }
 lexical-core = { version = "0.8", default-features = false }
@@ -49,3 +50,4 @@ lexical-core = { version = "0.8", default-features = false }
 [dev-dependencies]
 tempfile = "3.3"
 flate2 = { version = "1", default-features = false, features = 
["rust_backend"] }
+serde = { version = "1.0", default-features = false, features = ["derive"] }
diff --git a/arrow-json/src/raw/mod.rs b/arrow-json/src/raw/mod.rs
index 1bae8ac52..f1f1ffb77 100644
--- a/arrow-json/src/raw/mod.rs
+++ b/arrow-json/src/raw/mod.rs
@@ -29,11 +29,13 @@ use crate::raw::struct_array::StructArrayDecoder;
 use crate::raw::tape::{Tape, TapeDecoder, TapeElement};
 use crate::raw::timestamp_array::TimestampArrayDecoder;
 use arrow_array::timezone::Tz;
+use arrow_array::types::Float32Type;
 use arrow_array::types::*;
 use arrow_array::{downcast_integer, make_array, RecordBatch, 
RecordBatchReader};
 use arrow_data::ArrayData;
 use arrow_schema::{ArrowError, DataType, SchemaRef, TimeUnit};
 use chrono::Utc;
+use serde::Serialize;
 use std::io::BufRead;
 
 mod boolean_array;
@@ -41,6 +43,7 @@ mod decimal_array;
 mod list_array;
 mod map_array;
 mod primitive_array;
+mod serializer;
 mod string_array;
 mod struct_array;
 mod tape;
@@ -233,6 +236,184 @@ impl RawDecoder {
         self.tape_decoder.decode(buf)
     }
 
+    /// Serialize `rows` to this [`RawDecoder`]
+    ///
+    /// This provides a simple way to convert [serde]-compatible 
datastructures into arrow
+    /// [`RecordBatch`].
+    ///
+    /// Custom conversion logic as described in [arrow_array::builder] will 
likely outperform this,
+    /// especially where the schema is known at compile-time, however, this 
provides a mechanism
+    /// to get something up and running quickly
+    ///
+    /// It can be used with [`serde_json::Value`]
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use serde_json::{Value, json};
+    /// # use arrow_array::cast::AsArray;
+    /// # use arrow_array::types::Float32Type;
+    /// # use arrow_json::RawReaderBuilder;
+    /// # use arrow_schema::{DataType, Field, Schema};
+    /// let json = vec![json!({"float": 2.3}), json!({"float": 5.7})];
+    ///
+    /// let schema = Schema::new(vec![Field::new("float", DataType::Float32, 
true)]);
+    /// let mut decoder = 
RawReaderBuilder::new(Arc::new(schema)).build_decoder().unwrap();
+    ///
+    /// decoder.serialize(&json).unwrap();
+    /// let batch = decoder.flush().unwrap().unwrap();
+    /// assert_eq!(batch.num_rows(), 2);
+    /// assert_eq!(batch.num_columns(), 1);
+    /// let values = batch.column(0).as_primitive::<Float32Type>().values();
+    /// assert_eq!(values, &[2.3, 5.7])
+    /// ```
+    ///
+    /// Or with arbitrary [`Serialize`] types
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow_json::RawReaderBuilder;
+    /// # use arrow_schema::{DataType, Field, Schema};
+    /// # use serde::Serialize;
+    /// # use arrow_array::cast::AsArray;
+    /// # use arrow_array::types::{Float32Type, Int32Type};
+    /// #
+    /// #[derive(Serialize)]
+    /// struct MyStruct {
+    ///     int32: i32,
+    ///     float: f32,
+    /// }
+    ///
+    /// let schema = Schema::new(vec![
+    ///     Field::new("int32", DataType::Int32, false),
+    ///     Field::new("float", DataType::Float32, false),
+    /// ]);
+    ///
+    /// let rows = vec![
+    ///     MyStruct{ int32: 0, float: 3. },
+    ///     MyStruct{ int32: 4, float: 67.53 },
+    /// ];
+    ///
+    /// let mut decoder = 
RawReaderBuilder::new(Arc::new(schema)).build_decoder().unwrap();
+    /// decoder.serialize(&rows).unwrap();
+    ///
+    /// let batch = decoder.flush().unwrap().unwrap();
+    ///
+    /// // Expect batch containing two columns
+    /// let int32 = batch.column(0).as_primitive::<Int32Type>();
+    /// assert_eq!(int32.values(), &[0, 4]);
+    ///
+    /// let float = batch.column(1).as_primitive::<Float32Type>();
+    /// assert_eq!(float.values(), &[3., 67.53]);
+    /// ```
+    ///
+    /// Or even complex nested types
+    ///
+    /// ```
+    /// # use std::collections::BTreeMap;
+    /// # use std::sync::Arc;
+    /// # use arrow_array::StructArray;
+    /// # use arrow_cast::display::{ArrayFormatter, FormatOptions};
+    /// # use arrow_json::RawReaderBuilder;
+    /// # use arrow_schema::{DataType, Field, Fields, Schema};
+    /// # use serde::Serialize;
+    /// #
+    /// #[derive(Serialize)]
+    /// struct MyStruct {
+    ///     int32: i32,
+    ///     list: Vec<f64>,
+    ///     nested: Vec<Option<Nested>>,
+    /// }
+    ///
+    /// impl MyStruct {
+    ///     /// Returns the [`Fields`] for [`MyStruct`]
+    ///     fn fields() -> Fields {
+    ///         let nested = DataType::Struct(Nested::fields());
+    ///         Fields::from([
+    ///             Arc::new(Field::new("int32", DataType::Int32, false)),
+    ///             Arc::new(Field::new_list(
+    ///                 "list",
+    ///                 Field::new("element", DataType::Float64, false),
+    ///                 false,
+    ///             )),
+    ///             Arc::new(Field::new_list(
+    ///                 "nested",
+    ///                 Field::new("element", nested, true),
+    ///                 true,
+    ///             )),
+    ///         ])
+    ///     }
+    /// }
+    ///
+    /// #[derive(Serialize)]
+    /// struct Nested {
+    ///     map: BTreeMap<String, Vec<String>>
+    /// }
+    ///
+    /// impl Nested {
+    ///     /// Returns the [`Fields`] for [`Nested`]
+    ///     fn fields() -> Fields {
+    ///         let element = Field::new("element", DataType::Utf8, false);
+    ///         Fields::from([
+    ///             Arc::new(Field::new_map(
+    ///                 "map",
+    ///                 "entries",
+    ///                 Field::new("key", DataType::Utf8, false),
+    ///                 Field::new_list("value", element, false),
+    ///                 false, // sorted
+    ///                 false, // nullable
+    ///             ))
+    ///         ])
+    ///     }
+    /// }
+    ///
+    /// let data = vec![
+    ///     MyStruct {
+    ///         int32: 34,
+    ///         list: vec![1., 2., 34.],
+    ///         nested: vec![
+    ///             None,
+    ///             Some(Nested {
+    ///                 map: vec![
+    ///                     ("key1".to_string(), vec!["foo".to_string(), 
"bar".to_string()]),
+    ///                     ("key2".to_string(), vec!["baz".to_string()])
+    ///                 ].into_iter().collect()
+    ///             })
+    ///         ]
+    ///     },
+    ///     MyStruct {
+    ///         int32: 56,
+    ///         list: vec![],
+    ///         nested: vec![]
+    ///     },
+    ///     MyStruct {
+    ///         int32: 24,
+    ///         list: vec![-1., 245.],
+    ///         nested: vec![None]
+    ///     }
+    /// ];
+    ///
+    /// let schema = Schema::new(MyStruct::fields());
+    /// let mut decoder = 
RawReaderBuilder::new(Arc::new(schema)).build_decoder().unwrap();
+    /// decoder.serialize(&data).unwrap();
+    /// let batch = decoder.flush().unwrap().unwrap();
+    /// assert_eq!(batch.num_rows(), 3);
+    /// assert_eq!(batch.num_columns(), 3);
+    ///
+    /// // Convert to StructArray to format
+    /// let s = StructArray::from(batch);
+    /// let options = FormatOptions::default().with_null("null");
+    /// let formatter = ArrayFormatter::try_new(&s, &options).unwrap();
+    ///
+    /// assert_eq!(&formatter.value(0).to_string(), "{int32: 34, list: [1.0, 
2.0, 34.0], nested: [null, {map: {key1: [foo, bar], key2: [baz]}}]}");
+    /// assert_eq!(&formatter.value(1).to_string(), "{int32: 56, list: [], 
nested: []}");
+    /// assert_eq!(&formatter.value(2).to_string(), "{int32: 24, list: [-1.0, 
245.0], nested: [null]}");
+    /// ```
+    ///
+    /// Note: this ignores any batch size setting, and always decodes all rows
+    pub fn serialize<S: Serialize>(&mut self, rows: &[S]) -> Result<(), 
ArrowError> {
+        self.tape_decoder.serialize(rows)
+    }
+
     /// Flushes the currently buffered data to a [`RecordBatch`]
     ///
     /// Returns `Ok(None)` if no buffered data
diff --git a/arrow-json/src/raw/serializer.rs b/arrow-json/src/raw/serializer.rs
new file mode 100644
index 000000000..d743b6dba
--- /dev/null
+++ b/arrow-json/src/raw/serializer.rs
@@ -0,0 +1,422 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::raw::tape::TapeElement;
+use lexical_core::FormattedSize;
+use serde::ser::{
+    Impossible, SerializeMap, SerializeSeq, SerializeStruct, SerializeTuple,
+    SerializeTupleStruct,
+};
+use serde::{Serialize, Serializer};
+
+#[derive(Debug)]
+pub struct SerializerError(String);
+
+impl std::error::Error for SerializerError {}
+
+impl std::fmt::Display for SerializerError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
+impl serde::ser::Error for SerializerError {
+    fn custom<T>(msg: T) -> Self
+    where
+        T: std::fmt::Display,
+    {
+        Self(msg.to_string())
+    }
+}
+
+/// [`Serializer`] for [`TapeElement`]
+///
+/// Heavily based on <https://serde.rs/impl-serializer.html>
+pub struct TapeSerializer<'a> {
+    elements: &'a mut Vec<TapeElement>,
+
+    /// A buffer of parsed string data
+    bytes: &'a mut Vec<u8>,
+
+    /// Offsets into `data`
+    offsets: &'a mut Vec<usize>,
+}
+
+impl<'a> TapeSerializer<'a> {
+    pub fn new(
+        elements: &'a mut Vec<TapeElement>,
+        bytes: &'a mut Vec<u8>,
+        offsets: &'a mut Vec<usize>,
+    ) -> Self {
+        Self {
+            elements,
+            bytes,
+            offsets,
+        }
+    }
+}
+
+/// The tape stores all values as strings, and so must serialize numeric types
+///
+/// Formatting to a string only to parse it back again is rather wasteful,
+/// it may be possible to tweak the tape representation to avoid this
+///
+/// Need to use macro as const generic expressions are unstable
+/// <https://github.com/rust-lang/rust/issues/76560>
+macro_rules! serialize_numeric {
+    ($s:ident, $t:ty, $v:ident) => {{
+        let mut buffer = [0_u8; <$t>::FORMATTED_SIZE];
+        let s = lexical_core::write($v, &mut buffer);
+        $s.serialize_bytes(s)
+    }};
+}
+
+impl<'a, 'b> Serializer for &'a mut TapeSerializer<'b> {
+    type Ok = ();
+
+    type Error = SerializerError;
+
+    type SerializeSeq = ListSerializer<'a, 'b>;
+    type SerializeTuple = ListSerializer<'a, 'b>;
+    type SerializeTupleStruct = ListSerializer<'a, 'b>;
+    type SerializeTupleVariant = Impossible<(), SerializerError>;
+    type SerializeMap = ObjectSerializer<'a, 'b>;
+    type SerializeStruct = ObjectSerializer<'a, 'b>;
+    type SerializeStructVariant = Impossible<(), SerializerError>;
+
+    fn serialize_bool(self, v: bool) -> Result<(), SerializerError> {
+        self.elements.push(match v {
+            true => TapeElement::True,
+            false => TapeElement::False,
+        });
+        Ok(())
+    }
+
+    fn serialize_i8(self, v: i8) -> Result<(), SerializerError> {
+        serialize_numeric!(self, i8, v)
+    }
+
+    fn serialize_i16(self, v: i16) -> Result<(), SerializerError> {
+        serialize_numeric!(self, i16, v)
+    }
+
+    fn serialize_i32(self, v: i32) -> Result<(), SerializerError> {
+        serialize_numeric!(self, i32, v)
+    }
+
+    fn serialize_i64(self, v: i64) -> Result<(), SerializerError> {
+        serialize_numeric!(self, i64, v)
+    }
+
+    fn serialize_u8(self, v: u8) -> Result<(), SerializerError> {
+        serialize_numeric!(self, u8, v)
+    }
+
+    fn serialize_u16(self, v: u16) -> Result<(), SerializerError> {
+        serialize_numeric!(self, u16, v)
+    }
+
+    fn serialize_u32(self, v: u32) -> Result<(), SerializerError> {
+        serialize_numeric!(self, u32, v)
+    }
+
+    fn serialize_u64(self, v: u64) -> Result<(), SerializerError> {
+        serialize_numeric!(self, u64, v)
+    }
+
+    fn serialize_f32(self, v: f32) -> Result<(), SerializerError> {
+        serialize_numeric!(self, f32, v)
+    }
+
+    fn serialize_f64(self, v: f64) -> Result<(), SerializerError> {
+        serialize_numeric!(self, f64, v)
+    }
+
+    fn serialize_char(self, v: char) -> Result<(), SerializerError> {
+        self.serialize_str(&v.to_string())
+    }
+
+    fn serialize_str(self, v: &str) -> Result<(), SerializerError> {
+        self.serialize_bytes(v.as_bytes())
+    }
+
+    fn serialize_bytes(self, v: &[u8]) -> Result<(), SerializerError> {
+        self.bytes.extend_from_slice(v);
+        let idx = self.offsets.len() - 1;
+        self.elements.push(TapeElement::String(idx as _));
+        self.offsets.push(self.bytes.len());
+        Ok(())
+    }
+
+    fn serialize_none(self) -> Result<(), SerializerError> {
+        self.serialize_unit()
+    }
+
+    fn serialize_some<T>(self, value: &T) -> Result<(), SerializerError>
+    where
+        T: ?Sized + Serialize,
+    {
+        value.serialize(self)
+    }
+
+    fn serialize_unit(self) -> Result<(), SerializerError> {
+        self.elements.push(TapeElement::Null);
+        Ok(())
+    }
+
+    fn serialize_unit_struct(self, _name: &'static str) -> Result<(), 
SerializerError> {
+        self.serialize_unit()
+    }
+
+    fn serialize_unit_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        variant: &'static str,
+    ) -> Result<(), SerializerError> {
+        self.serialize_str(variant)
+    }
+
+    fn serialize_newtype_struct<T>(
+        self,
+        _name: &'static str,
+        value: &T,
+    ) -> Result<(), SerializerError>
+    where
+        T: ?Sized + Serialize,
+    {
+        value.serialize(self)
+    }
+
+    fn serialize_newtype_variant<T>(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        variant: &'static str,
+        value: &T,
+    ) -> Result<(), SerializerError>
+    where
+        T: ?Sized + Serialize,
+    {
+        let mut serializer = self.serialize_map(Some(1))?;
+        serializer.serialize_key(variant)?;
+        serializer.serialize_value(value)?;
+        serializer.finish();
+        Ok(())
+    }
+
+    fn serialize_seq(
+        self,
+        _len: Option<usize>,
+    ) -> Result<Self::SerializeSeq, SerializerError> {
+        Ok(ListSerializer::new(self))
+    }
+
+    fn serialize_tuple(
+        self,
+        len: usize,
+    ) -> Result<Self::SerializeTuple, SerializerError> {
+        self.serialize_seq(Some(len))
+    }
+
+    fn serialize_tuple_struct(
+        self,
+        _name: &'static str,
+        len: usize,
+    ) -> Result<Self::SerializeTupleStruct, SerializerError> {
+        self.serialize_seq(Some(len))
+    }
+
+    fn serialize_tuple_variant(
+        self,
+        name: &'static str,
+        _variant_index: u32,
+        variant: &'static str,
+        _len: usize,
+    ) -> Result<Self::SerializeTupleVariant, SerializerError> {
+        Err(SerializerError(format!(
+            "serializing tuple variants is not currently supported: 
{name}::{variant}"
+        )))
+    }
+
+    // Maps are represented in JSON as `{ K: V, K: V, ... }`.
+    fn serialize_map(
+        self,
+        _len: Option<usize>,
+    ) -> Result<Self::SerializeMap, SerializerError> {
+        Ok(ObjectSerializer::new(self))
+    }
+
+    fn serialize_struct(
+        self,
+        _name: &'static str,
+        len: usize,
+    ) -> Result<Self::SerializeStruct, SerializerError> {
+        self.serialize_map(Some(len))
+    }
+
+    fn serialize_struct_variant(
+        self,
+        name: &'static str,
+        _variant_index: u32,
+        variant: &'static str,
+        _len: usize,
+    ) -> Result<Self::SerializeStructVariant, SerializerError> {
+        Err(SerializerError(format!(
+            "serializing struct variants is not currently supported: 
{name}::{variant}"
+        )))
+    }
+}
+
+pub struct ObjectSerializer<'a, 'b> {
+    serializer: &'a mut TapeSerializer<'b>,
+    start: usize,
+}
+
+impl<'a, 'b> ObjectSerializer<'a, 'b> {
+    fn new(serializer: &'a mut TapeSerializer<'b>) -> Self {
+        let start = serializer.elements.len();
+        serializer.elements.push(TapeElement::StartObject(0));
+        Self { serializer, start }
+    }
+
+    fn finish(self) {
+        let end = self.serializer.elements.len() as _;
+        self.serializer.elements[self.start] = TapeElement::StartObject(end);
+
+        let end = TapeElement::EndObject(self.start as _);
+        self.serializer.elements.push(end);
+    }
+}
+
+impl<'a, 'b> SerializeMap for ObjectSerializer<'a, 'b> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
+    where
+        T: Serialize,
+    {
+        key.serialize(&mut *self.serializer)
+    }
+
+    fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), 
Self::Error>
+    where
+        T: Serialize,
+    {
+        value.serialize(&mut *self.serializer)
+    }
+
+    fn end(self) -> Result<(), Self::Error> {
+        self.finish();
+        Ok(())
+    }
+}
+
+impl<'a, 'b> SerializeStruct for ObjectSerializer<'a, 'b> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_field<T: ?Sized>(
+        &mut self,
+        key: &'static str,
+        value: &T,
+    ) -> Result<(), Self::Error>
+    where
+        T: Serialize,
+    {
+        key.serialize(&mut *self.serializer)?;
+        value.serialize(&mut *self.serializer)
+    }
+
+    fn end(self) -> Result<(), Self::Error> {
+        self.finish();
+        Ok(())
+    }
+}
+
+pub struct ListSerializer<'a, 'b> {
+    serializer: &'a mut TapeSerializer<'b>,
+    start: usize,
+}
+
+impl<'a, 'b> ListSerializer<'a, 'b> {
+    fn new(serializer: &'a mut TapeSerializer<'b>) -> Self {
+        let start = serializer.elements.len();
+        serializer.elements.push(TapeElement::StartList(0));
+        Self { serializer, start }
+    }
+
+    fn finish(self) {
+        let end = self.serializer.elements.len() as _;
+        self.serializer.elements[self.start] = TapeElement::StartList(end);
+
+        let end = TapeElement::EndList(self.start as _);
+        self.serializer.elements.push(end);
+    }
+}
+
+impl<'a, 'b> SerializeSeq for ListSerializer<'a, 'b> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), 
Self::Error>
+    where
+        T: Serialize,
+    {
+        value.serialize(&mut *self.serializer)
+    }
+
+    fn end(self) -> Result<(), Self::Error> {
+        self.finish();
+        Ok(())
+    }
+}
+
+impl<'a, 'b> SerializeTuple for ListSerializer<'a, 'b> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), 
Self::Error>
+    where
+        T: Serialize,
+    {
+        value.serialize(&mut *self.serializer)
+    }
+
+    fn end(self) -> Result<(), Self::Error> {
+        self.finish();
+        Ok(())
+    }
+}
+
+impl<'a, 'b> SerializeTupleStruct for ListSerializer<'a, 'b> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_field<T: ?Sized>(&mut self, value: &T) -> Result<(), 
Self::Error>
+    where
+        T: Serialize,
+    {
+        value.serialize(&mut *self.serializer)
+    }
+
+    fn end(self) -> Result<(), Self::Error> {
+        self.finish();
+        Ok(())
+    }
+}
diff --git a/arrow-json/src/raw/tape.rs b/arrow-json/src/raw/tape.rs
index 3f4a317c8..2720c2502 100644
--- a/arrow-json/src/raw/tape.rs
+++ b/arrow-json/src/raw/tape.rs
@@ -15,7 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use crate::raw::serializer::TapeSerializer;
 use arrow_schema::ArrowError;
+use serde::Serialize;
 use std::fmt::{Display, Formatter};
 
 /// We decode JSON to a flattened tape representation,
@@ -452,6 +454,27 @@ impl TapeDecoder {
         Ok(buf.len() - iter.len())
     }
 
+    /// Writes any type that implements [`Serialize`] into this [`TapeDecoder`]
+    pub fn serialize<S: Serialize>(&mut self, rows: &[S]) -> Result<(), 
ArrowError> {
+        if let Some(b) = self.stack.last() {
+            return Err(ArrowError::JsonError(format!(
+                "Cannot serialize to tape containing partial decode state {}",
+                b.as_str()
+            )));
+        }
+
+        let mut serializer =
+            TapeSerializer::new(&mut self.elements, &mut self.bytes, &mut 
self.offsets);
+
+        rows.iter()
+            .try_for_each(|row| row.serialize(&mut serializer))
+            .map_err(|e| ArrowError::JsonError(e.to_string()))?;
+
+        self.num_rows += rows.len();
+
+        Ok(())
+    }
+
     /// Finishes the current [`Tape`]
     pub fn finish(&self) -> Result<Tape<'_>, ArrowError> {
         if let Some(b) = self.stack.last() {
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 2c9bf64ec..58fe54fd1 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -99,6 +99,7 @@ criterion = { version = "0.4", default-features = false }
 half = { version = "2.1", default-features = false }
 rand = { version = "0.8", default-features = false, features = ["std", 
"std_rng"] }
 tempfile = { version = "3", default-features = false }
+serde = { version = "1.0", default-features = false, features = ["derive"] }
 
 [build-dependencies]
 
diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs
index 40b09a976..41b846b04 100644
--- a/arrow/src/lib.rs
+++ b/arrow/src/lib.rs
@@ -271,6 +271,52 @@
 //!
 //! Parquet is published as a [separate 
crate](https://crates.io/crates/parquet)
 //!
+//! # Serde Compatibility
+//!
+//! [`arrow_json::RawDecoder`] provides a mechanism to convert arbitrary, 
serde-compatible
+//! structures into [`RecordBatch`].
+//!
+//! Whilst likely less performant than implementing a custom builder, as 
described in
+//! [arrow_array::builder], this provides a simple mechanism to get up and 
running quickly
+//!
+//! ```
+//! # use std::sync::Arc;
+//! # use arrow_json::RawReaderBuilder;
+//! # use arrow_schema::{DataType, Field, Schema};
+//! # use serde::Serialize;
+//! # use arrow_array::cast::AsArray;
+//! # use arrow_array::types::{Float32Type, Int32Type};
+//! #
+//! #[derive(Serialize)]
+//! struct MyStruct {
+//!     int32: i32,
+//!     string: String,
+//! }
+//!
+//! let schema = Schema::new(vec![
+//!     Field::new("int32", DataType::Int32, false),
+//!     Field::new("string", DataType::Utf8, false),
+//! ]);
+//!
+//! let rows = vec![
+//!     MyStruct{ int32: 5, string: "bar".to_string() },
+//!     MyStruct{ int32: 8, string: "foo".to_string() },
+//! ];
+//!
+//! let mut decoder = 
RawReaderBuilder::new(Arc::new(schema)).build_decoder().unwrap();
+//! decoder.serialize(&rows).unwrap();
+//!
+//! let batch = decoder.flush().unwrap().unwrap();
+//!
+//! // Expect batch containing two columns
+//! let int32 = batch.column(0).as_primitive::<Int32Type>();
+//! assert_eq!(int32.values(), &[5, 8]);
+//!
+//! let string = batch.column(1).as_string::<i32>();
+//! assert_eq!(string.value(0), "bar");
+//! assert_eq!(string.value(1), "foo");
+//! ```
+//!
 //! # Memory and Buffers
 //!
 //! Advanced users may wish to interact with the underlying buffers of an 
[`Array`], for example,

Reply via email to