martin-g commented on code in PR #238:
URL: https://github.com/apache/avro-rs/pull/238#discussion_r2235357318
##########
avro/src/state_machines/reading/mod.rs:
##########
@@ -0,0 +1,338 @@
+use std::{ops::RangeInclusive, sync::Arc};
+
+use oval::Buffer;
+use serde::Deserialize;
+
+use crate::{
+ Error, Schema,
+ error::Details,
+ state_machines::reading::{
+ block::{ArrayStateMachine, MapStateMachine},
+ bytes::BytesStateMachine,
+ object::ObjectStateMachine,
+ },
+ types::Value,
+};
+pub mod async_impl;
+pub mod block;
+pub mod bytes;
+pub mod object;
+mod object_container_file;
+pub mod sync;
+
+pub trait StateMachine: Sized {
+ type Output: Sized;
+
+ /// Start/continue the state machine.
+ ///
+ /// Implementers are not allowed to return until they can't make progress
anymore.
+ fn parse(self, buffer: &mut Buffer) -> StateMachineResult<Self,
Self::Output>;
+}
+
+/// Indicates whether the state machine has completed or needs to be polled
again.
+#[must_use]
+pub enum StateMachineControlFlow<StateMachine, Output> {
+ /// The state machine needs more data before it can continue.
+ NeedMore(StateMachine),
+ /// The state machine is done and the result is returned.s
+ Done(Output),
+}
+
+pub type StateMachineResult<StateMachine, Output> =
+ Result<StateMachineControlFlow<StateMachine, Output>, Error>;
+
+/// The sub state machine that is currently being driven.
+///
+/// The `Int`, `Long`, `Float`, `Double`, and `Enum` statemachines don't have
state, as
+/// they don't consume the buffer if there are not enough bytes. This means
that the only
+/// thing these statemachines are keeping track of is which type we're
actually decoding.
+#[derive(Default)]
+pub enum SubStateMachine {
+ // TODO: Remove None, replace with Option<Box<SubStateMachine>>
+ #[default]
+ None,
+ Int,
+ Long,
+ Float,
+ Double,
+ Enum,
+ Bytes(BytesStateMachine),
+ String(BytesStateMachine),
+ Fixed(BytesStateMachine),
+ Array(ArrayStateMachine),
+ Map(MapStateMachine),
+ Object(ObjectStateMachine),
+ Union(Box<[CommandTape]>),
+}
+
+/// A item that was read from the document.
+#[must_use]
+pub enum ItemRead {
+ // TODO: This is probably not useful to have
+ Null,
+ Boolean(bool),
+ Int(i32),
+ Long(i64),
+ Float(f32),
+ Double(f64),
+ Bytes(Box<[u8]>),
+ String(Box<str>),
+ // TODO: Maybe this can just be a bytes
+ Fixed(Box<[u8]>),
+ /// The variant of the Enum that was read.
+ Enum(u32),
+ /// The variant of the Union that was read.
+ ///
+ /// The variant data is next.
+ Union(u32),
+ /// The start of a block of a Map or Array.
+ Block(usize),
+}
+
+/// The next item type that should be read.
+#[must_use]
+pub enum ToRead {
+ Null,
+ Boolean,
+ Int,
+ Long,
+ Float,
+ Double,
+ Bytes,
+ String,
+ Enum,
+ Fixed(usize),
+ Array(CommandTape),
+ Map(CommandTape),
+ Union(Box<[CommandTape]>),
+}
+
+/// A section of a tape of commands.
+///
+/// This has a reference to the entire tape, so that references to types (for
Union,Map,Array) can be resolved.
+#[derive(Debug, Clone)]
+#[must_use]
+pub struct CommandTape {
+ inner: Arc<[u8]>,
+ read_range: RangeInclusive<usize>,
+}
+
+impl CommandTape {
+ pub const NULL: u8 = 0;
+ pub const BOOLEAN: u8 = 1;
+ pub const INT: u8 = 2;
+ pub const LONG: u8 = 3;
+ pub const FLOAT: u8 = 4;
+ pub const DOUBLE: u8 = 5;
+ pub const BYTES: u8 = 6;
+ pub const STRING: u8 = 7;
+ pub const ENUM: u8 = 8;
+ pub const FIXED: u8 = 9;
+ // TODO: Maybe combine Array and Map into Block
+ // TODO: Add a type reference type, so that if a Block has a single type
no reference is needed
+ pub const ARRAY: u8 = 10;
+ pub const MAP: u8 = 11;
+ pub const UNION: u8 = 12;
+
+ /// Create a new tape that will be read from start to end.
+ pub fn new(command_tape: Arc<[u8]>) -> Self {
+ let length = command_tape.len();
+ Self {
+ inner: command_tape,
+ read_range: 0..=length,
+ }
+ }
+
+ /// Check if the section of the tape we're reading is finished.
+ pub fn is_finished(&self) -> bool {
+ self.read_range.is_empty()
+ }
+
+ /// Extract a part from the tape to give to a sub state machine.
+ ///
+ /// The tape will run from start to end (inclusive).
+ pub fn extract(&self, start: usize, end: usize) -> Self {
Review Comment:
assert that start < end ?!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]