scovich commented on code in PR #7442:
URL: https://github.com/apache/arrow-rs/pull/7442#discussion_r2628988462
##########
arrow-json/src/reader/mod.rs:
##########
@@ -369,6 +381,95 @@ impl<R: BufRead> RecordBatchReader for Reader<R> {
}
}
+/// A trait to create custom decoders for specific data types.
+///
+/// This allows overriding the default decoders for specific data types,
+/// or adding new decoders for custom data types.
+///
+/// # Examples
+///
+/// ```
+/// use arrow_json::{ArrayDecoder, DecoderFactory, TapeElement, Tape,
ReaderBuilder, StructMode};
+/// use arrow_schema::ArrowError;
+/// use arrow_schema::{DataType, Field, Fields, Schema};
+/// use arrow_array::cast::AsArray;
+/// use arrow_array::Array;
+/// use arrow_array::builder::StringBuilder;
+/// use arrow_data::ArrayData;
+/// use std::sync::Arc;
+///
+/// struct IncorrectStringAsNullDecoder {}
+///
+/// impl ArrayDecoder for IncorrectStringAsNullDecoder {
+/// fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) ->
Result<ArrayData, ArrowError> {
+/// let mut builder = StringBuilder::new();
+/// for p in pos {
+/// match tape.get(*p) {
+/// TapeElement::String(idx) => {
+/// builder.append_value(tape.get_string(idx));
+/// }
+/// _ => builder.append_null(),
+/// }
+/// }
+/// Ok(builder.finish().into_data())
+/// }
+/// }
+///
+/// #[derive(Debug)]
+/// struct IncorrectStringAsNullDecoderFactory;
+///
+/// impl DecoderFactory for IncorrectStringAsNullDecoderFactory {
+/// fn make_default_decoder<'a>(
+/// &self,
+/// data_type: DataType,
+/// _coerce_primitive: bool,
+/// _strict_mode: bool,
+/// _is_nullable: bool,
+/// _struct_mode: StructMode,
+/// ) -> Result<Option<Box<dyn ArrayDecoder>>, ArrowError> {
+/// match data_type {
+/// DataType::Utf8 =>
Ok(Some(Box::new(IncorrectStringAsNullDecoder {}))),
+/// _ => Ok(None),
+/// }
+/// }
+/// }
+///
+/// let json = r#"
+/// {"a": "a"}
+/// {"a": 12}
+/// "#;
+/// let batch =
ReaderBuilder::new(Arc::new(Schema::new(Fields::from(vec![Field::new(
+/// "a",
+/// DataType::Utf8,
+/// true,
+/// )]))))
+/// .with_decoder_factory(Arc::new(IncorrectStringAsNullDecoderFactory))
+/// .build(json.as_bytes())
+/// .unwrap()
+/// .next()
+/// .unwrap()
+/// .unwrap();
+///
+/// let values = batch.column(0).as_string::<i32>();
+/// assert_eq!(values.len(), 2);
+/// assert_eq!(values.value(0), "a");
+/// assert!(values.is_null(1));
+/// ```
+pub trait DecoderFactory: std::fmt::Debug + Send + Sync {
+ /// Make a decoder that overrides the default decoder for a specific data
type.
+ /// This can be used to override how e.g. error in decoding are handled.
+ fn make_default_decoder(
+ &self,
+ _data_type: DataType,
+ _coerce_primitive: bool,
+ _strict_mode: bool,
+ _is_nullable: bool,
+ _struct_mode: StructMode,
+ ) -> Result<Option<Box<dyn ArrayDecoder>>, ArrowError> {
+ Ok(None)
+ }
Review Comment:
It seems odd for a single-trait method to have a provided no-op method
definition?
What use case does that enable?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]