jecsand838 commented on code in PR #8274:
URL: https://github.com/apache/arrow-rs/pull/8274#discussion_r2325742520
##########
arrow-avro/src/writer/encoder.rs:
##########
@@ -267,11 +513,275 @@ impl F32Encoder<'_> {
struct F64Encoder<'a>(&'a arrow_array::Float64Array);
impl F64Encoder<'_> {
- #[inline]
fn encode<W: Write + ?Sized>(&mut self, idx: usize, out: &mut W) ->
Result<(), ArrowError> {
// Avro double: 8 bytes, IEEE-754 little-endian
let bits = self.0.value(idx).to_bits();
out.write_all(&bits.to_le_bytes())
.map_err(|e| ArrowError::IoError(format!("write f64: {e}"), e))
}
}
+
+struct Utf8GenericEncoder<'a, O: OffsetSizeTrait>(&'a GenericStringArray<O>);
+
+impl<'a, O: OffsetSizeTrait> Utf8GenericEncoder<'a, O> {
+ fn encode<W: Write + ?Sized>(&mut self, idx: usize, out: &mut W) ->
Result<(), ArrowError> {
+ write_len_prefixed(out, self.0.value(idx).as_bytes())
+ }
+}
+
+type Utf8Encoder<'a> = Utf8GenericEncoder<'a, i32>;
+type Utf8LargeEncoder<'a> = Utf8GenericEncoder<'a, i64>;
+
+struct StructEncoder<'a> {
+ encoders: Vec<FieldEncoder<'a>>,
+}
+
+impl<'a> StructEncoder<'a> {
+ fn try_new(
+ array: &'a StructArray,
+ field_bindings: &[FieldBinding],
+ ) -> Result<Self, ArrowError> {
+ let fields = match array.data_type() {
+ DataType::Struct(struct_fields) => struct_fields,
+ _ => return Err(ArrowError::SchemaError("Expected Struct".into())),
+ };
+ let mut encoders = Vec::with_capacity(field_bindings.len());
+ for field_binding in field_bindings {
+ let idx = field_binding.arrow_index;
+ let column = array.columns().get(idx).ok_or_else(|| {
+ ArrowError::SchemaError(format!("Struct child index {idx} out
of range"))
+ })?;
+ let field = fields
+ .get(idx)
+ .ok_or_else(|| {
+ ArrowError::SchemaError(format!("Struct child index {idx}
out of range"))
+ })?
+ .as_ref();
+ let encoder = prepare_value_site_encoder(
+ column.as_ref(),
+ field,
+ field_binding.nullability,
+ &field_binding.plan,
+ )?;
+ encoders.push(encoder);
+ }
+ Ok(Self { encoders })
+ }
+
+ fn encode<W: Write + ?Sized>(&mut self, idx: usize, out: &mut W) ->
Result<(), ArrowError> {
+ for encoder in self.encoders.iter_mut() {
+ encoder.encode(idx, out)?;
+ }
+ Ok(())
+ }
+}
+
+fn encode_blocked_range<W: Write + ?Sized, F>(
+ out: &mut W,
+ start: usize,
+ end: usize,
+ mut write_item: F,
+) -> Result<(), ArrowError>
+where
+ F: FnMut(usize, &mut W) -> Result<(), ArrowError>,
+{
+ let len = end.saturating_sub(start);
+ if len == 0 {
+ // Zero-length terminator per Avro spec
+ write_long(out, 0)?;
+ return Ok(());
+ }
+ // Emit a single positive block for performance, then the end marker.
+ write_long(out, len as i64)?;
+ for j in start..end {
+ write_item(j, out)?;
Review Comment:
That's very fair, I'll clean that up.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]