nathaniel-d-ef commented on code in PR #8298:
URL: https://github.com/apache/arrow-rs/pull/8298#discussion_r2344286454
##########
arrow-avro/src/writer/encoder.rs:
##########
@@ -653,6 +893,182 @@ fn prepare_value_site_encoder<'a>(
FieldEncoder::make_encoder(values_array, value_field, plan, nullability)
}
+/// Avro `fixed` encoder for Arrow `FixedSizeBinaryArray`.
+/// Spec: a fixed is encoded as exactly `size` bytes, with no length prefix.
+struct FixedEncoder<'a>(&'a FixedSizeBinaryArray);
+impl FixedEncoder<'_> {
+ fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) ->
Result<(), ArrowError> {
+ let v = self.0.value(idx); // &[u8] of fixed width
+ out.write_all(v)
+ .map_err(|e| ArrowError::IoError(format!("write fixed bytes:
{e}"), e))
+ }
+}
+
+/// Avro UUID logical type encoder: Arrow FixedSizeBinary(16) → Avro string
(UUID).
+/// Spec: uuid is a logical type over string (RFC‑4122). We output hyphenated
form.
+struct UuidEncoder<'a>(&'a FixedSizeBinaryArray);
+impl UuidEncoder<'_> {
+ fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) ->
Result<(), ArrowError> {
+ let v = self.0.value(idx);
+ if v.len() != 16 {
+ return Err(ArrowError::InvalidArgumentError(
+ "logicalType=uuid requires FixedSizeBinary(16)".into(),
+ ));
+ }
+ let u = Uuid::from_slice(v)
+ .map_err(|e| ArrowError::InvalidArgumentError(format!("Invalid
UUID bytes: {e}")))?;
+ let mut tmp = [0u8; uuid::fmt::Hyphenated::LENGTH];
+ let s = u.hyphenated().encode_lower(&mut tmp);
+ write_len_prefixed(out, s.as_bytes())
+ }
+}
+
+/// Avro `duration` encoder for Arrow `Interval(IntervalUnit::MonthDayNano)`.
+/// Spec: `duration` annotates Avro fixed(12) with three **little‑endian u32**:
+/// months, days, milliseconds (no negatives).
+struct IntervalMonthDayNanoEncoder<'a>(&'a
PrimitiveArray<IntervalMonthDayNanoType>);
+impl IntervalMonthDayNanoEncoder<'_> {
+ fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) ->
Result<(), ArrowError> {
+ let native = self.0.value(idx);
+ let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(native);
+ if months < 0 || days < 0 || nanos < 0 {
+ return Err(ArrowError::InvalidArgumentError(
+ "Avro 'duration' cannot encode negative
months/days/nanoseconds".into(),
+ ));
+ }
+ if nanos % 1_000_000 != 0 {
+ return Err(ArrowError::InvalidArgumentError(
+ "Avro 'duration' requires whole milliseconds; nanoseconds must
be divisible by 1_000_000"
+ .into(),
+ ));
+ }
+ let millis = nanos / 1_000_000;
+ if millis > u32::MAX as i64 {
+ return Err(ArrowError::InvalidArgumentError(
+ "Avro 'duration' milliseconds exceed u32::MAX".into(),
+ ));
+ }
+ let mut buf = [0u8; 12];
+ buf[0..4].copy_from_slice(&(months as u32).to_le_bytes());
+ buf[4..8].copy_from_slice(&(days as u32).to_le_bytes());
+ buf[8..12].copy_from_slice(&(millis as u32).to_le_bytes());
+ out.write_all(&buf)
+ .map_err(|e| ArrowError::IoError(format!("write duration: {e}"),
e))
+ }
+}
+
+/// Avro `duration` encoder for Arrow `Interval(IntervalUnit::YearMonth)`.
+struct IntervalYearMonthEncoder<'a>(&'a PrimitiveArray<IntervalYearMonthType>);
+impl IntervalYearMonthEncoder<'_> {
+ fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) ->
Result<(), ArrowError> {
+ let months_i32 = self.0.value(idx);
+
+ if months_i32 < 0 {
+ return Err(ArrowError::InvalidArgumentError(
+ "Avro 'duration' cannot encode negative months".into(),
+ ));
+ }
+
+ let mut buf = [0u8; 12];
+ buf[0..4].copy_from_slice(&(months_i32 as u32).to_le_bytes());
+ // Days and Milliseconds are zero, so their bytes are already 0.
+ // buf[4..8] is [0, 0, 0, 0]
+ // buf[8..12] is [0, 0, 0, 0]
+
+ out.write_all(&buf)
+ .map_err(|e| ArrowError::IoError(format!("write duration: {e}"),
e))
+ }
+}
+
+/// Avro `duration` encoder for Arrow `Interval(IntervalUnit::DayTime)`.
+struct IntervalDayTimeEncoder<'a>(&'a PrimitiveArray<IntervalDayTimeType>);
+impl IntervalDayTimeEncoder<'_> {
+ fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) ->
Result<(), ArrowError> {
+ // A DayTime interval is a packed (days: i32, milliseconds: i32).
+ let native = self.0.value(idx);
+ let (days, millis) = IntervalDayTimeType::to_parts(native);
+
+ if days < 0 || millis < 0 {
+ return Err(ArrowError::InvalidArgumentError(
+ "Avro 'duration' cannot encode negative days or
milliseconds".into(),
+ ));
+ }
+
+ // (months=0, days, millis)
+ let mut buf = [0u8; 12];
Review Comment:
This is a great recommendation, thank you!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]