jecsand838 commented on code in PR #8371:
URL: https://github.com/apache/arrow-rs/pull/8371#discussion_r2366376785
##########
arrow-avro/src/writer/mod.rs:
##########
@@ -230,6 +255,82 @@ mod tests {
.expect("failed to build test RecordBatch")
}
+ #[test]
+ fn test_stream_writer_writes_prefix_per_row() -> Result<(), ArrowError> {
+ let schema = Schema::new(vec![Field::new("a", DataType::Int32,
false)]);
+ let avro_schema = AvroSchema::try_from(&schema)?;
+
+ let fingerprint =
avro_schema.fingerprint(FingerprintAlgorithm::Rabin)?;
+ let mut expected_prefix =
Vec::from(crate::schema::SINGLE_OBJECT_MAGIC);
+ match fingerprint {
+ crate::schema::Fingerprint::Rabin(val) =>
expected_prefix.extend(val.to_le_bytes()),
+ _ => panic!("Expected Rabin fingerprint for default stream
writer"),
+ }
+
+ let batch = RecordBatch::try_new(
+ Arc::new(schema.clone()),
+ vec![Arc::new(Int32Array::from(vec![10, 20])) as ArrayRef],
+ )?;
+
+ let buffer: Vec<u8> = Vec::new();
+ let mut writer = AvroStreamWriter::new(buffer, schema)?;
+ writer.write(&batch)?;
+ let actual_bytes = writer.into_inner();
+
+ let mut expected_bytes = Vec::new();
+ // Row 1: prefix + zig-zag encoded(10)
+ expected_bytes.extend(&expected_prefix);
+ expected_bytes.push(0x14);
+ // Row 2: prefix + zig-zag encoded(20)
+ expected_bytes.extend(&expected_prefix);
+ expected_bytes.push(0x28);
+
+ assert_eq!(
+ actual_bytes, expected_bytes,
+ "Stream writer output did not match expected prefix-per-row format"
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn test_stream_writer_with_id_fingerprint() -> Result<(), ArrowError> {
Review Comment:
@nathaniel-d-ef One last thing, I'm thinking it would be a good idea to make
these round trip tests. You could do this by writing into a buffer and then
reading the output using an `arrow-avro` `Decoder`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]