jecsand838 commented on code in PR #8371:
URL: https://github.com/apache/arrow-rs/pull/8371#discussion_r2366385254
##########
arrow-avro/src/writer/mod.rs:
##########
@@ -230,6 +255,82 @@ mod tests {
.expect("failed to build test RecordBatch")
}
+ #[test]
+ fn test_stream_writer_writes_prefix_per_row() -> Result<(), ArrowError> {
+ let schema = Schema::new(vec![Field::new("a", DataType::Int32,
false)]);
+ let avro_schema = AvroSchema::try_from(&schema)?;
+
+ let fingerprint =
avro_schema.fingerprint(FingerprintAlgorithm::Rabin)?;
+ let mut expected_prefix =
Vec::from(crate::schema::SINGLE_OBJECT_MAGIC);
+ match fingerprint {
+ crate::schema::Fingerprint::Rabin(val) =>
expected_prefix.extend(val.to_le_bytes()),
+ _ => panic!("Expected Rabin fingerprint for default stream
writer"),
+ }
+
+ let batch = RecordBatch::try_new(
+ Arc::new(schema.clone()),
+ vec![Arc::new(Int32Array::from(vec![10, 20])) as ArrayRef],
+ )?;
+
+ let buffer: Vec<u8> = Vec::new();
+ let mut writer = AvroStreamWriter::new(buffer, schema)?;
+ writer.write(&batch)?;
+ let actual_bytes = writer.into_inner();
+
+ let mut expected_bytes = Vec::new();
+ // Row 1: prefix + zig-zag encoded(10)
+ expected_bytes.extend(&expected_prefix);
+ expected_bytes.push(0x14);
+ // Row 2: prefix + zig-zag encoded(20)
+ expected_bytes.extend(&expected_prefix);
+ expected_bytes.push(0x28);
+
+ assert_eq!(
+ actual_bytes, expected_bytes,
+ "Stream writer output did not match expected prefix-per-row format"
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn test_stream_writer_with_id_fingerprint() -> Result<(), ArrowError> {
Review Comment:
You could probably do something like this for those round trip tests:
```rust
#[test]
fn test_stream_writer_writes_prefix_per_row() -> Result<(), ArrowError> {
let schema = Schema::new(vec![Field::new("a", DataType::Int32,
false)]);
let batch = RecordBatch::try_new(
Arc::new(schema.clone()),
vec![Arc::new(Int32Array::from(vec![10, 20])) as ArrayRef],
)?;
let buf: Vec<u8> = Vec::new();
let mut writer = AvroStreamWriter::new(buf, schema.clone())?;
writer.write(&batch)?;
let encoded = writer.into_inner();
let mut store = SchemaStore::new(); // Rabin by default
let avro_schema = AvroSchema::try_from(&schema)?;
let _fp = store.register(avro_schema)?;
let mut decoder = ReaderBuilder::new()
.with_writer_schema_store(store)
.build_decoder()?;
let _consumed = decoder.decode(&encoded)?;
let decoded = decoder
.flush()?
.expect("expected at least one batch from decoder");
assert_eq!(decoded.num_columns(), 1);
assert_eq!(decoded.num_rows(), 2);
let col = decoded
.column(0)
.as_any()
.downcast_ref::<Int32Array>()
.expect("int column");
assert_eq!(col, &Int32Array::from(vec![10, 20]));
Ok(())
}
#[test]
fn test_stream_writer_with_id_fingerprint() -> Result<(), ArrowError> {
let schema = Schema::new(vec![Field::new("a", DataType::Int32,
false)]);
let batch = RecordBatch::try_new(
Arc::new(schema.clone()),
vec![Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef],
)?;
let schema_id: u32 = 42;
let mut writer = WriterBuilder::new(schema.clone())
.with_fingerprint_strategy(FingerprintStrategy::Id(schema_id))
.build::<_, AvroBinaryFormat>(Vec::new())?;
writer.write(&batch)?;
let encoded = writer.into_inner();
let mut store =
SchemaStore::new_with_type(FingerprintAlgorithm::None);
let avro_schema = AvroSchema::try_from(&schema)?;
let _ = store.set(Fingerprint::Id(schema_id), avro_schema)?;
let mut decoder = ReaderBuilder::new()
.with_writer_schema_store(store)
.build_decoder()?;
let _ = decoder.decode(&encoded)?;
let decoded = decoder
.flush()?
.expect("expected at least one batch from decoder");
assert_eq!(decoded.num_columns(), 1);
assert_eq!(decoded.num_rows(), 3);
let col = decoded
.column(0)
.as_any()
.downcast_ref::<Int32Array>()
.expect("int column");
assert_eq!(col, &Int32Array::from(vec![1, 2, 3]));
Ok(())
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]