alamb commented on code in PR #9171:
URL: https://github.com/apache/arrow-rs/pull/9171#discussion_r2724131415
##########
arrow-avro/src/writer/mod.rs:
##########
@@ -238,76 +247,40 @@ impl EncodedRows {
/// encoder.encode(&batch)?;
/// let rows = encoder.flush();
///
- /// // Access the first row (index 0)
- /// let row0 = rows.row(0)?;
- /// assert!(!row0.is_empty());
+ /// assert_eq!(rows.iter().count(), 2);
/// # Ok(())
/// # }
/// ```
- pub fn row(&self, i: usize) -> Result<Bytes, ArrowError> {
- if i >= self.len() {
+ pub fn row(&self, n: usize) -> Result<Bytes, ArrowError> {
+ if n >= self.len() {
return Err(ArrowError::AvroError(format!(
- "Row index {i} out of bounds for len {}",
+ "Row index {n} out of bounds for len {}",
self.len()
)));
}
// SAFETY:
// self.len() is defined as self.offsets.len().saturating_sub(1).
- // The check `i >= self.len()` above ensures that `i <
self.offsets.len() - 1`.
- // Therefore, both `i` and `i + 1` are strictly within the bounds of
`self.offsets`.
- let (start_u64, end_u64) = unsafe {
+ // The check `n >= self.len()` above ensures that `n <
self.offsets.len() - 1`.
+ // Therefore, both `n` and `n + 1` are strictly within the bounds of
`self.offsets`.
+ let (start, end) = unsafe {
Review Comment:
using usize rather than u64 seems like a nice cleaup
##########
arrow-avro/src/writer/mod.rs:
##########
@@ -322,25 +295,23 @@ impl EncodedRows {
/// let schema = Schema::new(vec![Field::new("x", DataType::Int32,
false)]);
/// let batch = RecordBatch::try_new(
/// Arc::new(schema.clone()),
- /// vec![Arc::new(Int32Array::from(vec![100])) as ArrayRef],
+ /// vec![Arc::new(Int32Array::from(vec![10, 20])) as ArrayRef],
/// )?;
///
/// let mut encoder =
WriterBuilder::new(schema).build_encoder::<AvroSoeFormat>()?;
/// encoder.encode(&batch)?;
/// let rows = encoder.flush();
///
- /// let vecs = rows.to_vecs()?;
- /// assert_eq!(vecs.len(), 1);
- /// assert!(!vecs[0].is_empty());
+ /// assert_eq!(rows.iter().count(), 2);
/// # Ok(())
/// # }
/// ```
- pub fn to_vecs(&self) -> Result<Vec<Vec<u8>>, ArrowError> {
- let mut out = Vec::with_capacity(self.len());
- for i in 0..self.len() {
- out.push(self.row(i)?.to_vec());
- }
- Ok(out)
+ #[inline]
+ pub fn iter(&self) -> impl ExactSizeIterator<Item = Bytes> + '_ {
+ self.offsets.windows(2).map(|w| {
+ debug_assert!(w[0] <= w[1] && w[1] <= self.data.len());
+ self.data.slice(w[0]..w[1])
Review Comment:
given you are using `slice` here I suspect the extra debug assert is not
necessary as the slice also does the same check
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]