alamb commented on code in PR #8237:
URL: https://github.com/apache/arrow-rs/pull/8237#discussion_r2337613587
##########
parquet/tests/arrow_reader/io/mod.rs:
##########
@@ -286,8 +286,7 @@ impl TestRowGroups {
.enumerate()
.map(|(col_idx, col_meta)| {
let column_name =
col_meta.column_descr().name().to_string();
- let page_locations = offset_index[rg_index][col_idx]
- .page_locations().to_vec();
+ let page_locations =
offset_index[rg_index][col_idx].page_locations();
Review Comment:
👍
##########
parquet/src/parquet_macros.rs:
##########
@@ -228,14 +306,86 @@ macro_rules! thrift_struct {
})
}
}
+
+ impl $(<$lt>)? WriteThrift for $identifier $(<$lt>)? {
+ const ELEMENT_TYPE: ElementType = ElementType::Struct;
+
+ #[allow(unused_assignments)]
+ fn write_thrift<W: Write>(&self, writer: &mut
ThriftCompactOutputProtocol<W>) -> Result<()> {
+ let mut last_field_id = 0i16;
+
$($crate::__thrift_write_required_or_optional_field!($required_or_optional
$field_name, $field_id, $field_type, self, writer, last_field_id);)*
+ writer.write_struct_end()
+ }
+ }
+
+ impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? {
+ fn write_thrift_field<W: Write>(&self, writer: &mut
ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) ->
Result<i16> {
+ writer.write_field_begin(FieldType::Struct, field_id,
last_field_id)?;
+ self.write_thrift(writer)?;
+ Ok(field_id)
+ }
+ }
}
}
-/// macro to use when decoding struct fields
+#[doc(hidden)]
Review Comment:
it would be great to get some more doc strings on these macros
##########
parquet/src/basic.rs:
##########
@@ -646,6 +774,39 @@ impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for
Compression {
}
}
+// FIXME
Review Comment:
🤔
##########
parquet/src/parquet_thrift.rs:
##########
@@ -539,3 +545,394 @@ where
Ok(res)
}
}
+
+/////////////////////////
+// thrift compact output
+
+pub(crate) struct ThriftCompactOutputProtocol<W: Write> {
+ writer: W,
+}
+
+impl<W: Write> ThriftCompactOutputProtocol<W> {
+ pub(crate) fn new(writer: W) -> Self {
+ Self { writer }
+ }
+
+ pub(crate) fn inner(&self) -> &W {
+ &self.writer
+ }
+
+ fn write_byte(&mut self, b: u8) -> Result<()> {
+ self.writer.write_all(&[b])?;
+ Ok(())
+ }
+
+ fn write_vlq(&mut self, val: u64) -> Result<()> {
+ let mut v = val;
+ while v > 0x7f {
+ self.write_byte(v as u8 | 0x80)?;
+ v >>= 7;
+ }
+ self.write_byte(v as u8)
+ }
+
+ fn write_zig_zag(&mut self, val: i64) -> Result<()> {
+ let s = (val < 0) as i64;
+ self.write_vlq((((val ^ -s) << 1) + s) as u64)
+ }
+
+ pub(crate) fn write_field_begin(
+ &mut self,
+ field_type: FieldType,
+ field_id: i16,
+ last_field_id: i16,
+ ) -> Result<()> {
+ let delta = field_id.wrapping_sub(last_field_id);
+ if delta > 0 && delta <= 0xf {
+ self.write_byte((delta as u8) << 4 | field_type as u8)
+ } else {
+ self.write_byte(field_type as u8)?;
+ self.write_i16(field_id)
+ }
+ }
+
+ pub(crate) fn write_list_begin(&mut self, element_type: ElementType, len:
usize) -> Result<()> {
+ if len < 15 {
+ self.write_byte((len as u8) << 4 | element_type as u8)
+ } else {
+ self.write_byte(0xf0u8 | element_type as u8)?;
+ self.write_vlq(len as _)
+ }
+ }
+
+ pub(crate) fn write_struct_end(&mut self) -> Result<()> {
+ self.write_byte(0)
+ }
+
+ pub(crate) fn write_bytes(&mut self, val: &[u8]) -> Result<()> {
+ self.write_vlq(val.len() as u64)?;
+ self.writer.write_all(val)?;
+ Ok(())
+ }
+
+ pub(crate) fn write_empty_struct(&mut self, field_id: i16, last_field_id:
i16) -> Result<i16> {
+ self.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
+ self.write_struct_end()?;
+ Ok(last_field_id)
+ }
+
+ pub(crate) fn write_bool(&mut self, val: bool) -> Result<()> {
+ match val {
+ true => self.write_byte(1),
+ false => self.write_byte(2),
+ }
+ }
+
+ pub(crate) fn write_i8(&mut self, val: i8) -> Result<()> {
+ self.write_byte(val as u8)
+ }
+
+ pub(crate) fn write_i16(&mut self, val: i16) -> Result<()> {
+ self.write_zig_zag(val as _)
+ }
+
+ pub(crate) fn write_i32(&mut self, val: i32) -> Result<()> {
+ self.write_zig_zag(val as _)
+ }
+
+ pub(crate) fn write_i64(&mut self, val: i64) -> Result<()> {
+ self.write_zig_zag(val as _)
+ }
+
+ pub(crate) fn write_double(&mut self, val: f64) -> Result<()> {
+ self.writer.write_all(&val.to_le_bytes())?;
+ Ok(())
+ }
+}
+
+pub(crate) trait WriteThrift {
Review Comment:
some more docstrings here explaining what this was for and what it did would
be super helpful I think
##########
parquet/src/parquet_thrift.rs:
##########
@@ -539,3 +545,394 @@ where
Ok(res)
}
}
+
+/////////////////////////
Review Comment:
it is pretty nice that this is so straightforward
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]