alamb commented on code in PR #8237:
URL: https://github.com/apache/arrow-rs/pull/8237#discussion_r2337613587


##########
parquet/tests/arrow_reader/io/mod.rs:
##########
@@ -286,8 +286,7 @@ impl TestRowGroups {
                     .enumerate()
                     .map(|(col_idx, col_meta)| {
                         let column_name = 
col_meta.column_descr().name().to_string();
-                        let page_locations = offset_index[rg_index][col_idx]
-                            .page_locations().to_vec();
+                        let page_locations = 
offset_index[rg_index][col_idx].page_locations();

Review Comment:
   👍 



##########
parquet/src/parquet_macros.rs:
##########
@@ -228,14 +306,86 @@ macro_rules! thrift_struct {
                 })
             }
         }
+
+        impl $(<$lt>)? WriteThrift for $identifier $(<$lt>)? {
+            const ELEMENT_TYPE: ElementType = ElementType::Struct;
+
+            #[allow(unused_assignments)]
+            fn write_thrift<W: Write>(&self, writer: &mut 
ThriftCompactOutputProtocol<W>) -> Result<()> {
+                let mut last_field_id = 0i16;
+                
$($crate::__thrift_write_required_or_optional_field!($required_or_optional 
$field_name, $field_id, $field_type, self, writer, last_field_id);)*
+                writer.write_struct_end()
+            }
+        }
+
+        impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? {
+            fn write_thrift_field<W: Write>(&self, writer: &mut 
ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) -> 
Result<i16> {
+                writer.write_field_begin(FieldType::Struct, field_id, 
last_field_id)?;
+                self.write_thrift(writer)?;
+                Ok(field_id)
+            }
+        }
     }
 }
 
-/// macro to use when decoding struct fields
+#[doc(hidden)]

Review Comment:
   it would be great to get some more doc strings on these macros



##########
parquet/src/basic.rs:
##########
@@ -646,6 +774,39 @@ impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for 
Compression {
     }
 }
 
+// FIXME

Review Comment:
   🤔 



##########
parquet/src/parquet_thrift.rs:
##########
@@ -539,3 +545,394 @@ where
         Ok(res)
     }
 }
+
+/////////////////////////
+// thrift compact output
+
+pub(crate) struct ThriftCompactOutputProtocol<W: Write> {
+    writer: W,
+}
+
+impl<W: Write> ThriftCompactOutputProtocol<W> {
+    pub(crate) fn new(writer: W) -> Self {
+        Self { writer }
+    }
+
+    pub(crate) fn inner(&self) -> &W {
+        &self.writer
+    }
+
+    fn write_byte(&mut self, b: u8) -> Result<()> {
+        self.writer.write_all(&[b])?;
+        Ok(())
+    }
+
+    fn write_vlq(&mut self, val: u64) -> Result<()> {
+        let mut v = val;
+        while v > 0x7f {
+            self.write_byte(v as u8 | 0x80)?;
+            v >>= 7;
+        }
+        self.write_byte(v as u8)
+    }
+
+    fn write_zig_zag(&mut self, val: i64) -> Result<()> {
+        let s = (val < 0) as i64;
+        self.write_vlq((((val ^ -s) << 1) + s) as u64)
+    }
+
+    pub(crate) fn write_field_begin(
+        &mut self,
+        field_type: FieldType,
+        field_id: i16,
+        last_field_id: i16,
+    ) -> Result<()> {
+        let delta = field_id.wrapping_sub(last_field_id);
+        if delta > 0 && delta <= 0xf {
+            self.write_byte((delta as u8) << 4 | field_type as u8)
+        } else {
+            self.write_byte(field_type as u8)?;
+            self.write_i16(field_id)
+        }
+    }
+
+    pub(crate) fn write_list_begin(&mut self, element_type: ElementType, len: 
usize) -> Result<()> {
+        if len < 15 {
+            self.write_byte((len as u8) << 4 | element_type as u8)
+        } else {
+            self.write_byte(0xf0u8 | element_type as u8)?;
+            self.write_vlq(len as _)
+        }
+    }
+
+    pub(crate) fn write_struct_end(&mut self) -> Result<()> {
+        self.write_byte(0)
+    }
+
+    pub(crate) fn write_bytes(&mut self, val: &[u8]) -> Result<()> {
+        self.write_vlq(val.len() as u64)?;
+        self.writer.write_all(val)?;
+        Ok(())
+    }
+
+    pub(crate) fn write_empty_struct(&mut self, field_id: i16, last_field_id: 
i16) -> Result<i16> {
+        self.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
+        self.write_struct_end()?;
+        Ok(last_field_id)
+    }
+
+    pub(crate) fn write_bool(&mut self, val: bool) -> Result<()> {
+        match val {
+            true => self.write_byte(1),
+            false => self.write_byte(2),
+        }
+    }
+
+    pub(crate) fn write_i8(&mut self, val: i8) -> Result<()> {
+        self.write_byte(val as u8)
+    }
+
+    pub(crate) fn write_i16(&mut self, val: i16) -> Result<()> {
+        self.write_zig_zag(val as _)
+    }
+
+    pub(crate) fn write_i32(&mut self, val: i32) -> Result<()> {
+        self.write_zig_zag(val as _)
+    }
+
+    pub(crate) fn write_i64(&mut self, val: i64) -> Result<()> {
+        self.write_zig_zag(val as _)
+    }
+
+    pub(crate) fn write_double(&mut self, val: f64) -> Result<()> {
+        self.writer.write_all(&val.to_le_bytes())?;
+        Ok(())
+    }
+}
+
+pub(crate) trait WriteThrift {

Review Comment:
   some more docstrings here explaining what this was for and what it did would 
be super helpful I think



##########
parquet/src/parquet_thrift.rs:
##########
@@ -539,3 +545,394 @@ where
         Ok(res)
     }
 }
+
+/////////////////////////

Review Comment:
   it is pretty nice that this is so straightforward



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to