This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 30185d6116 perf: Remove `bool_val` from Parquet Thrift 
`FieldIdentifier` (#9945)
30185d6116 is described below

commit 30185d6116c7ab6c50aa19d9c32defcf2937f720
Author: Ed Seidl <[email protected]>
AuthorDate: Fri May 15 07:24:13 2026 -0700

    perf: Remove `bool_val` from Parquet Thrift `FieldIdentifier` (#9945)
    
    # Which issue does this PR close?
    
    - Closes #9946.
    
    # Rationale for this change
    Remove some unnecessary branching from a hot path in the Thrift parser,
    improving performance and making for easier to read code.
    
    # What changes are included in this PR?
    Removes the `bool_val` field from `FieldIdentifier`.
    
    # Are these changes tested?
    Covered by existing tests
    
    # Are there any user-facing changes?
    No, this is an internal-only API
---
 parquet/src/file/metadata/thrift/mod.rs | 10 +----
 parquet/src/parquet_macros.rs           |  5 +--
 parquet/src/parquet_thrift.rs           | 73 +++++++++++++++------------------
 3 files changed, 36 insertions(+), 52 deletions(-)

diff --git a/parquet/src/file/metadata/thrift/mod.rs 
b/parquet/src/file/metadata/thrift/mod.rs
index 75fcad871e..9be697c0fe 100644
--- a/parquet/src/file/metadata/thrift/mod.rs
+++ b/parquet/src/file/metadata/thrift/mod.rs
@@ -1135,13 +1135,7 @@ impl DataPageHeaderV2 {
                     repetition_levels_byte_length = Some(val);
                 }
                 7 => {
-                    if field_ident.bool_val.is_none() {
-                        return Err(general_err!(
-                            "Expected bool field but got thrift type {:?}",
-                            field_ident.field_type
-                        ));
-                    }
-                    is_compressed = field_ident.bool_val;
+                    is_compressed = Some(field_ident.bool_val()?);
                 }
                 _ => {
                     prot.skip(field_ident.field_type)?;
@@ -1837,7 +1831,7 @@ pub(crate) mod tests {
     fn assert_malformed_bool_error(err: crate::errors::ParquetError) {
         let msg = err.to_string();
         assert!(
-            msg.contains("Expected bool field"),
+            msg.contains("Unexpected struct field type"),
             "unexpected error message: {msg}"
         );
     }
diff --git a/parquet/src/parquet_macros.rs b/parquet/src/parquet_macros.rs
index 13c4763431..8bb2ad23b0 100644
--- a/parquet/src/parquet_macros.rs
+++ b/parquet/src/parquet_macros.rs
@@ -464,10 +464,7 @@ macro_rules! __thrift_read_field {
         $crate::parquet_thrift::OrderedF64::read_thrift(&mut *$prot)?
     };
     ($prot:tt, $field_ident:tt, bool) => {
-        $field_ident.bool_val.ok_or_else(|| general_err!(
-            "Expected bool field but got thrift type {:?}",
-            $field_ident.field_type
-        ))?
+        $field_ident.bool_val()?
     };
     ($prot:tt, $field_ident:tt, $field_type:ident) => {
         $field_type::read_thrift(&mut *$prot)?
diff --git a/parquet/src/parquet_thrift.rs b/parquet/src/parquet_thrift.rs
index b9d69b9d52..7567ace08b 100644
--- a/parquet/src/parquet_thrift.rs
+++ b/parquet/src/parquet_thrift.rs
@@ -59,9 +59,10 @@ impl From<ThriftProtocolError> for ParquetError {
         match e {
             ThriftProtocolError::Eof => eof_err!("Unexpected EOF"),
             ThriftProtocolError::IO(e) => e.into(),
-            ThriftProtocolError::InvalidFieldType(value) => {
-                general_err!("Unexpected struct field type {}", value)
-            }
+            ThriftProtocolError::InvalidFieldType(value) => match 
FieldType::try_from(value) {
+                Ok(fld_type) => general_err!("Unexpected struct field type 
{:?}", fld_type),
+                Err(_) => general_err!("Unexpected struct field type {}", 
value),
+            },
             ThriftProtocolError::InvalidElementType(value) => {
                 general_err!("Unexpected list/set element type {}", value)
             }
@@ -247,11 +248,16 @@ pub(crate) struct FieldIdentifier {
     pub(crate) field_type: FieldType,
     /// The field's `id`. May be computed from delta or directly decoded.
     pub(crate) id: i16,
-    /// Stores the value for booleans.
-    ///
-    /// Boolean fields store no data, instead the field type is either boolean 
true, or
-    /// boolean false.
-    pub(crate) bool_val: Option<bool>,
+}
+
+impl FieldIdentifier {
+    pub(crate) fn bool_val(&self) -> ThriftProtocolResult<bool> {
+        match self.field_type {
+            FieldType::BooleanTrue => Ok(true),
+            FieldType::BooleanFalse => Ok(false),
+            _ => Err(ThriftProtocolError::InvalidFieldType(self.field_type as 
u8)),
+        }
+    }
 }
 
 /// Struct used to describe a [thrift list].
@@ -358,41 +364,28 @@ pub(crate) trait ThriftCompactInputProtocol<'a> {
         // - the type
         // - the field delta and the type
         let field_type = self.read_byte()?;
-        let field_delta = (field_type & 0xf0) >> 4;
-        let field_type = FieldType::try_from(field_type & 0xf)?;
-        let mut bool_val: Option<bool> = None;
-
-        match field_type {
-            FieldType::Stop => Ok(FieldIdentifier {
+        if field_type & 0xf == 0 {
+            return Ok(FieldIdentifier {
                 field_type: FieldType::Stop,
                 id: 0,
-                bool_val,
-            }),
-            _ => {
-                // special handling for bools
-                if field_type == FieldType::BooleanFalse {
-                    bool_val = Some(false);
-                } else if field_type == FieldType::BooleanTrue {
-                    bool_val = Some(true);
-                }
-                let field_id = if field_delta != 0 {
-                    last_field_id.checked_add(field_delta as i16).ok_or(
-                        ThriftProtocolError::FieldDeltaOverflow {
-                            field_delta,
-                            last_field_id,
-                        },
-                    )?
-                } else {
-                    self.read_full_field_id()?
-                };
-
-                Ok(FieldIdentifier {
-                    field_type,
-                    id: field_id,
-                    bool_val,
-                })
-            }
+            });
         }
+
+        let field_delta = (field_type & 0xf0) >> 4;
+        let field_type = FieldType::try_from(field_type & 0xf)?;
+
+        let id = if field_delta != 0 {
+            last_field_id.checked_add(field_delta as i16).ok_or(
+                ThriftProtocolError::FieldDeltaOverflow {
+                    field_delta,
+                    last_field_id,
+                },
+            )?
+        } else {
+            self.read_full_field_id()?
+        };
+
+        Ok(FieldIdentifier { field_type, id })
     }
 
     /// This is a specialized version of [`Self::read_field_begin`], solely 
for use in parsing

Reply via email to