mkarbo commented on code in PR #7535: URL: https://github.com/apache/arrow-rs/pull/7535#discussion_r2104383018
########## parquet-variant/src/variant.rs: ########## @@ -1,14 +1,105 @@ -use std::ops::Index; - use crate::decoder::{ self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType, }; +use crate::utils::{array_from_slice, invalid_utf8_err, non_empty_slice, slice_from_slice}; use arrow_schema::ArrowError; +use std::{ + num::TryFromIntError, + ops::{Index, Range}, + str, +}; + +#[derive(Clone, Debug, Copy, PartialEq)] +enum OffsetSizeBytes { + One = 1, + Two = 2, + Three = 3, + Four = 4, +} + +impl OffsetSizeBytes { + fn try_new(offset_size_minus_one: u8) -> Result<Self, ArrowError> { + use OffsetSizeBytes::*; + let result = match offset_size_minus_one { + 0 => One, + 1 => Two, + 2 => Three, + 3 => Four, + _ => { + return Err(ArrowError::InvalidArgumentError( + "offset_size_minus_one must be 0–3".to_string(), + )) + } + }; + Ok(result) + } + + fn unpack_usize( + &self, + bytes: &[u8], + byte_offset: usize, // how many bytes to skip + offset_index: usize, // which offset in an array of offsets + ) -> Result<usize, ArrowError> { + use OffsetSizeBytes::*; + let offset = byte_offset + (*self as usize) * offset_index; + let result = match self { + One => u8::from_le_bytes(array_from_slice(bytes, offset)?).into(), + Two => u16::from_le_bytes(array_from_slice(bytes, offset)?).into(), + // TODO: Do this one + Three => todo!(), + Four => u32::from_le_bytes(array_from_slice(bytes, offset)?) + .try_into() + .map_err(|e: TryFromIntError| ArrowError::InvalidArgumentError(e.to_string()))?, + }; + Ok(result) + } +} + +#[derive(Clone, Debug, Copy, PartialEq)] +pub(crate) struct VariantMetadataHeader { + version: u8, + is_sorted: bool, + /// Note: This is `offset_size_minus_one` + 1 + offset_size: OffsetSizeBytes, +} + +impl<'m> VariantMetadataHeader { Review Comment: You're right, removed -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org