mkarbo commented on code in PR #7535:
URL: https://github.com/apache/arrow-rs/pull/7535#discussion_r2102988142


##########
parquet-variant/src/decoder.rs:
##########
@@ -0,0 +1,199 @@
+// NOTE: Largely based on the implementation of @PinkCrow007 in 
https://github.com/apache/arrow-rs/pull/7452
+// And the feedback there.
+use crate::variant::VariantType;
+use arrow_schema::ArrowError;
+use std::{array::TryFromSliceError, str};
+
+#[derive(Debug, Clone, Copy)]
+pub enum VariantBasicType {
+    Primitive = 0,
+    ShortString = 1,
+    Object = 2,
+    Array = 3,
+}
+
+#[derive(Debug, Clone, Copy)]
+pub enum VariantPrimitiveType {
+    Null = 0,
+    BooleanTrue = 1,
+    BooleanFalse = 2,
+    Int8 = 3,
+    // TODO: Add 'legs' for the rest of primitives, once API is agreed upon
+    String = 16,
+}
+
+/// Extracts the basic type from a header byte
+pub(crate) fn get_basic_type(header: u8) -> Result<VariantBasicType, 
ArrowError> {
+    // See 
https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#value-encoding
+    let basic_type = header & 0x03; // Basic type is encoded in the first 2 
bits
+    let basic_type = match basic_type {
+        0 => VariantBasicType::Primitive,
+        1 => VariantBasicType::ShortString,
+        2 => VariantBasicType::Object,
+        3 => VariantBasicType::Array,
+        _ => {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "unknown basic type: {}",
+                basic_type
+            )))
+        }
+    };
+    Ok(basic_type)
+}
+
+/// Extracts the primitive type from a header byte
+pub(crate) fn get_primitive_type(header: u8) -> Result<VariantPrimitiveType, 
ArrowError> {
+    // See 
https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#value-encoding
+    //// Primitive type is encoded in the last 6 bits of the header byte
+    let primitive_type = (header >> 2) & 0x3F;
+    let primitive_type = match primitive_type {
+        0 => VariantPrimitiveType::Null,
+        1 => VariantPrimitiveType::BooleanTrue,
+        2 => VariantPrimitiveType::BooleanFalse,
+        3 => VariantPrimitiveType::Int8,
+        // TODO: Add 'legs' for the rest, once API is agreed upon
+        16 => VariantPrimitiveType::String,
+        _ => {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "unknown primitive type: {}",
+                primitive_type
+            )))
+        }
+    };
+    Ok(primitive_type)
+}
+
+/// Extracts the variant type from the value section of a variant. The variant
+/// type is defined as the set of all basic types and all primitive types.
+pub fn get_variant_type(value: &[u8]) -> Result<VariantType, ArrowError> {
+    if value.is_empty() {
+        return Err(ArrowError::InvalidArgumentError(
+            "Tried to get variant type from empty buffer array".to_string(),
+        ));
+    }
+    let header = value[0];
+    let variant_type = match get_basic_type(header)? {
+        VariantBasicType::Primitive => match get_primitive_type(header)? {
+            VariantPrimitiveType::Null => VariantType::Null,

Review Comment:
   Removed, thanks @alamb 



##########
parquet-variant/src/decoder.rs:
##########
@@ -0,0 +1,199 @@
+// NOTE: Largely based on the implementation of @PinkCrow007 in 
https://github.com/apache/arrow-rs/pull/7452
+// And the feedback there.
+use crate::variant::VariantType;
+use arrow_schema::ArrowError;
+use std::{array::TryFromSliceError, str};
+
+#[derive(Debug, Clone, Copy)]
+pub enum VariantBasicType {
+    Primitive = 0,
+    ShortString = 1,
+    Object = 2,
+    Array = 3,
+}
+
+#[derive(Debug, Clone, Copy)]
+pub enum VariantPrimitiveType {
+    Null = 0,
+    BooleanTrue = 1,
+    BooleanFalse = 2,
+    Int8 = 3,
+    // TODO: Add 'legs' for the rest of primitives, once API is agreed upon
+    String = 16,
+}
+
+/// Extracts the basic type from a header byte
+pub(crate) fn get_basic_type(header: u8) -> Result<VariantBasicType, 
ArrowError> {
+    // See 
https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#value-encoding
+    let basic_type = header & 0x03; // Basic type is encoded in the first 2 
bits
+    let basic_type = match basic_type {
+        0 => VariantBasicType::Primitive,
+        1 => VariantBasicType::ShortString,
+        2 => VariantBasicType::Object,
+        3 => VariantBasicType::Array,
+        _ => {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "unknown basic type: {}",
+                basic_type
+            )))
+        }
+    };
+    Ok(basic_type)
+}
+
+/// Extracts the primitive type from a header byte
+pub(crate) fn get_primitive_type(header: u8) -> Result<VariantPrimitiveType, 
ArrowError> {
+    // See 
https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#value-encoding
+    //// Primitive type is encoded in the last 6 bits of the header byte
+    let primitive_type = (header >> 2) & 0x3F;
+    let primitive_type = match primitive_type {
+        0 => VariantPrimitiveType::Null,
+        1 => VariantPrimitiveType::BooleanTrue,
+        2 => VariantPrimitiveType::BooleanFalse,
+        3 => VariantPrimitiveType::Int8,
+        // TODO: Add 'legs' for the rest, once API is agreed upon
+        16 => VariantPrimitiveType::String,
+        _ => {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "unknown primitive type: {}",
+                primitive_type
+            )))
+        }
+    };
+    Ok(primitive_type)
+}
+
+/// Extracts the variant type from the value section of a variant. The variant
+/// type is defined as the set of all basic types and all primitive types.
+pub fn get_variant_type(value: &[u8]) -> Result<VariantType, ArrowError> {
+    if value.is_empty() {
+        return Err(ArrowError::InvalidArgumentError(
+            "Tried to get variant type from empty buffer array".to_string(),
+        ));
+    }
+    let header = value[0];
+    let variant_type = match get_basic_type(header)? {
+        VariantBasicType::Primitive => match get_primitive_type(header)? {
+            VariantPrimitiveType::Null => VariantType::Null,

Review Comment:
   You're right, and it's been removed, thanks to @alamb 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to