alamb commented on code in PR #7919:
URL: https://github.com/apache/arrow-rs/pull/7919#discussion_r2205295497


##########
parquet-variant/src/path.rs:
##########
@@ -0,0 +1,48 @@
+use std::ops::Deref;
+
+/// Represents a qualified path to a potential subfield or index of a variant 
value.
+#[derive(Debug, Clone)]
+pub struct VariantPath(Vec<VariantPathElement>);
+
+impl VariantPath {
+    pub fn new(path: Vec<VariantPathElement>) -> Self {
+        Self(path)
+    }
+
+    pub fn path(&self) -> &Vec<VariantPathElement> {
+        &self.0
+    }
+}
+
+impl From<Vec<VariantPathElement>> for VariantPath {
+    fn from(value: Vec<VariantPathElement>) -> Self {
+        Self::new(value)
+    }
+}
+
+impl Deref for VariantPath {
+    type Target = Vec<VariantPathElement>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+/// Element of a path
+#[derive(Debug, Clone)]
+pub enum VariantPathElement {
+    /// Access field with name `name`
+    Field { name: String },

Review Comment:
   🤔 maybe we can make this a non owned string (maybe a `&str`). But then again 
this is probably good



##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -0,0 +1,158 @@
+use std::sync::Arc;
+
+use arrow::{
+    array::{Array, ArrayRef},
+    compute::CastOptions,
+    error::Result,
+};
+use arrow_schema::{ArrowError, Field};
+use parquet_variant::path::VariantPath;
+
+use crate::{VariantArray, VariantArrayBuilder};
+
+/// Returns an array with the specified path extracted from the variant values.
+///
+/// The return array type depends on the `as_type` field of the options 
parameter
+/// 1. `as_type: None`: a VariantArray is returned. The values in this new 
VariantArray will point
+///    to the specified path.
+/// 2. `as_type: Some(<specific field>)`: an array of the specified type is 
returned.
+pub fn variant_get(input: &ArrayRef, options: GetOptions) -> Result<ArrayRef> {
+    let variant_array: &VariantArray = 
input.as_any().downcast_ref().ok_or_else(|| {
+        ArrowError::InvalidArgumentError(
+            "expected a VariantArray as the input for variant_get".to_owned(),
+        )
+    })?;
+
+    if let Some(as_type) = options.as_type {
+        return Err(ArrowError::NotYetImplemented(format!(
+            "getting a {} from a VariantArray is not implemented yet",
+            as_type
+        )));
+    }
+
+    let mut builder = VariantArrayBuilder::new(variant_array.len());
+    for i in 0..variant_array.len() {
+        let new_variant = variant_array.value(i);
+        // TODO: perf?
+        let new_variant = new_variant.get_path(&options.path);
+        if let Some(new_variant) = new_variant {
+            // TODO: we're decoding the value and doing a copy into a variant 
value again. This
+            // copy can be much smarter.
+            builder.append_variant(new_variant);
+        } else {
+            builder.append_null();
+        }
+    }
+
+    Ok(Arc::new(builder.build()))
+}
+
+/// Controls the action of the variant_get kernel.
+#[derive(Debug, Clone)]
+pub struct GetOptions<'a> {
+    /// What path to extract
+    pub path: VariantPath,
+    /// if `as_type` is None, the returned array will itself be a VariantArray.
+    ///
+    /// if `as_type` is `Some(type)` the field is returned as the specified 
type if possible. To specify returning
+    /// a Variant, pass a Field with variant type in the metadata.
+    pub as_type: Option<Field>,
+    /// Controls the casting behavior (e.g. error vs substituting null on cast 
error).
+    pub cast_options: CastOptions<'a>,
+}
+
+impl<'a> GetOptions<'a> {
+    /// Construct options to get the specified path as a variant.
+    pub fn new_with_path(path: VariantPath) -> Self {
+        Self {
+            path,
+            as_type: None,
+            cast_options: Default::default(),
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::sync::Arc;
+
+    use arrow::array::{Array, ArrayRef};
+    use parquet_variant::{path::VariantPathElement, VariantBuilder};
+
+    use crate::{VariantArray, VariantArrayBuilder};
+
+    use super::{variant_get, GetOptions};
+
+    #[test]
+    fn get_primitive_variant() {
+        let mut builder = VariantBuilder::new();
+        builder.add_field_name("some_field");
+        let mut object = builder.new_object();
+        object.insert("some_field", 1234i64);
+        object.finish().unwrap();
+        let (metadata, value) = builder.finish();
+
+        let mut builder = VariantArrayBuilder::new(1);
+        builder.append_variant_buffers(&metadata, &value);
+
+        let variant_array = builder.build();
+
+        let input = Arc::new(variant_array) as ArrayRef;
+
+        let result = variant_get(
+            &input,
+            GetOptions::new_with_path(
+                
vec![VariantPathElement::field("some_field".to_owned())].into(),
+            ),
+        )
+        .unwrap();
+
+        let result: &VariantArray = result.as_any().downcast_ref().unwrap();
+        assert!(result.nulls().is_none());
+        let result = result.value(0);
+        assert_eq!(result.as_int64().unwrap(), 1234);
+    }
+
+    #[test]
+    #[should_panic(
+        expected = "Nested values are handled specially by ObjectBuilder and 
ListBuilder"

Review Comment:
   👍  yeah I think once we get this PR in things will be much easier
   - https://github.com/apache/arrow-rs/pull/7914



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to