alamb commented on code in PR #8280:
URL: https://github.com/apache/arrow-rs/pull/8280#discussion_r2330837752
##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -246,12 +443,16 @@ pub enum ShreddingState {
metadata: BinaryViewArray,
typed_value: ArrayRef,
},
- /// Partially shredded:
- /// * value is an object
- /// * typed_value is a shredded object.
+ /// Imperfectly shredded: Shredded values reside in `typed_value` while
those that failed to
Review Comment:
thank you -- this made it much clearer to me
##########
parquet-variant/src/path.rs:
##########
@@ -95,10 +95,10 @@ impl<'a> From<Vec<VariantPathElement<'a>>> for
VariantPath<'a> {
}
}
-/// Create from &str
+/// Create from &str with support for dot notation
Review Comment:
this probably eventually needs support for escaping, etc but is probably
fine for now
##########
parquet-variant-compute/src/variant_get/output/row_builder.rs:
##########
@@ -0,0 +1,342 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::ArrayRef;
+use arrow::compute::CastOptions;
+use arrow::datatypes;
+use arrow::datatypes::ArrowPrimitiveType;
+use arrow::error::{ArrowError, Result};
+use parquet_variant::{Variant, VariantPath};
+
+use crate::VariantArrayBuilder;
+
+use std::sync::Arc;
+
+pub(crate) fn make_shredding_row_builder<'a>(
+ //metadata: &BinaryViewArray,
+ path: VariantPath<'a>,
+ data_type: Option<&'a datatypes::DataType>,
+ cast_options: &'a CastOptions,
+) -> Result<Box<dyn VariantShreddingRowBuilder + 'a>> {
+ use arrow::array::PrimitiveBuilder;
+ use datatypes::{
+ Float16Type, Float32Type, Float64Type, Int16Type, Int32Type,
Int64Type, Int8Type,
+ };
+
+ // support non-empty paths (field access) and some empty path cases
+ if path.is_empty() {
+ return match data_type {
+ Some(datatypes::DataType::Int8) => {
+ let builder = PrimitiveVariantShreddingRowBuilder {
+ builder: PrimitiveBuilder::<Int8Type>::new(),
+ cast_options,
+ };
+ Ok(Box::new(builder))
+ }
+ Some(datatypes::DataType::Int16) => {
+ let builder = PrimitiveVariantShreddingRowBuilder {
+ builder: PrimitiveBuilder::<Int16Type>::new(),
+ cast_options,
+ };
+ Ok(Box::new(builder))
+ }
+ Some(datatypes::DataType::Int32) => {
+ let builder = PrimitiveVariantShreddingRowBuilder {
+ builder: PrimitiveBuilder::<Int32Type>::new(),
+ cast_options,
+ };
+ Ok(Box::new(builder))
+ }
+ Some(datatypes::DataType::Int64) => {
+ let builder = PrimitiveVariantShreddingRowBuilder {
+ builder: PrimitiveBuilder::<Int64Type>::new(),
+ cast_options,
+ };
+ Ok(Box::new(builder))
+ }
+ Some(datatypes::DataType::Float16) => {
+ let builder = PrimitiveVariantShreddingRowBuilder {
+ builder: PrimitiveBuilder::<Float16Type>::new(),
+ cast_options,
+ };
+ Ok(Box::new(builder))
+ }
+ Some(datatypes::DataType::Float32) => {
+ let builder = PrimitiveVariantShreddingRowBuilder {
+ builder: PrimitiveBuilder::<Float32Type>::new(),
+ cast_options,
+ };
+ Ok(Box::new(builder))
+ }
+ Some(datatypes::DataType::Float64) => {
+ let builder = PrimitiveVariantShreddingRowBuilder {
+ builder: PrimitiveBuilder::<Float64Type>::new(),
+ cast_options,
+ };
+ Ok(Box::new(builder))
+ }
+ None => {
+ // Return VariantArrayBuilder for VariantArray output
+ let builder = VariantArrayShreddingRowBuilder::new(16);
+ Ok(Box::new(builder))
+ }
+ _ => Err(ArrowError::NotYetImplemented(format!(
+ "variant_get with empty path and data_type={:?} not yet
implemented",
+ data_type
+ ))),
+ };
+ }
+
+ // Non-empty paths: field access functionality
+ // Helper macro to reduce duplication when wrapping builders with path
functionality
+ macro_rules! wrap_with_path {
+ ($inner_builder:expr) => {
+ Ok(Box::new(VariantPathRowBuilder {
+ builder: $inner_builder,
+ path,
+ }) as Box<dyn VariantShreddingRowBuilder + 'a>)
+ };
+ }
+
+ match data_type {
+ Some(datatypes::DataType::Int8) => {
+ let inner_builder = PrimitiveVariantShreddingRowBuilder {
+ builder: PrimitiveBuilder::<Int8Type>::new(),
+ cast_options,
+ };
+ wrap_with_path!(inner_builder)
+ }
+ Some(datatypes::DataType::Int16) => {
+ let inner_builder = PrimitiveVariantShreddingRowBuilder {
+ builder: PrimitiveBuilder::<Int16Type>::new(),
+ cast_options,
+ };
+ wrap_with_path!(inner_builder)
+ }
+ Some(datatypes::DataType::Int32) => {
+ let inner_builder = PrimitiveVariantShreddingRowBuilder {
+ builder: PrimitiveBuilder::<Int32Type>::new(),
+ cast_options,
+ };
+ wrap_with_path!(inner_builder)
+ }
+ Some(datatypes::DataType::Int64) => {
+ let inner_builder = PrimitiveVariantShreddingRowBuilder {
+ builder: PrimitiveBuilder::<Int64Type>::new(),
+ cast_options,
+ };
+ wrap_with_path!(inner_builder)
+ }
+ Some(datatypes::DataType::Float16) => {
+ let inner_builder = PrimitiveVariantShreddingRowBuilder {
+ builder: PrimitiveBuilder::<Float16Type>::new(),
+ cast_options,
+ };
+ wrap_with_path!(inner_builder)
+ }
+ Some(datatypes::DataType::Float32) => {
+ let inner_builder = PrimitiveVariantShreddingRowBuilder {
+ builder: PrimitiveBuilder::<Float32Type>::new(),
+ cast_options,
+ };
+ wrap_with_path!(inner_builder)
+ }
+ Some(datatypes::DataType::Float64) => {
+ let inner_builder = PrimitiveVariantShreddingRowBuilder {
+ builder: PrimitiveBuilder::<Float64Type>::new(),
+ cast_options,
+ };
+ wrap_with_path!(inner_builder)
+ }
+ None => {
+ // Create a variant array builder and wrap it with path
functionality
+ let inner_builder = VariantArrayShreddingRowBuilder::new(16);
+ wrap_with_path!(inner_builder)
+ }
+ _ => Err(ArrowError::NotYetImplemented(format!(
+ "variant_get with path={:?} and data_type={:?} not yet
implemented",
+ path, data_type
+ ))),
+ }
+}
+
+/// Builder for shredding variant values into strongly typed Arrow arrays.
+///
+/// Useful for variant_get kernels that need to extract specific paths from
variant values, possibly
+/// with casting of leaf values to specific types.
+pub(crate) trait VariantShreddingRowBuilder {
+ fn append_null(&mut self) -> Result<()>;
+
+ fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool>;
+
+ fn finish(&mut self) -> Result<ArrayRef>;
+}
+
+/// A thin wrapper whose only job is to extract a specific path from a variant
value and pass the
+/// result to a nested builder.
+struct VariantPathRowBuilder<'a, T: VariantShreddingRowBuilder> {
+ builder: T,
+ path: VariantPath<'a>,
+}
+
+impl<T: VariantShreddingRowBuilder> VariantShreddingRowBuilder for
VariantPathRowBuilder<'_, T> {
+ fn append_null(&mut self) -> Result<()> {
+ self.builder.append_null()
+ }
+
+ fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
+ if let Some(v) = value.get_path(&self.path) {
+ self.builder.append_value(&v)
+ } else {
+ self.builder.append_null()?;
+ Ok(false)
+ }
+ }
+ fn finish(&mut self) -> Result<ArrayRef> {
+ self.builder.finish()
+ }
+}
+
+/// Helper trait for converting `Variant` values to arrow primitive values.
Review Comment:
as a follow on, this trait might be more discoverable if we put it somewhere
in `parquet-variant-compute/src/type_conversion.rs`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]