scovich commented on code in PR #7965:
URL: https://github.com/apache/arrow-rs/pull/7965#discussion_r2216895846


##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -177,4 +192,209 @@ mod test {
             r#"{"inner_field": 1234}"#,
         );
     }
+
+    /// Shredding: extract a value as a VariantArray
+    #[test]
+    fn get_variant_shredded_int32_as_variant() {
+        let array = shredded_int32_variant_array();
+        let options = GetOptions::new();
+        let result = variant_get(&array, options).unwrap();
+
+        // expect the result is a VariantArray
+        let result: &VariantArray = result.as_any().downcast_ref().unwrap();
+        assert_eq!(result.len(), 4);
+
+        // Expect the values are the same as the original values
+        assert_eq!(result.value(0), Variant::Int32(34));
+        assert!(!result.is_valid(1));
+        assert_eq!(result.value(2), Variant::from("N/A"));
+        assert_eq!(result.value(3), Variant::Int32(100));
+    }
+
+    /// Shredding: extract a value as an Int32Array
+    #[test]
+    fn get_variant_shredded_int32_as_int32() {
+        // Extract the typed value as Int32Array
+        let array = shredded_int32_variant_array();
+        let options = GetOptions::new()
+            // specify we want the typed value as Int32
+            .with_as_type(Some(Field::new("typed_value", DataType::Int32, 
true)));
+        let result = variant_get(&array, options).unwrap();
+        let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(34), 
None, None, Some(100)]));
+        assert_eq!(&result, &expected)
+    }
+
+    /// Perfect Shredding: extract the typed value as a VariantArray
+    #[test]
+    fn get_variant_perfectly_shredded_int32_as_variant() {
+        let array = perfectly_shredded_int32_variant_array();
+        let options = GetOptions::new();
+        let result = variant_get(&array, options).unwrap();
+
+        // expect the result is a VariantArray
+        let result: &VariantArray = result.as_any().downcast_ref().unwrap();
+        assert_eq!(result.len(), 3);
+
+        // Expect the values are the same as the original values
+        assert_eq!(result.value(0), Variant::Int32(1));
+        assert_eq!(result.value(1), Variant::Int32(2));
+        assert_eq!(result.value(2), Variant::Int32(3));
+    }
+
+    /// Shredding: Extract the typed value as Int32Array
+    #[test]
+    fn get_variant_perfectly_shredded_int32_as_int32() {
+        // Extract the typed value as Int32Array
+        let array = perfectly_shredded_int32_variant_array();
+        let options = GetOptions::new()
+            // specify we want the typed value as Int32
+            .with_as_type(Some(Field::new("typed_value", DataType::Int32, 
true)));
+        let result = variant_get(&array, options).unwrap();
+        let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), 
Some(2), Some(3)]));
+        assert_eq!(&result, &expected)
+    }
+
+    /// Return a VariantArray that represents a perfectly "shredded" variant
+    /// for the following example (3 Variant::Int32 values):
+    ///
+    /// ```text
+    /// 1
+    /// 2
+    /// 3
+    /// ```
+    ///
+    /// The schema of the corresponding `StructArray` would look like this:
+    ///
+    /// ```text
+    /// StructArray {
+    ///   metadata: BinaryViewArray,
+    ///   typed_value: Int32Array,
+    /// }
+    /// ```
+    fn perfectly_shredded_int32_variant_array() -> ArrayRef {
+        // At the time of writing, the `VariantArrayBuilder` does not support 
shredding.
+        // so we must construct the array manually.  see 
https://github.com/apache/arrow-rs/issues/7895
+        let (metadata, _value) = { 
parquet_variant::VariantBuilder::new().finish() };
+
+        let metadata = 
BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3));
+        let typed_value = Int32Array::from(vec![Some(1), Some(2), Some(3)]);
+
+        let struct_array = StructArrayBuilder::new()
+            .with_field("metadata", Arc::new(metadata))
+            .with_field("typed_value", Arc::new(typed_value))
+            .build();
+
+        Arc::new(
+            VariantArray::try_new(Arc::new(struct_array)).expect("should 
create variant array"),
+        )

Review Comment:
   The [shredding 
spec](https://github.com/apache/parquet-format/blob/master/VariantShredding.md#value-shredding)
 definitely says both columns are optional:
   
   > Both `value` and `typed_value` are optional fields used together to encode 
a single value.
   
   Additionally:
   
   > Values in the two fields must be interpreted according to the following 
table:
   >
   > | `value`  | `typed_value` | Meaning                                       
              |
   > 
|----------|---------------|-------------------------------------------------------------|
   > | null     | null          | The value is missing; only valid for shredded 
object fields |
   > | non-null | null          | The value is present and may be any type, 
including null    |
   > | null     | non-null      | The value is present and is the shredded type 
              |
   > | non-null | non-null      | The value is present and is a partially 
shredded object     |
   
   As always in parquet, a physically missing column is interpreted as all-null.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to