sdf-jkl commented on code in PR #8354:
URL: https://github.com/apache/arrow-rs/pull/8354#discussion_r2353515197


##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -1010,7 +1010,101 @@ mod test {
         let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), 
Some(42)]));
         assert_eq!(&result, &expected);
     }
+    /// This test manually constructs a shredded variant array representing 
lists
+    /// like ["comedy", "drama"], ["horror", null] and ["comedy", "drama", 
"romance"]
+    /// as VariantArray using variant_get.
+    #[test]
+    fn test_shredded_list_field_access() {
+        let array = shredded_list_variant_array();
+
+        // Test: Extract the 0 index field as VariantArray first
+        let options = GetOptions::new_with_path(VariantPath::from(0));
+        let result = variant_get(&array, options).unwrap();
 
+        let result_variant: &VariantArray = 
result.as_any().downcast_ref().unwrap();
+        assert_eq!(result_variant.len(), 3);
+    
+        // Row 0: expect 0 index = "comedy"
+        assert_eq!(result_variant.value(0), Variant::String("comedy"));
+        // Row 1: expect 0 index = "horror"
+        assert_eq!(result_variant.value(1), Variant::String("horror"));
+        // Row 2: expect 0 index = "comedy"
+        assert_eq!(result_variant.value(2), Variant::String("comedy"));
+    }
+    /// Test extracting shredded list field with type conversion
+    #[test]
+    fn test_shredded_list_as_string() {
+        let array = shredded_list_variant_array();
+
+        // Test: Extract the 0 index values as StringArray (type conversion)
+        let field = Field::new("typed_value", DataType::Utf8, false);
+        let options = GetOptions::new_with_path(VariantPath::from(0))
+            .with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&array, options).unwrap();
+
+        // Should get StringArray
+        let expected: ArrayRef = 
Arc::new(StringArray::from(vec![Some("comedy"), Some("drama")]));
+        assert_eq!(&result, &expected);
+    }
+    /// Helper function to create a shredded variant array representing lists
+    ///
+    /// This creates an array that represents:
+    /// Row 0: ["comedy", "drama"] ([0] is shredded, [1] is shredded - 
perfectly shredded)
+    /// Row 1: ["horror", null] ([0] is shredded, [1] is binary null - 
partially shredded)
+    /// Row 2: ["comedy", "drama", "romance"] (perfectly shredded)
+    ///
+    /// The physical layout follows the shredding spec where:
+    /// - metadata: contains list metadata
+    /// - typed_value: StructArray with 0 index value
+    /// - value: contains fallback for
+    fn shredded_list_variant_array() -> ArrayRef {
+        // Create the base metadata for lists
+
+        // Could add this as an api for VariantList, like VariantList::from()
+        fn build_list_metadata(vector: Vec<Variant>) -> (Vec<u8>, Vec<u8>) {
+            let mut builder = parquet_variant::VariantBuilder::new();
+            let mut list = builder.new_list();
+            for value in vector {
+                list.append_value(value);
+            }
+            list.finish();
+            builder.finish()
+        }
+        let (metadata1, _) =
+            build_list_metadata(vec![Variant::String("comedy"), 
Variant::String("drama")]);
+
+        let (metadata2, _) = 
build_list_metadata(vec![Variant::String("horror"), Variant::Null]);
+
+        let (metadata3, _) = build_list_metadata(vec![
+            Variant::String("comedy"),
+            Variant::String("drama"),
+            Variant::String("romance"),
+        ]);
+
+        // Create metadata array
+        let metadata_array =
+            BinaryViewArray::from_iter_values(vec![metadata1, metadata2, 
metadata3]);
+
+        // Create the untyped value array
+        let value_array = 
BinaryViewArray::from(vec![Variant::Null.as_u8_slice()]);
+        // Maybe I should try with an actual primitive array
+        let typed_value_array = StringArray::from(vec![
+            Some("comedy"),
+            Some("drama"),
+            Some("horror"),
+            Some("comedy"),
+            Some("drama"),
+            Some("romance"),
+        ]);
+        // Build the main VariantArray
+        let main_struct = crate::variant_array::StructArrayBuilder::new()
+            .with_field("metadata", Arc::new(metadata_array))
+            .with_field("value", Arc::new(value_array))
+            .with_field("typed_value", Arc::new(typed_value_array))

Review Comment:
   Thanks for this too, I was under the wrong impression that the metadata 
encoding stores the offsets for the actual values. Reading your #8359 and 
rereading the Variant Encoding spec I see that the values offsets are within 
the value encoding itself.
   
    So the outermost `typed_value` should be an `GenericListArray` of `element` 
- `VariantObjects` with {`value` and `typed_value` fields}?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to