liamzwbao commented on code in PR #8354:
URL: https://github.com/apache/arrow-rs/pull/8354#discussion_r2893430812


##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -1848,6 +1979,282 @@ mod test {
         assert_eq!(&result, &expected);
     }
 
+    /// This test uses a pre-shredded list array and validates index-path 
access.
+    #[test]
+    fn test_shredded_list_index_access() {
+        let array = shredded_list_variant_array();
+        // Test: Extract the 0 index field as VariantArray first
+        let options = GetOptions::new_with_path(VariantPath::from(0));
+        let result = variant_get(&array, options).unwrap();
+        let result_variant = VariantArray::try_new(&result).unwrap();
+        assert_eq!(result_variant.len(), 2);
+
+        // Row 0: expect 0 index = "comedy"
+        assert_eq!(result_variant.value(0), Variant::from("comedy"));
+        // Row 1: expect 0 index = "horror"
+        assert_eq!(result_variant.value(1), Variant::from("horror"));
+    }
+
+    /// Test extracting shredded list field with type conversion.
+    #[test]
+    fn test_shredded_list_as_string() {
+        let array = shredded_list_variant_array();
+        // Test: Extract the 0 index values as StringArray (type conversion)
+        let field = Field::new("typed_value", DataType::Utf8, false);
+        let options = GetOptions::new_with_path(VariantPath::from(0))
+            .with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&array, options).unwrap();
+        // Should get StringArray
+        let expected: ArrayRef = 
Arc::new(StringArray::from(vec![Some("comedy"), Some("horror")]));
+        assert_eq!(&result, &expected);
+    }
+
+    #[test]
+    fn test_shredded_list_index_access_from_value_field() {
+        let array = shredded_list_variant_array();
+        // Index 1 maps to "drama" for row 0, and to fallback value 123 for 
row 1.
+        let options = GetOptions::new_with_path(VariantPath::from(1));
+        let result = variant_get(&array, options).unwrap();
+        let result_variant = VariantArray::try_new(&result).unwrap();
+
+        assert_eq!(result_variant.value(0), Variant::from("drama"));
+        assert_eq!(result_variant.value(1).as_int64(), Some(123));
+    }
+
+    #[test]
+    fn test_shredded_list_index_access_from_value_field_as_int64() {
+        let array = shredded_list_variant_array();
+        let field = Field::new("typed_value", DataType::Int64, true);
+        let options = GetOptions::new_with_path(VariantPath::from(1))
+            .with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&array, options).unwrap();
+
+        // "drama" -> NULL, 123 -> 123.
+        let expected: ArrayRef = Arc::new(Int64Array::from(vec![None, 
Some(123)]));
+        assert_eq!(&result, &expected);
+    }
+
+    #[test]
+    fn test_shredded_list_index_out_of_bounds_unsafe_cast_errors() {
+        let options =
+            
GetOptions::new_with_path(VariantPath::from(10)).with_cast_options(CastOptions {
+                safe: false,
+                ..Default::default()
+            });
+
+        let err = variant_get(&shredded_list_variant_array(), 
options.clone()).unwrap_err();
+        assert!(err.to_string().contains("Cannot access index '10'"));
+    }
+
+    #[test]
+    fn test_shredded_large_list_index_access_from_value_field() {
+        let array = shredded_large_list_variant_array();
+        // Index 1 maps to "drama" for row 0, and to fallback value 123 for 
row 1.
+        let options = GetOptions::new_with_path(VariantPath::from(1));
+        let result = variant_get(&array, options).unwrap();
+        let result_variant = VariantArray::try_new(&result).unwrap();
+
+        assert_eq!(result_variant.value(0), Variant::from("drama"));
+        assert_eq!(result_variant.value(1).as_int64(), Some(123));
+    }
+
+    #[test]
+    fn test_shredded_large_list_index_out_of_bounds_unsafe_cast_errors() {
+        let options =
+            
GetOptions::new_with_path(VariantPath::from(10)).with_cast_options(CastOptions {
+                safe: false,
+                ..Default::default()
+            });
+
+        let err = variant_get(&shredded_large_list_variant_array(), 
options).unwrap_err();
+        assert!(err.to_string().contains("Cannot access index '10'"));
+    }
+
+    #[test]
+    fn test_shredded_list_view_index_access_from_value_field() {
+        let array = shredded_list_view_variant_array();
+        let options = GetOptions::new_with_path(VariantPath::from(1));
+        let result = variant_get(&array, options).unwrap();
+        let result_variant = VariantArray::try_new(&result).unwrap();
+
+        assert_eq!(result_variant.value(0), Variant::from("drama"));
+        assert_eq!(result_variant.value(1).as_int64(), Some(123));
+    }
+
+    #[test]
+    fn test_shredded_list_view_index_out_of_bounds_unsafe_cast_errors() {
+        let options =
+            
GetOptions::new_with_path(VariantPath::from(10)).with_cast_options(CastOptions {
+                safe: false,
+                ..Default::default()
+            });
+
+        let err = variant_get(&shredded_list_view_variant_array(), 
options).unwrap_err();
+        assert!(err.to_string().contains("Cannot access index '10'"));
+    }
+
+    #[test]
+    fn test_shredded_large_list_view_index_access_from_value_field() {
+        let array = shredded_large_list_view_variant_array();
+        let options = GetOptions::new_with_path(VariantPath::from(1));
+        let result = variant_get(&array, options).unwrap();
+        let result_variant = VariantArray::try_new(&result).unwrap();
+
+        assert_eq!(result_variant.value(0), Variant::from("drama"));
+        assert_eq!(result_variant.value(1).as_int64(), Some(123));
+    }
+
+    #[test]
+    fn test_shredded_large_list_view_index_out_of_bounds_unsafe_cast_errors() {
+        let options =
+            
GetOptions::new_with_path(VariantPath::from(10)).with_cast_options(CastOptions {
+                safe: false,
+                ..Default::default()
+            });
+
+        let err = variant_get(&shredded_large_list_view_variant_array(), 
options).unwrap_err();
+        assert!(err.to_string().contains("Cannot access index '10'"));
+    }
+
+    #[test]
+    fn test_shredded_list_in_struct_index_access() {
+        let array = shredded_struct_with_list_variant_array();
+        let options = 
GetOptions::new_with_path(VariantPath::try_from("a[1]").unwrap());
+        let result = variant_get(&array, options).unwrap();
+        let result_variant = VariantArray::try_new(&result).unwrap();
+
+        assert_eq!(result_variant.value(0), Variant::from("drama"));
+        assert_eq!(result_variant.value(1).as_int64(), Some(123));
+    }
+
+    #[test]
+    fn test_shredded_struct_in_list_field_access() {
+        let array = shredded_list_of_struct_variant_array();
+        let field = Field::new("x", DataType::Int32, true);
+        let path = VariantPath::from(0).join("x");
+        let options = 
GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&array, options).unwrap();
+
+        let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), 
Some(3)]));
+        assert_eq!(&result, &expected);
+    }
+
+    #[test]
+    fn test_shredded_list_of_lists_index_access() {
+        let array = shredded_list_of_lists_variant_array();
+        let path = VariantPath::from(0).join(1);
+
+        let result = variant_get(&array, 
GetOptions::new_with_path(path.clone())).unwrap();
+        let result_variant = VariantArray::try_new(&result).unwrap();
+        assert_eq!(result_variant.value(0), Variant::from("b"));
+        assert_eq!(result_variant.value(1).as_int64(), Some(123));
+
+        let field = Field::new("typed_value", DataType::Int64, true);
+        let casted = variant_get(
+            &array,
+            
GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))),
+        )
+        .unwrap();
+        let expected: ArrayRef = Arc::new(Int64Array::from(vec![None, 
Some(123)]));
+        assert_eq!(&casted, &expected);
+    }
+
+    /// Helper to create a shredded list variant array used by list index 
tests.
+    ///
+    /// Rows:
+    /// 1. `["comedy", "drama"]` (fully shred-able as `Utf8`)
+    /// 2. `["horror", 123]` (partially shredded, with fallback for the 
numeric element)
+    fn shredded_list_variant_array() -> ArrayRef {
+        let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
+            Some(r#"["comedy", "drama"]"#),
+            Some(r#"["horror", 123]"#),
+        ]));
+        let input = json_to_variant(&json_rows).unwrap();
+
+        let list_schema = DataType::List(Arc::new(Field::new("item", 
DataType::Utf8, true)));
+        let shredded = shred_variant(&input, &list_schema).unwrap();
+        ArrayRef::from(shredded)
+    }
+
+    fn shredded_struct_with_list_variant_array() -> ArrayRef {
+        let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
+            Some(r#"{"a": ["comedy", "drama"]}"#),
+            Some(r#"{"a": ["horror", 123]}"#),
+        ]));
+        let input = json_to_variant(&json_rows).unwrap();
+
+        let list_schema = DataType::List(Arc::new(Field::new("item", 
DataType::Utf8, true)));
+        let shredding_schema = ShreddedSchemaBuilder::default()
+            .with_path("a", &list_schema)
+            .unwrap()
+            .build();
+        let shredded = shred_variant(&input, &shredding_schema).unwrap();
+        ArrayRef::from(shredded)
+    }
+
+    fn shredded_list_of_struct_variant_array() -> ArrayRef {
+        let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
+            Some(r#"[{"x": 1}, {"x": 2}]"#),
+            Some(r#"[{"x": 3}, {"y": 4}]"#),
+        ]));
+        let input = json_to_variant(&json_rows).unwrap();
+
+        let struct_type =
+            DataType::Struct(Fields::from(vec![Field::new("x", 
DataType::Int32, true)]));
+        let list_schema = DataType::List(Arc::new(Field::new("item", 
struct_type, true)));
+        let shredded = shred_variant(&input, &list_schema).unwrap();
+        ArrayRef::from(shredded)
+    }
+
+    fn shredded_list_of_lists_variant_array() -> ArrayRef {
+        let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
+            Some(r#"[["a", "b"], ["c", "d"]]"#),
+            Some(r#"[["x", 123], ["y", "z"]]"#),
+        ]));
+        let input = json_to_variant(&json_rows).unwrap();
+
+        let inner_list = DataType::List(Arc::new(Field::new("item", 
DataType::Utf8, true)));
+        let outer_list = DataType::List(Arc::new(Field::new("item", 
inner_list, true)));
+        let shredded = shred_variant(&input, &outer_list).unwrap();
+        ArrayRef::from(shredded)
+    }
+
+    fn shredded_large_list_variant_array() -> ArrayRef {
+        let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
+            Some(r#"["comedy", "drama"]"#),
+            Some(r#"["horror", 123]"#),
+        ]));
+        let input = json_to_variant(&json_rows).unwrap();
+
+        let list_schema = DataType::LargeList(Arc::new(Field::new("item", 
DataType::Utf8, true)));
+        let shredded = shred_variant(&input, &list_schema).unwrap();
+        ArrayRef::from(shredded)
+    }
+
+    fn shredded_list_view_variant_array() -> ArrayRef {
+        let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
+            Some(r#"["comedy", "drama"]"#),
+            Some(r#"["horror", 123]"#),
+        ]));
+        let input = json_to_variant(&json_rows).unwrap();
+
+        let list_schema = DataType::ListView(Arc::new(Field::new("item", 
DataType::Utf8, true)));
+        let shredded = shred_variant(&input, &list_schema).unwrap();
+        ArrayRef::from(shredded)
+    }
+
+    fn shredded_large_list_view_variant_array() -> ArrayRef {
+        let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
+            Some(r#"["comedy", "drama"]"#),
+            Some(r#"["horror", 123]"#),
+        ]));
+        let input = json_to_variant(&json_rows).unwrap();
+
+        let list_schema =
+            DataType::LargeListView(Arc::new(Field::new("item", 
DataType::Utf8, true)));
+        let shredded = shred_variant(&input, &list_schema).unwrap();
+        ArrayRef::from(shredded)
+    }

Review Comment:
   nit: These helpers looks pretty similar other than the schema definition, we 
could consider generalize them. This would also help the callers of these 
helpers to be merged in a single test if they just differ in the list types



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to