This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 2f40f78e4f [Variant] Support `['fieldName']` in VariantPath parser 
(#9276)
2f40f78e4f is described below

commit 2f40f78e4feae3aee261d9608cede9535e1429e0
Author: Congxian Qiu <[email protected]>
AuthorDate: Tue Feb 17 20:18:55 2026 +0800

    [Variant] Support `['fieldName']` in VariantPath parser (#9276)
    
    # Which issue does this PR close?
    
    <!--
    We generally require a GitHub issue to be filed for all bug fixes and
    enhancements and this helps us generate change logs for our releases.
    You can link an issue to this PR using the GitHub syntax.
    -->
    
    - Closes #9050.
    - close #8954
    
    # Rationale for this change
    
    <!--
    Why are you proposing this change? If this is already explained clearly
    in the issue then this section is not needed.
    Explaining clearly why changes are proposed helps reviewers understand
    your changes and offer better suggestions for fixes.
    -->
    
    # What changes are included in this PR?
    
    Add `[fieldName]` support in `VariantPath` parser, will throw an error
    if the parser fails.
    
    Also support escaping `\` inside brackets. If we force users to use
    brackets when the field contains special characters, maybe we can also
    close #8954
    
    Sample behaviors(read more on the code doc)
    
    - `[foo]` -> filed `foo`
    - `[2]` -> index 2
    - `[a.b]` -> field `a.b`
    - `[a\]b]` -> field `a]b`
    - `[a\xb]` -> field `axb`
    
    
    # Are these changes tested?
    
    <!--
    We typically require tests for all PRs in order to:
    1. Prevent the code from being accidentally broken by subsequent changes
    2. Serve as another way to document the expected behavior of the code
    
    If tests are not included in your PR, please explain why (for example,
    are they covered by existing tests)?
    -->
    
    Added tests
    
    # Are there any user-facing changes?
    
    <!--
    If there are user-facing changes then we may require documentation to be
    updated before approving the PR.
    
    If there are any breaking changes to public APIs, please call them out.
    -->
    
    Yes, there are some user-facing changes, but `parquet-variant` is still
    experient for now, so maybe we don't need to wait for a major version.
---
 parquet-variant-compute/src/shred_variant.rs | 117 ++++++++++++++---------
 parquet-variant-compute/src/variant_get.rs   |  93 ++++++++++---------
 parquet-variant/src/path.rs                  | 100 +++++++++++++++++---
 parquet-variant/src/utils.rs                 | 133 +++++++++++++++++++++++----
 parquet-variant/src/variant.rs               |   6 +-
 5 files changed, 322 insertions(+), 127 deletions(-)

diff --git a/parquet-variant-compute/src/shred_variant.rs 
b/parquet-variant-compute/src/shred_variant.rs
index d82eb15af5..c60c602baa 100644
--- a/parquet-variant-compute/src/shred_variant.rs
+++ b/parquet-variant-compute/src/shred_variant.rs
@@ -493,24 +493,26 @@ impl IntoShreddingField for (DataType, bool) {
 /// use parquet_variant::{VariantPath, VariantPathElement};
 /// use parquet_variant_compute::ShreddedSchemaBuilder;
 ///
-/// // Define the shredding schema using the builder
-/// let shredding_type = ShreddedSchemaBuilder::default()
+/// fn main() -> Result<(), arrow::error::ArrowError> {
+///     // Define the shredding schema using the builder
+///     let shredding_type = ShreddedSchemaBuilder::default()
 ///     // store the "time" field as a separate UTC timestamp
-///     .with_path("time", (&DataType::Timestamp(TimeUnit::Nanosecond, 
Some("UTC".into())), true))
+///     .with_path("time", (&DataType::Timestamp(TimeUnit::Nanosecond, 
Some("UTC".into())), true))?
 ///     // store hostname as non-nullable Utf8
-///     .with_path("hostname", (&DataType::Utf8, false))
+///     .with_path("hostname", (&DataType::Utf8, false))?
 ///     // pass a FieldRef directly
 ///     .with_path(
 ///         "metadata.trace_id",
 ///         Arc::new(Field::new("trace_id", DataType::FixedSizeBinary(16), 
false)),
-///     )
+///     )?
 ///     // field name with a dot: use VariantPath to avoid splitting
 ///     .with_path(
 ///         VariantPath::from_iter([VariantPathElement::from("metrics.cpu")]),
 ///         &DataType::Float64,
-///     )
+///     )?
 ///     .build();
-///
+///    Ok(())
+/// }
 /// // The shredding_type can now be passed to shred_variant:
 /// // let shredded = shred_variant(&input, &shredding_type)?;
 /// ```
@@ -536,14 +538,17 @@ impl ShreddedSchemaBuilder {
     /// * `path` - Anything convertible to [`VariantPath`] (e.g., a `&str`)
     /// * `field` - Anything convertible via [`IntoShreddingField`] (e.g. 
`FieldRef`,
     ///   `&DataType`, or `(&DataType, bool)` to control nullability)
-    pub fn with_path<'a, P, F>(mut self, path: P, field: F) -> Self
+    pub fn with_path<'a, P, F>(mut self, path: P, field: F) -> Result<Self>
     where
-        P: Into<VariantPath<'a>>,
+        P: TryInto<VariantPath<'a>>,
+        P::Error: std::fmt::Debug,
         F: IntoShreddingField,
     {
-        let path: VariantPath<'a> = path.into();
+        let path: VariantPath<'a> = path
+            .try_into()
+            .map_err(|e| ArrowError::InvalidArgumentError(format!("{:?}", 
e)))?;
         self.root.insert_path(&path, field.into_shredding_field());
-        self
+        Ok(self)
     }
 
     /// Build the final [`DataType`].
@@ -1558,7 +1563,7 @@ mod tests {
     }
 
     #[test]
-    fn test_object_shredding_comprehensive() {
+    fn test_object_shredding_comprehensive() -> Result<()> {
         let input = build_variant_array(vec![
             // Row 0: Fully shredded object
             VariantRow::Object(vec![
@@ -1596,8 +1601,8 @@ mod tests {
         // Create target schema: struct<score: float64, age: int64>
         // Both types are supported for shredding
         let target_schema = ShreddedSchemaBuilder::default()
-            .with_path("score", &DataType::Float64)
-            .with_path("age", &DataType::Int64)
+            .with_path("score", &DataType::Float64)?
+            .with_path("age", &DataType::Int64)?
             .build();
 
         let result = shred_variant(&input, &target_schema).unwrap();
@@ -1903,6 +1908,7 @@ mod tests {
                 }),
             }),
         );
+        Ok(())
     }
 
     #[test]
@@ -1998,7 +2004,7 @@ mod tests {
     }
 
     #[test]
-    fn test_object_different_schemas() {
+    fn test_object_different_schemas() -> Result<()> {
         // Create object with multiple fields
         let input = build_variant_array(vec![VariantRow::Object(vec![
             ("id", VariantValue::from(123i32)),
@@ -2008,7 +2014,7 @@ mod tests {
 
         // Test with schema containing only id field
         let schema1 = ShreddedSchemaBuilder::default()
-            .with_path("id", &DataType::Int32)
+            .with_path("id", &DataType::Int32)?
             .build();
         let result1 = shred_variant(&input, &schema1).unwrap();
         let value_field1 = result1.value_field().unwrap();
@@ -2016,8 +2022,8 @@ mod tests {
 
         // Test with schema containing id and age fields
         let schema2 = ShreddedSchemaBuilder::default()
-            .with_path("id", &DataType::Int32)
-            .with_path("age", &DataType::Int64)
+            .with_path("id", &DataType::Int32)?
+            .with_path("age", &DataType::Int64)?
             .build();
         let result2 = shred_variant(&input, &schema2).unwrap();
         let value_field2 = result2.value_field().unwrap();
@@ -2025,17 +2031,19 @@ mod tests {
 
         // Test with schema containing all fields
         let schema3 = ShreddedSchemaBuilder::default()
-            .with_path("id", &DataType::Int32)
-            .with_path("age", &DataType::Int64)
-            .with_path("score", &DataType::Float64)
+            .with_path("id", &DataType::Int32)?
+            .with_path("age", &DataType::Int64)?
+            .with_path("score", &DataType::Float64)?
             .build();
         let result3 = shred_variant(&input, &schema3).unwrap();
         let value_field3 = result3.value_field().unwrap();
         assert!(value_field3.is_null(0)); // fully shredded, no remaining 
fields
+
+        Ok(())
     }
 
     #[test]
-    fn test_uuid_shredding_in_objects() {
+    fn test_uuid_shredding_in_objects() -> Result<()> {
         let mock_uuid_1 = Uuid::new_v4();
         let mock_uuid_2 = Uuid::new_v4();
         let mock_uuid_3 = Uuid::new_v4();
@@ -2069,8 +2077,8 @@ mod tests {
         ]);
 
         let target_schema = ShreddedSchemaBuilder::default()
-            .with_path("id", DataType::FixedSizeBinary(16))
-            .with_path("session_id", DataType::FixedSizeBinary(16))
+            .with_path("id", DataType::FixedSizeBinary(16))?
+            .with_path("session_id", DataType::FixedSizeBinary(16))?
             .build();
 
         let result = shred_variant(&input, &target_schema).unwrap();
@@ -2201,6 +2209,8 @@ mod tests {
 
         // Row 5: Null
         assert!(result.is_null(5));
+
+        Ok(())
     }
 
     #[test]
@@ -2251,10 +2261,10 @@ mod tests {
     }
 
     #[test]
-    fn test_variant_schema_builder_simple() {
+    fn test_variant_schema_builder_simple() -> Result<()> {
         let shredding_type = ShreddedSchemaBuilder::default()
-            .with_path("a", &DataType::Int64)
-            .with_path("b", &DataType::Float64)
+            .with_path("a", &DataType::Int64)?
+            .with_path("b", &DataType::Float64)?
             .build();
 
         assert_eq!(
@@ -2264,14 +2274,16 @@ mod tests {
                 Field::new("b", DataType::Float64, true),
             ]))
         );
+
+        Ok(())
     }
 
     #[test]
-    fn test_variant_schema_builder_nested() {
+    fn test_variant_schema_builder_nested() -> Result<()> {
         let shredding_type = ShreddedSchemaBuilder::default()
-            .with_path("a", &DataType::Int64)
-            .with_path("b.c", &DataType::Utf8)
-            .with_path("b.d", &DataType::Float64)
+            .with_path("a", &DataType::Int64)?
+            .with_path("b.c", &DataType::Utf8)?
+            .with_path("b.d", &DataType::Float64)?
             .build();
 
         assert_eq!(
@@ -2288,13 +2300,15 @@ mod tests {
                 ),
             ]))
         );
+
+        Ok(())
     }
 
     #[test]
-    fn test_variant_schema_builder_with_path_variant_path_arg() {
+    fn test_variant_schema_builder_with_path_variant_path_arg() -> Result<()> {
         let path = VariantPath::from_iter([VariantPathElement::from("a.b")]);
         let shredding_type = ShreddedSchemaBuilder::default()
-            .with_path(path, &DataType::Int64)
+            .with_path(path, &DataType::Int64)?
             .build();
 
         match shredding_type {
@@ -2305,16 +2319,18 @@ mod tests {
             }
             _ => panic!("expected struct data type"),
         }
+
+        Ok(())
     }
 
     #[test]
-    fn test_variant_schema_builder_custom_nullability() {
+    fn test_variant_schema_builder_custom_nullability() -> Result<()> {
         let shredding_type = ShreddedSchemaBuilder::default()
             .with_path(
                 "foo",
                 Arc::new(Field::new("should_be_renamed", DataType::Utf8, 
false)),
-            )
-            .with_path("bar", (&DataType::Int64, false))
+            )?
+            .with_path("bar", (&DataType::Int64, false))?
             .build();
 
         let DataType::Struct(fields) = shredding_type else {
@@ -2328,10 +2344,12 @@ mod tests {
         let bar = fields.iter().find(|f| f.name() == "bar").unwrap();
         assert_eq!(bar.data_type(), &DataType::Int64);
         assert!(!bar.is_nullable());
+
+        Ok(())
     }
 
     #[test]
-    fn test_variant_schema_builder_with_shred_variant() {
+    fn test_variant_schema_builder_with_shred_variant() -> Result<()> {
         let input = build_variant_array(vec![
             VariantRow::Object(vec![
                 ("time", VariantValue::from(1234567890i64)),
@@ -2346,8 +2364,8 @@ mod tests {
         ]);
 
         let shredding_type = ShreddedSchemaBuilder::default()
-            .with_path("time", &DataType::Int64)
-            .with_path("hostname", &DataType::Utf8)
+            .with_path("time", &DataType::Int64)?
+            .with_path("hostname", &DataType::Utf8)?
             .build();
 
         let result = shred_variant(&input, &shredding_type).unwrap();
@@ -2424,13 +2442,15 @@ mod tests {
 
         // Row 2
         assert!(result.is_null(2));
+
+        Ok(())
     }
 
     #[test]
-    fn test_variant_schema_builder_conflicting_path() {
+    fn test_variant_schema_builder_conflicting_path() -> Result<()> {
         let shredding_type = ShreddedSchemaBuilder::default()
-            .with_path("a", &DataType::Int64)
-            .with_path("a", &DataType::Float64)
+            .with_path("a", &DataType::Int64)?
+            .with_path("a", &DataType::Float64)?
             .build();
 
         assert_eq!(
@@ -2439,25 +2459,30 @@ mod tests {
                 vec![Field::new("a", DataType::Float64, true),]
             ))
         );
+
+        Ok(())
     }
 
     #[test]
-    fn test_variant_schema_builder_root_path() {
+    fn test_variant_schema_builder_root_path() -> Result<()> {
         let path = VariantPath::new(vec![]);
         let shredding_type = ShreddedSchemaBuilder::default()
-            .with_path(path, &DataType::Int64)
+            .with_path(path, &DataType::Int64)?
             .build();
 
         assert_eq!(shredding_type, DataType::Int64);
+
+        Ok(())
     }
 
     #[test]
-    fn test_variant_schema_builder_empty_path() {
+    fn test_variant_schema_builder_empty_path() -> Result<()> {
         let shredding_type = ShreddedSchemaBuilder::default()
-            .with_path("", &DataType::Int64)
+            .with_path("", &DataType::Int64)?
             .build();
 
         assert_eq!(shredding_type, DataType::Int64);
+        Ok(())
     }
 
     #[test]
diff --git a/parquet-variant-compute/src/variant_get.rs 
b/parquet-variant-compute/src/variant_get.rs
index 35cd5b4e40..f9985084cc 100644
--- a/parquet-variant-compute/src/variant_get.rs
+++ b/parquet-variant-compute/src/variant_get.rs
@@ -390,7 +390,7 @@ mod test {
     fn get_primitive_variant_field() {
         single_variant_get_test(
             r#"{"some_field": 1234}"#,
-            VariantPath::from("some_field"),
+            VariantPath::try_from("some_field").unwrap(),
             "1234",
         );
     }
@@ -404,7 +404,9 @@ mod test {
     fn get_primitive_variant_inside_object_of_object() {
         single_variant_get_test(
             r#"{"top_level_field": {"inner_field": 1234}}"#,
-            VariantPath::from("top_level_field").join("inner_field"),
+            VariantPath::try_from("top_level_field")
+                .unwrap()
+                .join("inner_field"),
             "1234",
         );
     }
@@ -422,7 +424,7 @@ mod test {
     fn get_primitive_variant_inside_object_of_list() {
         single_variant_get_test(
             r#"{"some_field": [1234]}"#,
-            VariantPath::from("some_field").join(0),
+            VariantPath::try_from("some_field").unwrap().join(0),
             "1234",
         );
     }
@@ -431,7 +433,7 @@ mod test {
     fn get_complex_variant() {
         single_variant_get_test(
             r#"{"top_level_field": {"inner_field": 1234}}"#,
-            VariantPath::from("top_level_field"),
+            VariantPath::try_from("top_level_field").unwrap(),
             r#"{"inner_field": 1234}"#,
         );
     }
@@ -1818,7 +1820,7 @@ mod test {
         let array = shredded_object_with_x_field_variant_array();
 
         // Test: Extract the "x" field as VariantArray first
-        let options = GetOptions::new_with_path(VariantPath::from("x"));
+        let options = 
GetOptions::new_with_path(VariantPath::try_from("x").unwrap());
         let result = variant_get(&array, options).unwrap();
 
         let result_variant = VariantArray::try_new(&result).unwrap();
@@ -1837,7 +1839,7 @@ mod test {
 
         // Test: Extract the "x" field as Int32Array (type conversion)
         let field = Field::new("x", DataType::Int32, false);
-        let options = GetOptions::new_with_path(VariantPath::from("x"))
+        let options = 
GetOptions::new_with_path(VariantPath::try_from("x").unwrap())
             .with_as_type(Some(FieldRef::from(field)));
         let result = variant_get(&array, options).unwrap();
 
@@ -1930,11 +1932,11 @@ mod test {
         // Check: How does VariantPath parse different strings?
         println!("Testing path parsing:");
 
-        let path_x = VariantPath::from("x");
+        let path_x = VariantPath::try_from("x").unwrap();
         let elements_x: Vec<_> = path_x.iter().collect();
         println!("  'x' -> {} elements: {:?}", elements_x.len(), elements_x);
 
-        let path_ax = VariantPath::from("a.x");
+        let path_ax = VariantPath::try_from("a.x").unwrap();
         let elements_ax: Vec<_> = path_ax.iter().collect();
         println!(
             "  'a.x' -> {} elements: {:?}",
@@ -1942,7 +1944,7 @@ mod test {
             elements_ax
         );
 
-        let path_ax_alt = VariantPath::from("$.a.x");
+        let path_ax_alt = VariantPath::try_from("$.a.x").unwrap();
         let elements_ax_alt: Vec<_> = path_ax_alt.iter().collect();
         println!(
             "  '$.a.x' -> {} elements: {:?}",
@@ -1950,10 +1952,10 @@ mod test {
             elements_ax_alt
         );
 
-        let path_nested = VariantPath::from("a").join("x");
+        let path_nested = VariantPath::try_from("a").unwrap().join("x");
         let elements_nested: Vec<_> = path_nested.iter().collect();
         println!(
-            "  VariantPath::from('a').join('x') -> {} elements: {:?}",
+            "  VariantPath::try_from('a').unwrap().join('x') -> {} elements: 
{:?}",
             elements_nested.len(),
             elements_nested
         );
@@ -1962,7 +1964,7 @@ mod test {
         let array = shredded_object_with_x_field_variant_array();
 
         // Test if variant_get with REAL nested path throws not implemented 
error
-        let real_nested_path = VariantPath::from("a").join("x");
+        let real_nested_path = VariantPath::try_from("a").unwrap().join("x");
         let options = GetOptions::new_with_path(real_nested_path);
         let result = variant_get(&array, options);
 
@@ -1995,7 +1997,7 @@ mod test {
         let unshredded_array = create_depth_0_test_data();
 
         let field = Field::new("result", DataType::Int32, true);
-        let path = VariantPath::from("x");
+        let path = VariantPath::try_from("x").unwrap();
         let options = 
GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
         let result = variant_get(&unshredded_array, options).unwrap();
 
@@ -2011,7 +2013,7 @@ mod test {
         let shredded_array = create_depth_0_shredded_test_data_simple();
 
         let field = Field::new("result", DataType::Int32, true);
-        let path = VariantPath::from("x");
+        let path = VariantPath::try_from("x").unwrap();
         let options = 
GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
         let result = variant_get(&shredded_array, options).unwrap();
 
@@ -2033,7 +2035,7 @@ mod test {
         let unshredded_array = create_nested_path_test_data();
 
         let field = Field::new("result", DataType::Int32, true);
-        let path = VariantPath::from("a.x"); // Dot notation!
+        let path = VariantPath::try_from("a.x").unwrap(); // Dot notation!
         let options = 
GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
         let result = variant_get(&unshredded_array, options).unwrap();
 
@@ -2048,7 +2050,7 @@ mod test {
         let shredded_array = create_depth_1_shredded_test_data_working();
 
         let field = Field::new("result", DataType::Int32, true);
-        let path = VariantPath::from("a.x"); // Dot notation!
+        let path = VariantPath::try_from("a.x").unwrap(); // Dot notation!
         let options = 
GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
         let result = variant_get(&shredded_array, options).unwrap();
 
@@ -2070,7 +2072,7 @@ mod test {
         let unshredded_array = create_depth_2_test_data();
 
         let field = Field::new("result", DataType::Int32, true);
-        let path = VariantPath::from("a.b.x"); // Double nested dot notation!
+        let path = VariantPath::try_from("a.b.x").unwrap(); // Double nested 
dot notation!
         let options = 
GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
         let result = variant_get(&unshredded_array, options).unwrap();
 
@@ -2086,7 +2088,7 @@ mod test {
         let shredded_array = create_depth_2_shredded_test_data_working();
 
         let field = Field::new("result", DataType::Int32, true);
-        let path = VariantPath::from("a.b.x"); // Double nested dot notation!
+        let path = VariantPath::try_from("a.b.x").unwrap(); // Double nested 
dot notation!
         let options = 
GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
         let result = variant_get(&shredded_array, options).unwrap();
 
@@ -2108,7 +2110,7 @@ mod test {
         let array = shredded_object_with_x_field_variant_array();
 
         // Test: Extract the "x" field (single level) - this works
-        let single_path = VariantPath::from("x");
+        let single_path = VariantPath::try_from("x").unwrap();
         let field = Field::new("result", DataType::Int32, true);
         let options =
             
GetOptions::new_with_path(single_path).with_as_type(Some(FieldRef::from(field)));
@@ -2117,7 +2119,7 @@ mod test {
         println!("Single path 'x' works - result: {:?}", result);
 
         // Test: Try nested path "a.x" - this is what we need to implement
-        let nested_path = VariantPath::from("a").join("x");
+        let nested_path = VariantPath::try_from("a").unwrap().join("x");
         let field = Field::new("result", DataType::Int32, true);
         let options =
             
GetOptions::new_with_path(nested_path).with_as_type(Some(FieldRef::from(field)));
@@ -2584,7 +2586,7 @@ mod test {
 
         // Try to access a field with safe cast options (should return NULLs)
         let safe_options = GetOptions {
-            path: VariantPath::from("nonexistent_field"),
+            path: VariantPath::try_from("nonexistent_field").unwrap(),
             as_type: Some(Arc::new(Field::new("result", DataType::Int32, 
true))),
             cast_options: CastOptions::default(), // safe = true
         };
@@ -2601,7 +2603,7 @@ mod test {
 
         // Try to access a field with strict cast options (should error)
         let strict_options = GetOptions {
-            path: VariantPath::from("nonexistent_field"),
+            path: VariantPath::try_from("nonexistent_field").unwrap(),
             as_type: Some(Arc::new(Field::new("result", DataType::Int32, 
true))),
             cast_options: CastOptions {
                 safe: false,
@@ -2710,7 +2712,7 @@ mod test {
         // 2. The "a" field's typed_value nulls
         // 3. The "x" field's typed_value nulls
         let options = GetOptions {
-            path: VariantPath::from("a.x"),
+            path: VariantPath::try_from("a.x").unwrap(),
             as_type: Some(Arc::new(Field::new("result", DataType::Int32, 
true))),
             cast_options: CastOptions::default(),
         };
@@ -2844,7 +2846,7 @@ mod test {
         // Test 1: nullable field (should allow nulls from cast failures)
         let nullable_field = Arc::new(Field::new("result", DataType::Int32, 
true));
         let options_nullable = GetOptions {
-            path: VariantPath::from("x"),
+            path: VariantPath::try_from("x").unwrap(),
             as_type: Some(nullable_field.clone()),
             cast_options: CastOptions::default(),
         };
@@ -2897,7 +2899,7 @@ mod test {
         // Test 2: non-nullable field (behavior should be the same with safe 
casting)
         let non_nullable_field = Arc::new(Field::new("result", 
DataType::Int32, false));
         let options_non_nullable = GetOptions {
-            path: VariantPath::from("x"),
+            path: VariantPath::try_from("x").unwrap(),
             as_type: Some(non_nullable_field.clone()),
             cast_options: CastOptions::default(), // safe=true by default
         };
@@ -3072,7 +3074,7 @@ mod test {
         let variant_array = create_comprehensive_nested_shredded_variant();
 
         // Extract "outer" field using path-based variant_get
-        let path = VariantPath::from("outer");
+        let path = VariantPath::try_from("outer").unwrap();
         let inner_field = Field::new("inner", DataType::Int32, true);
         let result_type = DataType::Struct(Fields::from(vec![inner_field]));
 
@@ -3117,7 +3119,7 @@ mod test {
         let variant_array = create_comprehensive_nested_shredded_variant();
 
         // Extract "outer.inner" field using path-based variant_get
-        let path = VariantPath::from("outer").join("inner");
+        let path = VariantPath::try_from("outer").unwrap().join("inner");
 
         let options = GetOptions {
             path,
@@ -4113,7 +4115,7 @@ mod test {
         let all_nulls_field_ref = FieldRef::from(Field::new("result", 
DataType::Int32, true));
         let all_nulls_result = variant_get(
             &variant_array,
-            GetOptions::new_with_path(VariantPath::from("all_nulls"))
+            
GetOptions::new_with_path(VariantPath::try_from("all_nulls").unwrap())
                 .with_as_type(Some(all_nulls_field_ref)),
         )
         .unwrap();
@@ -4123,7 +4125,7 @@ mod test {
         let some_nulls_field_ref = FieldRef::from(Field::new("result", 
DataType::Int32, true));
         let some_nulls_result = variant_get(
             &variant_array,
-            GetOptions::new_with_path(VariantPath::from("some_nulls"))
+            
GetOptions::new_with_path(VariantPath::try_from("some_nulls").unwrap())
                 .with_as_type(Some(some_nulls_field_ref)),
         )
         .unwrap();
@@ -4138,7 +4140,7 @@ mod test {
         ));
         let struct_result = variant_get(
             &variant_array,
-            GetOptions::new_with_path(VariantPath::from("struct_field"))
+            
GetOptions::new_with_path(VariantPath::try_from("struct_field").unwrap())
                 .with_as_type(Some(struct_field_ref)),
         )
         .unwrap();
@@ -4237,12 +4239,13 @@ mod test {
         ];
 
         for (request_type, expected) in expectations {
-            let options = 
GetOptions::new_with_path(VariantPath::from("outer").join("list"))
-                .with_as_type(Some(FieldRef::from(Field::new(
-                    "result",
-                    request_type.clone(),
-                    true,
-                ))));
+            let options =
+                
GetOptions::new_with_path(VariantPath::try_from("outer").unwrap().join("list"))
+                    .with_as_type(Some(FieldRef::from(Field::new(
+                        "result",
+                        request_type.clone(),
+                        true,
+                    ))));
 
             let result = variant_get(&variant_array, options).unwrap();
             assert_eq!(result.data_type(), expected.data_type());
@@ -4254,13 +4257,17 @@ mod test {
             (1, vec![None, None]),
             (2, vec![Some(3), None]),
         ] {
-            let index_options =
-                
GetOptions::new_with_path(VariantPath::from("outer").join("list").join(idx))
-                    .with_as_type(Some(FieldRef::from(Field::new(
-                        "result",
-                        DataType::Int64,
-                        true,
-                    ))));
+            let index_options = GetOptions::new_with_path(
+                VariantPath::try_from("outer")
+                    .unwrap()
+                    .join("list")
+                    .join(idx),
+            )
+            .with_as_type(Some(FieldRef::from(Field::new(
+                "result",
+                DataType::Int64,
+                true,
+            ))));
             let index_result = variant_get(&variant_array, 
index_options).unwrap();
             let index_expected: ArrayRef = 
Arc::new(Int64Array::from(expected));
             assert_eq!(&index_result, &index_expected);
diff --git a/parquet-variant/src/path.rs b/parquet-variant/src/path.rs
index 2aeb9df97d..fe10d0451d 100644
--- a/parquet-variant/src/path.rs
+++ b/parquet-variant/src/path.rs
@@ -14,9 +14,9 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-use std::{borrow::Cow, ops::Deref};
-
 use crate::utils::parse_path;
+use arrow_schema::ArrowError;
+use std::{borrow::Cow, ops::Deref};
 
 /// Represents a qualified path to a potential subfield or index of a variant
 /// value.
@@ -33,7 +33,7 @@ use crate::utils::parse_path;
 /// ```rust
 /// # use parquet_variant::{VariantPath, VariantPathElement};
 /// // access the field "foo" in a variant object value
-/// let path = VariantPath::from("foo");
+/// let path = VariantPath::try_from("foo").unwrap();
 /// // access the first element in a variant list vale
 /// let path = VariantPath::from(0);
 /// ```
@@ -43,7 +43,7 @@ use crate::utils::parse_path;
 /// # use parquet_variant::{VariantPath, VariantPathElement};
 /// /// You can also create a path by joining elements together:
 /// // access the field "foo" and then the first element in a variant list 
value
-/// let path = VariantPath::from("foo").join(0);
+/// let path = VariantPath::try_from("foo").unwrap().join(0);
 /// // this is the same as the previous one
 /// let path2 = VariantPath::from_iter(["foo".into(), 0.into()]);
 /// assert_eq!(path, path2);
@@ -59,8 +59,8 @@ use crate::utils::parse_path;
 /// ```
 /// # use parquet_variant::{VariantPath, VariantPathElement};
 /// /// You can also convert strings directly into paths using dot notation
-/// let path = VariantPath::from("foo.bar.baz");
-/// let expected = VariantPath::from("foo").join("bar").join("baz");
+/// let path = VariantPath::try_from("foo.bar.baz").unwrap();
+/// let expected = 
VariantPath::try_from("foo").unwrap().join("bar").join("baz");
 /// assert_eq!(path, expected);
 /// ```
 ///
@@ -69,11 +69,21 @@ use crate::utils::parse_path;
 /// # use parquet_variant::{VariantPath, VariantPathElement};
 /// /// You can access the paths using slices
 /// // access the field "foo" and then the first element in a variant list 
value
-/// let path = VariantPath::from("foo")
+/// let path = VariantPath::try_from("foo").unwrap()
 ///   .join("bar")
 ///   .join("baz");
 /// assert_eq!(path[1], VariantPathElement::field("bar"));
 /// ```
+///
+/// # Example: Accessing filed with bracket
+/// ```
+/// # use parquet_variant::{VariantPath, VariantPathElement};
+/// let path = VariantPath::try_from("a[b.c].d[2]").unwrap();
+/// let expected = VariantPath::from_iter([VariantPathElement::field("a"),
+///     VariantPathElement::field("b.c"),
+///     VariantPathElement::field("d"),
+///     VariantPathElement::index(2)]);
+/// assert_eq!(path, expected)
 #[derive(Debug, Clone, PartialEq, Default)]
 pub struct VariantPath<'a>(Vec<VariantPathElement<'a>>);
 
@@ -112,9 +122,11 @@ impl<'a> From<Vec<VariantPathElement<'a>>> for 
VariantPath<'a> {
 }
 
 /// Create from &str with support for dot notation
-impl<'a> From<&'a str> for VariantPath<'a> {
-    fn from(path: &'a str) -> Self {
-        VariantPath::new(path.split(".").flat_map(parse_path).collect())
+impl<'a> TryFrom<&'a str> for VariantPath<'a> {
+    type Error = ArrowError;
+
+    fn try_from(path: &'a str) -> Result<Self, Self::Error> {
+        parse_path(path).map(VariantPath::new)
     }
 }
 
@@ -211,7 +223,7 @@ mod tests {
 
     #[test]
     fn test_variant_path_empty_str() {
-        let path = VariantPath::from("");
+        let path = VariantPath::try_from("").unwrap();
         assert!(path.is_empty());
     }
 
@@ -224,9 +236,10 @@ mod tests {
 
     #[test]
     fn test_variant_path_dot_notation_with_array_index() {
-        let path = VariantPath::from("city.store.books[3].title");
+        let path = VariantPath::try_from("city.store.books[3].title").unwrap();
 
-        let expected = VariantPath::from("city")
+        let expected = VariantPath::try_from("city")
+            .unwrap()
             .join("store")
             .join("books")
             .join(3)
@@ -237,7 +250,7 @@ mod tests {
 
     #[test]
     fn test_variant_path_dot_notation_with_only_array_index() {
-        let path = VariantPath::from("[3]");
+        let path = VariantPath::try_from("[3]").unwrap();
 
         let expected = VariantPath::from(3);
 
@@ -246,10 +259,67 @@ mod tests {
 
     #[test]
     fn test_variant_path_dot_notation_with_starting_array_index() {
-        let path = VariantPath::from("[3].title");
+        let path = VariantPath::try_from("[3].title").unwrap();
 
         let expected = VariantPath::from(3).join("title");
 
         assert_eq!(path, expected);
     }
+
+    #[test]
+    fn test_variant_path_field_in_bracket() {
+        // field with index
+        let path = VariantPath::try_from("foo[0].bar").unwrap();
+        let expected = VariantPath::from_iter([
+            VariantPathElement::field("foo"),
+            VariantPathElement::index(0),
+            VariantPathElement::field("bar"),
+        ]);
+        assert_eq!(path, expected);
+
+        // index in the end
+        let path = VariantPath::try_from("foo.bar[42]").unwrap();
+        let expected = VariantPath::from_iter([
+            VariantPathElement::field("foo"),
+            VariantPathElement::field("bar"),
+            VariantPathElement::index(42),
+        ]);
+        assert_eq!(path, expected);
+
+        // invalid index will be treated as field
+        let path = VariantPath::try_from("foo.bar[abc]").unwrap();
+        let expected = VariantPath::from_iter([
+            VariantPathElement::field("foo"),
+            VariantPathElement::field("bar"),
+            VariantPathElement::field("abc"),
+        ]);
+        assert_eq!(path, expected);
+    }
+
+    #[test]
+    fn test_invalid_path_parse() {
+        // Leading dot
+        let err = VariantPath::try_from(".foo.bar").unwrap_err();
+        assert_eq!(err.to_string(), "Parser error: Unexpected leading '.'");
+
+        // Trailing dot
+        let err = VariantPath::try_from("foo.bar.").unwrap_err();
+        assert_eq!(err.to_string(), "Parser error: Unexpected trailing '.'");
+
+        // No ']' will be treated as error
+        let err = VariantPath::try_from("foo.bar[2.baz").unwrap_err();
+        assert_eq!(err.to_string(), "Parser error: Unclosed '[' at byte 7");
+
+        // No ']' because of escaped.
+        let err = VariantPath::try_from("foo.bar[2\\].fds").unwrap_err();
+        assert_eq!(err.to_string(), "Parser error: Unclosed '[' at byte 7");
+
+        // Trailing backslash in bracket
+        let err = VariantPath::try_from("foo.bar[fdafa\\").unwrap_err();
+        assert_eq!(err.to_string(), "Parser error: Unclosed '[' at byte 7");
+
+        // No '[' before ']'
+        let err = VariantPath::try_from("foo.bar]baz").unwrap_err();
+        assert_eq!(err.to_string(), "Parser error: Unexpected ']' at byte 7");
+    }
 }
diff --git a/parquet-variant/src/utils.rs b/parquet-variant/src/utils.rs
index 6accbcb366..0984a601b2 100644
--- a/parquet-variant/src/utils.rs
+++ b/parquet-variant/src/utils.rs
@@ -150,36 +150,129 @@ pub(crate) fn fits_precision<const N: u32>(n: impl 
Into<i64>) -> bool {
     n.into().unsigned_abs().leading_zeros() >= (i64::BITS - N)
 }
 
-// Helper fn to parse input segments like foo[0] or foo[0][0]
+/// Parse a path string into a vector of [`VariantPathElement`].
+///
+/// # Syntax
+/// - `.field` or `field` - access object field (do not support special char)
+/// - `[index]` - access array element by index
+/// - `[field]` - access object field (support special char with escape `\`)
+///
+/// # Escape Rules
+/// Inside brackets `[...]`:
+/// - `\\` -> literal `\`
+/// - `\]` -> literal `]`
+/// - Any other `\x` -> literal `x`
+///
+/// Outside brackets, no escaping is supported.
+///
+/// # Examples
+/// - `""` -> empty path
+/// - `"foo"` -> single field `foo`
+/// - `"foo.bar"` -> nested fields `foo`, `bar`
+/// - `"[1]"` -> array index 1
+/// - `"foo[1].bar"` -> field `foo`, index 1, field `bar`
+/// - `"[a.b]"` -> field `a.b` (dot is literal inside bracket)
+/// - `"[a\\]b]"` -> field `a]b` (escaped `]`
+/// - etc.
+///
+/// # Errors
+/// - Leading `.` (e.g., `".foo"`)
+/// - Trailing `.` (e.g., `"foo."`)
+/// - Unclosed '[' (e.g., `"foo[1"`)
+/// - Unexpected ']' (e.g., `"foo]"`)
+/// - Trailing '`' inside bracket (treated as unclosed bracket)
 #[inline]
-pub(crate) fn parse_path<'a>(segment: &'a str) -> Vec<VariantPathElement<'a>> {
-    if segment.is_empty() {
-        return Vec::new();
+pub(crate) fn parse_path(s: &str) -> Result<Vec<VariantPathElement<'_>>, 
ArrowError> {
+    let scan_field = |start: usize| {
+        s[start..]
+            .find(['.', '[', ']'])
+            .map_or_else(|| s.len(), |p| start + p)
+    };
+
+    let bytes = s.as_bytes();
+    if let Some(b'.') = bytes.first() {
+        return Err(ArrowError::ParseError("Unexpected leading '.'".into()));
     }
 
-    let mut path_elements = Vec::new();
-    let mut base = segment;
+    let mut elements = Vec::new();
+    let mut i = 0;
 
-    while let Some(stripped) = base.strip_suffix(']') {
-        let Some(open_pos) = stripped.rfind('[') else {
-            return vec![VariantPathElement::field(segment)];
+    while i < bytes.len() {
+        let (elem, end) = match bytes[i] {
+            b'.' => {
+                i += 1; // skip the dot; a field must follow
+                let end = scan_field(i);
+                if end == i {
+                    return Err(ArrowError::ParseError(match bytes.get(i) {
+                        None => "Unexpected trailing '.'".into(),
+                        Some(&c) => format!("Unexpected '{}' at byte {i}", c 
as char),
+                    }));
+                }
+                (VariantPathElement::field(&s[i..end]), end)
+            }
+            b'[' => {
+                let (element, end) = parse_in_bracket(s, i)?;
+                (element, end)
+            }
+            b']' => {
+                return Err(ArrowError::ParseError(format!(
+                    "Unexpected ']' at byte {i}"
+                )));
+            }
+            _ => {
+                let end = scan_field(i);
+                (VariantPathElement::field(&s[i..end]), end)
+            }
         };
+        elements.push(elem);
+        i = end;
+    }
 
-        let index_str = &stripped[open_pos + 1..];
-        let Ok(index) = index_str.parse::<usize>() else {
-            return vec![VariantPathElement::field(segment)];
-        };
+    Ok(elements)
+}
 
-        path_elements.push(VariantPathElement::index(index));
-        base = &stripped[..open_pos];
-    }
+/// Parse `[digits | field]` starting at `i` (which points to `[`).
+/// Returns (VariantPathElement, position after `]`).
+fn parse_in_bracket(s: &str, i: usize) -> Result<(VariantPathElement<'_>, 
usize), ArrowError> {
+    let start = i + 1; // skip '['
 
-    if !base.is_empty() {
-        path_elements.push(VariantPathElement::field(base));
+    let mut unescaped = String::new();
+    let mut chars = s[start..].char_indices().peekable();
+    let mut end = None;
+
+    while let Some((offset, c)) = chars.next() {
+        match c {
+            // Escape: take next char literally
+            '\\' => {
+                if let Some((_, next)) = chars.next() {
+                    unescaped.push(next);
+                }
+                // Trailing backslash will be handled as 'unclosed [' below
+            }
+            ']' => {
+                // Unescaped ']' ends the bracket
+                end = Some(start + offset);
+                break;
+            }
+            _ => {
+                unescaped.push(c);
+            }
+        }
     }
 
-    path_elements.reverse();
-    path_elements
+    let end = match end {
+        Some(e) => e,
+        None => {
+            return Err(ArrowError::ParseError(format!("Unclosed '[' at byte 
{i}")));
+        }
+    };
+
+    let element = match unescaped.parse() {
+        Ok(idx) => VariantPathElement::index(idx),
+        Err(_) => VariantPathElement::field(unescaped),
+    };
+
+    Ok((element, end + 1))
 }
 
 #[cfg(test)]
diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs
index 819c20d554..53fb3c4b1e 100644
--- a/parquet-variant/src/variant.rs
+++ b/parquet-variant/src/variant.rs
@@ -1459,9 +1459,9 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// // given a variant like `{"foo": ["bar", "baz"]}`
     /// let variant = Variant::new(&metadata, &value);
     /// // Accessing a non existent path returns None
-    /// assert_eq!(variant.get_path(&VariantPath::from("non_existent")), None);
+    /// 
assert_eq!(variant.get_path(&VariantPath::try_from("non_existent").unwrap()), 
None);
     /// // Access obj["foo"]
-    /// let path = VariantPath::from("foo");
+    /// let path = VariantPath::try_from("foo").unwrap();
     /// let foo = variant.get_path(&path).expect("field `foo` should exist");
     /// assert!(foo.as_list().is_some(), "field `foo` should be a list");
     /// // Access foo[0]
@@ -1470,7 +1470,7 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// // bar is a string
     /// assert_eq!(bar.as_string(), Some("bar"));
     /// // You can also access nested paths
-    /// let path = VariantPath::from("foo").join(0);
+    /// let path = VariantPath::try_from("foo").unwrap().join(0);
     /// assert_eq!(variant.get_path(&path).unwrap(), bar);
     /// ```
     pub fn get_path(&self, path: &VariantPath) -> Option<Variant<'_, '_>> {


Reply via email to