qlong commented on code in PR #15384:
URL: https://github.com/apache/iceberg/pull/15384#discussion_r3132694704


##########
api/src/test/java/org/apache/iceberg/expressions/TestPathUtil.java:
##########
@@ -123,4 +142,157 @@ public void testNormalizedFieldLists(List<String> fields, 
String normalizedPath)
   public void testPathEscaping(String name, String escaped) {
     assertThat(PathUtil.rfc9535escape(name)).isEqualTo(escaped);
   }
+
+  @ParameterizedTest
+  @FieldSource("ESCAPE_CASES")
+  public void testPathUnescapeRoundTrip(String name, String escaped) {
+    assertThat(PathUtil.rfc9535unescape(escaped)).isEqualTo(name);
+  }
+
+  @ParameterizedTest
+  @FieldSource("NORMALIZED_PATHS")
+  public void testParseDotAndBracketAgree(String dotPath, String 
normalizedPath) {
+    
assertThat(PathUtil.parse(dotPath)).isEqualTo(PathUtil.parse(normalizedPath));
+  }
+
+  @Test
+  public void testParseSingleSegmentWithDot() {
+    assertThat(PathUtil.parse("$['a.b']")).isEqualTo(names("a.b"));
+    assertThat(PathUtil.parse("$['user.name']")).isEqualTo(names("user.name"));
+  }
+
+  @Test
+  public void testParseDistinctFromDotSplit() {
+    assertThat(PathUtil.parse("$.a.b")).isEqualTo(names("a", "b"));
+    assertThat(PathUtil.parse("$['a.b']")).isEqualTo(names("a.b"));
+  }
+
+  @Test
+  public void testParseNormalizedRoundTrip() {
+    for (Object[] row : NORMALIZED_FIELD_LISTS) {
+      @SuppressWarnings("unchecked")
+      List<String> fields = (List<String>) row[0];
+      String normalized = (String) row[1];
+      
assertThat(PathUtil.toNormalizedPath(PathUtil.parse(normalized))).isEqualTo(normalized);
+      assertThat(PathUtil.parse(normalized))
+          
.isEqualTo(fields.stream().map(Name::new).collect(Collectors.toList()));
+    }
+  }
+
+  @Test
+  public void testParseRoot() {
+    assertThat(PathUtil.parse("$")).isEqualTo(List.of());
+    assertThat(PathUtil.toNormalizedPath(PathUtil.parse("$"))).isEqualTo("$");
+  }
+
+  /**
+   * Bracket paths can represent keys that dot notation cannot (empty name, 
{@code []}, {@code ..}).
+   */
+  @Test
+  public void testParseSpecialFieldNames() {
+    assertThat(PathUtil.parse("$['']")).isEqualTo(names(""));
+    assertThat(PathUtil.parse("$['[]']")).isEqualTo(names("[]"));
+    assertThat(PathUtil.parse("$['..']")).isEqualTo(names(".."));
+    assertThat(PathUtil.parse("$['*']")).isEqualTo(names("*"));
+    assertThat(PathUtil.parse("$['[']")).isEqualTo(names("["));
+    assertThat(PathUtil.parse("$[']']")).isEqualTo(names("]"));
+
+    
assertThat(PathUtil.toNormalizedPath(PathUtil.parse("$['']"))).isEqualTo("$['']");
+    
assertThat(PathUtil.toNormalizedPath(PathUtil.parse("$['[]']"))).isEqualTo("$['[]']");
+    
assertThat(PathUtil.toNormalizedPath(PathUtil.parse("$['..']"))).isEqualTo("$['..']");
+  }
+
+  @Test
+  public void testParseNestedWithSpecialMiddleSegment() {
+    assertThat(PathUtil.parse("$['a']['..']['b']")).isEqualTo(names("a", "..", 
"b"));
+    assertThat(PathUtil.toNormalizedPath(List.of("a", "..", 
"b"))).isEqualTo("$['a']['..']['b']");
+  }
+
+  @Test
+  public void testParseMixedDotAndBracket() {
+    assertThat(PathUtil.parse("$.a['b.c']")).isEqualTo(names("a", "b.c"));
+    assertThat(PathUtil.parse("$.events['user.name'].id"))
+        .isEqualTo(names("events", "user.name", "id"));
+    
assertThat(PathUtil.toNormalizedPath(PathUtil.parse("$.a['b.c']"))).isEqualTo("$['a']['b.c']");
+  }
+
+  @Test
+  public void testParseMixedBracketThenDot() {
+    assertThat(PathUtil.parse("$['x.y'].z")).isEqualTo(names("x.y", "z"));
+    
assertThat(PathUtil.toNormalizedPath(PathUtil.parse("$['x.y'].z"))).isEqualTo("$['x.y']['z']");
+  }
+
+  @Test
+  public void testParseArrayIndexSegments() {
+    assertThat(PathUtil.parse("$.matrix[0][1]"))
+        .isEqualTo(List.of(new Name("matrix"), new Index(0), new Index(1)));
+    assertThat(PathUtil.toNormalizedPath(PathUtil.parse("$.matrix[0][1]")))
+        .isEqualTo("$['matrix'][0][1]");
+
+    assertThat(PathUtil.parse("$.basket[0][2].a"))
+        .isEqualTo(List.of(new Name("basket"), new Index(0), new Index(2), new 
Name("a")));
+    assertThat(PathUtil.toNormalizedPath(PathUtil.parse("$.basket[0][2].a")))
+        .isEqualTo("$['basket'][0][2]['a']");
+
+    assertThat(PathUtil.parse("$.items[0].tags[1]"))
+        .isEqualTo(List.of(new Name("items"), new Index(0), new Name("tags"), 
new Index(1)));
+    assertThat(PathUtil.toNormalizedPath(PathUtil.parse("$.items[0].tags[1]")))
+        .isEqualTo("$['items'][0]['tags'][1]");
+
+    assertThat(PathUtil.parse("$.events[0].event_id"))
+        .isEqualTo(List.of(new Name("events"), new Index(0), new 
Name("event_id")));
+    
assertThat(PathUtil.toNormalizedPath(PathUtil.parse("$.events[0].event_id")))
+        .isEqualTo("$['events'][0]['event_id']");
+
+    assertThat(PathUtil.parse("$['items'][0]['tags'][1]"))
+        .isEqualTo(PathUtil.parse("$.items[0].tags[1]"));
+    
assertThat(PathUtil.parse("$['matrix'][0][1]")).isEqualTo(PathUtil.parse("$.matrix[0][1]"));
+  }
+
+  @Test
+  public void testParseArrayIndexRoundTrip() {
+    // parse(toNormalizedPath(parse(x))) == parse(x) for paths with [n] 
segments
+    for (String[] pair : NORMALIZED_PATHS) {
+      String normalized = pair[1];
+      if (normalized.contains("[") && !normalized.contains("['")) {
+        // normalized path has array indices; round-trip must be stable
+        
assertThat(PathUtil.parse(PathUtil.toNormalizedPath(PathUtil.parse(normalized))))
+            .isEqualTo(PathUtil.parse(normalized));
+      }
+    }
+    // explicit cases to be clear about what is covered
+    for (String path :
+        new String[] {
+          "$['matrix'][0][1]",
+          "$['items'][0]['tags'][1]",
+          "$['basket'][0][2]['a']",
+          "$['events'][0]['event_id']",
+        }) {
+      
assertThat(PathUtil.parse(PathUtil.toNormalizedPath(PathUtil.parse(path))))
+          .isEqualTo(PathUtil.parse(path));
+    }
+  }
+
+  private static final String[] INVALID_PARSE_PATHS =
+      new String[] {
+        null,
+        "",
+        "event_id",
+        "$.a..b",
+        "$.events.*",
+        "$['a'", // unclosed
+        "$['a']x", // trailing junk
+        "$.['a']", // empty dot segment before bracket
+        "$a.b", // missing separator after $
+        "$.a[]", // empty array index
+        "$.a[-1]", // negative index (not valid JSONPath index syntax here)
+      };
+
+  @ParameterizedTest
+  @FieldSource("INVALID_PARSE_PATHS")
+  public void testParseInvalid(String path) {
+    assertThatThrownBy(() -> PathUtil.parse(path))
+        .isInstanceOf(IllegalArgumentException.class)

Review Comment:
   fixed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to