This is an automated email from the ASF dual-hosted git repository.

andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git

commit 8fa235f4c709c11277a47746306709afb7dbe9a4
Author: Andy Seaborne <[email protected]>
AuthorDate: Thu Aug 7 12:25:36 2025 +0100

    GH-3374: Emit warning for improper dot segments
---
 .../main/java/org/apache/jena/rfc3986/IRI3986.java | 15 ++++
 .../main/java/org/apache/jena/rfc3986/Issue.java   |  1 +
 .../java/org/apache/jena/rfc3986/LibParseIRI.java  | 81 ++++++++++++++++++++--
 .../java/org/apache/jena/rfc3986/Violations.java   |  1 +
 .../java/org/apache/jena/rfc3986/TestBuild.java    |  1 +
 .../java/org/apache/jena/rfc3986/TestParseDNS.java |  1 -
 .../java/org/apache/jena/rfc3986/TestParseOID.java |  2 +-
 .../org/apache/jena/rfc3986/TestURISchemes.java    | 17 +++++
 8 files changed, 111 insertions(+), 8 deletions(-)

diff --git a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java 
b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java
index dd810a80cc..04a312cff7 100644
--- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java
+++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java
@@ -1518,6 +1518,21 @@ public class IRI3986 implements IRI {
          * percent- encodings.
          */
         checkPercent();
+
+        /*
+         * The path segments "." and "..", also known as dot-segments, are
+         * defined for relative reference within the path name hierarchy.  They
+         * are intended for use at the beginning of a relative-path reference
+         * (Section 4.2) to indicate relative position within the hierarchical
+         * tree of names.
+         * https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
+         */
+        if ( hasPath() ) {
+            boolean good = LibParseIRI.checkDotSegments(iriStr, path0,  path1);
+            if ( ! good ) {
+                schemeReport(this, Issue.iri_bad_dot_segments, 
URIScheme.GENERAL, "Dot segments should only appear at the start of a relative 
IRI");
+            }
+        }
     }
 
     private void checkPercent() {
diff --git a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Issue.java 
b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Issue.java
index cb670beb26..f13b132310 100644
--- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Issue.java
+++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Issue.java
@@ -28,6 +28,7 @@ public enum Issue {
     iri_password,
     iri_bad_ipv4_address,
     iri_bad_ipv6_address,
+    iri_bad_dot_segments,   // Not at the start of a relative IRI.
 
     // Scheme
     iri_scheme_name_is_not_lowercase,
diff --git 
a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/LibParseIRI.java 
b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/LibParseIRI.java
index 42bf15b0f7..9cb8dfd03e 100644
--- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/LibParseIRI.java
+++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/LibParseIRI.java
@@ -28,7 +28,7 @@ package org.apache.jena.rfc3986;
      * "chars" array  should be lower case.
      *     scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
      */
-    /*package*/ static boolean containsAtIgnoreCase(CharSequence string, int 
x, char[] chars) {
+    static boolean containsAtIgnoreCase(CharSequence string, int x, char[] 
chars) {
         // Avoid creating any objects.
         int n = string.length();
         if ( x+chars.length-1 >= n )
@@ -47,7 +47,7 @@ package org.apache.jena.rfc3986;
     }
 
     /** Check whether the character and the next character match the expected 
characters. */
-    public static boolean peekFor(CharSequence string, int x, char x1, char 
x2) {
+    static boolean peekFor(CharSequence string, int x, char x1, char x2) {
         int n = string.length();
         if ( x+1 >= n )
             return false;
@@ -56,7 +56,7 @@ package org.apache.jena.rfc3986;
         return ch1 == x1 && ch2 == x2;
     }
 
-    public static char charAt(CharSequence string, int x) {
+    static char charAt(CharSequence string, int x) {
         if ( x >= string.length() )
             return Chars3986.EOF;
         return string.charAt(x);
@@ -77,6 +77,75 @@ package org.apache.jena.rfc3986;
         return string.regionMatches(idx, substr, 0, substr.length());
     }
 
+    // Check dot segments
+
+    static boolean checkDotSegments(String string, int start, int finish) {
+        DotSegments segs = checkDotSegments$(string, start, finish);
+        switch(segs) {
+            case BAD -> {return false;}
+            case GOOD ->{return true;}
+            default-> {return false;}
+        }
+    }
+
+    // For clarity ...
+    enum DotSegments { GOOD, BAD }
+
+    /**
+     *  Return true if acceptable - no dot-segments except for initial 
dot-segments.
+     *  This code does not create any objects.
+     */
+    private static DotSegments checkDotSegments$(String string, int start, int 
finish) {
+        if ( start >= finish )
+            return DotSegments.GOOD;
+        int startIdx = start;
+        boolean dotSegmentAllowed = true;
+        int segCount = 0;
+
+        // Is the start "/"?
+        if ( string.charAt(start) == '/' ) {
+            // Rooted path.
+            startIdx++;
+            segCount++;
+            dotSegmentAllowed = false;
+        }
+        int segStart = startIdx;
+        for ( int i = startIdx ; i < finish ; i++ ) {
+            char ch = string.charAt(i);
+            if ( ch == '/' ) {
+                segCount++;
+                int segFinish = i;
+                boolean isDotSegment = isDotSegment(string, segStart, 
segFinish);
+                if ( isDotSegment && ! dotSegmentAllowed )
+                    // Early return.
+                    return DotSegments.BAD;
+                if ( ! isDotSegment )
+                    dotSegmentAllowed = false;
+                segStart = i+1;
+            }
+        }
+        // Check final segment [segStart, finish) if we switch to "no dot 
segments" and it is not empty.
+        if (! dotSegmentAllowed && segStart < finish ) {
+            int segFinish = finish;
+            boolean isDotSegment = isDotSegment(string, segStart, segFinish);
+            if ( isDotSegment )
+                return DotSegments.BAD;;
+        }
+        return DotSegments.GOOD;
+    }
+
+    // Test a segment for being "." or ".."
+    private static boolean isDotSegment(String string, int begin, int end) {
+        // Print a segment, no "/"
+        //System.out.println(string.subSequence(segStart, segFinish));
+        int len = end-begin;
+        switch(len) {
+            case 1 -> { return ( string.charAt(begin) == '.' ); }
+            case 2 -> { return ( string.charAt(begin) == '.' && 
string.charAt(begin+1) == '.' ); }
+            default -> { return false; }
+        }
+    }
+
     // >> Copied from jena-iri for comparison.
     static String jenaIRIremoveDotSegments(String path) {
         // 5.2.4 step 1.
@@ -107,13 +176,13 @@ package org.apache.jena.rfc3986;
             // 5.2.4 2 C.
             if (in.startsWith("/../")) {
                 inputBufferStart += 3;
-                removeLastSeqment(output);
+                removeLastSegment(output);
                 continue;
             }
             if (in.equals("/..")) {
                 in = "/"; // don't continue, process below.
                 inputBufferStart += 3; // force end of loop
-                removeLastSeqment(output);
+                removeLastSegment(output);
             }
             // 5.2.4 2 D.
             if (in.equals(".")) {
@@ -135,7 +204,7 @@ package org.apache.jena.rfc3986;
         return output.toString();
     }
 
-    private static void removeLastSeqment(StringBuilder output) {
+    private static void removeLastSegment(StringBuilder output) {
         int ix = output.length();
         while (ix > 0) {
             ix--;
diff --git a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Violations.java 
b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Violations.java
index 91327a3239..e4979c080b 100644
--- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Violations.java
+++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Violations.java
@@ -66,6 +66,7 @@ public class Violations {
         SeverityMap.setSeverity(severityMap, Issue.iri_password,               
       Severity.ERROR);
         SeverityMap.setSeverity(severityMap, Issue.iri_bad_ipv4_address,       
       Severity.WARNING);
         SeverityMap.setSeverity(severityMap, Issue.iri_bad_ipv6_address,       
       Severity.WARNING);
+        SeverityMap.setSeverity(severityMap, Issue.iri_bad_dot_segments,       
       Severity.WARNING);
 
         // Scheme
         SeverityMap.setSeverity(severityMap, 
Issue.iri_scheme_name_is_not_lowercase,  Severity.WARNING);
diff --git a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestBuild.java 
b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestBuild.java
index 843fa41da4..5e86251b95 100644
--- a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestBuild.java
+++ b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestBuild.java
@@ -20,6 +20,7 @@ package org.apache.jena.rfc3986;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
+
 import org.junit.jupiter.api.Test;
 
 /** Building IRIs from components. */
diff --git 
a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseDNS.java 
b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseDNS.java
index 8297cab4c3..1de2deeec9 100644
--- a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseDNS.java
+++ b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseDNS.java
@@ -22,7 +22,6 @@ import static org.junit.jupiter.api.Assertions.assertThrows;
 
 import org.junit.jupiter.api.Test;
 
-
 /** Test the class ParseDNS */
 public class TestParseDNS {
 
diff --git 
a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseOID.java 
b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseOID.java
index d1eea0b299..6bc8a413cd 100644
--- a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseOID.java
+++ b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseOID.java
@@ -18,8 +18,8 @@
 
 package org.apache.jena.rfc3986;
 
-import static org.apache.jena.rfc3986.LibTestURI.test3986;
 import static org.junit.jupiter.api.Assertions.assertThrowsExactly;
+import static org.apache.jena.rfc3986.LibTestURI.test3986;
 
 import org.junit.jupiter.api.Test;
 
diff --git 
a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestURISchemes.java 
b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestURISchemes.java
index 908badf872..e7b99a8dc9 100644
--- a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestURISchemes.java
+++ b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestURISchemes.java
@@ -41,6 +41,23 @@ public class TestURISchemes {
     @Test public void general_percent_05() { 
schemeViolation("http://host%AA/ab%FFdef";, null, Issue.iri_host_not_lowercase); 
}
     @Test public void general_percent_06() { 
schemeViolation("http://host%aa/ab%aa123";, null, 
Issue.iri_percent_not_uppercase); }
 
+    @Test public void general_dot_segments_01() { 
schemeViolation("http://example/abc/../def/";, null, 
Issue.iri_bad_dot_segments); }
+    @Test public void general_dot_segments_02() { 
schemeViolation("jena://example/abc/../def/", null, 
Issue.iri_bad_dot_segments); }
+    @Test public void general_dot_segments_03() { 
schemeViolation("http://example/.";, null, Issue.iri_bad_dot_segments); }
+    @Test public void general_dot_segments_04() { 
schemeViolation("http://example/./";, null, Issue.iri_bad_dot_segments); }
+
+    @Test public void general_dot_segments_05() { schemeViolation("http:/..", 
null, Issue.iri_bad_dot_segments, Issue.http_no_host); }
+    @Test public void general_dot_segments_06() { schemeViolation("http:/.", 
null, Issue.iri_bad_dot_segments, Issue.http_no_host); }
+    @Test public void general_dot_segments_07() { good("./abcd"); }
+    @Test public void general_dot_segments_08() { good("../abcd"); }
+    @Test public void general_dot_segments_09() { good("../../abcd"); }
+    @Test public void general_dot_segments_10() { good("./../abcd"); }
+    @Test public void general_dot_segments_11() { 
schemeViolation("../../abcd/..", null, Issue.iri_bad_dot_segments); }
+    @Test public void general_dot_segments_12() { 
schemeViolation("../../abcd/.", null, Issue.iri_bad_dot_segments); }
+    @Test public void general_dot_segments_13() { good("http://host/pa.th/";); }
+    @Test public void general_dot_segments_14() { good("http://host/.path/";); }
+    @Test public void general_dot_segments_15() { good("http://host/path./";); }
+
     // == http:, https:
     @Test public void scheme_http_empty_host_1() { 
schemeViolation("http:///abc";,  URIScheme.HTTP, Issue.http_empty_host); }
     @Test public void scheme_http_empty_host_2() { 
schemeViolation("https:///abc";, URIScheme.HTTPS, Issue.http_empty_host); }

Reply via email to