This is an automated email from the ASF dual-hosted git repository. andy pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/jena.git
commit 8fa235f4c709c11277a47746306709afb7dbe9a4 Author: Andy Seaborne <[email protected]> AuthorDate: Thu Aug 7 12:25:36 2025 +0100 GH-3374: Emit warning for improper dot segments --- .../main/java/org/apache/jena/rfc3986/IRI3986.java | 15 ++++ .../main/java/org/apache/jena/rfc3986/Issue.java | 1 + .../java/org/apache/jena/rfc3986/LibParseIRI.java | 81 ++++++++++++++++++++-- .../java/org/apache/jena/rfc3986/Violations.java | 1 + .../java/org/apache/jena/rfc3986/TestBuild.java | 1 + .../java/org/apache/jena/rfc3986/TestParseDNS.java | 1 - .../java/org/apache/jena/rfc3986/TestParseOID.java | 2 +- .../org/apache/jena/rfc3986/TestURISchemes.java | 17 +++++ 8 files changed, 111 insertions(+), 8 deletions(-) diff --git a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java index dd810a80cc..04a312cff7 100644 --- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java +++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java @@ -1518,6 +1518,21 @@ public class IRI3986 implements IRI { * percent- encodings. */ checkPercent(); + + /* + * The path segments "." and "..", also known as dot-segments, are + * defined for relative reference within the path name hierarchy. They + * are intended for use at the beginning of a relative-path reference + * (Section 4.2) to indicate relative position within the hierarchical + * tree of names. + * https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 + */ + if ( hasPath() ) { + boolean good = LibParseIRI.checkDotSegments(iriStr, path0, path1); + if ( ! good ) { + schemeReport(this, Issue.iri_bad_dot_segments, URIScheme.GENERAL, "Dot segments should only appear at the start of a relative IRI"); + } + } } private void checkPercent() { diff --git a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Issue.java b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Issue.java index cb670beb26..f13b132310 100644 --- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Issue.java +++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Issue.java @@ -28,6 +28,7 @@ public enum Issue { iri_password, iri_bad_ipv4_address, iri_bad_ipv6_address, + iri_bad_dot_segments, // Not at the start of a relative IRI. // Scheme iri_scheme_name_is_not_lowercase, diff --git a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/LibParseIRI.java b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/LibParseIRI.java index 42bf15b0f7..9cb8dfd03e 100644 --- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/LibParseIRI.java +++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/LibParseIRI.java @@ -28,7 +28,7 @@ package org.apache.jena.rfc3986; * "chars" array should be lower case. * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */ - /*package*/ static boolean containsAtIgnoreCase(CharSequence string, int x, char[] chars) { + static boolean containsAtIgnoreCase(CharSequence string, int x, char[] chars) { // Avoid creating any objects. int n = string.length(); if ( x+chars.length-1 >= n ) @@ -47,7 +47,7 @@ package org.apache.jena.rfc3986; } /** Check whether the character and the next character match the expected characters. */ - public static boolean peekFor(CharSequence string, int x, char x1, char x2) { + static boolean peekFor(CharSequence string, int x, char x1, char x2) { int n = string.length(); if ( x+1 >= n ) return false; @@ -56,7 +56,7 @@ package org.apache.jena.rfc3986; return ch1 == x1 && ch2 == x2; } - public static char charAt(CharSequence string, int x) { + static char charAt(CharSequence string, int x) { if ( x >= string.length() ) return Chars3986.EOF; return string.charAt(x); @@ -77,6 +77,75 @@ package org.apache.jena.rfc3986; return string.regionMatches(idx, substr, 0, substr.length()); } + // Check dot segments + + static boolean checkDotSegments(String string, int start, int finish) { + DotSegments segs = checkDotSegments$(string, start, finish); + switch(segs) { + case BAD -> {return false;} + case GOOD ->{return true;} + default-> {return false;} + } + } + + // For clarity ... + enum DotSegments { GOOD, BAD } + + /** + * Return true if acceptable - no dot-segments except for initial dot-segments. + * This code does not create any objects. + */ + private static DotSegments checkDotSegments$(String string, int start, int finish) { + if ( start >= finish ) + return DotSegments.GOOD; + int startIdx = start; + boolean dotSegmentAllowed = true; + int segCount = 0; + + // Is the start "/"? + if ( string.charAt(start) == '/' ) { + // Rooted path. + startIdx++; + segCount++; + dotSegmentAllowed = false; + } + int segStart = startIdx; + for ( int i = startIdx ; i < finish ; i++ ) { + char ch = string.charAt(i); + if ( ch == '/' ) { + segCount++; + int segFinish = i; + boolean isDotSegment = isDotSegment(string, segStart, segFinish); + if ( isDotSegment && ! dotSegmentAllowed ) + // Early return. + return DotSegments.BAD; + if ( ! isDotSegment ) + dotSegmentAllowed = false; + segStart = i+1; + } + } + // Check final segment [segStart, finish) if we switch to "no dot segments" and it is not empty. + if (! dotSegmentAllowed && segStart < finish ) { + int segFinish = finish; + boolean isDotSegment = isDotSegment(string, segStart, segFinish); + if ( isDotSegment ) + return DotSegments.BAD;; + } + return DotSegments.GOOD; + } + + // Test a segment for being "." or ".." + private static boolean isDotSegment(String string, int begin, int end) { + // Print a segment, no "/" + //System.out.println(string.subSequence(segStart, segFinish)); + int len = end-begin; + switch(len) { + case 1 -> { return ( string.charAt(begin) == '.' ); } + case 2 -> { return ( string.charAt(begin) == '.' && string.charAt(begin+1) == '.' ); } + default -> { return false; } + } + } + // >> Copied from jena-iri for comparison. static String jenaIRIremoveDotSegments(String path) { // 5.2.4 step 1. @@ -107,13 +176,13 @@ package org.apache.jena.rfc3986; // 5.2.4 2 C. if (in.startsWith("/../")) { inputBufferStart += 3; - removeLastSeqment(output); + removeLastSegment(output); continue; } if (in.equals("/..")) { in = "/"; // don't continue, process below. inputBufferStart += 3; // force end of loop - removeLastSeqment(output); + removeLastSegment(output); } // 5.2.4 2 D. if (in.equals(".")) { @@ -135,7 +204,7 @@ package org.apache.jena.rfc3986; return output.toString(); } - private static void removeLastSeqment(StringBuilder output) { + private static void removeLastSegment(StringBuilder output) { int ix = output.length(); while (ix > 0) { ix--; diff --git a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Violations.java b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Violations.java index 91327a3239..e4979c080b 100644 --- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Violations.java +++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Violations.java @@ -66,6 +66,7 @@ public class Violations { SeverityMap.setSeverity(severityMap, Issue.iri_password, Severity.ERROR); SeverityMap.setSeverity(severityMap, Issue.iri_bad_ipv4_address, Severity.WARNING); SeverityMap.setSeverity(severityMap, Issue.iri_bad_ipv6_address, Severity.WARNING); + SeverityMap.setSeverity(severityMap, Issue.iri_bad_dot_segments, Severity.WARNING); // Scheme SeverityMap.setSeverity(severityMap, Issue.iri_scheme_name_is_not_lowercase, Severity.WARNING); diff --git a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestBuild.java b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestBuild.java index 843fa41da4..5e86251b95 100644 --- a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestBuild.java +++ b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestBuild.java @@ -20,6 +20,7 @@ package org.apache.jena.rfc3986; import static org.junit.jupiter.api.Assertions.assertEquals; + import org.junit.jupiter.api.Test; /** Building IRIs from components. */ diff --git a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseDNS.java b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseDNS.java index 8297cab4c3..1de2deeec9 100644 --- a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseDNS.java +++ b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseDNS.java @@ -22,7 +22,6 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import org.junit.jupiter.api.Test; - /** Test the class ParseDNS */ public class TestParseDNS { diff --git a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseOID.java b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseOID.java index d1eea0b299..6bc8a413cd 100644 --- a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseOID.java +++ b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseOID.java @@ -18,8 +18,8 @@ package org.apache.jena.rfc3986; -import static org.apache.jena.rfc3986.LibTestURI.test3986; import static org.junit.jupiter.api.Assertions.assertThrowsExactly; +import static org.apache.jena.rfc3986.LibTestURI.test3986; import org.junit.jupiter.api.Test; diff --git a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestURISchemes.java b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestURISchemes.java index 908badf872..e7b99a8dc9 100644 --- a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestURISchemes.java +++ b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestURISchemes.java @@ -41,6 +41,23 @@ public class TestURISchemes { @Test public void general_percent_05() { schemeViolation("http://host%AA/ab%FFdef", null, Issue.iri_host_not_lowercase); } @Test public void general_percent_06() { schemeViolation("http://host%aa/ab%aa123", null, Issue.iri_percent_not_uppercase); } + @Test public void general_dot_segments_01() { schemeViolation("http://example/abc/../def/", null, Issue.iri_bad_dot_segments); } + @Test public void general_dot_segments_02() { schemeViolation("jena://example/abc/../def/", null, Issue.iri_bad_dot_segments); } + @Test public void general_dot_segments_03() { schemeViolation("http://example/.", null, Issue.iri_bad_dot_segments); } + @Test public void general_dot_segments_04() { schemeViolation("http://example/./", null, Issue.iri_bad_dot_segments); } + + @Test public void general_dot_segments_05() { schemeViolation("http:/..", null, Issue.iri_bad_dot_segments, Issue.http_no_host); } + @Test public void general_dot_segments_06() { schemeViolation("http:/.", null, Issue.iri_bad_dot_segments, Issue.http_no_host); } + @Test public void general_dot_segments_07() { good("./abcd"); } + @Test public void general_dot_segments_08() { good("../abcd"); } + @Test public void general_dot_segments_09() { good("../../abcd"); } + @Test public void general_dot_segments_10() { good("./../abcd"); } + @Test public void general_dot_segments_11() { schemeViolation("../../abcd/..", null, Issue.iri_bad_dot_segments); } + @Test public void general_dot_segments_12() { schemeViolation("../../abcd/.", null, Issue.iri_bad_dot_segments); } + @Test public void general_dot_segments_13() { good("http://host/pa.th/"); } + @Test public void general_dot_segments_14() { good("http://host/.path/"); } + @Test public void general_dot_segments_15() { good("http://host/path./"); } + // == http:, https: @Test public void scheme_http_empty_host_1() { schemeViolation("http:///abc", URIScheme.HTTP, Issue.http_empty_host); } @Test public void scheme_http_empty_host_2() { schemeViolation("https:///abc", URIScheme.HTTPS, Issue.http_empty_host); }
