This is an automated email from the ASF dual-hosted git repository.

andy pushed a commit to branch jena5
in repository https://gitbox.apache.org/repos/asf/jena.git

commit aebedf537cb4d7735a55a1bfe68d418674697393
Author: Andy Seaborne <a...@apache.org>
AuthorDate: Sat Oct 28 12:45:35 2023 +0100

    GH-2062: Support URN components inc fragments for UUIDs
---
 .../org/apache/jena/riot/system/TestIRIxRIOT.java  |  42 +--
 .../org/apache/jena/irix/IRIProviderJenaIRI.java   | 112 +++++++-
 .../src/main/java/org/apache/jena/irix/IRIx.java   |   2 +-
 .../test/java/org/apache/jena/irix/TS_IRIx.java    |   3 +-
 .../irix/{TestParseIRIx.java => TestIRIxOps.java}  | 103 +------
 .../java/org/apache/jena/irix/TestIRIxSyntax.java  | 130 +++++++++
 .../java/org/apache/jena/irix/TestRFC3986.java     | 314 ++++++++++++---------
 7 files changed, 424 insertions(+), 282 deletions(-)

diff --git 
a/jena-arq/src/test/java/org/apache/jena/riot/system/TestIRIxRIOT.java 
b/jena-arq/src/test/java/org/apache/jena/riot/system/TestIRIxRIOT.java
index ab3d686e98..5dad5324a9 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/system/TestIRIxRIOT.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/system/TestIRIxRIOT.java
@@ -84,39 +84,7 @@ public class TestIRIxRIOT {
     @Test public void irix_uuid_1_nt_check()    { testLang(urnuuid01, Lang.NT, 
UNSET, TRUE, 0, 0); }
     @Test public void irix_uuid_1_ttl()         { testDft (urnuuid01, 
Lang.TTL, 0, 0); }
 
-    // urn:uuid -- IRI3986 answers
-    //
-//    private static String urnuuid02 = "<urn:uuid:bad>";
-//    @Test public void irix_uuid_2_nt()          { testDft (urnuuid02, 
Lang.NT, 0, 0); }
-//    @Test public void irix_uuid_2_nt_check()    { testLang(urnuuid02, 
Lang.NT, UNSET, TRUE, 0, 1); }
-//    @Test public void irix_uuid_2_ttl()         { testDft (urnuuid02, 
Lang.TTL, 0, 1); }
-//
-//    private static String uuid03 = "<uuid:bad>";
-//    @Test public void irix_uuid_3_nt()          { testDft (uuid03, Lang.NT, 
0, 0); }
-//    @Test public void irix_uuid_3_nt_check()    { testLang(uuid03, Lang.NT, 
UNSET, TRUE, 0, 1); }
-//    @Test public void irix_uuid_3_ttl()         { testDft (uuid03, Lang.TTL, 
0, 1); }
-//
-//    private static String urnuuid04 = 
"<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79?query>";
-//    @Test public void irix_uuid_4_nt()          { testDft (urnuuid04, 
Lang.NT, 0, 0); }
-//    @Test public void irix_uuid_4_nt_check()    { testLang(urnuuid04, 
Lang.NT, UNSET, TRUE, 0, 1); }
-//    @Test public void irix_uuid_4_ttl()         { testDft (urnuuid04, 
Lang.TTL, 0, 1); }
-//
-//    private static String uruuidurn05 = 
"<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79#fragment>";
-//    @Test public void irix_uuid_5_nt()          { testDft (uruuidurn05, 
Lang.NT, 0, 0); }
-//    @Test public void irix_uuid_5_nt_check()    { testLang(uruuidurn05, 
Lang.NT, UNSET, TRUE, 0, 1); }
-//    @Test public void irix_uuid_5_ttl()         { testDft (uruuidurn05, 
Lang.TTL, 0, 1); }
-//
-//    private static String urnuuid06 = 
"<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79?query#fragment>";
-//    @Test public void irix_uuid_6_nt()          { testDft (urnuuid06, 
Lang.NT, 0, 0); }
-//    @Test public void irix_uuid_6_nt_check()    { testLang(urnuuid06, 
Lang.NT, UNSET, TRUE, 0, 2); }
-//    @Test public void irix_uuid_6_ttl()         { testDft (urnuuid06, 
Lang.TTL, 0, 2); }
-//
-//    private static String uuid07 = 
"<uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79?query#fragment>";
-//    @Test public void irix_uuid_7_nt()          { testDft (uuid07, Lang.NT, 
0, 0); }
-//    @Test public void irix_uuid_7_nt_check()    { testLang(uuid07, Lang.NT, 
UNSET, TRUE, 0, 2); }
-//    @Test public void irix_uuid_7_ttl()         { testDft (uuid07, Lang.TTL, 
0, 2); }
-
-    // -- urn:uuid -- jena-iri answers
+    // -- uuid: & urn:uuid -- jena-iri answers
     // The warning on bad UUIDs is from IRIProviderjenaIRI, not jena-iri, and 
so it isn't check/no check sensitive.
     private static String urnuuid02 = "<urn:uuid:bad>";
     @Test public void irix_uuid_2_nt()          { testDft (urnuuid02, Lang.NT, 
0, 1); }
@@ -133,10 +101,10 @@ public class TestIRIxRIOT {
     @Test public void irix_uuid_4_nt_check()    { testLang(urnuuid04, Lang.NT, 
UNSET, TRUE, 0, 1); }
     @Test public void irix_uuid_4_ttl()         { testDft (urnuuid04, 
Lang.TTL, 0, 1); }
 
-    private static String uruuidurn05 = 
"<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79#fragment>";
-    @Test public void irix_uuid_5_nt()          { testDft (uruuidurn05, 
Lang.NT, 0, 1); }
-    @Test public void irix_uuid_5_nt_check()    { testLang(uruuidurn05, 
Lang.NT, UNSET, TRUE, 0, 1); }
-    @Test public void irix_uuid_5_ttl()         { testDft (uruuidurn05, 
Lang.TTL, 0, 1); }
+    private static String urnuuid05 = 
"<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79#fragment>";
+    @Test public void irix_uuid_5_nt()          { testDft (urnuuid05, Lang.NT, 
0, 0); }
+    @Test public void irix_uuid_5_nt_check()    { testLang(urnuuid05, Lang.NT, 
UNSET, TRUE, 0, 0); }
+    @Test public void irix_uuid_5_ttl()         { testDft (urnuuid05, 
Lang.TTL, 0, 0); }
 
     private static String urnuuid06 = 
"<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79?query#fragment>";
     @Test public void irix_uuid_6_nt()          { testDft (urnuuid06, Lang.NT, 
0, 0); }
diff --git 
a/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java 
b/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java
index 2934f9f589..2b68e7d0f3 100644
--- a/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java
+++ b/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java
@@ -230,11 +230,12 @@ public class IRIProviderJenaIRI implements IRIProvider {
         if ( STRICT_FILE && isFILE(iri) ) {
             if ( iriStr.startsWith("file://" ) && ! 
iriStr.startsWith("file:///") )
                 throw new IRIException("file: URLs should start file:///: 
<"+iriStr+">");
-        }
-
-        if ( isUUID(iri, iriStr) ) {
+        } else if ( isUUID(iri, iriStr) ) {
             checkUUID(iri, iriStr);
+        } else if ( isURNUUID(iri, iriStr) ) {
+            checkURNUUID(iri, iriStr);
         }
+
         if (!showExceptions)
             return iri;
         if (!iri.hasViolation(includeWarnings))
@@ -283,26 +284,109 @@ public class IRIProviderJenaIRI implements IRIProvider {
     private static boolean isURN(IRI iri)  { return 
"urn".equalsIgnoreCase(iri.getScheme()); }
     private static boolean isFILE(IRI iri) { return 
"file".equalsIgnoreCase(iri.getScheme()); }
 
-    private static String UUID_REGEXP = 
"^(?:urn:uuid|uuid):[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$";
-    private static Pattern UUID_PATTERN = Pattern.compile(UUID_REGEXP, 
Pattern.CASE_INSENSITIVE);
-
     private static boolean isUUID(IRI iri, String iriStr) {
-        return iriStr.regionMatches(true, 0, "urn:uuid:", 0, 
"urn:uuid:".length())
-            || iriStr.regionMatches(true, 0, "uuid:", 0, "uuid:".length());
+        // Ignore case
+        return iriStr.regionMatches(true, 0, "uuid:", 0, "uuid:".length());
     }
 
+    private static boolean isURNUUID(IRI iri, String iriStr) {
+        // Ignore case
+        return iriStr.regionMatches(true, 0, "urn:uuid:", 0, 
"urn:uuid:".length());
+    }
+
+    // ---- uuid:
+    // UUID match, no anchors or URI scheme.
+    private static String UUID_BASE = 
"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}";
+    private static String UUID_REGEXP = "^uuid:"+UUID_BASE+"$";
+    private static Pattern UUID_PATTERN = Pattern.compile(UUID_REGEXP, 
Pattern.CASE_INSENSITIVE);
+
     private static void checkUUID(IRI iriObj, String original) {
         if ( iriObj.hasViolation(true) )
             // Already has problems.
             return;
-        // jena-iri and iri4ld should both be uptodate now..
-//        // Unfortunately, these tests are check/no-check sensitive.
-//        if ( iriObj.getRawFragment() != null )
-//            throw new IRIException("Fragment used with UUID");
-//        if ( iriObj.getRawQuery() != null )
-//            throw new IRIException("Query used with UUID");
+        // jena-iri does not have UUID checks.
+        // Unfortunately, these tests are check/no-check sensitive.
+        if ( iriObj.getRawFragment() != null )
+            throw new IRIException("Fragment used with uuid:");
+        if ( iriObj.getRawQuery() != null )
+            throw new IRIException("Query used with uuid:");
         boolean matches = UUID_PATTERN.matcher(original).matches();
         if ( !matches )
             throw new IRIException("Not a valid UUID string: "+original);
     }
+
+
+    // ---- urn:uuid:
+    // RFC 8141 added the possibility for r-component, q-component (combined
+    // into the URI query string) and f-component (restricted fragment). This
+    // regexp has a weak test for r/q/f. It does not check the character
+    // limitations to ASCII on r/q/f
+
+    //private static String A2Z = "[0-9a-z]";
+
+    // Non-strict regexp: Any order r- and q-compoments, UCSchars.
+    private static String URN_UUID_REGEXP_LAX = 
"^urn:uuid:"+UUID_BASE+"(?:(?:\\?\\+.|\\?=.|#).*)?$";
+
+    // Strict regex for urn:uuid
+    //    Only ASCII.
+    //  pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
+    //  pct-encoded   = "%" HEXDIG HEXDIG
+    //  unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
+    //  iunreserved   = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
+    //  reserved      = gen-delims / sub-delims
+    //  gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+    //  sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
+    //                / "*" / "+" / "," / ";" / "="
+    // Not:
+    //  ipchar        = iunreserved / pct-encoded / sub-delims / ":" / "@"
+    //                = ipchar / ucschar
+
+//    ucschar        = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
+//            / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
+//            / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
+//            / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
+//            / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
+//            / %xD0000-DFFFD / %xE1000-EFFFD
+
+    // "(?:  )" is a non-binding group.
+    private static String PCT = "(?:%[a-f][a-f])";
+
+    // As contents of "[]" used in PCHAR
+    private static String UNRESERVED = "-0-9a-z._~";
+    // Or use \p{IsAlphabetic}
+    private static String UCSCHAR = "\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
+            /*
+            / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
+            / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
+            / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
+            / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
+            / %xD0000-DFFFD / %xE1000-EFFFD
+           */
+    // private       = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD
+    //private static String IPRIVATE
+    private static String IUNRESERVED = UNRESERVED+UCSCHAR;
+
+    //private static String GEN_DELIMS = ":/\\?#\\[\\]@";
+    private static String SUB_DELIMS = "!\\$&'\\(\\)\\*\\+,;=";
+    // Switch IUNRESERVED / UNRESERVED
+    private static String PCHARS1 = UNRESERVED+SUB_DELIMS+":"+"@";
+    private static String PCHAR = "(?:(?:["+PCHARS1+"]|"+PCT+"))";
+
+    private static String URN_COMP_X = "/\\?";
+    private static String URN_RQ_COMP_CHAR = PCHAR+URN_COMP_X;
+    private static String URN_R_COMP = "(?:\\?\\+["+URN_RQ_COMP_CHAR+"]+)?";
+    private static String URN_Q_COMP = "(?:\\?=["+URN_RQ_COMP_CHAR+"]+)?";
+    private static String URN_F_COMP = "(?:#["+PCHAR+"]*)?";
+    private static String URN_UUID_REGEXP = 
"^urn:uuid:"+UUID_BASE+URN_R_COMP+URN_Q_COMP+URN_F_COMP+"$";
+
+    private static Pattern URN_UUID_PATTERN = Pattern.compile(URN_UUID_REGEXP, 
Pattern.CASE_INSENSITIVE);
+
+    private static void checkURNUUID(IRI iriObj, String original) {
+        if ( iriObj.hasViolation(true) )
+            // Already has problems.
+            return;
+        boolean matches = URN_UUID_PATTERN.matcher(original).matches();
+        if ( !matches )
+            throw new IRIException("Not a valid UUID string: "+original);
+    }
 }
\ No newline at end of file
diff --git a/jena-core/src/main/java/org/apache/jena/irix/IRIx.java 
b/jena-core/src/main/java/org/apache/jena/irix/IRIx.java
index 885fa62b54..7b2c5377f1 100644
--- a/jena-core/src/main/java/org/apache/jena/irix/IRIx.java
+++ b/jena-core/src/main/java/org/apache/jena/irix/IRIx.java
@@ -60,7 +60,7 @@ public abstract class IRIx {
      * It returns a IRIx holder and does no checking whatsoever.
      * Whether the IRI "works" is down to care by the application.
      */
-    static public IRIx createAny(String iri) throws IRIException {
+    static public IRIx createAny(String iri) {
         Objects.requireNonNull(iri);
         return IRIProviderAny.stringProvider().create(iri);
     }
diff --git a/jena-core/src/test/java/org/apache/jena/irix/TS_IRIx.java 
b/jena-core/src/test/java/org/apache/jena/irix/TS_IRIx.java
index 479e9fd544..eba5652592 100644
--- a/jena-core/src/test/java/org/apache/jena/irix/TS_IRIx.java
+++ b/jena-core/src/test/java/org/apache/jena/irix/TS_IRIx.java
@@ -26,7 +26,8 @@ import org.junit.runners.Suite;
 @RunWith(Suite.class)
 @Suite.SuiteClasses( {
     // IRIx tests with matrix of providers.
-    TestParseIRIx.class,
+    TestIRIxSyntax.class,
+    TestIRIxOps.class,
     TestRFC3986.class,
     TestResolve.class,
     TestNormalize.class,
diff --git a/jena-core/src/test/java/org/apache/jena/irix/TestParseIRIx.java 
b/jena-core/src/test/java/org/apache/jena/irix/TestIRIxOps.java
similarity index 66%
rename from jena-core/src/test/java/org/apache/jena/irix/TestParseIRIx.java
rename to jena-core/src/test/java/org/apache/jena/irix/TestIRIxOps.java
index a4d951a7b4..b91e2d7a37 100644
--- a/jena-core/src/test/java/org/apache/jena/irix/TestParseIRIx.java
+++ b/jena-core/src/test/java/org/apache/jena/irix/TestIRIxOps.java
@@ -21,118 +21,23 @@ package org.apache.jena.irix;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
-import java.util.Locale;
-
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
 /**
- * Parse tests.
+ * Basic parser tests and IRIx operations.
  *
- * {@link TestRFC3986} contained tests with expections scheme errors and 
warnings.
+ * {@link TestRFC3986} contained tests with exceptions scheme errors and 
warnings.
  */
 @RunWith(Parameterized.class)
-public class TestParseIRIx extends AbstractTestIRIx {
+public class TestIRIxOps extends AbstractTestIRIx {
 
 
-    public TestParseIRIx(String name, IRIProvider provider) {
+    public TestIRIxOps(String name, IRIProvider provider) {
         super(name, provider);
     }
 
-    // ---- RFC 3986 Grammar : misc parsing.
-
-    @Test public void uri_01()      { parse("http://example/abc";); }
-
-    @Test public void uri_02()      { parse("http://example/αβγ";); }
-
-    @Test public void uri_03()      { parse("http://example/Ẓ";); }
-
-    @Test public void uri_04()      { parse("http://[::1]/abc";); }
-
-    @Test public void uri_05()      { parse("http://reg123/abc";); }
-
-    @Test public void uri_06()      { parse("http://1.2.3.4/abc";); }
-
-    // ---- Compliance with HTTP RFC7230. 
https://tools.ietf.org/html/rfc7230#section-2.7
-
-    @Test(expected=IRIException.class)
-    public void http_01() { parse("http:"); }
-
-    @Test(expected=IRIException.class)
-    public void http_02() { parse("http:/"); }
-
-    @Test(expected=IRIException.class)
-    public void http_03() { parse("http://";); }
-
-    @Test public void http_04() { parse("http://x";); }
-
-    @Test(expected=IRIException.class)
-    public void http_05()   { parse("http:abc"); }
-
-    @Test(expected=IRIException.class)
-    public void http_06()   { parse("http:///abc";); }
-
-    @Test(expected=IRIException.class)
-    // [] not in IPv6 address
-    public void http_07()   { parse("http://h/ab[]";); }
-
-    @Test public void http_08() { parse("http://example/~jena/file";); }
-
-    // -- Compliance with URN scheme: https://tools.ietf.org/html/rfc8141
-
-    @Test public void urn_01() { parse("urn:NID:NSS"); }
-
-    @Test(expected=IRIException.class)
-    public void urn_02() { parse("urn:x:abcd"); }
-
-    @Test(expected=IRIException.class)
-    public void urn_03() { parse("urn:ex:"); }
-
-    @Test public void urn_04()  { notStrict("urn", ()->parse("urn:x:abc")); }
-
-    @Test public void urn_05()  { notStrict("urn", ()->parse("urn:ex:")); }
-
-    @Test public void urn_06()  { parse("urn:NID:NSS?=abc"); }
-
-    @Test public void urn_07()  { parse("urn:NID:NSS?+abc"); }
-
-    @Test public void urn_08()  { parse("urn:NID:NSS#frag"); }
-
-    @Test public void urn_09()  { parse("urn:NID:NSS#"); }
-
-    private static String testUUID = "aa045fc2-a781-11eb-9041-afa3877612ee";
-
-    @Test public void parse_uuid_01() { parse("uuid:"+testUUID); }
-
-    @Test public void parse_uuid_02() { 
parse("uuid:"+(testUUID.toUpperCase(Locale.ROOT))); }
-
-    @Test public void parse_uuid_03() { parse("UUID:"+testUUID); }
-
-    @Test public void parse_uuid_04() { parse("urn:uuid:"+testUUID); }
-
-    @Test public void parse_uuid_05() { 
parse("urn:uuid:"+(testUUID.toUpperCase(Locale.ROOT))); }
-
-    @Test public void parse_uuid_06() { parse("URN:UUID:"+testUUID); }
-
-    // Illegal.
-    // RFC 8141 (urn) allows query and fragment in urn:uuid: (limited 
character set).
-    // But RFC 4122 (urn:uuid: namespace definition) does not.
-
-    // -- Compliance with file scheme: https://tools.ietf.org/html/rfc8089
-
-    @Test public void file_01() { parse("file:///path/name"); }
-
-    @Test public void file_02() { parse("file:/path/name"); }
-
-    @Test public void file_03() { parse("file:name"); }
-
-    @Test public void file_04() { parse("file:/path/name"); }
-
-    @Test public void file_05() { parse("file:name"); }
-
-    @Test public void file_06() { parse("file:///c:/~user/file"); }
-
     // --- Use in RDF
 
     @Test public void reference_01() { reference("http://example/";, true); }
diff --git a/jena-core/src/test/java/org/apache/jena/irix/TestIRIxSyntax.java 
b/jena-core/src/test/java/org/apache/jena/irix/TestIRIxSyntax.java
new file mode 100644
index 0000000000..ff161aa482
--- /dev/null
+++ b/jena-core/src/test/java/org/apache/jena/irix/TestIRIxSyntax.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.irix;
+
+import java.util.Locale;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+/**
+ * Basic tests of RFC 3986 syntax.
+ *
+ * {@link TestRFC3986} contained tests with more scheme errors and warnings. 
It also compares to jena-iri.
+ */
+@RunWith(Parameterized.class)
+public class TestIRIxSyntax extends AbstractTestIRIx {
+
+    public TestIRIxSyntax(String name, IRIProvider provider) {
+        super(name, provider);
+    }
+
+    @Test public void http_01()      { parse("http://example/abc";); }
+
+    @Test public void http_02()      { parse("http://example/αβγ";); }
+
+    @Test public void http_03()      { parse("http://example/Ẓ";); }
+
+    @Test public void http_04()      { parse("http://[::1]/abc";); }
+
+    @Test public void http_05()      { parse("http://reg123/abc";); }
+
+    @Test public void http_06()      { parse("http://1.2.3.4/abc";); }
+
+    // ---- Compliance with HTTP RFC7230. 
https://tools.ietf.org/html/rfc7230#section-2.7
+
+    @Test(expected=IRIException.class)
+    public void http_51() { parse("http:"); }
+
+    @Test(expected=IRIException.class)
+    public void http_52() { parse("http:/"); }
+
+    @Test(expected=IRIException.class)
+    public void http_53() { parse("http://";); }
+
+    @Test public void http_54() { parse("http://x";); }
+
+    @Test(expected=IRIException.class)
+    public void http_55()   { parse("http:abc"); }
+
+    @Test(expected=IRIException.class)
+    public void http_56()   { parse("http:///abc";); }
+
+    @Test(expected=IRIException.class)
+    // [] not in IPv6 address
+    public void http_57()   { parse("http://h/ab[]";); }
+
+    @Test public void http_58() { parse("http://example/~jena/file";); }
+
+    // -- Compliance with URN scheme: https://tools.ietf.org/html/rfc8141
+
+    @Test public void urn_01() { parse("urn:NID:NSS"); }
+
+    @Test(expected=IRIException.class)
+    public void urn_02() { parse("urn:x:abcd"); }
+
+    @Test(expected=IRIException.class)
+    public void urn_03() { parse("urn:ex:"); }
+
+    @Test public void urn_04()  { notStrict("urn", ()->parse("urn:x:abc")); }
+
+    @Test public void urn_05()  { notStrict("urn", ()->parse("urn:ex:")); }
+
+    @Test public void urn_06()  { parse("urn:NID:NSS?=abc"); }
+
+    @Test public void urn_07()  { parse("urn:NID:NSS?+abc"); }
+
+    @Test public void urn_08()  { parse("urn:NID:NSS#frag"); }
+
+    @Test public void urn_09()  { parse("urn:NID:NSS#"); }
+
+    private static String testUUID = "aa045fc2-a781-11eb-9041-afa3877612ee";
+
+    @Test public void parse_uuid_01() { parse("uuid:"+testUUID); }
+
+    @Test public void parse_uuid_02() { 
parse("uuid:"+(testUUID.toUpperCase(Locale.ROOT))); }
+
+    @Test public void parse_uuid_03() { parse("UUID:"+testUUID); }
+
+    @Test public void parse_uuid_04() { parse("urn:uuid:"+testUUID); }
+
+    @Test public void parse_uuid_05() { 
parse("urn:uuid:"+(testUUID.toUpperCase(Locale.ROOT))); }
+
+    @Test public void parse_uuid_06() { parse("URN:UUID:"+testUUID); }
+
+    // -- Compliance with file scheme: https://tools.ietf.org/html/rfc8089
+
+    @Test public void file_01() { parse("file:///path/name"); }
+
+    @Test public void file_02() { parse("file:/path/name"); }
+
+    @Test public void file_03() { parse("file:name"); }
+
+    @Test public void file_04() { parse("file:/path/name"); }
+
+    @Test public void file_05() { parse("file:name"); }
+
+    @Test public void file_06() { parse("file:///c:/~user/file"); }
+
+    // Parse, only collect violations from scheme-specific rules.
+    private void parse(String string) {
+        IRIx iri = IRIx.create(string);
+    }
+}
diff --git a/jena-core/src/test/java/org/apache/jena/irix/TestRFC3986.java 
b/jena-core/src/test/java/org/apache/jena/irix/TestRFC3986.java
index 9cd2db10c5..061bc2cf34 100644
--- a/jena-core/src/test/java/org/apache/jena/irix/TestRFC3986.java
+++ b/jena-core/src/test/java/org/apache/jena/irix/TestRFC3986.java
@@ -33,7 +33,9 @@ import org.junit.runners.Parameterized;
 
 /**
  * Test of parsing and schema violations.
- * See also plain parse tests in {@link TestParseIRIx}
+ * This is the test suite that compares result with jena-iri.
+ * See also {@link TestIRIxSyntax} for other IRIx parsing operations.
+ * See also {@link TestIRIxOps} for IRIx operations.
  */
 @FixMethodOrder(MethodSorters.NAME_ASCENDING)
 @RunWith(Parameterized.class)
@@ -58,7 +60,7 @@ public class TestRFC3986 extends AbstractTestIRIx {
     @Test public void parse_05() { good("/ab%FFdef"); }
 
     // Uppercase preferred
-    @Test public void parse_06() { goodNoIRICheck("/ab%ffdef"); }
+    @Test public void parse_06() { good("/ab%ffdef"); }
 
     @Test public void parse_07() { good("http://host/abcdef?qs=foo#frag";); }
 
@@ -71,7 +73,7 @@ public class TestRFC3986 extends AbstractTestIRIx {
     @Test public void parse_11() { good("//host:8081/abc/def?qs=ghi#jkl"); }
 
     // Legal, if weird, scheme name.
-    @Test public void parse_12() { goodNoIRICheck("a+.-9://h/"); }
+    @Test public void parse_12() { good("a+.-9://h/"); }
 
     // No path.
 
@@ -88,66 +90,6 @@ public class TestRFC3986 extends AbstractTestIRIx {
 
     @Test public void parse_18() { good("/z/a:b"); }
 
-    @Test public void equality_01() {
-        String s = "https://jena.apache.org/";;
-        IRIx iri1 = IRIx.create(s);
-        IRIx iri2 = IRIx.create(s);
-        assertEquals(iri1, iri2);
-        assertEquals(iri1.hashCode(), iri2.hashCode());
-    }
-
-    // HTTP scheme specific rules.
-    @Test public void parse_http_01()   { 
badSpecific("http:///file/name.txt";); }
-
-    // HTTP scheme specific rules.
-    @Test public void parse_http_02()   { 
badSpecific("HTTP:///file/name.txt"); }
-
-    // This is treated as legal with path and no authority.
-    //@Test public void parse_http_02a()   { 
badSpecific("http:/file/name.txt"); }
-
-    @Test public void parse_http_03()   { 
badSpecific("http://user@host/file/name.txt";); }
-
-    @Test public void parse_http_04()   { 
good("nothttp://user@host/file/name.txt";); }
-
-    @Test public void parse_http_05()   { 
good("nothttp://user@/file/name.txt";); }
-
-    @Test public void parse_file_01() { good("file:///file/name.txt"); }
-
-    // We reject "file://host/" forms.
-    @Test public void parse_file_02() { 
badSpecific("file://host/file/name.txt"); }
-
-    // This is legal by RFC 8089 (jena-iri, based on the original RFC 1738, 
fails this with missing authority).
-    @Test public void parse_file_03() { goodNoIRICheck("file:/file/name.txt"); 
}
-
-    @Test public void parse_urn_01() { good("urn:x-local:abc/def"); }
-
-    // rq-components = [ "?+" r-component ]
-    //                 [ "?=" q-component ]
-
-    @Test public void parse_urn_02()        { 
good("urn:x-local:abc/def?+more"); }
-
-    @Test public void parse_urn_03()        { 
good("urn:x-local:abc/def?=123"); }
-
-    @Test public void parse_urn_04()        { 
good("urn:x-local:abc/def?+resolve?=123#frag"); }
-
-    @Test public void parse_urn_05()        { good("urn:abc0:def"); }
-
-    private static String testUUID = "aa045fc2-a781-11eb-9041-afa3877612ee";
-
-    @Test public void parse_uuid_01() { good("uuid:"+testUUID); }
-
-    @Test public void parse_uuid_02() { 
good("uuid:"+(testUUID.toUpperCase(Locale.ROOT))); }
-
-    @Test public void parse_uuid_03() { good("urn:uuid:"+testUUID); }
-
-    @Test public void parse_uuid_04() { 
good("urn:uuid:"+(testUUID.toUpperCase(Locale.ROOT))); }
-
-    // -- FTP
-
-    @Test public void parse_ftp_01() { 
good("ftp://user@host:3333/abc/def?qs=ghi#jkl";); }
-
-    @Test public void parse_ftp_02() { good("ftp://[::1]/abc/def?qs=ghi#jkl";); 
}
-
     // ---- bad
 
     // Leading ':'
@@ -209,102 +151,213 @@ public class TestRFC3986 extends AbstractTestIRIx {
     // [] not allowed.
     @Test public void bad_frag_1() { 
bad("http://eg.com/test.txt#xpointer(/unit[5])"); }
 
-    // ---- bad by scheme.
-    @Test public void parse_http_bad_01() { 
badSpecific("http://user@host:8081/abc/def?qs=ghi#jkl";); }
+    @Test public void equality_01() {
+        String s = "https://jena.apache.org/";;
+        IRIx iri1 = IRIx.create(s);
+        IRIx iri2 = IRIx.create(s);
+        assertEquals(iri1, iri2);
+        assertEquals(iri1.hashCode(), iri2.hashCode());
+    }
+
+    // HTTP scheme specific rules.
+    @Test public void parse_http_01()   { 
badSpecific("http:///file/name.txt";); }
+
+    // HTTP scheme specific rules.
+    @Test public void parse_http_02()   { 
badSpecific("HTTP:///file/name.txt"); }
+
+    // This is legal with path and no authority.
+    //@Test public void parse_http_02a()   { 
badSpecific("http:/file/name.txt"); }
+
+    @Test public void parse_http_03()   { 
badSpecific("http://user@host/file/name.txt";); }
+
+    @Test public void parse_http_04()   { 
good("nothttp://user@host/file/name.txt";); }
+
+    @Test public void parse_http_05()   { 
good("nothttp://user@/file/name.txt";); }
+
+    @Test public void parse_http_06() { 
badSpecific("http://user@host:8081/abc/def?qs=ghi#jkl";); }
 
+    @Test public void parse_file_01() { good("file:///file/name.txt"); }
+
+    // We reject "file://host/" forms.
+    @Test public void parse_file_02() { 
badSpecific("file://host/file/name.txt"); }
+
+    // This is legal by RFC 8089 (jena-iri, based on the original RFC 1738, 
fails this with missing authority).
+    @Test public void parse_file_03() { goodNoIRICheck("file:/file/name.txt"); 
}
+
+    // -- FTP
+
+    @Test public void parse_ftp_01() { 
good("ftp://user@host:3333/abc/def?qs=ghi#jkl";); }
+
+    @Test public void parse_ftp_02() { good("ftp://[::1]/abc/def?qs=ghi#jkl";); 
}
+
+    @Test public void parse_urn_01() { good("urn:nid:nss"); }
+
+    @Test public void parse_urn_02() { good("urn:x-local:abc/def"); }
+
+    // @formatter:off
+    // namestring    = assigned-name
+    //                 [ rq-components ]
+    //                 [ "#" f-component ]
+    // rq-components = [ "?+" r-component ]
+    //                 [ "?=" q-component ]
+    // @formatter:on
+
+    @Test public void parse_urn_03()        { 
good("urn:x-local:abc/def?+more"); }
+
+    @Test public void parse_urn_04()        { 
good("urn:x-local:abc/def?=123"); }
+
+    @Test public void parse_urn_05()        { 
good("urn:x-local:abc/def?+resolve?=123#frag"); }
+
+    @Test public void parse_urn_06()        { good("urn:abc0:def#frag"); }
     //  urn:2char:1char
     // urn:NID:NSS where NID is at least 2 alphas, and at most 32 long
+
+    /**
+     * Allow UCSCHARs in the NSS, and the RFC 8141 components.
+     */
+    // XXX Not ASCII in the NSS part, or components.
+    private static boolean I_URN = true;
+    private static void parse_internation_urn(String string) {
+        if ( I_URN )
+            good(string);
+        else
+            badSpecific(string);
+    }
+
     @Test public void parse_urn_bad_01() { badSpecific("urn:"); }
+
     @Test public void parse_urn_bad_02() { badSpecific("urn:x:abc"); }
 
     @Test public void parse_urn_bad_03() { badSpecific("urn:abc:"); }
+
     // 33 chars
     @Test public void parse_urn_bad_04() { 
badSpecific("urn:abcdefghij-123456789-123456789-yz:a"); }
 
     // Bad by URN specific rule for the query components.
-    @Test public void parse_urn_bad_05()    { 
badSpecific("urn:local:abc/def?query=foo"); }
+    @Test public void parse_urn_bad_05() { 
badSpecific("urn:local:abc/def?query=foo"); }
 
-    @Test public void parse_urn_uuid_bad_01() {
-        badSpecific("urn:uuid:06e775ac-2c38-11b2-801c-8086f2cc00c9?query=foo");
-    }
+    // URNs are defined in RFC 8141 referring to RFC 3986 (URI - ASCII)
+    @Test public void parse_intn_urn_01()    { 
parse_internation_urn("urn:NID:αβγ"); }
+    @Test public void parse_intn_urn_02()    { 
parse_internation_urn("urn:nid:nss#αβγ"); }
+    @Test public void parse_intn_urn_03()    { 
parse_internation_urn("urn:nid:nss?=αβγ"); }
+    @Test public void parse_intn_urn_04()    { 
parse_internation_urn("urn:nid:nss?+αβγ"); }
 
-    @Test public void parse_urn_uuid_bad_02() {
-        badSpecific("urn:uuid:06e775ac-2c38-11b2-801c-8086f2cc00c9#frag");
-    }
+    private static String testUUID = "aa045fc2-a781-11eb-9041-afa3877612ee";
 
-    @Test public void parse_urn_uuid_bad_03() {
-        // Bad length
-        badSpecific("urn:uuid:06e775ac");
+    // RFC 8141 allows query and fragment in urn: (limited character set).
+    // It even permits retrospectively applying to older schemes,
+    // However, the r- (?+"), p- ("?=") or f- (#) component does not play a 
part in URN equivalence.
+
+    // Allow r-component, q-component and f-component
+    private static final boolean UUID_8141 = true;
+    private static void parse_uuid_8141(String string) {
+        if ( UUID_8141 )
+            good(string);
+        else
+            badSpecific(string);
     }
 
-    @Test public void parse_urn_uuid_bad_04() {
-        // Bad character
-        badSpecific("urn:uuid:06e775ac-ZZZZ-11b2-801c-8086f2cc00c9");
-    }
+    // -- uuid:
 
-    @Test public void parse_uuid_bad_01() {
-        badSpecific("uuid:06e775ac-2c38-11b2-801c-8086f2cc00c9?query=foo");
-    }
+    @Test public void parse_uuid_01() { good("uuid:"+testUUID); }
 
-    @Test public void parse_uuid_bad_02() {
-        badSpecific("uuid:06e775ac-2c38-11b2-801c-8086f2cc00c9#frag");
-    }
+    @Test public void parse_uuid_02() { 
good("uuid:"+(testUUID.toUpperCase(Locale.ROOT))); }
 
-    @Test public void parse_uuid_bad_03() {
-        badSpecific("uuid:06e775ac-2c38-11b2");
-    }
+    @Test public void parse_uuid_bad_01() { 
badSpecific("uuid:06e775ac-2c38-11b2-801c-8086f2cc00c9?query=foo"); }
 
-    @Test public void parse_uuid_bad_04() {
-        badSpecific("urn:uuid:06e775ac-ZZZZ-11b2-801c-8086f2cc00c9");
-    }
+    // Too short
+    @Test public void parse_uuid_bad_02() { 
badSpecific("uuid:06e775ac-2c38-11b2"); }
 
-    // No char fragment is legal.
-    @Test public void parse_uuid_bad_05() {
-        badSpecific("urn:uuid:" + testUUID + "#");
-    }
+    // Too long
+    @Test public void parse_uuid_bad_03() { 
badSpecific("uuid:06e775ac-2c38-11b2-9999"); }
 
-    // RFC 8141 allows query string must be ?=<one+ char> or ?+<one+ char>
-    @Test public void parse_uuid_bad_06() {
-        badSpecific("urn:uuid:" + testUUID + "?=chars");
-    }
+    // Bad character
+    @Test public void parse_uuid_bad_04() { 
badSpecific("uuid:06e775ac-ZZZZ-11b2-801c-8086f2cc00c9"); }
 
-    @Test public void parse_uuid_bad_07() {
-        badSpecific("urn:uuid:" + testUUID + "?+chars");
-    }
+    // For the ad-hoc "uuid:" do not allow r/q/f components.
 
-    @Test public void parse_uuid_bad_08() {
-        badSpecific("urn:uuid:" + testUUID + "?=");
-    }
+    @Test public void parse_uuid_bad_10() { badSpecific("uuid:"+testUUID+ 
"?+chars"); }
+
+    @Test public void parse_uuid_bad_11() { badSpecific("uuid:"+testUUID+ 
"?=chars"); }
+
+    @Test public void parse_uuid_bad_12() { 
badSpecific("uuid:"+testUUID+"#frag"); }
 
-    @Test public void parse_uuid_bad_09() {
-        badSpecific("urn:uuid:" + testUUID + "?+");
-    }
+
+    // -- urn:uuid:
 
     // RFC 8141 allows query and fragment in urn: (limited character set).
-    // RFC 4122 (uuid namespace definition) does not.
-    @Test
-    public void parse_uuid_bad_8141_01() {
-        badSpecific("urn:uuid:" + testUUID + "#frag");
-    }
+    // It even permits retrospectively applying to older schemes,
+    // However, the r- (?+"), p- ("?=") or f- (#) component does not play a 
part in URN equivalence.
 
-    // No char fragment is legal.
-    @Test
-    public void parse_uuid_bad_8141_02() {
-        badSpecific("urn:uuid:" + testUUID + "#");
-    }
+    @Test public void parse_urn_uuid_01() { good("urn:uuid:"+testUUID); }
+
+    @Test public void parse_urn_uuid_02() { 
good("urn:uuid:"+(testUUID.toUpperCase(Locale.ROOT))); }
+
+    @Test public void parse_urn_uuid_03() { 
parse_uuid_8141("urn:uuid:"+testUUID+"#frag"); }
+
+    // Zero char fragment is legal.
+    @Test public void parse_urn_uuid_04() { parse_uuid_8141("urn:uuid:" + 
testUUID + "#"); }
 
     // RFC 8141 allows query string must be ?=<one+ char> or ?+<one+ char>
-    @Test
-    public void parse_uuid_bad_8141_03() {
-        badSpecific("urn:uuid:" + testUUID + "?=chars");
-    }
+    @Test public void parse_urn_uuid_21() { parse_uuid_8141("urn:uuid:" + 
testUUID + "?=chars"); }
 
-    @Test
-    public void parse_uuid_bad_8141_04() {
-        badSpecific("urn:uuid:" + testUUID + "?+chars");
-    }
+    // RFC 8141 allows "query string" where it must must be ?=<one+ char> or 
?+<one+ char>
+    @Test public void parse_urn_uuid_22() { parse_uuid_8141("urn:uuid:" + 
testUUID + "?=ab/?cd"); }
+
+    @Test public void parse_urn_uuid_23() { parse_uuid_8141("urn:uuid:" + 
testUUID + "?+chars"); }
+
+    @Test public void parse_urn_uuid_24() { parse_uuid_8141("urn:uuid:" + 
testUUID + "?+ab/?cd"); }
+
+    @Test public void parse_urn_uuid_25() { parse_uuid_8141("urn:uuid:" + 
testUUID + "?+chars?=chars#frag"); }
+
+    @Test public void parse_urn_uuid_26() { parse_uuid_8141("urn:uuid:" + 
testUUID + "?+chars?=chars#frag"); }
+
+    // Strange cases.
+    // The r- and q- components can have '?', '+' and '=' in them
+    // so the first occurrence captures everything up to the
+    // fragment or end of string.
+
+    @Test public void parse_urn_uuid_27() { parse_uuid_8141("urn:uuid:" + 
testUUID + "?+chars?"); }
+
+    @Test public void parse_urn_uuid_28() { parse_uuid_8141("urn:uuid:" + 
testUUID + "?+chars??=next"); }
+
+    // Single q-component
+    @Test public void parse_urn_uuid_29() { parse_uuid_8141("urn:uuid:" + 
testUUID + "?=chars?a=b"); }
+
+    // Single q-component!
+    @Test public void parse_urn_uuid_30() { parse_uuid_8141("urn:uuid:" + 
testUUID + "?=aaa?+bbb"); }
+
+    // Single r-component
+    @Test public void parse_urn_uuid_31() { parse_uuid_8141("urn:uuid:" + 
testUUID + "?+aaa?+bbb"); }
+
+    @Test public void parse_urn_uuid_32() { parse_uuid_8141("urn:uuid:" + 
testUUID + "?=Q?+R"); }
+
+    // Always bad.
+    // Query string, not a component.
+    @Test public void parse_urn_uuid_bad_01() { 
badSpecific("urn:uuid:06e775ac-2c38-11b2-801c-8086f2cc00c9?query=foo"); }
+
+    // Bad length
+    @Test public void parse_urn_uuid_bad_02() { 
badSpecific("urn:uuid:06e775ac"); }
+
+    // Bad character
+    @Test public void parse_urn_uuid_bad_03() { 
badSpecific("urn:uuid:06e775ac-ZZZZ-11b2-801c-8086f2cc00c9"); }
+
+    // Always bad. At least one char.
+    @Test public void parse_urn_uuid_bad_04() { badSpecific("urn:uuid:" + 
testUUID + "?="); }
+
+    // Always bad. At least one char.
+    @Test public void parse_urn_uuid_bad_05() { badSpecific("urn:uuid:" + 
testUUID + "?+"); }
+
+    @Test public void parse_urn_uuid_bad_06() { badSpecific("urn:uuid:" + 
testUUID + "?"); }
+
+    @Test public void parse_urn_uuid_bad_07() { badSpecific("urn:uuid:" + 
testUUID + "?abc"); }
+
+    // XXX Not ASCII in the NSS part
+    @Test public void parse_urn_uuid_bad_12() { badSpecific("urn:uuid:" + 
testUUID + "#αβγ"); }
+    @Test public void parse_urn_uuid_bad_13() { badSpecific("urn:uuid:" + 
testUUID + "?=αβγ"); }
+    @Test public void parse_urn_uuid_bad_14() { badSpecific("urn:uuid:" + 
testUUID + "?+αβγ"); }
 
-    private void good(String string) {
+    private static void good(String string) {
         IRIx iri = IRIx.create(string);
         assertNotNull(iri);
         if ( true ) {
@@ -320,13 +373,14 @@ public class TestRFC3986 extends AbstractTestIRIx {
         assertNotNull(javaURI);
     }
 
-    private void goodNoIRICheck(String string) {
+    // Where jena-iri odes not get the right answer.
+    private static void goodNoIRICheck(String string) {
         IRIx iri = IRIx.create(string);
         java.net.URI javaURI = java.net.URI.create(string);
     }
 
     // Expect an IRIParseException
-    private void bad(String string) {
+    private static void bad(String string) {
         try {
             IRIs.checkEx(string);
             IRIs.reference(string);
@@ -335,7 +389,7 @@ public class TestRFC3986 extends AbstractTestIRIx {
         } catch (IRIException ex) {}
     }
 
-    private void badSpecific(String string) {
+    private static void badSpecific(String string) {
         bad(string);
     }
 }


Reply via email to