This is an automated email from the ASF dual-hosted git repository. andy pushed a commit to branch jena5 in repository https://gitbox.apache.org/repos/asf/jena.git
commit aebedf537cb4d7735a55a1bfe68d418674697393 Author: Andy Seaborne <a...@apache.org> AuthorDate: Sat Oct 28 12:45:35 2023 +0100 GH-2062: Support URN components inc fragments for UUIDs --- .../org/apache/jena/riot/system/TestIRIxRIOT.java | 42 +-- .../org/apache/jena/irix/IRIProviderJenaIRI.java | 112 +++++++- .../src/main/java/org/apache/jena/irix/IRIx.java | 2 +- .../test/java/org/apache/jena/irix/TS_IRIx.java | 3 +- .../irix/{TestParseIRIx.java => TestIRIxOps.java} | 103 +------ .../java/org/apache/jena/irix/TestIRIxSyntax.java | 130 +++++++++ .../java/org/apache/jena/irix/TestRFC3986.java | 314 ++++++++++++--------- 7 files changed, 424 insertions(+), 282 deletions(-) diff --git a/jena-arq/src/test/java/org/apache/jena/riot/system/TestIRIxRIOT.java b/jena-arq/src/test/java/org/apache/jena/riot/system/TestIRIxRIOT.java index ab3d686e98..5dad5324a9 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/system/TestIRIxRIOT.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/system/TestIRIxRIOT.java @@ -84,39 +84,7 @@ public class TestIRIxRIOT { @Test public void irix_uuid_1_nt_check() { testLang(urnuuid01, Lang.NT, UNSET, TRUE, 0, 0); } @Test public void irix_uuid_1_ttl() { testDft (urnuuid01, Lang.TTL, 0, 0); } - // urn:uuid -- IRI3986 answers - // -// private static String urnuuid02 = "<urn:uuid:bad>"; -// @Test public void irix_uuid_2_nt() { testDft (urnuuid02, Lang.NT, 0, 0); } -// @Test public void irix_uuid_2_nt_check() { testLang(urnuuid02, Lang.NT, UNSET, TRUE, 0, 1); } -// @Test public void irix_uuid_2_ttl() { testDft (urnuuid02, Lang.TTL, 0, 1); } -// -// private static String uuid03 = "<uuid:bad>"; -// @Test public void irix_uuid_3_nt() { testDft (uuid03, Lang.NT, 0, 0); } -// @Test public void irix_uuid_3_nt_check() { testLang(uuid03, Lang.NT, UNSET, TRUE, 0, 1); } -// @Test public void irix_uuid_3_ttl() { testDft (uuid03, Lang.TTL, 0, 1); } -// -// private static String urnuuid04 = "<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79?query>"; -// @Test public void irix_uuid_4_nt() { testDft (urnuuid04, Lang.NT, 0, 0); } -// @Test public void irix_uuid_4_nt_check() { testLang(urnuuid04, Lang.NT, UNSET, TRUE, 0, 1); } -// @Test public void irix_uuid_4_ttl() { testDft (urnuuid04, Lang.TTL, 0, 1); } -// -// private static String uruuidurn05 = "<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79#fragment>"; -// @Test public void irix_uuid_5_nt() { testDft (uruuidurn05, Lang.NT, 0, 0); } -// @Test public void irix_uuid_5_nt_check() { testLang(uruuidurn05, Lang.NT, UNSET, TRUE, 0, 1); } -// @Test public void irix_uuid_5_ttl() { testDft (uruuidurn05, Lang.TTL, 0, 1); } -// -// private static String urnuuid06 = "<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79?query#fragment>"; -// @Test public void irix_uuid_6_nt() { testDft (urnuuid06, Lang.NT, 0, 0); } -// @Test public void irix_uuid_6_nt_check() { testLang(urnuuid06, Lang.NT, UNSET, TRUE, 0, 2); } -// @Test public void irix_uuid_6_ttl() { testDft (urnuuid06, Lang.TTL, 0, 2); } -// -// private static String uuid07 = "<uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79?query#fragment>"; -// @Test public void irix_uuid_7_nt() { testDft (uuid07, Lang.NT, 0, 0); } -// @Test public void irix_uuid_7_nt_check() { testLang(uuid07, Lang.NT, UNSET, TRUE, 0, 2); } -// @Test public void irix_uuid_7_ttl() { testDft (uuid07, Lang.TTL, 0, 2); } - - // -- urn:uuid -- jena-iri answers + // -- uuid: & urn:uuid -- jena-iri answers // The warning on bad UUIDs is from IRIProviderjenaIRI, not jena-iri, and so it isn't check/no check sensitive. private static String urnuuid02 = "<urn:uuid:bad>"; @Test public void irix_uuid_2_nt() { testDft (urnuuid02, Lang.NT, 0, 1); } @@ -133,10 +101,10 @@ public class TestIRIxRIOT { @Test public void irix_uuid_4_nt_check() { testLang(urnuuid04, Lang.NT, UNSET, TRUE, 0, 1); } @Test public void irix_uuid_4_ttl() { testDft (urnuuid04, Lang.TTL, 0, 1); } - private static String uruuidurn05 = "<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79#fragment>"; - @Test public void irix_uuid_5_nt() { testDft (uruuidurn05, Lang.NT, 0, 1); } - @Test public void irix_uuid_5_nt_check() { testLang(uruuidurn05, Lang.NT, UNSET, TRUE, 0, 1); } - @Test public void irix_uuid_5_ttl() { testDft (uruuidurn05, Lang.TTL, 0, 1); } + private static String urnuuid05 = "<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79#fragment>"; + @Test public void irix_uuid_5_nt() { testDft (urnuuid05, Lang.NT, 0, 0); } + @Test public void irix_uuid_5_nt_check() { testLang(urnuuid05, Lang.NT, UNSET, TRUE, 0, 0); } + @Test public void irix_uuid_5_ttl() { testDft (urnuuid05, Lang.TTL, 0, 0); } private static String urnuuid06 = "<urn:uuid:6cd401dc-a8d2-11eb-9192-1f162b53dc79?query#fragment>"; @Test public void irix_uuid_6_nt() { testDft (urnuuid06, Lang.NT, 0, 0); } diff --git a/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java b/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java index 2934f9f589..2b68e7d0f3 100644 --- a/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java +++ b/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java @@ -230,11 +230,12 @@ public class IRIProviderJenaIRI implements IRIProvider { if ( STRICT_FILE && isFILE(iri) ) { if ( iriStr.startsWith("file://" ) && ! iriStr.startsWith("file:///") ) throw new IRIException("file: URLs should start file:///: <"+iriStr+">"); - } - - if ( isUUID(iri, iriStr) ) { + } else if ( isUUID(iri, iriStr) ) { checkUUID(iri, iriStr); + } else if ( isURNUUID(iri, iriStr) ) { + checkURNUUID(iri, iriStr); } + if (!showExceptions) return iri; if (!iri.hasViolation(includeWarnings)) @@ -283,26 +284,109 @@ public class IRIProviderJenaIRI implements IRIProvider { private static boolean isURN(IRI iri) { return "urn".equalsIgnoreCase(iri.getScheme()); } private static boolean isFILE(IRI iri) { return "file".equalsIgnoreCase(iri.getScheme()); } - private static String UUID_REGEXP = "^(?:urn:uuid|uuid):[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"; - private static Pattern UUID_PATTERN = Pattern.compile(UUID_REGEXP, Pattern.CASE_INSENSITIVE); - private static boolean isUUID(IRI iri, String iriStr) { - return iriStr.regionMatches(true, 0, "urn:uuid:", 0, "urn:uuid:".length()) - || iriStr.regionMatches(true, 0, "uuid:", 0, "uuid:".length()); + // Ignore case + return iriStr.regionMatches(true, 0, "uuid:", 0, "uuid:".length()); } + private static boolean isURNUUID(IRI iri, String iriStr) { + // Ignore case + return iriStr.regionMatches(true, 0, "urn:uuid:", 0, "urn:uuid:".length()); + } + + // ---- uuid: + // UUID match, no anchors or URI scheme. + private static String UUID_BASE = "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"; + private static String UUID_REGEXP = "^uuid:"+UUID_BASE+"$"; + private static Pattern UUID_PATTERN = Pattern.compile(UUID_REGEXP, Pattern.CASE_INSENSITIVE); + private static void checkUUID(IRI iriObj, String original) { if ( iriObj.hasViolation(true) ) // Already has problems. return; - // jena-iri and iri4ld should both be uptodate now.. -// // Unfortunately, these tests are check/no-check sensitive. -// if ( iriObj.getRawFragment() != null ) -// throw new IRIException("Fragment used with UUID"); -// if ( iriObj.getRawQuery() != null ) -// throw new IRIException("Query used with UUID"); + // jena-iri does not have UUID checks. + // Unfortunately, these tests are check/no-check sensitive. + if ( iriObj.getRawFragment() != null ) + throw new IRIException("Fragment used with uuid:"); + if ( iriObj.getRawQuery() != null ) + throw new IRIException("Query used with uuid:"); boolean matches = UUID_PATTERN.matcher(original).matches(); if ( !matches ) throw new IRIException("Not a valid UUID string: "+original); } + + + // ---- urn:uuid: + // RFC 8141 added the possibility for r-component, q-component (combined + // into the URI query string) and f-component (restricted fragment). This + // regexp has a weak test for r/q/f. It does not check the character + // limitations to ASCII on r/q/f + + //private static String A2Z = "[0-9a-z]"; + + // Non-strict regexp: Any order r- and q-compoments, UCSchars. + private static String URN_UUID_REGEXP_LAX = "^urn:uuid:"+UUID_BASE+"(?:(?:\\?\\+.|\\?=.|#).*)?$"; + + // Strict regex for urn:uuid + // Only ASCII. + // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + // pct-encoded = "%" HEXDIG HEXDIG + // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + // iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar + // reserved = gen-delims / sub-delims + // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + // / "*" / "+" / "," / ";" / "=" + // Not: + // ipchar = iunreserved / pct-encoded / sub-delims / ":" / "@" + // = ipchar / ucschar + +// ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF +// / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD +// / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD +// / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD +// / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD +// / %xD0000-DFFFD / %xE1000-EFFFD + + // "(?: )" is a non-binding group. + private static String PCT = "(?:%[a-f][a-f])"; + + // As contents of "[]" used in PCHAR + private static String UNRESERVED = "-0-9a-z._~"; + // Or use \p{IsAlphabetic} + private static String UCSCHAR = "\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF"; + /* + / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD + / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD + / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD + / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD + / %xD0000-DFFFD / %xE1000-EFFFD + */ + // private = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD + //private static String IPRIVATE + private static String IUNRESERVED = UNRESERVED+UCSCHAR; + + //private static String GEN_DELIMS = ":/\\?#\\[\\]@"; + private static String SUB_DELIMS = "!\\$&'\\(\\)\\*\\+,;="; + // Switch IUNRESERVED / UNRESERVED + private static String PCHARS1 = UNRESERVED+SUB_DELIMS+":"+"@"; + private static String PCHAR = "(?:(?:["+PCHARS1+"]|"+PCT+"))"; + + private static String URN_COMP_X = "/\\?"; + private static String URN_RQ_COMP_CHAR = PCHAR+URN_COMP_X; + private static String URN_R_COMP = "(?:\\?\\+["+URN_RQ_COMP_CHAR+"]+)?"; + private static String URN_Q_COMP = "(?:\\?=["+URN_RQ_COMP_CHAR+"]+)?"; + private static String URN_F_COMP = "(?:#["+PCHAR+"]*)?"; + private static String URN_UUID_REGEXP = "^urn:uuid:"+UUID_BASE+URN_R_COMP+URN_Q_COMP+URN_F_COMP+"$"; + + private static Pattern URN_UUID_PATTERN = Pattern.compile(URN_UUID_REGEXP, Pattern.CASE_INSENSITIVE); + + private static void checkURNUUID(IRI iriObj, String original) { + if ( iriObj.hasViolation(true) ) + // Already has problems. + return; + boolean matches = URN_UUID_PATTERN.matcher(original).matches(); + if ( !matches ) + throw new IRIException("Not a valid UUID string: "+original); + } } \ No newline at end of file diff --git a/jena-core/src/main/java/org/apache/jena/irix/IRIx.java b/jena-core/src/main/java/org/apache/jena/irix/IRIx.java index 885fa62b54..7b2c5377f1 100644 --- a/jena-core/src/main/java/org/apache/jena/irix/IRIx.java +++ b/jena-core/src/main/java/org/apache/jena/irix/IRIx.java @@ -60,7 +60,7 @@ public abstract class IRIx { * It returns a IRIx holder and does no checking whatsoever. * Whether the IRI "works" is down to care by the application. */ - static public IRIx createAny(String iri) throws IRIException { + static public IRIx createAny(String iri) { Objects.requireNonNull(iri); return IRIProviderAny.stringProvider().create(iri); } diff --git a/jena-core/src/test/java/org/apache/jena/irix/TS_IRIx.java b/jena-core/src/test/java/org/apache/jena/irix/TS_IRIx.java index 479e9fd544..eba5652592 100644 --- a/jena-core/src/test/java/org/apache/jena/irix/TS_IRIx.java +++ b/jena-core/src/test/java/org/apache/jena/irix/TS_IRIx.java @@ -26,7 +26,8 @@ import org.junit.runners.Suite; @RunWith(Suite.class) @Suite.SuiteClasses( { // IRIx tests with matrix of providers. - TestParseIRIx.class, + TestIRIxSyntax.class, + TestIRIxOps.class, TestRFC3986.class, TestResolve.class, TestNormalize.class, diff --git a/jena-core/src/test/java/org/apache/jena/irix/TestParseIRIx.java b/jena-core/src/test/java/org/apache/jena/irix/TestIRIxOps.java similarity index 66% rename from jena-core/src/test/java/org/apache/jena/irix/TestParseIRIx.java rename to jena-core/src/test/java/org/apache/jena/irix/TestIRIxOps.java index a4d951a7b4..b91e2d7a37 100644 --- a/jena-core/src/test/java/org/apache/jena/irix/TestParseIRIx.java +++ b/jena-core/src/test/java/org/apache/jena/irix/TestIRIxOps.java @@ -21,118 +21,23 @@ package org.apache.jena.irix; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import java.util.Locale; - import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; /** - * Parse tests. + * Basic parser tests and IRIx operations. * - * {@link TestRFC3986} contained tests with expections scheme errors and warnings. + * {@link TestRFC3986} contained tests with exceptions scheme errors and warnings. */ @RunWith(Parameterized.class) -public class TestParseIRIx extends AbstractTestIRIx { +public class TestIRIxOps extends AbstractTestIRIx { - public TestParseIRIx(String name, IRIProvider provider) { + public TestIRIxOps(String name, IRIProvider provider) { super(name, provider); } - // ---- RFC 3986 Grammar : misc parsing. - - @Test public void uri_01() { parse("http://example/abc"); } - - @Test public void uri_02() { parse("http://example/αβγ"); } - - @Test public void uri_03() { parse("http://example/Ẓ"); } - - @Test public void uri_04() { parse("http://[::1]/abc"); } - - @Test public void uri_05() { parse("http://reg123/abc"); } - - @Test public void uri_06() { parse("http://1.2.3.4/abc"); } - - // ---- Compliance with HTTP RFC7230. https://tools.ietf.org/html/rfc7230#section-2.7 - - @Test(expected=IRIException.class) - public void http_01() { parse("http:"); } - - @Test(expected=IRIException.class) - public void http_02() { parse("http:/"); } - - @Test(expected=IRIException.class) - public void http_03() { parse("http://"); } - - @Test public void http_04() { parse("http://x"); } - - @Test(expected=IRIException.class) - public void http_05() { parse("http:abc"); } - - @Test(expected=IRIException.class) - public void http_06() { parse("http:///abc"); } - - @Test(expected=IRIException.class) - // [] not in IPv6 address - public void http_07() { parse("http://h/ab[]"); } - - @Test public void http_08() { parse("http://example/~jena/file"); } - - // -- Compliance with URN scheme: https://tools.ietf.org/html/rfc8141 - - @Test public void urn_01() { parse("urn:NID:NSS"); } - - @Test(expected=IRIException.class) - public void urn_02() { parse("urn:x:abcd"); } - - @Test(expected=IRIException.class) - public void urn_03() { parse("urn:ex:"); } - - @Test public void urn_04() { notStrict("urn", ()->parse("urn:x:abc")); } - - @Test public void urn_05() { notStrict("urn", ()->parse("urn:ex:")); } - - @Test public void urn_06() { parse("urn:NID:NSS?=abc"); } - - @Test public void urn_07() { parse("urn:NID:NSS?+abc"); } - - @Test public void urn_08() { parse("urn:NID:NSS#frag"); } - - @Test public void urn_09() { parse("urn:NID:NSS#"); } - - private static String testUUID = "aa045fc2-a781-11eb-9041-afa3877612ee"; - - @Test public void parse_uuid_01() { parse("uuid:"+testUUID); } - - @Test public void parse_uuid_02() { parse("uuid:"+(testUUID.toUpperCase(Locale.ROOT))); } - - @Test public void parse_uuid_03() { parse("UUID:"+testUUID); } - - @Test public void parse_uuid_04() { parse("urn:uuid:"+testUUID); } - - @Test public void parse_uuid_05() { parse("urn:uuid:"+(testUUID.toUpperCase(Locale.ROOT))); } - - @Test public void parse_uuid_06() { parse("URN:UUID:"+testUUID); } - - // Illegal. - // RFC 8141 (urn) allows query and fragment in urn:uuid: (limited character set). - // But RFC 4122 (urn:uuid: namespace definition) does not. - - // -- Compliance with file scheme: https://tools.ietf.org/html/rfc8089 - - @Test public void file_01() { parse("file:///path/name"); } - - @Test public void file_02() { parse("file:/path/name"); } - - @Test public void file_03() { parse("file:name"); } - - @Test public void file_04() { parse("file:/path/name"); } - - @Test public void file_05() { parse("file:name"); } - - @Test public void file_06() { parse("file:///c:/~user/file"); } - // --- Use in RDF @Test public void reference_01() { reference("http://example/", true); } diff --git a/jena-core/src/test/java/org/apache/jena/irix/TestIRIxSyntax.java b/jena-core/src/test/java/org/apache/jena/irix/TestIRIxSyntax.java new file mode 100644 index 0000000000..ff161aa482 --- /dev/null +++ b/jena-core/src/test/java/org/apache/jena/irix/TestIRIxSyntax.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.irix; + +import java.util.Locale; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +/** + * Basic tests of RFC 3986 syntax. + * + * {@link TestRFC3986} contained tests with more scheme errors and warnings. It also compares to jena-iri. + */ +@RunWith(Parameterized.class) +public class TestIRIxSyntax extends AbstractTestIRIx { + + public TestIRIxSyntax(String name, IRIProvider provider) { + super(name, provider); + } + + @Test public void http_01() { parse("http://example/abc"); } + + @Test public void http_02() { parse("http://example/αβγ"); } + + @Test public void http_03() { parse("http://example/Ẓ"); } + + @Test public void http_04() { parse("http://[::1]/abc"); } + + @Test public void http_05() { parse("http://reg123/abc"); } + + @Test public void http_06() { parse("http://1.2.3.4/abc"); } + + // ---- Compliance with HTTP RFC7230. https://tools.ietf.org/html/rfc7230#section-2.7 + + @Test(expected=IRIException.class) + public void http_51() { parse("http:"); } + + @Test(expected=IRIException.class) + public void http_52() { parse("http:/"); } + + @Test(expected=IRIException.class) + public void http_53() { parse("http://"); } + + @Test public void http_54() { parse("http://x"); } + + @Test(expected=IRIException.class) + public void http_55() { parse("http:abc"); } + + @Test(expected=IRIException.class) + public void http_56() { parse("http:///abc"); } + + @Test(expected=IRIException.class) + // [] not in IPv6 address + public void http_57() { parse("http://h/ab[]"); } + + @Test public void http_58() { parse("http://example/~jena/file"); } + + // -- Compliance with URN scheme: https://tools.ietf.org/html/rfc8141 + + @Test public void urn_01() { parse("urn:NID:NSS"); } + + @Test(expected=IRIException.class) + public void urn_02() { parse("urn:x:abcd"); } + + @Test(expected=IRIException.class) + public void urn_03() { parse("urn:ex:"); } + + @Test public void urn_04() { notStrict("urn", ()->parse("urn:x:abc")); } + + @Test public void urn_05() { notStrict("urn", ()->parse("urn:ex:")); } + + @Test public void urn_06() { parse("urn:NID:NSS?=abc"); } + + @Test public void urn_07() { parse("urn:NID:NSS?+abc"); } + + @Test public void urn_08() { parse("urn:NID:NSS#frag"); } + + @Test public void urn_09() { parse("urn:NID:NSS#"); } + + private static String testUUID = "aa045fc2-a781-11eb-9041-afa3877612ee"; + + @Test public void parse_uuid_01() { parse("uuid:"+testUUID); } + + @Test public void parse_uuid_02() { parse("uuid:"+(testUUID.toUpperCase(Locale.ROOT))); } + + @Test public void parse_uuid_03() { parse("UUID:"+testUUID); } + + @Test public void parse_uuid_04() { parse("urn:uuid:"+testUUID); } + + @Test public void parse_uuid_05() { parse("urn:uuid:"+(testUUID.toUpperCase(Locale.ROOT))); } + + @Test public void parse_uuid_06() { parse("URN:UUID:"+testUUID); } + + // -- Compliance with file scheme: https://tools.ietf.org/html/rfc8089 + + @Test public void file_01() { parse("file:///path/name"); } + + @Test public void file_02() { parse("file:/path/name"); } + + @Test public void file_03() { parse("file:name"); } + + @Test public void file_04() { parse("file:/path/name"); } + + @Test public void file_05() { parse("file:name"); } + + @Test public void file_06() { parse("file:///c:/~user/file"); } + + // Parse, only collect violations from scheme-specific rules. + private void parse(String string) { + IRIx iri = IRIx.create(string); + } +} diff --git a/jena-core/src/test/java/org/apache/jena/irix/TestRFC3986.java b/jena-core/src/test/java/org/apache/jena/irix/TestRFC3986.java index 9cd2db10c5..061bc2cf34 100644 --- a/jena-core/src/test/java/org/apache/jena/irix/TestRFC3986.java +++ b/jena-core/src/test/java/org/apache/jena/irix/TestRFC3986.java @@ -33,7 +33,9 @@ import org.junit.runners.Parameterized; /** * Test of parsing and schema violations. - * See also plain parse tests in {@link TestParseIRIx} + * This is the test suite that compares result with jena-iri. + * See also {@link TestIRIxSyntax} for other IRIx parsing operations. + * See also {@link TestIRIxOps} for IRIx operations. */ @FixMethodOrder(MethodSorters.NAME_ASCENDING) @RunWith(Parameterized.class) @@ -58,7 +60,7 @@ public class TestRFC3986 extends AbstractTestIRIx { @Test public void parse_05() { good("/ab%FFdef"); } // Uppercase preferred - @Test public void parse_06() { goodNoIRICheck("/ab%ffdef"); } + @Test public void parse_06() { good("/ab%ffdef"); } @Test public void parse_07() { good("http://host/abcdef?qs=foo#frag"); } @@ -71,7 +73,7 @@ public class TestRFC3986 extends AbstractTestIRIx { @Test public void parse_11() { good("//host:8081/abc/def?qs=ghi#jkl"); } // Legal, if weird, scheme name. - @Test public void parse_12() { goodNoIRICheck("a+.-9://h/"); } + @Test public void parse_12() { good("a+.-9://h/"); } // No path. @@ -88,66 +90,6 @@ public class TestRFC3986 extends AbstractTestIRIx { @Test public void parse_18() { good("/z/a:b"); } - @Test public void equality_01() { - String s = "https://jena.apache.org/"; - IRIx iri1 = IRIx.create(s); - IRIx iri2 = IRIx.create(s); - assertEquals(iri1, iri2); - assertEquals(iri1.hashCode(), iri2.hashCode()); - } - - // HTTP scheme specific rules. - @Test public void parse_http_01() { badSpecific("http:///file/name.txt"); } - - // HTTP scheme specific rules. - @Test public void parse_http_02() { badSpecific("HTTP:///file/name.txt"); } - - // This is treated as legal with path and no authority. - //@Test public void parse_http_02a() { badSpecific("http:/file/name.txt"); } - - @Test public void parse_http_03() { badSpecific("http://user@host/file/name.txt"); } - - @Test public void parse_http_04() { good("nothttp://user@host/file/name.txt"); } - - @Test public void parse_http_05() { good("nothttp://user@/file/name.txt"); } - - @Test public void parse_file_01() { good("file:///file/name.txt"); } - - // We reject "file://host/" forms. - @Test public void parse_file_02() { badSpecific("file://host/file/name.txt"); } - - // This is legal by RFC 8089 (jena-iri, based on the original RFC 1738, fails this with missing authority). - @Test public void parse_file_03() { goodNoIRICheck("file:/file/name.txt"); } - - @Test public void parse_urn_01() { good("urn:x-local:abc/def"); } - - // rq-components = [ "?+" r-component ] - // [ "?=" q-component ] - - @Test public void parse_urn_02() { good("urn:x-local:abc/def?+more"); } - - @Test public void parse_urn_03() { good("urn:x-local:abc/def?=123"); } - - @Test public void parse_urn_04() { good("urn:x-local:abc/def?+resolve?=123#frag"); } - - @Test public void parse_urn_05() { good("urn:abc0:def"); } - - private static String testUUID = "aa045fc2-a781-11eb-9041-afa3877612ee"; - - @Test public void parse_uuid_01() { good("uuid:"+testUUID); } - - @Test public void parse_uuid_02() { good("uuid:"+(testUUID.toUpperCase(Locale.ROOT))); } - - @Test public void parse_uuid_03() { good("urn:uuid:"+testUUID); } - - @Test public void parse_uuid_04() { good("urn:uuid:"+(testUUID.toUpperCase(Locale.ROOT))); } - - // -- FTP - - @Test public void parse_ftp_01() { good("ftp://user@host:3333/abc/def?qs=ghi#jkl"); } - - @Test public void parse_ftp_02() { good("ftp://[::1]/abc/def?qs=ghi#jkl"); } - // ---- bad // Leading ':' @@ -209,102 +151,213 @@ public class TestRFC3986 extends AbstractTestIRIx { // [] not allowed. @Test public void bad_frag_1() { bad("http://eg.com/test.txt#xpointer(/unit[5])"); } - // ---- bad by scheme. - @Test public void parse_http_bad_01() { badSpecific("http://user@host:8081/abc/def?qs=ghi#jkl"); } + @Test public void equality_01() { + String s = "https://jena.apache.org/"; + IRIx iri1 = IRIx.create(s); + IRIx iri2 = IRIx.create(s); + assertEquals(iri1, iri2); + assertEquals(iri1.hashCode(), iri2.hashCode()); + } + + // HTTP scheme specific rules. + @Test public void parse_http_01() { badSpecific("http:///file/name.txt"); } + + // HTTP scheme specific rules. + @Test public void parse_http_02() { badSpecific("HTTP:///file/name.txt"); } + + // This is legal with path and no authority. + //@Test public void parse_http_02a() { badSpecific("http:/file/name.txt"); } + + @Test public void parse_http_03() { badSpecific("http://user@host/file/name.txt"); } + + @Test public void parse_http_04() { good("nothttp://user@host/file/name.txt"); } + + @Test public void parse_http_05() { good("nothttp://user@/file/name.txt"); } + + @Test public void parse_http_06() { badSpecific("http://user@host:8081/abc/def?qs=ghi#jkl"); } + @Test public void parse_file_01() { good("file:///file/name.txt"); } + + // We reject "file://host/" forms. + @Test public void parse_file_02() { badSpecific("file://host/file/name.txt"); } + + // This is legal by RFC 8089 (jena-iri, based on the original RFC 1738, fails this with missing authority). + @Test public void parse_file_03() { goodNoIRICheck("file:/file/name.txt"); } + + // -- FTP + + @Test public void parse_ftp_01() { good("ftp://user@host:3333/abc/def?qs=ghi#jkl"); } + + @Test public void parse_ftp_02() { good("ftp://[::1]/abc/def?qs=ghi#jkl"); } + + @Test public void parse_urn_01() { good("urn:nid:nss"); } + + @Test public void parse_urn_02() { good("urn:x-local:abc/def"); } + + // @formatter:off + // namestring = assigned-name + // [ rq-components ] + // [ "#" f-component ] + // rq-components = [ "?+" r-component ] + // [ "?=" q-component ] + // @formatter:on + + @Test public void parse_urn_03() { good("urn:x-local:abc/def?+more"); } + + @Test public void parse_urn_04() { good("urn:x-local:abc/def?=123"); } + + @Test public void parse_urn_05() { good("urn:x-local:abc/def?+resolve?=123#frag"); } + + @Test public void parse_urn_06() { good("urn:abc0:def#frag"); } // urn:2char:1char // urn:NID:NSS where NID is at least 2 alphas, and at most 32 long + + /** + * Allow UCSCHARs in the NSS, and the RFC 8141 components. + */ + // XXX Not ASCII in the NSS part, or components. + private static boolean I_URN = true; + private static void parse_internation_urn(String string) { + if ( I_URN ) + good(string); + else + badSpecific(string); + } + @Test public void parse_urn_bad_01() { badSpecific("urn:"); } + @Test public void parse_urn_bad_02() { badSpecific("urn:x:abc"); } @Test public void parse_urn_bad_03() { badSpecific("urn:abc:"); } + // 33 chars @Test public void parse_urn_bad_04() { badSpecific("urn:abcdefghij-123456789-123456789-yz:a"); } // Bad by URN specific rule for the query components. - @Test public void parse_urn_bad_05() { badSpecific("urn:local:abc/def?query=foo"); } + @Test public void parse_urn_bad_05() { badSpecific("urn:local:abc/def?query=foo"); } - @Test public void parse_urn_uuid_bad_01() { - badSpecific("urn:uuid:06e775ac-2c38-11b2-801c-8086f2cc00c9?query=foo"); - } + // URNs are defined in RFC 8141 referring to RFC 3986 (URI - ASCII) + @Test public void parse_intn_urn_01() { parse_internation_urn("urn:NID:αβγ"); } + @Test public void parse_intn_urn_02() { parse_internation_urn("urn:nid:nss#αβγ"); } + @Test public void parse_intn_urn_03() { parse_internation_urn("urn:nid:nss?=αβγ"); } + @Test public void parse_intn_urn_04() { parse_internation_urn("urn:nid:nss?+αβγ"); } - @Test public void parse_urn_uuid_bad_02() { - badSpecific("urn:uuid:06e775ac-2c38-11b2-801c-8086f2cc00c9#frag"); - } + private static String testUUID = "aa045fc2-a781-11eb-9041-afa3877612ee"; - @Test public void parse_urn_uuid_bad_03() { - // Bad length - badSpecific("urn:uuid:06e775ac"); + // RFC 8141 allows query and fragment in urn: (limited character set). + // It even permits retrospectively applying to older schemes, + // However, the r- (?+"), p- ("?=") or f- (#) component does not play a part in URN equivalence. + + // Allow r-component, q-component and f-component + private static final boolean UUID_8141 = true; + private static void parse_uuid_8141(String string) { + if ( UUID_8141 ) + good(string); + else + badSpecific(string); } - @Test public void parse_urn_uuid_bad_04() { - // Bad character - badSpecific("urn:uuid:06e775ac-ZZZZ-11b2-801c-8086f2cc00c9"); - } + // -- uuid: - @Test public void parse_uuid_bad_01() { - badSpecific("uuid:06e775ac-2c38-11b2-801c-8086f2cc00c9?query=foo"); - } + @Test public void parse_uuid_01() { good("uuid:"+testUUID); } - @Test public void parse_uuid_bad_02() { - badSpecific("uuid:06e775ac-2c38-11b2-801c-8086f2cc00c9#frag"); - } + @Test public void parse_uuid_02() { good("uuid:"+(testUUID.toUpperCase(Locale.ROOT))); } - @Test public void parse_uuid_bad_03() { - badSpecific("uuid:06e775ac-2c38-11b2"); - } + @Test public void parse_uuid_bad_01() { badSpecific("uuid:06e775ac-2c38-11b2-801c-8086f2cc00c9?query=foo"); } - @Test public void parse_uuid_bad_04() { - badSpecific("urn:uuid:06e775ac-ZZZZ-11b2-801c-8086f2cc00c9"); - } + // Too short + @Test public void parse_uuid_bad_02() { badSpecific("uuid:06e775ac-2c38-11b2"); } - // No char fragment is legal. - @Test public void parse_uuid_bad_05() { - badSpecific("urn:uuid:" + testUUID + "#"); - } + // Too long + @Test public void parse_uuid_bad_03() { badSpecific("uuid:06e775ac-2c38-11b2-9999"); } - // RFC 8141 allows query string must be ?=<one+ char> or ?+<one+ char> - @Test public void parse_uuid_bad_06() { - badSpecific("urn:uuid:" + testUUID + "?=chars"); - } + // Bad character + @Test public void parse_uuid_bad_04() { badSpecific("uuid:06e775ac-ZZZZ-11b2-801c-8086f2cc00c9"); } - @Test public void parse_uuid_bad_07() { - badSpecific("urn:uuid:" + testUUID + "?+chars"); - } + // For the ad-hoc "uuid:" do not allow r/q/f components. - @Test public void parse_uuid_bad_08() { - badSpecific("urn:uuid:" + testUUID + "?="); - } + @Test public void parse_uuid_bad_10() { badSpecific("uuid:"+testUUID+ "?+chars"); } + + @Test public void parse_uuid_bad_11() { badSpecific("uuid:"+testUUID+ "?=chars"); } + + @Test public void parse_uuid_bad_12() { badSpecific("uuid:"+testUUID+"#frag"); } - @Test public void parse_uuid_bad_09() { - badSpecific("urn:uuid:" + testUUID + "?+"); - } + + // -- urn:uuid: // RFC 8141 allows query and fragment in urn: (limited character set). - // RFC 4122 (uuid namespace definition) does not. - @Test - public void parse_uuid_bad_8141_01() { - badSpecific("urn:uuid:" + testUUID + "#frag"); - } + // It even permits retrospectively applying to older schemes, + // However, the r- (?+"), p- ("?=") or f- (#) component does not play a part in URN equivalence. - // No char fragment is legal. - @Test - public void parse_uuid_bad_8141_02() { - badSpecific("urn:uuid:" + testUUID + "#"); - } + @Test public void parse_urn_uuid_01() { good("urn:uuid:"+testUUID); } + + @Test public void parse_urn_uuid_02() { good("urn:uuid:"+(testUUID.toUpperCase(Locale.ROOT))); } + + @Test public void parse_urn_uuid_03() { parse_uuid_8141("urn:uuid:"+testUUID+"#frag"); } + + // Zero char fragment is legal. + @Test public void parse_urn_uuid_04() { parse_uuid_8141("urn:uuid:" + testUUID + "#"); } // RFC 8141 allows query string must be ?=<one+ char> or ?+<one+ char> - @Test - public void parse_uuid_bad_8141_03() { - badSpecific("urn:uuid:" + testUUID + "?=chars"); - } + @Test public void parse_urn_uuid_21() { parse_uuid_8141("urn:uuid:" + testUUID + "?=chars"); } - @Test - public void parse_uuid_bad_8141_04() { - badSpecific("urn:uuid:" + testUUID + "?+chars"); - } + // RFC 8141 allows "query string" where it must must be ?=<one+ char> or ?+<one+ char> + @Test public void parse_urn_uuid_22() { parse_uuid_8141("urn:uuid:" + testUUID + "?=ab/?cd"); } + + @Test public void parse_urn_uuid_23() { parse_uuid_8141("urn:uuid:" + testUUID + "?+chars"); } + + @Test public void parse_urn_uuid_24() { parse_uuid_8141("urn:uuid:" + testUUID + "?+ab/?cd"); } + + @Test public void parse_urn_uuid_25() { parse_uuid_8141("urn:uuid:" + testUUID + "?+chars?=chars#frag"); } + + @Test public void parse_urn_uuid_26() { parse_uuid_8141("urn:uuid:" + testUUID + "?+chars?=chars#frag"); } + + // Strange cases. + // The r- and q- components can have '?', '+' and '=' in them + // so the first occurrence captures everything up to the + // fragment or end of string. + + @Test public void parse_urn_uuid_27() { parse_uuid_8141("urn:uuid:" + testUUID + "?+chars?"); } + + @Test public void parse_urn_uuid_28() { parse_uuid_8141("urn:uuid:" + testUUID + "?+chars??=next"); } + + // Single q-component + @Test public void parse_urn_uuid_29() { parse_uuid_8141("urn:uuid:" + testUUID + "?=chars?a=b"); } + + // Single q-component! + @Test public void parse_urn_uuid_30() { parse_uuid_8141("urn:uuid:" + testUUID + "?=aaa?+bbb"); } + + // Single r-component + @Test public void parse_urn_uuid_31() { parse_uuid_8141("urn:uuid:" + testUUID + "?+aaa?+bbb"); } + + @Test public void parse_urn_uuid_32() { parse_uuid_8141("urn:uuid:" + testUUID + "?=Q?+R"); } + + // Always bad. + // Query string, not a component. + @Test public void parse_urn_uuid_bad_01() { badSpecific("urn:uuid:06e775ac-2c38-11b2-801c-8086f2cc00c9?query=foo"); } + + // Bad length + @Test public void parse_urn_uuid_bad_02() { badSpecific("urn:uuid:06e775ac"); } + + // Bad character + @Test public void parse_urn_uuid_bad_03() { badSpecific("urn:uuid:06e775ac-ZZZZ-11b2-801c-8086f2cc00c9"); } + + // Always bad. At least one char. + @Test public void parse_urn_uuid_bad_04() { badSpecific("urn:uuid:" + testUUID + "?="); } + + // Always bad. At least one char. + @Test public void parse_urn_uuid_bad_05() { badSpecific("urn:uuid:" + testUUID + "?+"); } + + @Test public void parse_urn_uuid_bad_06() { badSpecific("urn:uuid:" + testUUID + "?"); } + + @Test public void parse_urn_uuid_bad_07() { badSpecific("urn:uuid:" + testUUID + "?abc"); } + + // XXX Not ASCII in the NSS part + @Test public void parse_urn_uuid_bad_12() { badSpecific("urn:uuid:" + testUUID + "#αβγ"); } + @Test public void parse_urn_uuid_bad_13() { badSpecific("urn:uuid:" + testUUID + "?=αβγ"); } + @Test public void parse_urn_uuid_bad_14() { badSpecific("urn:uuid:" + testUUID + "?+αβγ"); } - private void good(String string) { + private static void good(String string) { IRIx iri = IRIx.create(string); assertNotNull(iri); if ( true ) { @@ -320,13 +373,14 @@ public class TestRFC3986 extends AbstractTestIRIx { assertNotNull(javaURI); } - private void goodNoIRICheck(String string) { + // Where jena-iri odes not get the right answer. + private static void goodNoIRICheck(String string) { IRIx iri = IRIx.create(string); java.net.URI javaURI = java.net.URI.create(string); } // Expect an IRIParseException - private void bad(String string) { + private static void bad(String string) { try { IRIs.checkEx(string); IRIs.reference(string); @@ -335,7 +389,7 @@ public class TestRFC3986 extends AbstractTestIRIx { } catch (IRIException ex) {} } - private void badSpecific(String string) { + private static void badSpecific(String string) { bad(string); } }