This is an automated email from the ASF dual-hosted git repository. andy pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/jena.git
commit 58de521d54d622daaa2df120306c4e2e29382fd8 Author: Andy Seaborne <[email protected]> AuthorDate: Thu Jul 10 12:22:25 2025 +0100 GH-3304: Fix private use parsing; refactor code; use JUnit5 exclusively --- .../{TestPackage_core.java => JenaCoreTest.java} | 0 jena-langtag/pom.xml | 10 - .../org/apache/jena/langtag/InternalLangTag.java | 15 +- .../org/apache/jena/langtag/LangTagRFC5646.java | 272 ++++++++++++--------- .../java/org/apache/jena/langtag/TestLangTag.java | 20 +- .../org/apache/jena/langtag/TestLangTagFormat.java | 39 +-- 6 files changed, 215 insertions(+), 141 deletions(-) diff --git a/jena-core/src/test/java/org/apache/jena/test/TestPackage_core.java b/jena-core/src/test/java/org/apache/jena/test/JenaCoreTest.java similarity index 100% rename from jena-core/src/test/java/org/apache/jena/test/TestPackage_core.java rename to jena-core/src/test/java/org/apache/jena/test/JenaCoreTest.java diff --git a/jena-langtag/pom.xml b/jena-langtag/pom.xml index ac3ae08791..8610b5000e 100644 --- a/jena-langtag/pom.xml +++ b/jena-langtag/pom.xml @@ -54,16 +54,6 @@ <scope>test</scope> </dependency> - <!-- - Needed for @Parameterized test suite - JUnit5 will eventually have @ParameterizedClass. - --> - <dependency> - <groupId>org.junit.vintage</groupId> - <artifactId>junit-vintage-engine</artifactId> - <scope>test</scope> - </dependency> - </dependencies> <build> diff --git a/jena-langtag/src/main/java/org/apache/jena/langtag/InternalLangTag.java b/jena-langtag/src/main/java/org/apache/jena/langtag/InternalLangTag.java index 93281be82a..a6e75ebb1a 100644 --- a/jena-langtag/src/main/java/org/apache/jena/langtag/InternalLangTag.java +++ b/jena-langtag/src/main/java/org/apache/jena/langtag/InternalLangTag.java @@ -123,6 +123,15 @@ class InternalLangTag { return true; } + static boolean isAlphaNum(String string, int start, int end) { + for ( int i = start ; i < end ; i++ ) { + char ch = string.charAt(i); + if ( ! isAlphaNum(ch) ) + return false; + } + return true; + } + static void checkAlphaMinus(String string, int N, int start, int end) { for ( int i = start ; i < end ; i++ ) { char ch = string.charAt(i); @@ -147,7 +156,7 @@ class InternalLangTag { } } - /*package*/ static String str(char ch) { + static String str(char ch) { return String.format("'%s' U+%04X", Character.valueOf(ch), (int)ch); } @@ -159,6 +168,10 @@ class InternalLangTag { return ( ch >= '0' && ch <= '9' ); } + static boolean isAlphaNum(char ch) { + return isAlpha(ch) || isNum(ch); + } + static boolean isMinus(char ch) { return ( ch == '-' ); } diff --git a/jena-langtag/src/main/java/org/apache/jena/langtag/LangTagRFC5646.java b/jena-langtag/src/main/java/org/apache/jena/langtag/LangTagRFC5646.java index 23e33a62f4..99a009863e 100644 --- a/jena-langtag/src/main/java/org/apache/jena/langtag/LangTagRFC5646.java +++ b/jena-langtag/src/main/java/org/apache/jena/langtag/LangTagRFC5646.java @@ -34,56 +34,52 @@ import java.util.Set; * <a href="https://www.rfc-editor.org/info/rfc5646">RFC 5646: Tags for Identifying Languages</a> * </p> */ -public final class LangTagRFC5646 implements LangTag{ - // The language tag as given. - private final String langTagString; - - // Grandfathered - private boolean isGrandfathered = false; - // Private use of the whole Language-Tag - private boolean isPrivateUseLanguage = false; - - /* Formatting: https://datatracker.ietf.org/doc/html/rfc5646#section-2.1.1 - * - * All subtags, including extension and private use subtags, - * use lowercase letters with two exceptions: two-letter - * and four-letter subtags that neither appear at the start of the tag - * nor occur after singletons. Such two-letter subtags are all - * uppercase (as in the tags "en-CA-x-ca" or "sgn-BE-FR") and four- - * letter subtags are titlecase (as in the tag "az-Latn-x-latn"). - * - * See str() - */ - - // Helpers - private enum CaseRule { TITLE, LOWER, UPPER } - private enum CharSet { ALPHA, ALPHANUM } +public final class LangTagRFC5646 implements LangTag { public static LangTag create(String string) { LangTagRFC5646 langtag = parser(string); return langtag; } + // The language tag as given. + private final String langTagString; + + /* Formatting: https://datatracker.ietf.org/doc/html/rfc5646#section-2.1.1 + * + * All subtags, including extension and private use subtags, + * use lowercase letters with two exceptions: two-letter + * and four-letter subtags that neither appear at the start of the tag + * nor occur after singletons. Such two-letter subtags are all + * uppercase (as in the tags "en-CA-x-ca" or "sgn-BE-FR") and four- + * letter subtags are titlecase (as in the tag "az-Latn-x-latn"). + * + * See str() + */ + + private final boolean isGrandfathered; + // Private use of the whole Language-Tag + private final boolean isPrivateUseLanguage; + // Start/Finish indexes, excluding the initial '-' - private int language0 = -1 ; - private int language1 = -1 ; + private final int language0; + private final int language1; - private int script0 = -1 ; - private int script1 = -1 ; + private final int script0; + private final int script1; - private int region0 = -1 ; - private int region1 = -1 ; + private final int region0; + private final int region1; - private int variant0 = -1 ; - private int variant1 = -1 ; + private final int variant0; + private final int variant1; // All extensions. - private int extension0 = -1 ; - private int extension1 = -1 ; + private final int extension0; + private final int extension1; // Private use sub tag (not private use of the whole language tag, which starts "x-"). - private int privateuse0 = -1 ; - private int privateuse1 = -1 ; + private final int privateuse0; + private final int privateuse1; @Override public String getLanguage() { @@ -232,12 +228,64 @@ public final class LangTagRFC5646 implements LangTag{ } private static LangTagRFC5646 parser(String string) { + LangTagRFC5646 langtag = new Builder().parse(string).build(); + return langtag; + } + + // Builder helps tidy the code. + // It allowing the LangTagRFC5646 object to have final fields. + // It means there is one place calling the constructor with its many arguments. + + private static class Builder { + // All members of LangTagRFC + String langTagString = null; + boolean isGrandfathered = false; + boolean isPrivateUseLanguage = false; + int language0 = -1; + int language1 = -1; + int script0 = -1; + int script1 = -1; + int region0 = -1; + int region1 = -1; + int variant0 = -1; + int variant1 = -1; + int extension0 = -1; + int extension1 = -1; + int privateuse0 = -1; + int privateuse1 = -1; + + Builder() {} + + private Builder parse(String string) { + final Builder builder = this; + LangTagRFC5646.parse(builder, string); + return this; + } + + private LangTagRFC5646 build() { + return new LangTagRFC5646(langTagString, language0, language1, + script0, script1, region0, region1, variant0, variant1, + extension0, extension1, privateuse0, privateuse1, + isGrandfathered, isPrivateUseLanguage); + } + } + // Helpers + private enum CaseRule { TITLE, LOWER, UPPER } + private enum CharRange { ALPHA, ALPHANUM } + + // The whole of function 'parse' is enclosed in formatter:off + // @formatter:off + static void parse(Builder builder, String string) { // A segment is a sequence of A2ZN characters separated by '-'. - LangTagRFC5646 langtag = new LangTagRFC5646(string); + builder.langTagString = string; final int N = string.length(); - // @formatter:off + + // Language-Tag = langtag ; normal language tags + // / privateuse ; private use tag + // / grandfathered ; grandfathered tags + // langtag = language // ["-" script] // ["-" region] @@ -273,7 +321,6 @@ public final class LangTagRFC5646 implements LangTag{ // / %x79-7A ; y - z // // privateuse = "x" 1*("-" (1*8alphanum)) - // @formatter:on if ( N == 0 ) InternalLangTag.error("Empty string"); @@ -297,22 +344,22 @@ public final class LangTagRFC5646 implements LangTag{ // variant of "en-GB", each of them, in its entirety, // represents a language. // - langtag.language0 = 0; - langtag.language1 = N; - langtag.isGrandfathered = true; + builder.language0 = 0; + builder.language1 = N; + builder.isGrandfathered = true; // Exception. if ( string.equalsIgnoreCase("en-GB-oed") ) { // "oed" is "Oxford English Dictionary spelling" // Better is the replacement "en-GB-oxendict" - langtag.language0 = 0; - langtag.language1 = 2; - langtag.region0 = 3; - langtag.region1 = 5; + builder.language0 = 0; + builder.language1 = 2; + builder.region0 = 3; + builder.region1 = 5; // Non-standard variant. - langtag.variant0 = 6; - langtag.variant1 = N; + builder.variant0 = 6; + builder.variant1 = N; } - return langtag; + return; } // -- language @@ -335,15 +382,15 @@ public final class LangTagRFC5646 implements LangTag{ the country of Switzerland (or any other value in the IANA registry) unless there is a private agreement in place to do so. See Section 4.6. - */ - langtag.isPrivateUseLanguage = true; + */ + builder.isPrivateUseLanguage = true; int idxPrivateUseStart = 0; - int idxPrivateUseEnd = maybeSubtags(string, N, idxPrivateUseStart+segLen, 1, 8); - langtag.privateuse0 = idxPrivateUseStart; - langtag.privateuse1 = idxPrivateUseEnd; - if ( langtag.privateuse1 < N ) - InternalLangTag.error("Trailing characters in private langtag: '%s'", string.substring(langtag.privateuse1)); - return langtag; + int idxPrivateUseEnd = maybeSubtags(string, N, idxPrivateUseStart+segLen, CharRange.ALPHANUM, 1, 8); + builder.privateuse0 = idxPrivateUseStart; + builder.privateuse1 = idxPrivateUseEnd; + if ( builder.privateuse1 < N ) + InternalLangTag.error("Trailing characters in private langtag: '%s'", string.substring(builder.privateuse1)); + return; } // else InternalLangTag.error("Language part is 1 character: it must be 2-3 characters (4-8 reserved for future use), \"x-\", or a recognized grandfathered tag"); @@ -354,43 +401,43 @@ public final class LangTagRFC5646 implements LangTag{ if ( idx2 < 0 ) { // language only. - langtag.language0 = 0; - langtag.language1 = N; - InternalLangTag.checkAlpha(string, N, langtag.language0, langtag.language1); - return langtag; + builder.language0 = 0; + builder.language1 = N; + InternalLangTag.checkAlpha(string, N, builder.language0, builder.language1); + return; } if ( idx == idx2 ) InternalLangTag.error("Can not find the language subtag: '%s'", string); - langtag.language0 = idx; + builder.language0 = idx; if ( segLen == 2 || segLen == 3 ) { - // -- Language extension subtags/ + // -- Language extension subtags // language = 2*3ALPHA ; shortest ISO 639 code // ["-" extlang] // extlang = 3ALPHA ; selected ISO 639 codes // *2("-" 3ALPHA) ; permanently reserved int extStart = idx+segLen; - InternalLangTag.checkAlpha(string, N, langtag.language0, extStart); + InternalLangTag.checkAlpha(string, N, builder.language0, extStart); // Extensions are 1 to 3 3ALPHA subtags - int extEnd = maybeSubtags(string, N, extStart, 3, 3); + int extEnd = maybeSubtags(string, N, extStart, CharRange.ALPHA, 3, 3); if ( extEnd > extStart ) { idx2 = extEnd; - InternalLangTag.checkAlphaMinus(string, N, extStart, langtag.language1); + InternalLangTag.checkAlphaMinus(string, N, extStart, builder.language1); } } else if ( segLen >= 4 && segLen <= 8 ) { // / 4ALPHA ; or reserved for future use // / 5*8ALPHA ; or registered language subtag // Dubious. - InternalLangTag.checkAlpha(string, N, langtag.language0, idx2); + InternalLangTag.checkAlpha(string, N, builder.language0, idx2); } else { InternalLangTag.error("Language too long (2-3 characters, 4-8 reserved for future use)"); } - langtag.language1 = idx2; + builder.language1 = idx2; // Info - noteSegment("language", string, langtag.language0, langtag.language1); + noteSegment("language", string, builder.language0, builder.language1); // Move on - next subtag idx = segmentNextStart(N, idx, idx2); @@ -407,10 +454,10 @@ public final class LangTagRFC5646 implements LangTag{ int start = idx; int finish = idx+segLen; - langtag.script0 = idx; - langtag.script1 = idx+segLen; - InternalLangTag.checkAlpha(string, N, langtag.script0, langtag.script1); - noteSegment("script", string, langtag.script0, langtag.script1); + builder.script0 = idx; + builder.script1 = idx+segLen; + InternalLangTag.checkAlpha(string, N, builder.script0, builder.script1); + noteSegment("script", string, builder.script0, builder.script1); // Move on. idx = segmentNextStart(N, idx, idx2); @@ -424,13 +471,13 @@ public final class LangTagRFC5646 implements LangTag{ // / 3DIGIT ; UN M.49 code if ( segLen == 2 || segLen == 3 ) { // Region - langtag.region0 = idx; - langtag.region1 = idx+segLen; + builder.region0 = idx; + builder.region1 = idx+segLen; if ( segLen == 2 ) - InternalLangTag.checkAlpha(string, N, langtag.region0, langtag.region1); + InternalLangTag.checkAlpha(string, N, builder.region0, builder.region1); else - InternalLangTag.checkDigits(string, N, langtag.region0, langtag.region1); - noteSegment("region", string, langtag.region0, langtag.region1); + InternalLangTag.checkDigits(string, N, builder.region0, builder.region1); + noteSegment("region", string, builder.region0, builder.region1); // Move on. idx = segmentNextStart(N, idx, idx2); @@ -445,11 +492,11 @@ public final class LangTagRFC5646 implements LangTag{ for ( ;; ) { if ( segLen >= 5 && segLen <= 8) { // variant 5*8alphanum - if ( langtag.variant0 == -1 ) - langtag.variant0 = idx; - langtag.variant1 = idx+segLen; - InternalLangTag.checkAlphaNum(string, N, idx, langtag.variant1); - noteSegment("variant", string, langtag.variant0, langtag.variant1); + if ( builder.variant0 == -1 ) + builder.variant0 = idx; + builder.variant1 = idx+segLen; + InternalLangTag.checkAlphaNum(string, N, idx, builder.variant1); + noteSegment("variant", string, builder.variant0, builder.variant1); // Move on. idx = segmentNextStart(N, idx, idx2); idx2 = segmentNextFinish(string, N, idx); @@ -462,11 +509,11 @@ public final class LangTagRFC5646 implements LangTag{ // DIGIT 3alphanum char ch = string.charAt(idx); if ( ch >= '0' || ch <= '9' ) { - if ( langtag.variant0 == -1 ) - langtag.variant0 = idx; - langtag.variant1 = idx+segLen; - InternalLangTag.checkAlphaNum(string, N, idx, langtag.variant1); - noteSegment("variant", string, langtag.variant0, langtag.variant1); + if ( builder.variant0 == -1 ) + builder.variant0 = idx; + builder.variant1 = idx+segLen; + InternalLangTag.checkAlphaNum(string, N, idx, builder.variant1); + noteSegment("variant", string, builder.variant0, builder.variant1); } // Move on. idx = segmentNextStart(N, idx, idx2); @@ -498,12 +545,12 @@ public final class LangTagRFC5646 implements LangTag{ InternalLangTag.error("Duplicate extension singleton: '"+singleton+"'"); } - if ( langtag.extension0 == -1 ) - langtag.extension0 = idx; + if ( builder.extension0 == -1 ) + builder.extension0 = idx; // Extension. // 2*8 alphanum int idxExtStart = idx+segLen; - int idxEndExtra = maybeSubtags(string, N, idxExtStart, 2, 8); + int idxEndExtra = maybeSubtags(string, N, idxExtStart, CharRange.ALPHANUM, 2, 8); // Expecting at least one subtag. if ( idxExtStart == idxEndExtra ) @@ -511,10 +558,10 @@ public final class LangTagRFC5646 implements LangTag{ if ( idxEndExtra > idxExtStart ) idx2 = idxEndExtra; - langtag.extension1 = idx2; - InternalLangTag.checkAlphaNumMinus(string, N, langtag.extension0, langtag.extension1); + builder.extension1 = idx2; + InternalLangTag.checkAlphaNumMinus(string, N, builder.extension0, builder.extension1); - noteSegment("extension", string, langtag.extension0, langtag.extension1); + noteSegment("extension", string, builder.extension0, builder.extension1); // Move on. idx = segmentNextStart(N, idx, idx2); idx2 = segmentNextFinish(string, N, idx); @@ -525,10 +572,10 @@ public final class LangTagRFC5646 implements LangTag{ // ---- private use if ( inPrivateUseSubtag ) { - langtag.privateuse0 = idx; + builder.privateuse0 = idx; // privateuse = "x" 1*("-" (1*8alphanum)) int idxPrivateUseStart = idx+segLen; - int idxPrivateUseEnd = maybeSubtags(string, N, idxPrivateUseStart, 1, 8); + int idxPrivateUseEnd = maybeSubtags(string, N, idxPrivateUseStart, CharRange.ALPHANUM, 1, 8); // Expecting at least one subtag. if ( idxPrivateUseStart == idxPrivateUseEnd ) @@ -536,10 +583,10 @@ public final class LangTagRFC5646 implements LangTag{ if ( idxPrivateUseEnd > idxPrivateUseStart ) idx2 = idxPrivateUseEnd; - langtag.privateuse1 = idx2; - InternalLangTag.checkAlphaNumMinus(string, N, langtag.privateuse0, langtag.privateuse1); + builder.privateuse1 = idx2; + InternalLangTag.checkAlphaNumMinus(string, N, builder.privateuse0, builder.privateuse1); - noteSegment("private use", string, langtag.privateuse0, langtag.privateuse1); + noteSegment("private use", string, builder.privateuse0, builder.privateuse1); // Private use runs to end of string. But do checking. // Move on. idx = segmentNextStart(N, idx, idx2); @@ -557,12 +604,8 @@ public final class LangTagRFC5646 implements LangTag{ InternalLangTag.error("Trailing characters: '%s'", string.substring(idx)); if ( idx2 >= 0 ) InternalLangTag.error("Bad string: '%s'", string); - return langtag; - } - - private LangTagRFC5646(String string) { - this.langTagString = string; } + // @formatter:on private LangTagRFC5646(String string, int language0, int language1, @@ -571,13 +614,17 @@ public final class LangTagRFC5646 implements LangTag{ int variant0, int variant1, int extension0, int extension1, int privateuse0, int privateuse1, - boolean isGrandfathered) { + boolean isGrandfathered, + boolean isPrivateUseLanguage) { this.langTagString = string; this.isGrandfathered = isGrandfathered; + this.isPrivateUseLanguage = isPrivateUseLanguage; this.language0 = language0; this.language1 = language1; this.script0 = script0; this.script1 = script1; + this.region0 = region0; + this.region1 = region1; this.variant0 = variant0; this.variant1 = variant1; this.extension0 = extension0; @@ -587,7 +634,7 @@ public final class LangTagRFC5646 implements LangTag{ } /** Zero or more subtags, each between min and max length. */ - private static int maybeSubtags(String string, int N, int idxStart, int min, int max) { + private static int maybeSubtags(String string, int N, int idxStart, CharRange charRange, int min, int max) { // Looking at the '-' or end of string. int numExt = 0; int count = 0; @@ -597,7 +644,7 @@ public final class LangTagRFC5646 implements LangTag{ char ch = string.charAt(x); if ( ch != '-' ) break; - int x1 = maybeOneSubtag(string, N, x+1, min, max); + int x1 = maybeOneSubtag(string, N, x+1, charRange, min, max); if ( x1 <= 0 ) break; if ( x1 == N ) { @@ -613,18 +660,23 @@ public final class LangTagRFC5646 implements LangTag{ * Peek for a segment between min and max in length. * The initial "-" has been read. */ - private static int maybeOneSubtag(String string, int N, int idxStart, int min, int max) { + private static int maybeOneSubtag(String string, int N, int idxStart, CharRange charRange, int min, int max) { int idx = idxStart; if ( idx >= N ) return -1; int idx2 = segmentNextFinish(string, N, idx); int segLen = segmentLength(N, idx, idx2); if ( segLen == 0 ) - InternalLangTag.error("Bad langtag. Found '--'"); + InternalLangTag.error("Bad builder. Found '--'"); if ( segLen < min || segLen > max ) return -1; - if ( ! InternalLangTag.isAlpha(string, idxStart, idxStart+segLen) ) + boolean valid = + switch (charRange) { + case ALPHA -> InternalLangTag.isAlpha(string, idxStart, idxStart+segLen); + case ALPHANUM -> InternalLangTag.isAlphaNum(string, idxStart, idxStart+segLen); + }; + if ( !valid ) return -1; return idxStart+segLen; } diff --git a/jena-langtag/src/test/java/org/apache/jena/langtag/TestLangTag.java b/jena-langtag/src/test/java/org/apache/jena/langtag/TestLangTag.java index 2b2c35bddd..bb9b443dc2 100644 --- a/jena-langtag/src/test/java/org/apache/jena/langtag/TestLangTag.java +++ b/jena-langtag/src/test/java/org/apache/jena/langtag/TestLangTag.java @@ -43,12 +43,27 @@ public class TestLangTag { @Test public void test_lang_basic_09() { testRFC5646("de-CH-w-extend", "de-CH-w-extend", "de", null, "CH", null, "w-extend"); } @Test public void test_lang_basic_10() { testRFC5646("de-CH-w-extend-extend", "de-CH-w-extend-extend", "de", null, "CH", null, "w-extend-extend"); } + //String langString, String formatted, String lang, String script, String region, String variant, String extension) + + // Alignment : region is 3 num, variant is 5-8 num. + @Test public void test_lang_basic_11() { testRFC5646("en-123", "en-123", "en", null, "123", null, null); } + @Test public void test_lang_basic_12() { testRFC5646("en-12345", "en-12345", "en", null, null, "12345", null); } + @Test public void test_lang_basic_13() { testRFC5646("en-123-12345678", "en-123-12345678", "en", null, "123", "12345678", null); } + // Extension is "s-XX" (2 to 8). + @Test public void test_lang_basic_14() { testRFC5646("en-s-12", "en-s-12", "en", null, null, null, "s-12"); } + @Test public void test_lang_basic_15() { testRFC5646("en-s-12345678", "en-s-12345678", "en", null, null, null, "s-12345678"); } + @Test public void test_lang_basic_20() { testPrivateUse("de-CH-x-phonebk-morech", "de-CH-x-phonebk-morech", "de", null, "CH", null, null, "x-phonebk-morech"); } - // Private use language tag. No language! + // Private use language tag. @Test public void test_lang_basic_21() { testPrivateUse("x-private", "x-private", null, null, null, null, null, "x-private"); } + // Private use subtag. @Test public void test_lang_basic_22() { testPrivateUse("az-Latn-x-latn", "az-Latn-x-latn", "az", "Latn", null, null, null, "x-latn"); } @Test public void test_lang_basic_23() { testPrivateUse("sss-x-y", "sss-x-y", "sss", null, null, null, null, "x-y"); } + @Test public void test_lang_basic_24() { testPrivateUse("sss-x-1", "sss-x-1", "sss", null, null, null, null, "x-1"); } + @Test public void test_lang_basic_25() { testPrivateUse("sss-x-12345678", "sss-x-12345678", "sss", null, null, null, null, "x-12345678"); } + // Private use language: not language, only a private use section. + @Test public void test_lang_basic_26() { testPrivateUse("x-12345678", "x-12345678", null, null, null, null, null, "x-12345678"); } // 4 chars reserved // 5-8 characters @@ -73,6 +88,8 @@ public class TestLangTag { @Test public void test_lang_bad_21() { testBad("abcdefghz"); } @Test public void test_lang_bad_22() { testBad("en-abcdefghz"); } @Test public void test_lang_bad_23() { testBad("en-Latn-x-abcdefghz"); } + @Test public void test_lang_bad_24() { testBad("en-123456789"); } + // Bad extension @Test public void test_lang_bad_31() { testBad("sss-d"); } @@ -84,6 +101,7 @@ public class TestLangTag { @Test public void test_lang_bad_45() { testBad("sss-x"); } @Test public void test_lang_bad_46() { testBad("sss-x-"); } @Test public void test_lang_bad_47() { testBad("sss-x-part-"); } + @Test public void test_lang_bad_48() { testBad("sss-x-part-Q12345678"); } @Test public void test_lang_bad_repeated_extension() { // "en-a-bbb-a-ccc" is invalid because the subtag 'a' appears twice. diff --git a/jena-langtag/src/test/java/org/apache/jena/langtag/TestLangTagFormat.java b/jena-langtag/src/test/java/org/apache/jena/langtag/TestLangTagFormat.java index c4c33c96bf..906bf2e4d5 100644 --- a/jena-langtag/src/test/java/org/apache/jena/langtag/TestLangTagFormat.java +++ b/jena-langtag/src/test/java/org/apache/jena/langtag/TestLangTagFormat.java @@ -18,38 +18,39 @@ package org.apache.jena.langtag; -import java.util.ArrayList; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.util.List; import java.util.function.Function; +import java.util.stream.Stream; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedClass; +import org.junit.jupiter.params.provider.MethodSource; -// JUnit4 -// Junit5 is missing @ParameterizedClass which may arrive eventually -@RunWith(Parameterized.class) +@ParameterizedClass +@MethodSource("provideArgs") public class TestLangTagFormat { + private static Function<String, String> formatter1 = (s)-> LangTagRFC5646.create(s).str(); private static Function<String, String> formatter2 = (s)-> LangTags.basicFormat(s); - @Parameters(name = "{index}: {0}") - public static Iterable<Object[]> data() { - List<Object[]> x = new ArrayList<>() ; - - x.add(new Object[] {"LangTagRFC5646", formatter1}); - x.add(new Object[] {"LangTagOps", formatter2}); - return x ; + private record ArgPair(String name, Function<String, String> formatter) {} + private static Stream<ArgPair> provideArgs() { + return List.of + (new ArgPair("LangTagRFC5646", formatter1), + new ArgPair("LangTagOps", formatter2) + ).stream(); } private final String formatterName; private final Function<String, String> formatter; - public TestLangTagFormat(String name, Function<String, String> formatter) { - this.formatterName = name; - this.formatter = formatter; + public TestLangTagFormat(@SuppressWarnings("exports") ArgPair args) { + this.formatterName = args.name; + this.formatter = args.formatter; + } @Test public void testBasicFormat01() { test("de", "de"); } @@ -137,6 +138,6 @@ public class TestLangTagFormat { private void test(String langString, String expected) { String result = formatter.apply(langString); // JUnit4 argument order. - org.junit.Assert.assertEquals(formatterName+"("+langString+"): ", expected, result); + assertEquals(expected, result, ()->formatterName+"("+langString+")"); } }
