This is an automated email from the ASF dual-hosted git repository. andy pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/jena.git
commit db5c1583ae791754d77b0959345b57f971ea798e Author: Andy Seaborne <[email protected]> AuthorDate: Thu Jun 26 19:06:01 2025 +0100 GH-3281: Update TurtleJCC to check RDF Strings --- jena-arq/Grammar/Turtle/turtle.jj | 7 +++--- .../jena/riot/lang/extra/LangParserBase.java | 20 +++++++++++++++- .../jena/riot/lang/extra/javacc/TurtleJavacc.java | 7 +++--- .../apache/jena/arq/junit/riot/ParseForTest.java | 26 +++++++++++++++++++- .../org/apache/jena/riot/Scripts_AltTurtle.java | 6 ++--- jena-cmds/src/test/java/arq/rdftests.java | 28 +++++++++++++--------- 6 files changed, 70 insertions(+), 24 deletions(-) diff --git a/jena-arq/Grammar/Turtle/turtle.jj b/jena-arq/Grammar/Turtle/turtle.jj index 0c37b9eae2..7024dd6b5b 100644 --- a/jena-arq/Grammar/Turtle/turtle.jj +++ b/jena-arq/Grammar/Turtle/turtle.jj @@ -108,10 +108,10 @@ String VersionSpecificer() : { Token t; String verStr; } // | t = <STRING_LITERAL_LONG2> { verStr = stripQuotes3(t.image) ; } ) { - checkString(verStr, t.beginLine, t.beginColumn) ; - verStr = unescapeStr(verStr, t.beginLine, t.beginColumn) ; + verStr = unescapeStr(verStr, t.beginLine, t.beginColumn) ; + checkRDFString(verStr, t.beginLine, t.beginColumn) ; return verStr ; - } + } } void DirectiveOld() : { Token t ; Token t2 ; String iri ; String verStr ; } @@ -302,7 +302,6 @@ String String() : { Token t ; String lex ; } | t = <STRING_LITERAL_LONG2> { lex = stripQuotes3(t.image) ; } ) { - checkString(lex, t.beginLine, t.beginColumn) ; lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; return lex ; } diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/LangParserBase.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/LangParserBase.java index 83573de14a..d9aa0f914c 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/LangParserBase.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/LangParserBase.java @@ -85,6 +85,7 @@ public class LangParserBase { } protected Node createURI(String iriStr, int line, int column) { + checkRDFString(iriStr, line, column); return profile.createURI(iriStr, line, column); } @@ -97,13 +98,29 @@ public class LangParserBase { } protected Node createListNode(int line, int column) { - return createBNode(line, column); + return createBNode(line, column); } + /** @deprecated Use {@link #checkRDFString}. */ + @Deprecated(forRemoval=true) protected void checkString(String string, int line, int column) { + checkRDFString(string, line, column); + } + + /** + * Apply any checks for "RDF String" to a string that has already had escape processing applied. + * An RDF String is a sequence of codepoints in the range U+0000 to U+10FFFF, excluding surrogates. + * Because this is java, we test for no non-paired surrogates. + * A surrogate pair is high-low. + */ + protected static void checkRDFString(String string, int line, int column) { for ( int i = 0 ; i < string.length() ; i++ ) { // Not "codePointAt" which does surrogate processing. char ch = string.charAt(i); + + if ( ! Character.isValidCodePoint(ch) ) + throw new RiotParseException(String.format("Illegal code point in \\U sequence value: 0x%08X", ch), line, column); + // Check surrogate pairs are pairs. if ( Character.isHighSurrogate(ch) ) { i++; @@ -170,6 +187,7 @@ public class LangParserBase { protected String resolveQuotedIRI(String iriStr, int line, int column) { iriStr = LangParserLib.stripQuotes(iriStr); iriStr = unescapeIRI(iriStr); + checkRDFString(iriStr, line, column); // Check if ( iriStr.contains("<") || iriStr.contains(">") ) throw new RiotParseException("Illegal character '<' or '>' in IRI: '"+iriStr+"'", line, column); diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/javacc/TurtleJavacc.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/javacc/TurtleJavacc.java index 26dfc7a578..bb070d787d 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/javacc/TurtleJavacc.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/javacc/TurtleJavacc.java @@ -151,8 +151,8 @@ verStr = stripQuotes(t.image) ; jj_consume_token(-1); throw new ParseException(); } -checkString(verStr, t.beginLine, t.beginColumn) ; - verStr = unescapeStr(verStr, t.beginLine, t.beginColumn) ; +verStr = unescapeStr(verStr, t.beginLine, t.beginColumn) ; + checkRDFString(verStr, t.beginLine, t.beginColumn) ; {if ("" != null) return verStr ;} throw new Error("Missing return statement in function"); } @@ -658,8 +658,7 @@ lex = stripQuotes3(t.image) ; jj_consume_token(-1); throw new ParseException(); } -checkString(lex, t.beginLine, t.beginColumn) ; - lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; +lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; {if ("" != null) return lex ;} throw new Error("Missing return statement in function"); } diff --git a/jena-arq/src/test/java/org/apache/jena/arq/junit/riot/ParseForTest.java b/jena-arq/src/test/java/org/apache/jena/arq/junit/riot/ParseForTest.java index 2f862c4859..f839298839 100644 --- a/jena-arq/src/test/java/org/apache/jena/arq/junit/riot/ParseForTest.java +++ b/jena-arq/src/test/java/org/apache/jena/arq/junit/riot/ParseForTest.java @@ -25,13 +25,37 @@ import java.util.concurrent.ConcurrentHashMap; import org.apache.jena.riot.*; import org.apache.jena.riot.system.*; +/** + * Manage parsers used for tests, separate from the overall system setup. + */ public class ParseForTest { public static void parse(StreamRDF destination, String uri, Lang lang, boolean ignoreWarnings) { parse(destination, uri, uri, lang, ignoreWarnings); } - public static Map<Lang, ReaderRIOTFactory> alternativeReaderFactories = new ConcurrentHashMap<>(); + /** + * Map of {@link Lang} to {@link ReaderRIOTFactory} that is consulted before + * defaulting to the standard system parser. + */ + private static Map<Lang, ReaderRIOTFactory> alternativeReaderFactories = new ConcurrentHashMap<>(); + + /** + * Add an alternative language implementation to + * {@link #alternativeReaderFactories} map. This map of {@link Lang} to + * {@link ReaderRIOTFactory} is consulted before defaulting to the standard + * system parser. + */ + public static void registerAlternative(Lang lang, ReaderRIOTFactory factory) { + alternativeReaderFactories.put(lang, factory); + } + + /** + * Remove an registration of an alternative for {@link Lang}. + */ + public static void unregisterAlternative(Lang lang) { + alternativeReaderFactories.remove(lang); + } public static void parse(StreamRDF destination, String uri, String base, Lang lang, boolean ignoreWarnings) { diff --git a/jena-arq/src/test/java/org/apache/jena/riot/Scripts_AltTurtle.java b/jena-arq/src/test/java/org/apache/jena/riot/Scripts_AltTurtle.java index 0246af9fde..544031d842 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/Scripts_AltTurtle.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/Scripts_AltTurtle.java @@ -38,7 +38,7 @@ import org.junit.runner.RunWith ; // rdf-tests CG "testing/rdf-tests-cg/turtle/manifest.ttl" - + // [rdf-star CG] RDF star CG tests. No longer valid // "testing/rdf-star-cg/turtle/syntax/manifest.ttl", // "testing/rdf-star-cg/turtle/eval/manifest.ttl" @@ -52,11 +52,11 @@ public class Scripts_AltTurtle JenaSystem.init(); // Register language and parser factory. TurtleJCC.register(); - ParseForTest.alternativeReaderFactories.put(Lang.TURTLE, TurtleJCC.factory); + ParseForTest.registerAlternative(Lang.TURTLE, TurtleJCC.factory); } @AfterClass public static void afterClass() { - ParseForTest.alternativeReaderFactories.remove(Lang.TURTLE); + ParseForTest.unregisterAlternative(Lang.TURTLE); } } diff --git a/jena-cmds/src/test/java/arq/rdftests.java b/jena-cmds/src/test/java/arq/rdftests.java index f8bdde2635..ee0752686b 100644 --- a/jena-cmds/src/test/java/arq/rdftests.java +++ b/jena-cmds/src/test/java/arq/rdftests.java @@ -28,6 +28,7 @@ import org.apache.jena.Jena; import org.apache.jena.arq.junit.SurpressedTest; import org.apache.jena.arq.junit.TextTestRunner; import org.apache.jena.arq.junit.manifest.ManifestEntry; +import org.apache.jena.arq.junit.riot.ParseForTest; import org.apache.jena.arq.junit.riot.RiotTests; import org.apache.jena.arq.junit.riot.VocabLangRDF; import org.apache.jena.arq.junit.sparql.SparqlTests; @@ -46,10 +47,8 @@ import org.apache.jena.rdf.model.Literal; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.ModelFactory; import org.apache.jena.rdf.model.Resource; -import org.apache.jena.riot.Lang; -import org.apache.jena.riot.RDFDataMgr; -import org.apache.jena.riot.RIOT; -import org.apache.jena.riot.SysRIOT; +import org.apache.jena.riot.*; +import org.apache.jena.riot.lang.extra.TurtleJCC; import org.apache.jena.sparql.expr.E_Function; import org.apache.jena.sparql.expr.NodeValue; import org.apache.jena.sparql.junit.EarlReport; @@ -94,9 +93,11 @@ public class rdftests extends CmdGeneral protected ArgDecl strictDecl = new ArgDecl(ArgDecl.NoValue, "strict"); protected boolean cmdStrictMode = false; - protected ArgDecl arqDecl = new ArgDecl(ArgDecl.NoValue, "arq"); + // Use the alternative Turtle parser which is JavaCC based. + protected ArgDecl useTTLjcc = new ArgDecl(ArgDecl.NoValue, "ttljcc"); + protected ArgDecl useARQ = new ArgDecl(ArgDecl.NoValue, "arq"); // Run with ".rq" as ARQ extended syntax. - protected boolean arqAsNormal = false; + protected boolean argAsNormal = false; protected ArgDecl earlDecl = new ArgDecl(ArgDecl.NoValue, "earl"); protected boolean createEarlReport = false; @@ -114,9 +115,11 @@ public class rdftests extends CmdGeneral super.modVersion.addClass(Jena.class); getUsage().startCategory("Tests (execute test manifest)"); getUsage().addUsage("<manifest>", "run the tests specified in the given manifest"); - add(arqDecl, "--arq", "Operate with ARQ syntax"); - add(strictDecl, "--strict", "Operate in strict mode (no extensions of any kind)"); - add(earlDecl, "--earl", "create EARL report"); + + add(useARQ, "--arq", "Operate with ARQ syntax"); + add(useTTLjcc, "--ttljcc", "Use the alternative Turtle parser in tests"); + add(strictDecl, "--strict", "Operate in strict mode (no extensions of any kind)"); + add(earlDecl, "--earl", "Create EARL report"); addModule(modContext); } @@ -134,11 +137,14 @@ public class rdftests extends CmdGeneral cmdStrictMode = super.hasArg(strictDecl); if ( contains(baseDecl) ) baseURI = super.getValue(baseDecl); - arqAsNormal = contains(arqDecl); + if ( contains(useTTLjcc) ) + ParseForTest.registerAlternative(Lang.TURTLE, TurtleJCC.factory); + argAsNormal = contains(useARQ); } @Override protected void exec() { + NodeValue.VerboseWarnings = false; E_Function.WarnOnUnknownFunction = false; EarlReport report = new EarlReport(systemURI); @@ -152,7 +158,7 @@ public class rdftests extends CmdGeneral QueryEvalTest.compareResultSetsByValue = false; } - if ( arqAsNormal ) + if ( argAsNormal ) SparqlTests.defaultForSyntaxTests = Syntax.syntaxARQ; else SparqlTests.defaultForSyntaxTests = Syntax.syntaxSPARQL_12;
