This is an automated email from the ASF dual-hosted git repository.

andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git

commit db5c1583ae791754d77b0959345b57f971ea798e
Author: Andy Seaborne <[email protected]>
AuthorDate: Thu Jun 26 19:06:01 2025 +0100

    GH-3281: Update TurtleJCC to check RDF Strings
---
 jena-arq/Grammar/Turtle/turtle.jj                  |  7 +++---
 .../jena/riot/lang/extra/LangParserBase.java       | 20 +++++++++++++++-
 .../jena/riot/lang/extra/javacc/TurtleJavacc.java  |  7 +++---
 .../apache/jena/arq/junit/riot/ParseForTest.java   | 26 +++++++++++++++++++-
 .../org/apache/jena/riot/Scripts_AltTurtle.java    |  6 ++---
 jena-cmds/src/test/java/arq/rdftests.java          | 28 +++++++++++++---------
 6 files changed, 70 insertions(+), 24 deletions(-)

diff --git a/jena-arq/Grammar/Turtle/turtle.jj 
b/jena-arq/Grammar/Turtle/turtle.jj
index 0c37b9eae2..7024dd6b5b 100644
--- a/jena-arq/Grammar/Turtle/turtle.jj
+++ b/jena-arq/Grammar/Turtle/turtle.jj
@@ -108,10 +108,10 @@ String VersionSpecificer() : {  Token t; String verStr; }
 //   | t = <STRING_LITERAL_LONG2> { verStr = stripQuotes3(t.image) ; }
   )
     {
-      checkString(verStr, t.beginLine, t.beginColumn) ;
-      verStr = unescapeStr(verStr,  t.beginLine, t.beginColumn) ;
+      verStr = unescapeStr(verStr, t.beginLine, t.beginColumn) ;
+      checkRDFString(verStr, t.beginLine, t.beginColumn) ;
       return verStr ;
-    }  
+    } 
 }
 
 void DirectiveOld() : { Token t ; Token t2 ; String iri ; String verStr ; }
@@ -302,7 +302,6 @@ String String() : { Token t ; String lex ; }
   | t = <STRING_LITERAL_LONG2> { lex = stripQuotes3(t.image) ; }
   )
     {
-      checkString(lex, t.beginLine, t.beginColumn) ;
       lex = unescapeStr(lex,  t.beginLine, t.beginColumn) ;
       return lex ;
     }
diff --git 
a/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/LangParserBase.java 
b/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/LangParserBase.java
index 83573de14a..d9aa0f914c 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/LangParserBase.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/LangParserBase.java
@@ -85,6 +85,7 @@ public class LangParserBase {
     }
 
     protected Node createURI(String iriStr, int line, int column) {
+        checkRDFString(iriStr, line, column);
         return profile.createURI(iriStr, line, column);
     }
 
@@ -97,13 +98,29 @@ public class LangParserBase {
     }
 
     protected Node createListNode(int line, int column) {
-        return  createBNode(line, column);
+        return createBNode(line, column);
     }
 
+    /** @deprecated Use {@link #checkRDFString}. */
+    @Deprecated(forRemoval=true)
     protected void checkString(String string, int line, int column) {
+        checkRDFString(string, line, column);
+    }
+
+    /**
+     * Apply any checks for "RDF String" to a string that has already had 
escape processing applied.
+     * An RDF String is a sequence of codepoints in the range U+0000 to 
U+10FFFF, excluding surrogates.
+     * Because this is java, we test for no non-paired surrogates.
+     * A surrogate pair is high-low.
+     */
+    protected static void checkRDFString(String string, int line, int column) {
         for ( int i = 0 ; i < string.length() ; i++ ) {
             // Not "codePointAt" which does surrogate processing.
             char ch = string.charAt(i);
+
+            if ( ! Character.isValidCodePoint(ch) )
+                throw new RiotParseException(String.format("Illegal code point 
in \\U sequence value: 0x%08X", ch), line, column);
+
             // Check surrogate pairs are pairs.
             if ( Character.isHighSurrogate(ch) ) {
                 i++;
@@ -170,6 +187,7 @@ public class LangParserBase {
     protected String resolveQuotedIRI(String iriStr, int line, int column) {
         iriStr = LangParserLib.stripQuotes(iriStr);
         iriStr = unescapeIRI(iriStr);
+        checkRDFString(iriStr, line, column);
         // Check
         if ( iriStr.contains("<") || iriStr.contains(">") )
             throw new RiotParseException("Illegal character '<' or '>' in IRI: 
'"+iriStr+"'", line, column);
diff --git 
a/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/javacc/TurtleJavacc.java
 
b/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/javacc/TurtleJavacc.java
index 26dfc7a578..bb070d787d 100644
--- 
a/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/javacc/TurtleJavacc.java
+++ 
b/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/javacc/TurtleJavacc.java
@@ -151,8 +151,8 @@ verStr = stripQuotes(t.image) ;
       jj_consume_token(-1);
       throw new ParseException();
     }
-checkString(verStr, t.beginLine, t.beginColumn) ;
-      verStr = unescapeStr(verStr,  t.beginLine, t.beginColumn) ;
+verStr = unescapeStr(verStr, t.beginLine, t.beginColumn) ;
+      checkRDFString(verStr, t.beginLine, t.beginColumn) ;
       {if ("" != null) return verStr ;}
     throw new Error("Missing return statement in function");
 }
@@ -658,8 +658,7 @@ lex = stripQuotes3(t.image) ;
       jj_consume_token(-1);
       throw new ParseException();
     }
-checkString(lex, t.beginLine, t.beginColumn) ;
-      lex = unescapeStr(lex,  t.beginLine, t.beginColumn) ;
+lex = unescapeStr(lex,  t.beginLine, t.beginColumn) ;
       {if ("" != null) return lex ;}
     throw new Error("Missing return statement in function");
 }
diff --git 
a/jena-arq/src/test/java/org/apache/jena/arq/junit/riot/ParseForTest.java 
b/jena-arq/src/test/java/org/apache/jena/arq/junit/riot/ParseForTest.java
index 2f862c4859..f839298839 100644
--- a/jena-arq/src/test/java/org/apache/jena/arq/junit/riot/ParseForTest.java
+++ b/jena-arq/src/test/java/org/apache/jena/arq/junit/riot/ParseForTest.java
@@ -25,13 +25,37 @@ import java.util.concurrent.ConcurrentHashMap;
 import org.apache.jena.riot.*;
 import org.apache.jena.riot.system.*;
 
+/**
+ * Manage parsers used for tests, separate from the overall system setup.
+ */
 public class ParseForTest {
 
     public static void parse(StreamRDF destination, String uri, Lang lang, 
boolean ignoreWarnings) {
         parse(destination, uri, uri, lang, ignoreWarnings);
     }
 
-    public static Map<Lang, ReaderRIOTFactory> alternativeReaderFactories = 
new ConcurrentHashMap<>();
+    /**
+     * Map of {@link Lang} to {@link ReaderRIOTFactory} that is consulted 
before
+     * defaulting to the standard system parser.
+     */
+    private static Map<Lang, ReaderRIOTFactory> alternativeReaderFactories = 
new ConcurrentHashMap<>();
+
+    /**
+     * Add an alternative language implementation to
+     * {@link #alternativeReaderFactories} map. This map of {@link Lang} to
+     * {@link ReaderRIOTFactory} is consulted before defaulting to the standard
+     * system parser.
+     */
+    public static void registerAlternative(Lang lang, ReaderRIOTFactory 
factory) {
+        alternativeReaderFactories.put(lang, factory);
+    }
+
+    /**
+     * Remove an registration of an alternative for {@link Lang}.
+     */
+    public static void unregisterAlternative(Lang lang) {
+        alternativeReaderFactories.remove(lang);
+    }
 
     public static void parse(StreamRDF destination, String uri, String base, 
Lang lang, boolean ignoreWarnings) {
 
diff --git a/jena-arq/src/test/java/org/apache/jena/riot/Scripts_AltTurtle.java 
b/jena-arq/src/test/java/org/apache/jena/riot/Scripts_AltTurtle.java
index 0246af9fde..544031d842 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/Scripts_AltTurtle.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/Scripts_AltTurtle.java
@@ -38,7 +38,7 @@ import org.junit.runner.RunWith ;
 
     // rdf-tests CG
     "testing/rdf-tests-cg/turtle/manifest.ttl"
-    
+
     // [rdf-star CG] RDF star CG tests. No longer valid
 //    "testing/rdf-star-cg/turtle/syntax/manifest.ttl",
 //    "testing/rdf-star-cg/turtle/eval/manifest.ttl"
@@ -52,11 +52,11 @@ public class Scripts_AltTurtle
         JenaSystem.init();
         // Register language and parser factory.
         TurtleJCC.register();
-        ParseForTest.alternativeReaderFactories.put(Lang.TURTLE, 
TurtleJCC.factory);
+        ParseForTest.registerAlternative(Lang.TURTLE, TurtleJCC.factory);
     }
 
     @AfterClass public static void afterClass() {
-        ParseForTest.alternativeReaderFactories.remove(Lang.TURTLE);
+        ParseForTest.unregisterAlternative(Lang.TURTLE);
     }
 }
 
diff --git a/jena-cmds/src/test/java/arq/rdftests.java 
b/jena-cmds/src/test/java/arq/rdftests.java
index f8bdde2635..ee0752686b 100644
--- a/jena-cmds/src/test/java/arq/rdftests.java
+++ b/jena-cmds/src/test/java/arq/rdftests.java
@@ -28,6 +28,7 @@ import org.apache.jena.Jena;
 import org.apache.jena.arq.junit.SurpressedTest;
 import org.apache.jena.arq.junit.TextTestRunner;
 import org.apache.jena.arq.junit.manifest.ManifestEntry;
+import org.apache.jena.arq.junit.riot.ParseForTest;
 import org.apache.jena.arq.junit.riot.RiotTests;
 import org.apache.jena.arq.junit.riot.VocabLangRDF;
 import org.apache.jena.arq.junit.sparql.SparqlTests;
@@ -46,10 +47,8 @@ import org.apache.jena.rdf.model.Literal;
 import org.apache.jena.rdf.model.Model;
 import org.apache.jena.rdf.model.ModelFactory;
 import org.apache.jena.rdf.model.Resource;
-import org.apache.jena.riot.Lang;
-import org.apache.jena.riot.RDFDataMgr;
-import org.apache.jena.riot.RIOT;
-import org.apache.jena.riot.SysRIOT;
+import org.apache.jena.riot.*;
+import org.apache.jena.riot.lang.extra.TurtleJCC;
 import org.apache.jena.sparql.expr.E_Function;
 import org.apache.jena.sparql.expr.NodeValue;
 import org.apache.jena.sparql.junit.EarlReport;
@@ -94,9 +93,11 @@ public class rdftests extends CmdGeneral
     protected ArgDecl    strictDecl        = new ArgDecl(ArgDecl.NoValue, 
"strict");
     protected boolean    cmdStrictMode     = false;
 
-    protected ArgDecl    arqDecl           = new ArgDecl(ArgDecl.NoValue, 
"arq");
+    // Use the alternative Turtle parser which is JavaCC based.
+    protected ArgDecl    useTTLjcc         = new ArgDecl(ArgDecl.NoValue, 
"ttljcc");
+    protected ArgDecl    useARQ            = new ArgDecl(ArgDecl.NoValue, 
"arq");
     // Run with ".rq" as ARQ extended syntax.
-    protected boolean    arqAsNormal       = false;
+    protected boolean    argAsNormal       = false;
 
     protected ArgDecl    earlDecl          = new ArgDecl(ArgDecl.NoValue, 
"earl");
     protected boolean    createEarlReport  = false;
@@ -114,9 +115,11 @@ public class rdftests extends CmdGeneral
         super.modVersion.addClass(Jena.class);
         getUsage().startCategory("Tests (execute test manifest)");
         getUsage().addUsage("<manifest>", "run the tests specified in the 
given manifest");
-        add(arqDecl, "--arq",       "Operate with ARQ syntax");
-        add(strictDecl, "--strict", "Operate in strict mode (no extensions of 
any kind)");
-        add(earlDecl, "--earl", "create EARL report");
+
+        add(useARQ,       "--arq",     "Operate with ARQ syntax");
+        add(useTTLjcc,    "--ttljcc",  "Use the alternative Turtle parser in 
tests");
+        add(strictDecl,   "--strict",  "Operate in strict mode (no extensions 
of any kind)");
+        add(earlDecl,     "--earl",    "Create EARL report");
         addModule(modContext);
     }
 
@@ -134,11 +137,14 @@ public class rdftests extends CmdGeneral
         cmdStrictMode = super.hasArg(strictDecl);
         if ( contains(baseDecl) )
             baseURI = super.getValue(baseDecl);
-        arqAsNormal = contains(arqDecl);
+        if ( contains(useTTLjcc) )
+            ParseForTest.registerAlternative(Lang.TURTLE, TurtleJCC.factory);
+        argAsNormal = contains(useARQ);
     }
 
     @Override
     protected void exec() {
+
         NodeValue.VerboseWarnings = false;
         E_Function.WarnOnUnknownFunction = false;
         EarlReport report = new EarlReport(systemURI);
@@ -152,7 +158,7 @@ public class rdftests extends CmdGeneral
             QueryEvalTest.compareResultSetsByValue = false;
         }
 
-        if ( arqAsNormal )
+        if ( argAsNormal )
             SparqlTests.defaultForSyntaxTests = Syntax.syntaxARQ;
         else
             SparqlTests.defaultForSyntaxTests = Syntax.syntaxSPARQL_12;

Reply via email to