This is an automated email from the ASF dual-hosted git repository. andy pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/jena.git
commit dd70a39a7789bad79ed4d310dfa268a6077c3e73 Author: Andy Seaborne <a...@apache.org> AuthorDate: Fri Aug 30 10:20:37 2024 +0100 Notes about TurtleJCC --- .../org/apache/jena/riot/lang/extra/TurtleJCC.java | 19 +++++++++++++++---- .../jena/riot/lang/extra/TurtleJavaccReaderRIOT.java | 8 +++++--- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/TurtleJCC.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/TurtleJCC.java index b0218ea6c9..3e6fcdacab 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/TurtleJCC.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/TurtleJCC.java @@ -19,15 +19,26 @@ package org.apache.jena.riot.lang.extra; import org.apache.jena.riot.*; +import org.apache.jena.riot.lang.LangTurtle; +/** + * This is not the normal Turtle parser. + * <p> + * This is a separate Turtle parser using JavaCC used as a comparison. The JavaCC + * grammar reflects the RDF Turtle standard grammar. It also exists so that there is + * a valid JavaCC grammar that can be used as a basis for other languages. + * <p> + * It is significantly slower than the RIOT {@link LangTurtle}. + */ public class TurtleJCC { - // Must be a different content type. + // Must be a different content type to the normal parser. + // Must have different name and altNames. // Must be a different file extension. public static Lang TTLJCC = LangBuilder.create("TurtleJavaCC", "text/turtle-jcc") - .addAltNames("ttljcc") - .addFileExtensions("ttljcc") - .build(); + .addAltNames("ttljcc") + .addFileExtensions("ttljcc") + .build(); public static ReaderRIOTFactory factory = (lang, profile) -> new TurtleJavaccReaderRIOT(profile) ; public static void register() { diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/TurtleJavaccReaderRIOT.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/TurtleJavaccReaderRIOT.java index 0b7065a968..c213423716 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/TurtleJavaccReaderRIOT.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/extra/TurtleJavaccReaderRIOT.java @@ -35,10 +35,12 @@ import org.apache.jena.sparql.util.Context; /** * Turtle parser, written using JavaCC. + * <p> * This is not used normally. - * It is slower than the RIOT {@link LangTurtle}. - * It may not be up-to-date but at least in the codebase means it should be java-compatible. - * It exists so that there is a JavaCC grammar that can be used as a basis for other languages. + * <p> + * It is slower than the RIOT {@link LangTurtle} and has not been optimized for + * speed. Tokenizing is the bottleneck. It exists so that there is a JavaCC grammar + * that can be used as a basis for other languages. */ public class TurtleJavaccReaderRIOT implements ReaderRIOT { private final ParserProfile profile;