kinow commented on code in PR #2726:
URL: https://github.com/apache/jena/pull/2726#discussion_r1772025998
##########
jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java:
##########
@@ -35,37 +35,35 @@
/**
* Tokenizer for the Turtle family of syntaxes.
- * Supports addition tokens.
+ * Supports additional tokens.
*/
public final class TokenizerText implements Tokenizer
{
- // Drop through to final general symbol/keyword reader, including <=, !=
- // Care with <=
- // Policy driven for CURIES?
-
- private static final int CTRL_CHAR = CH_STAR;
-
- // The code has the call points for checking tokens but it is generally
better to
+ // The code has the call points for checking tokens but it is
generally better to
Review Comment:
Extra spaces?
##########
jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java:
##########
@@ -384,7 +378,31 @@ private Token parseToken() {
case CH_RBRACE: reader.readChar();
token.setType(TokenType.RBRACE); /*token.setImage(CH_RBRACE);*/ return token;
case CH_LPAREN: reader.readChar();
token.setType(TokenType.LPAREN); /*token.setImage(CH_LPAREN);*/ return token;
- case CH_RPAREN: reader.readChar();
token.setType(TokenType.RPAREN); /*token.setImage(CH_RPAREN);*/ return token;
+
+ // Can be ')' or ')>>'
Review Comment:
@afs what's the document that describes the `<<(` and `)>>` grammar? I'm
only finding "<<" and ">>" as in RDF-star?
##########
jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizerText.java:
##########
@@ -25,911 +25,938 @@
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
-import java.io.ByteArrayInputStream ;
+import java.io.ByteArrayInputStream;
import java.io.Reader;
import org.apache.jena.atlas.io.IO;
-import org.apache.jena.atlas.io.PeekReader ;
-import org.apache.jena.atlas.lib.StrUtils ;
-import org.apache.jena.riot.RiotException ;
-import org.apache.jena.riot.RiotParseException ;
+import org.apache.jena.atlas.io.PeekReader;
+import org.apache.jena.atlas.lib.StrUtils;
+import org.apache.jena.riot.RiotException;
+import org.apache.jena.riot.RiotParseException;
import org.apache.jena.riot.system.ErrorHandlerFactory.ErrorHandlerRecorder;
-import org.apache.jena.sparql.ARQConstants ;
-import org.junit.Test ;
+import org.apache.jena.sparql.ARQConstants;
+import org.junit.Test;
-public class TestTokenizer {
+public class TestTokenizerText {
private static Tokenizer tokenizer(String string) {
- return tokenizer(string, false) ;
+ return tokenizer(string, false);
}
private static Tokenizer tokenizer(String string, boolean lineMode) {
- PeekReader r = PeekReader.readString(string) ;
+ PeekReader r = PeekReader.readString(string);
Tokenizer tokenizer = TokenizerText.create()
.errorHandler(errorHandlerExceptions())
.source(r)
.lineMode(lineMode)
.build();
- return tokenizer ;
+ return tokenizer;
}
private static void tokenFirst(String string) {
- Tokenizer tokenizer = tokenizer(string) ;
- assertTrue(tokenizer.hasNext()) ;
- assertNotNull(tokenizer.next()) ;
+ Tokenizer tokenizer = tokenizer(string);
+ assertTrue(tokenizer.hasNext());
+ assertNotNull(tokenizer.next());
// Maybe more.
- // assertFalse(tokenizer.hasNext()) ;
+ // assertFalse(tokenizer.hasNext());
}
private static Token tokenFor(String string) {
- Tokenizer tokenizer = tokenizer(string) ;
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertFalse(tokenizer.hasNext()) ;
- return token ;
+ Tokenizer tokenizer = tokenizer(string);
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertFalse(tokenizer.hasNext());
+ return token;
}
private static Token tokenizeAndTestExact(String input, TokenType
tokenType, String tokenImage) {
- return tokenizeAndTestExact(input, tokenType, tokenImage, null) ;
+ return tokenizeAndTestExact(input, tokenType, tokenImage, null);
}
private static Token tokenizeAndTestExact(String input, StringType
stringType, String tokenImage) {
- Token token = tokenizeAndTestExact(input, TokenType.STRING,
tokenImage, null) ;
+ Token token = tokenizeAndTestExact(input, TokenType.STRING,
tokenImage, null);
assertEquals(stringType, token.getStringType());
return token;
}
private static Token tokenizeAndTestExact(String input, TokenType
tokenType, String tokenImage1, String tokenImage2) {
- Tokenizer tokenizer = tokenizer(input) ;
- Token token = testNextToken(tokenizer, tokenType, tokenImage1,
tokenImage2) ;
- assertFalse("Excess tokens", tokenizer.hasNext()) ;
- return token ;
+ Tokenizer tokenizer = tokenizer(input);
+ Token token = testNextToken(tokenizer, tokenType, tokenImage1,
tokenImage2);
+ assertFalse("Excess tokens", tokenizer.hasNext());
+ return token;
}
private static Token tokenizeAndTestExact(String input, TokenType
tokenType, String tokenImage1,
String tokenImage2, Token
subToken1, Token subToken2) {
- Token token = tokenFor(input) ;
- assertEquals(tokenType, token.getType()) ;
- assertEquals(tokenImage1, token.getImage()) ;
- assertEquals(tokenImage2, token.getImage2()) ;
- assertEquals(subToken1, token.getSubToken1()) ;
- assertEquals(subToken2, token.getSubToken2()) ;
- return token ;
+ Token token = tokenFor(input);
+ assertEquals(tokenType, token.getType());
+ assertEquals(tokenImage1, token.getImage());
+ assertEquals(tokenImage2, token.getImage2());
+ assertEquals(subToken1, token.getSubToken1());
+ assertEquals(subToken2, token.getSubToken2());
+ return token;
}
private static Tokenizer tokenizeAndTestFirst(String input, TokenType
tokenType) {
- return tokenizeAndTestFirst(input, tokenType, null, null) ;
+ return tokenizeAndTestFirst(input, tokenType, null, null);
}
private static Tokenizer tokenizeAndTestFirst(String input, TokenType
tokenType, String tokenImage) {
- return tokenizeAndTestFirst(input, tokenType, tokenImage, null) ;
+ return tokenizeAndTestFirst(input, tokenType, tokenImage, null);
}
private static Tokenizer tokenizeAndTestFirst(String input, TokenType
tokenType, String tokenImage1, String tokenImage2) {
- Tokenizer tokenizer = tokenizer(input) ;
- testNextToken(tokenizer, tokenType, tokenImage1, tokenImage2) ;
- return tokenizer ;
+ Tokenizer tokenizer = tokenizer(input);
+ testNextToken(tokenizer, tokenType, tokenImage1, tokenImage2);
+ return tokenizer;
}
private static Token testNextToken(Tokenizer tokenizer, TokenType
tokenType) {
- return testNextToken(tokenizer, tokenType, null, null) ;
+ return testNextToken(tokenizer, tokenType, null, null);
}
private static Token testNextToken(Tokenizer tokenizer, TokenType
tokenType, String tokenImage1) {
- return testNextToken(tokenizer, tokenType, tokenImage1, null) ;
+ return testNextToken(tokenizer, tokenType, tokenImage1, null);
}
private static Token testNextToken(Tokenizer tokenizer, TokenType
tokenType, String tokenImage1, String tokenImage2) {
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertNotNull(token) ;
- assertEquals(tokenType, token.getType()) ;
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertNotNull(token);
+ assertEquals(tokenType, token.getType());
if ( tokenImage1 != null )
- assertEquals(tokenImage1, token.getImage()) ;
+ assertEquals(tokenImage1, token.getImage());
if ( tokenImage2 != null )
- assertEquals(tokenImage1, token.getImage()) ;
- assertEquals(tokenImage2, token.getImage2()) ;
- return token ;
+ assertEquals(tokenImage1, token.getImage());
+ assertEquals(tokenImage2, token.getImage2());
+ return token;
}
private static Token tokenizeAndTest(String input, TokenType tokenType,
String tokenImage1, String tokenImage2, Token subToken1, Token subToken2) {
- Token token = tokenFor(input) ;
- assertNotNull(token) ;
- assertEquals(tokenType, token.getType()) ;
- assertEquals(tokenImage1, token.getImage()) ;
- assertEquals(tokenImage2, token.getImage2()) ;
- assertEquals(subToken1, token.getSubToken1()) ;
- assertEquals(subToken2, token.getSubToken2()) ;
- return token ;
+ Token token = tokenFor(input);
+ assertNotNull(token);
+ assertEquals(tokenType, token.getType());
+ assertEquals(tokenImage1, token.getImage());
+ assertEquals(tokenImage2, token.getImage2());
+ assertEquals(subToken1, token.getSubToken1());
+ assertEquals(subToken2, token.getSubToken2());
+ return token;
}
@Test
public void tokenUnit_iri1() {
- tokenizeAndTestExact("<x>", TokenType.IRI, "x") ;
+ tokenizeAndTestExact("<x>", TokenType.IRI, "x");
}
@Test
public void tokenUnit_iri2() {
- tokenizeAndTestExact(" <> ", TokenType.IRI, "") ;
+ tokenizeAndTestExact(" <> ", TokenType.IRI, "");
}
@Test
// (expected=RiotParseException.class) We test the message.
public void tokenUnit_iri3() {
try {
// That's one \
- tokenFirst("<abc\\>def>") ;
+ tokenFirst("<abc\\>def>");
} catch (RiotParseException ex) {
- String x = ex.getMessage() ;
- assertTrue(x.contains("Illegal")) ;
+ String x = ex.getMessage();
+ assertTrue(x.contains("Illegal"));
}
}
@Test
public void tokenUnit_iri4() {
// \\\\ is a double \\ in the data. 0x41 is 'A'
- tokenizeAndTestFirst("<abc\\u0041def> 123", TokenType.IRI,
"abcAdef") ;
+ tokenizeAndTestFirst("<abc\\u0041def> 123", TokenType.IRI,
"abcAdef");
}
@Test
public void tokenUnit_iri5() {
// \\\\ is a double \\ in the data. 0x41 is 'A'
- tokenizeAndTestFirst("<\\u0041def> 123", TokenType.IRI, "Adef") ;
+ tokenizeAndTestFirst("<\\u0041def> 123", TokenType.IRI, "Adef");
}
@Test
public void tokenUnit_iri6() {
// \\\\ is a double \\ in the data. 0x41 is 'A'
- tokenizeAndTestFirst("<abc\\u0041> 123", TokenType.IRI, "abcA") ;
+ tokenizeAndTestFirst("<abc\\u0041> 123", TokenType.IRI, "abcA");
}
// Bad IRIs
@Test(expected=RiotException.class)
public void tokenUnit_iri10() {
- tokenFirst("<abc def>") ;
+ tokenFirst("<abc def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri11() {
- tokenFirst("<abc<def>") ;
+ tokenFirst("<abc<def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri12() {
- tokenFirst("<abc{def>") ;
+ tokenFirst("<abc{def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri13() {
- tokenFirst("<abc}def>") ;
+ tokenFirst("<abc}def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri14() {
- tokenFirst("<abc|def>") ;
+ tokenFirst("<abc|def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri15() {
- tokenFirst("<abc^def>") ;
+ tokenFirst("<abc^def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri16() {
- tokenFirst("<abc`def>") ;
+ tokenFirst("<abc`def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri17() {
- tokenFirst("<abc\tdef>") ; // Java escae - real tab
+ tokenFirst("<abc\tdef>"); // Java escae - real tab
}
@Test(expected=RiotException.class)
public void tokenUnit_iri18() {
- tokenFirst("<abc\u0007def>") ; // Java escape - codepoint 7
+ tokenFirst("<abc\u0007def>"); // Java escape - codepoint 7
}
@Test(expected=RiotException.class)
public void tokenUnit_iri19() {
- tokenFirst("<abc\\>") ;
+ tokenFirst("<abc\\>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri20() {
- tokenFirst("<abc\\def>") ;
+ tokenFirst("<abc\\def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri21() {
// \\\\ is a double \\ in the data.
// RDF 1.1 - \\ is not legal in a IRIREF
- tokenFirst("<abc\\\\def>") ;
+ tokenFirst("<abc\\\\def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri22() {
- tokenFirst("<abc\\u00ZZdef>") ;
+ tokenFirst("<abc\\u00ZZdef>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri23() {
- tokenFirst("<abc\\uZZ20def>") ;
+ tokenFirst("<abc\\uZZ20def>");
}
@Test
public void tokenUnit_str1() {
- tokenizeAndTestExact(" 'abc' ", StringType.STRING1, "abc") ;
+ tokenizeAndTestExact(" 'abc' ", StringType.STRING1, "abc");
}
@Test
public void tokenUnit_str2() {
- tokenizeAndTestExact(" '' ", StringType.STRING1, "") ;
+ tokenizeAndTestExact(" '' ", StringType.STRING1, "");
}
@Test
public void tokenUnit_str3() {
- tokenizeAndTestExact("'\\u0020'", StringType.STRING1, " ") ;
+ tokenizeAndTestExact("'\\u0020'", StringType.STRING1, " ");
}
@Test
public void tokenUnit_str4() {
- tokenizeAndTestExact("'a\\'\\\"\\n\\t\\r\\f'", StringType.STRING1,
"a'\"\n\t\r\f") ;
+ tokenizeAndTestExact("'a\\'\\\"\\n\\t\\r\\f'", StringType.STRING1,
"a'\"\n\t\r\f");
}
@Test(expected = RiotParseException.class)
public void tokenUnit_str5() {
// This is a raw newline. \n is a Java string escape.
- tokenizeAndTestExact("'\n'", StringType.STRING1, "\n") ;
+ tokenizeAndTestExact("'\n'", StringType.STRING1, "\n");
}
@Test
public void tokenUnit_str6() {
- tokenizeAndTestExact(" \"abc\" ", StringType.STRING2, "abc") ;
+ tokenizeAndTestExact(" \"abc\" ", StringType.STRING2, "abc");
}
@Test
public void tokenUnit_str7() {
- tokenizeAndTestExact("\"\"", StringType.STRING2, "") ;
+ tokenizeAndTestExact("\"\"", StringType.STRING2, "");
}
@Test(expected = RiotParseException.class)
public void tokenUnit_str8() {
- Tokenizer tokenizer = tokenizer("\"") ;
- assertTrue(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizer("\"");
+ assertTrue(tokenizer.hasNext());
}
@Test(expected = RiotParseException.class)
public void tokenUnit_str9() {
- tokenFirst("'abc") ;
+ tokenFirst("'abc");
}
@Test
public void tokenUnit_str10() {
- tokenizeAndTestExact("'\\'abc'", StringType.STRING1, "'abc") ;
+ tokenizeAndTestExact("'\\'abc'", StringType.STRING1, "'abc");
}
@Test
public void tokenUnit_str11() {
- tokenizeAndTestExact("'\\U00000020'", StringType.STRING1, " ") ;
+ tokenizeAndTestExact("'\\U00000020'", StringType.STRING1, " ");
}
@Test
public void tokenUnit_str_long1() {
- tokenizeAndTestExact("'''aaa'''", StringType.LONG_STRING1, "aaa") ;
+ tokenizeAndTestExact("'''aaa'''", StringType.LONG_STRING1, "aaa");
}
@Test
public void tokenUnit_str_long2() {
- tokenizeAndTestExact("\"\"\"aaa\"\"\"", StringType.LONG_STRING2,
"aaa") ;
+ tokenizeAndTestExact("\"\"\"aaa\"\"\"", StringType.LONG_STRING2,
"aaa");
}
@Test
public void tokenUnit_str_long3() {
- tokenizeAndTestExact("''''1234'''", StringType.LONG_STRING1, "'1234") ;
+ tokenizeAndTestExact("''''1234'''", StringType.LONG_STRING1, "'1234");
}
@Test
public void tokenUnit_str_long4() {
- tokenizeAndTestExact("'''''1234'''", StringType.LONG_STRING1,
"''1234") ;
+ tokenizeAndTestExact("'''''1234'''", StringType.LONG_STRING1,
"''1234");
}
@Test
public void tokenUnit_str_long5() {
- tokenizeAndTestExact("'''\\'''1234'''", StringType.LONG_STRING1,
"'''1234") ;
+ tokenizeAndTestExact("'''\\'''1234'''", StringType.LONG_STRING1,
"'''1234");
}
@Test
public void tokenUnit_str_long6() {
- tokenizeAndTestExact("\"\"\"\"1234\"\"\"", StringType.LONG_STRING2,
"\"1234") ;
+ tokenizeAndTestExact("\"\"\"\"1234\"\"\"", StringType.LONG_STRING2,
"\"1234");
}
@Test
public void tokenUnit_str_long7() {
- tokenizeAndTestExact("\"\"\"\"\"1234\"\"\"", StringType.LONG_STRING2,
"\"\"1234") ;
+ tokenizeAndTestExact("\"\"\"\"\"1234\"\"\"", StringType.LONG_STRING2,
"\"\"1234");
}
@Test
public void tokenUnit_str_long8() {
- tokenizeAndTestExact("''''''", StringType.LONG_STRING1, "") ;
+ tokenizeAndTestExact("''''''", StringType.LONG_STRING1, "");
}
@Test
public void tokenUnit_str_long9() {
- tokenizeAndTestExact("\"\"\"'''''''''''''''''\"\"\"",
StringType.LONG_STRING2, "'''''''''''''''''") ;
+ tokenizeAndTestExact("\"\"\"'''''''''''''''''\"\"\"",
StringType.LONG_STRING2, "'''''''''''''''''");
}
@Test(expected = RiotParseException.class)
public void tokenUnit_str_long10() {
- tokenFirst("\"\"\"abcdef") ;
+ tokenFirst("\"\"\"abcdef");
}
@Test(expected = RiotParseException.class)
public void tokenUnit_str_long11() {
- tokenFirst("'''") ;
+ tokenFirst("'''");
}
@Test
public void tokenUnit_str_long12() {
- tokenizeAndTestExact("'''x'''@en", TokenType.LITERAL_LANG, "x", "en") ;
+ tokenizeAndTestExact("'''x'''@en", TokenType.LITERAL_LANG, "x", "en");
}
@Test
public void tokenUnit_bNode1() {
- tokenizeAndTestExact("_:abc", TokenType.BNODE, "abc") ;
+ tokenizeAndTestExact("_:abc", TokenType.BNODE, "abc");
}
@Test
public void tokenUnit_bNode2() {
- tokenizeAndTestExact("_:123 ", TokenType.BNODE, "123") ;
+ tokenizeAndTestExact("_:123 ", TokenType.BNODE, "123");
}
@Test(expected = RiotParseException.class)
public void tokenUnit_bNode3() {
- Tokenizer tokenizer = tokenizer("_:") ;
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertNotNull(token) ;
+ Tokenizer tokenizer = tokenizer("_:");
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertNotNull(token);
}
@Test
public void tokenUnit_bNode4() {
- tokenizeAndTestExact("_:1-2-Z ", TokenType.BNODE, "1-2-Z") ;
+ tokenizeAndTestExact("_:1-2-Z ", TokenType.BNODE, "1-2-Z");
}
@Test
public void tokenUnit_bNode5() {
- Tokenizer tokenizer = tokenizeAndTestFirst("_:x. ",
TokenType.BNODE, "x") ;
- testNextToken(tokenizer, TokenType.DOT) ;
- assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("_:x. ",
TokenType.BNODE, "x");
+ testNextToken(tokenizer, TokenType.DOT);
+ assertFalse(tokenizer.hasNext());
}
@Test
public void tokenUnit_bNode6() {
- Tokenizer tokenizer = tokenizeAndTestFirst("_:x:a. ",
TokenType.BNODE, "x") ;
- testNextToken(tokenizer, TokenType.PREFIXED_NAME, "", "a") ;
- testNextToken(tokenizer, TokenType.DOT) ;
- assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("_:x:a. ",
TokenType.BNODE, "x");
+ testNextToken(tokenizer, TokenType.PREFIXED_NAME, "", "a");
+ testNextToken(tokenizer, TokenType.DOT);
+ assertFalse(tokenizer.hasNext());
}
-// @Test
-// public void tokenUnit_cntrl1() {
-// tokenizeAndTestExact("*S", TokenType.CNTRL, "S");
-// }
-//
-// @Test
-// public void tokenUnit_cntr2() {
-// tokenizeAndTestExact("*SXYZ", TokenType.CNTRL, "SXYZ");
-// }
-//
-// @Test
-// public void tokenUnit_cntrl3() {
-// Tokenizer tokenizer = tokenizer("*S<x>");
-// assertTrue(tokenizer.hasNext());
-// Token token = tokenizer.next();
-// assertNotNull(token);
-// assertEquals(TokenType.CNTRL, token.getType());
-// assertEquals('S', token.getCntrlCode());
-// assertNull(token.getImage());
-// assertNull(token.getImage2());
-//
-// assertTrue(tokenizer.hasNext());
-// Token token2 = tokenizer.next();
-// assertNotNull(token2);
-// assertEquals(TokenType.IRI, token2.getType());
-// assertEquals("x", token2.getImage());
-// assertNull(token2.getImage2());
-// assertFalse(tokenizer.hasNext());
-// }
-
@Test
public void tokenUnit_syntax1() {
- tokenizeAndTestExact(".", TokenType.DOT, null, null) ;
+ tokenizeAndTestExact(".", TokenType.DOT, null, null);
}
@Test
public void tokenUnit_syntax2() {
- Tokenizer tokenizer = tokenizer(".;,") ;
- testNextToken(tokenizer, TokenType.DOT) ;
- testNextToken(tokenizer, TokenType.SEMICOLON) ;
- testNextToken(tokenizer, TokenType.COMMA) ;
- assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizer(".;,");
+ testNextToken(tokenizer, TokenType.DOT);
+ testNextToken(tokenizer, TokenType.SEMICOLON);
+ testNextToken(tokenizer, TokenType.COMMA);
+ assertFalse(tokenizer.hasNext());
}
@Test
public void tokenUnit_pname1() {
- tokenizeAndTestExact("a:b.c", TokenType.PREFIXED_NAME, "a", "b.c") ;
+ tokenizeAndTestExact("a:b.c", TokenType.PREFIXED_NAME, "a", "b.c");
}
@Test
public void tokenUnit_pname2() {
- Tokenizer tokenizer = tokenizeAndTestFirst("a:b.",
TokenType.PREFIXED_NAME, "a", "b") ;
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertEquals(TokenType.DOT, token.getType()) ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("a:b.",
TokenType.PREFIXED_NAME, "a", "b");
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertEquals(TokenType.DOT, token.getType());
}
@Test
public void tokenUnit_pname3() {
- tokenizeAndTestExact("a:b123", TokenType.PREFIXED_NAME, "a", "b123") ;
+ tokenizeAndTestExact("a:b123", TokenType.PREFIXED_NAME, "a", "b123");
}
@Test
public void tokenUnit_pname4() {
- tokenizeAndTestExact("a:", TokenType.PREFIXED_NAME, "a", "") ;
+ tokenizeAndTestExact("a:", TokenType.PREFIXED_NAME, "a", "");
}
@Test
public void tokenUnit_pname5() {
- tokenizeAndTestExact(":", TokenType.PREFIXED_NAME, "", "") ;
+ tokenizeAndTestExact(":", TokenType.PREFIXED_NAME, "", "");
}
@Test
public void tokenUnit_pname6() {
- tokenizeAndTestExact(":a", TokenType.PREFIXED_NAME, "", "a") ;
+ tokenizeAndTestExact(":a", TokenType.PREFIXED_NAME, "", "a");
}
@Test
public void tokenUnit_pname7() {
- tokenizeAndTestExact(":123", TokenType.PREFIXED_NAME, "", "123") ;
+ tokenizeAndTestExact(":123", TokenType.PREFIXED_NAME, "", "123");
}
@Test
public void tokenUnit_pname8() {
- tokenizeAndTestExact("a123:456", TokenType.PREFIXED_NAME, "a123",
"456") ;
+ tokenizeAndTestExact("a123:456", TokenType.PREFIXED_NAME, "a123",
"456");
}
@Test
public void tokenUnit_pname9() {
- Tokenizer tokenizer = tokenizeAndTestFirst("a123:-456",
TokenType.PREFIXED_NAME, "a123", "") ;
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertEquals(TokenType.INTEGER, token.getType()) ;
- assertEquals("-456", token.getImage()) ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("a123:-456",
TokenType.PREFIXED_NAME, "a123", "");
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertEquals(TokenType.INTEGER, token.getType());
+ assertEquals("-456", token.getImage());
}
@Test
public void tokenUnit_pname10() {
- tokenizeAndTestExact("a:a.b", TokenType.PREFIXED_NAME, "a", "a.b") ;
+ tokenizeAndTestExact("a:a.b", TokenType.PREFIXED_NAME, "a", "a.b");
}
@Test
public void tokenUnit_pname11() {
- tokenizeAndTestExact("a:0.b", TokenType.PREFIXED_NAME, "a", "0.b") ;
+ tokenizeAndTestExact("a:0.b", TokenType.PREFIXED_NAME, "a", "0.b");
}
@Test
public void tokenUnit_pname12() {
- tokenizeAndTestFirst("a:0. b", TokenType.PREFIXED_NAME, "a", "0") ;
+ tokenizeAndTestFirst("a:0. b", TokenType.PREFIXED_NAME, "a", "0");
}
@Test
public void tokenUnit_pname13() {
// x00e9 é
// x0065 e and x0301 ́
- tokenizeAndTestExact("a:xyzé", TokenType.PREFIXED_NAME, "a",
"xyz\u00e9") ;
+ tokenizeAndTestExact("a:xyzé", TokenType.PREFIXED_NAME, "a",
"xyz\u00e9");
}
@Test
public void tokenUnit_pname14() {
// x0065 e and x0301 ́
- tokenizeAndTestExact("a:xyze\u0301", TokenType.PREFIXED_NAME, "a",
"xyze\u0301") ;
+ tokenizeAndTestExact("a:xyze\u0301", TokenType.PREFIXED_NAME, "a",
"xyze\u0301");
}
@Test
public void tokenUnit_pname15() {
// x0065 e and x0301 ́
- tokenizeAndTestExact("a:xe\u0301y", TokenType.PREFIXED_NAME, "a",
"xe\u0301y") ;
+ tokenizeAndTestExact("a:xe\u0301y", TokenType.PREFIXED_NAME, "a",
"xe\u0301y");
}
@Test
public void tokenUnit_pname16() {
- tokenizeAndTestExact("a:b\\#c", TokenType.PREFIXED_NAME, "a", "b#c") ;
+ tokenizeAndTestExact("a:b\\#c", TokenType.PREFIXED_NAME, "a", "b#c");
}
@Test
public void tokenUnit_pname17() {
- tokenizeAndTestExact("a:b\\/c", TokenType.PREFIXED_NAME, "a", "b/c") ;
+ tokenizeAndTestExact("a:b\\/c", TokenType.PREFIXED_NAME, "a", "b/c");
}
@Test
public void tokenUnit_pname18() {
- tokenizeAndTestExact("a:b:c", TokenType.PREFIXED_NAME, "a", "b:c") ;
+ tokenizeAndTestExact("a:b:c", TokenType.PREFIXED_NAME, "a", "b:c");
}
@Test
public void tokenUnit_pname19() {
- tokenizeAndTestExact("a:b%AAc", TokenType.PREFIXED_NAME, "a", "b%AAc")
;
+ tokenizeAndTestExact("a:b%AAc", TokenType.PREFIXED_NAME, "a", "b%AAc");
}
@Test
public void tokenUnit_pname20() {
- Tokenizer tokenizer = tokenizeAndTestFirst("123:", TokenType.INTEGER,
"123") ;
- testNextToken(tokenizer, TokenType.PREFIXED_NAME, "", "") ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("123:", TokenType.INTEGER,
"123");
+ testNextToken(tokenizer, TokenType.PREFIXED_NAME, "", "");
}
// Generic: parse first token from ...
- // tokenTest(str, TokenType, TokenImage) ;
+ // tokenTest(str, TokenType, TokenImage);
@Test
- public void tokenUnit_num1() {
- tokenizeAndTestExact("123", TokenType.INTEGER, "123") ;
+ public void tokenUnit_num01() {
+ tokenizeAndTestExact("123", TokenType.INTEGER, "123");
}
@Test
- public void tokenUnit_num2() {
+ public void tokenUnit_num02() {
// This is a change in Turtle (and SPARQL 1.1)
- tokenizeAndTestFirst("123.", TokenType.INTEGER, "123") ;
+ tokenizeAndTestFirst("123.", TokenType.INTEGER, "123");
}
@Test
- public void tokenUnit_num3() {
- tokenizeAndTestExact("+123.456", TokenType.DECIMAL, "+123.456") ;
+ public void tokenUnit_num03() {
+ tokenizeAndTestExact("+123.456", TokenType.DECIMAL, "+123.456");
}
@Test
- public void tokenUnit_num4() {
- tokenizeAndTestExact("-1", TokenType.INTEGER, "-1") ;
+ public void tokenUnit_num04() {
+ tokenizeAndTestExact("-1", TokenType.INTEGER, "-1");
}
@Test
- public void tokenUnit_num5() {
- tokenizeAndTestExact("-1e0", TokenType.DOUBLE, "-1e0") ;
+ public void tokenUnit_num05() {
+ tokenizeAndTestExact("-1e0", TokenType.DOUBLE, "-1e0");
}
@Test
- public void tokenUnit_num6() {
- tokenizeAndTestExact("1e+1", TokenType.DOUBLE, "1e+1") ;
+ public void tokenUnit_num06() {
+ tokenizeAndTestExact("1e+1", TokenType.DOUBLE, "1e+1");
}
@Test
- public void tokenUnit_num7() {
- tokenizeAndTestExact("1.3e+1", TokenType.DOUBLE, "1.3e+1") ;
+ public void tokenUnit_num07() {
+ tokenizeAndTestExact("1.3e+1", TokenType.DOUBLE, "1.3e+1");
}
@Test
- public void tokenUnit_num8() {
- tokenizeAndTestFirst("1.3.4", TokenType.DECIMAL, "1.3") ;
+ public void tokenUnit_num08() {
+ tokenizeAndTestFirst("1.3.4", TokenType.DECIMAL, "1.3");
}
@Test
- public void tokenUnit_num9() {
- tokenizeAndTestFirst("1.3e67.7", TokenType.DOUBLE, "1.3e67") ;
+ public void tokenUnit_num09() {
+ tokenizeAndTestFirst("1.3e67.7", TokenType.DOUBLE, "1.3e67");
}
@Test
public void tokenUnit_num10() {
- tokenizeAndTestExact(".1", TokenType.DECIMAL, ".1") ;
+ tokenizeAndTestExact(".1", TokenType.DECIMAL, ".1");
}
@Test
public void tokenUnit_num11() {
- tokenizeAndTestExact(".1e0", TokenType.DOUBLE, ".1e0") ;
+ tokenizeAndTestExact(".1e0", TokenType.DOUBLE, ".1e0");
Review Comment:
:+1: came looking for a number starting with `dot` and `e`
##########
jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java:
##########
@@ -439,37 +458,42 @@ private Token parseToken() {
*/
- // TODO extract readNumberNoSign
-
- int signCh = 0;
-
if ( ch == CH_PLUS || ch == CH_MINUS ) {
reader.readChar();
int ch2 = reader.peekChar();
-
- if ( !range(ch2, '0', '9') ) {
- // ch was end of symbol.
- // reader.readChar();
+ if ( !range(ch2, '0', '9') && ch2 != CH_DOT ) {
+ // Not a number.
if ( ch == CH_PLUS )
token.setType(TokenType.PLUS);
else
token.setType(TokenType.MINUS);
return token;
}
-
- // Already got a + or - ...
- // readNumberNoSign
- // Because next, old code processes signs.
- reader.pushbackChar(ch);
- signCh = ch;
- // Drop to next "if"
+ // ch2 not consumed.
+ boolean charactersConsumed = readNumber(ch, false);
+ if ( ! charactersConsumed ) {
+ if ( ch == CH_PLUS )
+ token.setType(TokenType.PLUS);
+ else
+ token.setType(TokenType.MINUS);
+ }
+ return token;
}
- if ( ch == CH_PLUS || ch == CH_MINUS || range(ch, '0', '9') ) {
- // readNumberNoSign
- readNumber();
- if ( Checking )
- checkNumber(token.getImage(), token.getImage2());
+ if ( range(ch, '0', '9') ) {
Review Comment:
So no need to check the plus and minus here as it was handled above... but
do we need to handle the `CH_DOT` here too? I'm guessing the previous `if`
handles `-0`, `+1.2`, etc., and this one handles `0`, `1.`?
##########
jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java:
##########
@@ -337,18 +326,23 @@ private Token parseToken() {
switch(ch)
{
- // DOT can start a decimal. Check for digit.
+ // DOT can start a decimal.
case CH_DOT:
reader.readChar();
ch = reader.peekChar();
if ( range(ch, '0', '9') ) {
- // Not a DOT after all.
+ // DOT DIGIT - it's a number.
+ // Reload the DOT.
reader.pushbackChar(CH_DOT);
- readNumber();
- if ( Checking )
- checkNumber(token.getImage(), token.getImage2());
- return token;
+ boolean charactersConsumed = readNumber(CH_ZERO, false);
Review Comment:
:+1: had a look at `readNumber`'s code, and I think it will handle now
doubles with dot, even if they contain an exponent too (which I believe is
valid according to [the
grammar](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#tu-grammar-production-DECIMAL),
although I haven't tested that)

##########
jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizerText.java:
##########
@@ -25,911 +25,938 @@
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
-import java.io.ByteArrayInputStream ;
+import java.io.ByteArrayInputStream;
import java.io.Reader;
import org.apache.jena.atlas.io.IO;
-import org.apache.jena.atlas.io.PeekReader ;
-import org.apache.jena.atlas.lib.StrUtils ;
-import org.apache.jena.riot.RiotException ;
-import org.apache.jena.riot.RiotParseException ;
+import org.apache.jena.atlas.io.PeekReader;
+import org.apache.jena.atlas.lib.StrUtils;
+import org.apache.jena.riot.RiotException;
+import org.apache.jena.riot.RiotParseException;
import org.apache.jena.riot.system.ErrorHandlerFactory.ErrorHandlerRecorder;
-import org.apache.jena.sparql.ARQConstants ;
-import org.junit.Test ;
+import org.apache.jena.sparql.ARQConstants;
+import org.junit.Test;
-public class TestTokenizer {
+public class TestTokenizerText {
private static Tokenizer tokenizer(String string) {
- return tokenizer(string, false) ;
+ return tokenizer(string, false);
}
private static Tokenizer tokenizer(String string, boolean lineMode) {
- PeekReader r = PeekReader.readString(string) ;
+ PeekReader r = PeekReader.readString(string);
Tokenizer tokenizer = TokenizerText.create()
.errorHandler(errorHandlerExceptions())
.source(r)
.lineMode(lineMode)
.build();
- return tokenizer ;
+ return tokenizer;
}
private static void tokenFirst(String string) {
- Tokenizer tokenizer = tokenizer(string) ;
- assertTrue(tokenizer.hasNext()) ;
- assertNotNull(tokenizer.next()) ;
+ Tokenizer tokenizer = tokenizer(string);
+ assertTrue(tokenizer.hasNext());
+ assertNotNull(tokenizer.next());
// Maybe more.
- // assertFalse(tokenizer.hasNext()) ;
+ // assertFalse(tokenizer.hasNext());
}
private static Token tokenFor(String string) {
- Tokenizer tokenizer = tokenizer(string) ;
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertFalse(tokenizer.hasNext()) ;
- return token ;
+ Tokenizer tokenizer = tokenizer(string);
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertFalse(tokenizer.hasNext());
+ return token;
}
private static Token tokenizeAndTestExact(String input, TokenType
tokenType, String tokenImage) {
- return tokenizeAndTestExact(input, tokenType, tokenImage, null) ;
+ return tokenizeAndTestExact(input, tokenType, tokenImage, null);
}
private static Token tokenizeAndTestExact(String input, StringType
stringType, String tokenImage) {
- Token token = tokenizeAndTestExact(input, TokenType.STRING,
tokenImage, null) ;
+ Token token = tokenizeAndTestExact(input, TokenType.STRING,
tokenImage, null);
assertEquals(stringType, token.getStringType());
return token;
}
private static Token tokenizeAndTestExact(String input, TokenType
tokenType, String tokenImage1, String tokenImage2) {
- Tokenizer tokenizer = tokenizer(input) ;
- Token token = testNextToken(tokenizer, tokenType, tokenImage1,
tokenImage2) ;
- assertFalse("Excess tokens", tokenizer.hasNext()) ;
- return token ;
+ Tokenizer tokenizer = tokenizer(input);
+ Token token = testNextToken(tokenizer, tokenType, tokenImage1,
tokenImage2);
+ assertFalse("Excess tokens", tokenizer.hasNext());
+ return token;
}
private static Token tokenizeAndTestExact(String input, TokenType
tokenType, String tokenImage1,
String tokenImage2, Token
subToken1, Token subToken2) {
- Token token = tokenFor(input) ;
- assertEquals(tokenType, token.getType()) ;
- assertEquals(tokenImage1, token.getImage()) ;
- assertEquals(tokenImage2, token.getImage2()) ;
- assertEquals(subToken1, token.getSubToken1()) ;
- assertEquals(subToken2, token.getSubToken2()) ;
- return token ;
+ Token token = tokenFor(input);
+ assertEquals(tokenType, token.getType());
+ assertEquals(tokenImage1, token.getImage());
+ assertEquals(tokenImage2, token.getImage2());
+ assertEquals(subToken1, token.getSubToken1());
+ assertEquals(subToken2, token.getSubToken2());
+ return token;
}
private static Tokenizer tokenizeAndTestFirst(String input, TokenType
tokenType) {
- return tokenizeAndTestFirst(input, tokenType, null, null) ;
+ return tokenizeAndTestFirst(input, tokenType, null, null);
}
private static Tokenizer tokenizeAndTestFirst(String input, TokenType
tokenType, String tokenImage) {
- return tokenizeAndTestFirst(input, tokenType, tokenImage, null) ;
+ return tokenizeAndTestFirst(input, tokenType, tokenImage, null);
}
private static Tokenizer tokenizeAndTestFirst(String input, TokenType
tokenType, String tokenImage1, String tokenImage2) {
- Tokenizer tokenizer = tokenizer(input) ;
- testNextToken(tokenizer, tokenType, tokenImage1, tokenImage2) ;
- return tokenizer ;
+ Tokenizer tokenizer = tokenizer(input);
+ testNextToken(tokenizer, tokenType, tokenImage1, tokenImage2);
+ return tokenizer;
}
private static Token testNextToken(Tokenizer tokenizer, TokenType
tokenType) {
- return testNextToken(tokenizer, tokenType, null, null) ;
+ return testNextToken(tokenizer, tokenType, null, null);
}
private static Token testNextToken(Tokenizer tokenizer, TokenType
tokenType, String tokenImage1) {
- return testNextToken(tokenizer, tokenType, tokenImage1, null) ;
+ return testNextToken(tokenizer, tokenType, tokenImage1, null);
}
private static Token testNextToken(Tokenizer tokenizer, TokenType
tokenType, String tokenImage1, String tokenImage2) {
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertNotNull(token) ;
- assertEquals(tokenType, token.getType()) ;
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertNotNull(token);
+ assertEquals(tokenType, token.getType());
if ( tokenImage1 != null )
- assertEquals(tokenImage1, token.getImage()) ;
+ assertEquals(tokenImage1, token.getImage());
if ( tokenImage2 != null )
- assertEquals(tokenImage1, token.getImage()) ;
- assertEquals(tokenImage2, token.getImage2()) ;
- return token ;
+ assertEquals(tokenImage1, token.getImage());
+ assertEquals(tokenImage2, token.getImage2());
+ return token;
}
private static Token tokenizeAndTest(String input, TokenType tokenType,
String tokenImage1, String tokenImage2, Token subToken1, Token subToken2) {
- Token token = tokenFor(input) ;
- assertNotNull(token) ;
- assertEquals(tokenType, token.getType()) ;
- assertEquals(tokenImage1, token.getImage()) ;
- assertEquals(tokenImage2, token.getImage2()) ;
- assertEquals(subToken1, token.getSubToken1()) ;
- assertEquals(subToken2, token.getSubToken2()) ;
- return token ;
+ Token token = tokenFor(input);
+ assertNotNull(token);
+ assertEquals(tokenType, token.getType());
+ assertEquals(tokenImage1, token.getImage());
+ assertEquals(tokenImage2, token.getImage2());
+ assertEquals(subToken1, token.getSubToken1());
+ assertEquals(subToken2, token.getSubToken2());
+ return token;
}
@Test
public void tokenUnit_iri1() {
- tokenizeAndTestExact("<x>", TokenType.IRI, "x") ;
+ tokenizeAndTestExact("<x>", TokenType.IRI, "x");
}
@Test
public void tokenUnit_iri2() {
- tokenizeAndTestExact(" <> ", TokenType.IRI, "") ;
+ tokenizeAndTestExact(" <> ", TokenType.IRI, "");
}
@Test
// (expected=RiotParseException.class) We test the message.
public void tokenUnit_iri3() {
try {
// That's one \
- tokenFirst("<abc\\>def>") ;
+ tokenFirst("<abc\\>def>");
} catch (RiotParseException ex) {
- String x = ex.getMessage() ;
- assertTrue(x.contains("Illegal")) ;
+ String x = ex.getMessage();
+ assertTrue(x.contains("Illegal"));
}
}
@Test
public void tokenUnit_iri4() {
// \\\\ is a double \\ in the data. 0x41 is 'A'
- tokenizeAndTestFirst("<abc\\u0041def> 123", TokenType.IRI,
"abcAdef") ;
+ tokenizeAndTestFirst("<abc\\u0041def> 123", TokenType.IRI,
"abcAdef");
}
@Test
public void tokenUnit_iri5() {
// \\\\ is a double \\ in the data. 0x41 is 'A'
- tokenizeAndTestFirst("<\\u0041def> 123", TokenType.IRI, "Adef") ;
+ tokenizeAndTestFirst("<\\u0041def> 123", TokenType.IRI, "Adef");
}
@Test
public void tokenUnit_iri6() {
// \\\\ is a double \\ in the data. 0x41 is 'A'
- tokenizeAndTestFirst("<abc\\u0041> 123", TokenType.IRI, "abcA") ;
+ tokenizeAndTestFirst("<abc\\u0041> 123", TokenType.IRI, "abcA");
}
// Bad IRIs
@Test(expected=RiotException.class)
public void tokenUnit_iri10() {
- tokenFirst("<abc def>") ;
+ tokenFirst("<abc def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri11() {
- tokenFirst("<abc<def>") ;
+ tokenFirst("<abc<def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri12() {
- tokenFirst("<abc{def>") ;
+ tokenFirst("<abc{def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri13() {
- tokenFirst("<abc}def>") ;
+ tokenFirst("<abc}def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri14() {
- tokenFirst("<abc|def>") ;
+ tokenFirst("<abc|def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri15() {
- tokenFirst("<abc^def>") ;
+ tokenFirst("<abc^def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri16() {
- tokenFirst("<abc`def>") ;
+ tokenFirst("<abc`def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri17() {
- tokenFirst("<abc\tdef>") ; // Java escae - real tab
+ tokenFirst("<abc\tdef>"); // Java escae - real tab
}
@Test(expected=RiotException.class)
public void tokenUnit_iri18() {
- tokenFirst("<abc\u0007def>") ; // Java escape - codepoint 7
+ tokenFirst("<abc\u0007def>"); // Java escape - codepoint 7
}
@Test(expected=RiotException.class)
public void tokenUnit_iri19() {
- tokenFirst("<abc\\>") ;
+ tokenFirst("<abc\\>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri20() {
- tokenFirst("<abc\\def>") ;
+ tokenFirst("<abc\\def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri21() {
// \\\\ is a double \\ in the data.
// RDF 1.1 - \\ is not legal in a IRIREF
- tokenFirst("<abc\\\\def>") ;
+ tokenFirst("<abc\\\\def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri22() {
- tokenFirst("<abc\\u00ZZdef>") ;
+ tokenFirst("<abc\\u00ZZdef>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri23() {
- tokenFirst("<abc\\uZZ20def>") ;
+ tokenFirst("<abc\\uZZ20def>");
}
@Test
public void tokenUnit_str1() {
- tokenizeAndTestExact(" 'abc' ", StringType.STRING1, "abc") ;
+ tokenizeAndTestExact(" 'abc' ", StringType.STRING1, "abc");
}
@Test
public void tokenUnit_str2() {
- tokenizeAndTestExact(" '' ", StringType.STRING1, "") ;
+ tokenizeAndTestExact(" '' ", StringType.STRING1, "");
}
@Test
public void tokenUnit_str3() {
- tokenizeAndTestExact("'\\u0020'", StringType.STRING1, " ") ;
+ tokenizeAndTestExact("'\\u0020'", StringType.STRING1, " ");
}
@Test
public void tokenUnit_str4() {
- tokenizeAndTestExact("'a\\'\\\"\\n\\t\\r\\f'", StringType.STRING1,
"a'\"\n\t\r\f") ;
+ tokenizeAndTestExact("'a\\'\\\"\\n\\t\\r\\f'", StringType.STRING1,
"a'\"\n\t\r\f");
}
@Test(expected = RiotParseException.class)
public void tokenUnit_str5() {
// This is a raw newline. \n is a Java string escape.
- tokenizeAndTestExact("'\n'", StringType.STRING1, "\n") ;
+ tokenizeAndTestExact("'\n'", StringType.STRING1, "\n");
}
@Test
public void tokenUnit_str6() {
- tokenizeAndTestExact(" \"abc\" ", StringType.STRING2, "abc") ;
+ tokenizeAndTestExact(" \"abc\" ", StringType.STRING2, "abc");
}
@Test
public void tokenUnit_str7() {
- tokenizeAndTestExact("\"\"", StringType.STRING2, "") ;
+ tokenizeAndTestExact("\"\"", StringType.STRING2, "");
}
@Test(expected = RiotParseException.class)
public void tokenUnit_str8() {
- Tokenizer tokenizer = tokenizer("\"") ;
- assertTrue(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizer("\"");
+ assertTrue(tokenizer.hasNext());
}
@Test(expected = RiotParseException.class)
public void tokenUnit_str9() {
- tokenFirst("'abc") ;
+ tokenFirst("'abc");
}
@Test
public void tokenUnit_str10() {
- tokenizeAndTestExact("'\\'abc'", StringType.STRING1, "'abc") ;
+ tokenizeAndTestExact("'\\'abc'", StringType.STRING1, "'abc");
}
@Test
public void tokenUnit_str11() {
- tokenizeAndTestExact("'\\U00000020'", StringType.STRING1, " ") ;
+ tokenizeAndTestExact("'\\U00000020'", StringType.STRING1, " ");
}
@Test
public void tokenUnit_str_long1() {
- tokenizeAndTestExact("'''aaa'''", StringType.LONG_STRING1, "aaa") ;
+ tokenizeAndTestExact("'''aaa'''", StringType.LONG_STRING1, "aaa");
}
@Test
public void tokenUnit_str_long2() {
- tokenizeAndTestExact("\"\"\"aaa\"\"\"", StringType.LONG_STRING2,
"aaa") ;
+ tokenizeAndTestExact("\"\"\"aaa\"\"\"", StringType.LONG_STRING2,
"aaa");
}
@Test
public void tokenUnit_str_long3() {
- tokenizeAndTestExact("''''1234'''", StringType.LONG_STRING1, "'1234") ;
+ tokenizeAndTestExact("''''1234'''", StringType.LONG_STRING1, "'1234");
}
@Test
public void tokenUnit_str_long4() {
- tokenizeAndTestExact("'''''1234'''", StringType.LONG_STRING1,
"''1234") ;
+ tokenizeAndTestExact("'''''1234'''", StringType.LONG_STRING1,
"''1234");
}
@Test
public void tokenUnit_str_long5() {
- tokenizeAndTestExact("'''\\'''1234'''", StringType.LONG_STRING1,
"'''1234") ;
+ tokenizeAndTestExact("'''\\'''1234'''", StringType.LONG_STRING1,
"'''1234");
}
@Test
public void tokenUnit_str_long6() {
- tokenizeAndTestExact("\"\"\"\"1234\"\"\"", StringType.LONG_STRING2,
"\"1234") ;
+ tokenizeAndTestExact("\"\"\"\"1234\"\"\"", StringType.LONG_STRING2,
"\"1234");
}
@Test
public void tokenUnit_str_long7() {
- tokenizeAndTestExact("\"\"\"\"\"1234\"\"\"", StringType.LONG_STRING2,
"\"\"1234") ;
+ tokenizeAndTestExact("\"\"\"\"\"1234\"\"\"", StringType.LONG_STRING2,
"\"\"1234");
}
@Test
public void tokenUnit_str_long8() {
- tokenizeAndTestExact("''''''", StringType.LONG_STRING1, "") ;
+ tokenizeAndTestExact("''''''", StringType.LONG_STRING1, "");
}
@Test
public void tokenUnit_str_long9() {
- tokenizeAndTestExact("\"\"\"'''''''''''''''''\"\"\"",
StringType.LONG_STRING2, "'''''''''''''''''") ;
+ tokenizeAndTestExact("\"\"\"'''''''''''''''''\"\"\"",
StringType.LONG_STRING2, "'''''''''''''''''");
}
@Test(expected = RiotParseException.class)
public void tokenUnit_str_long10() {
- tokenFirst("\"\"\"abcdef") ;
+ tokenFirst("\"\"\"abcdef");
}
@Test(expected = RiotParseException.class)
public void tokenUnit_str_long11() {
- tokenFirst("'''") ;
+ tokenFirst("'''");
}
@Test
public void tokenUnit_str_long12() {
- tokenizeAndTestExact("'''x'''@en", TokenType.LITERAL_LANG, "x", "en") ;
+ tokenizeAndTestExact("'''x'''@en", TokenType.LITERAL_LANG, "x", "en");
}
@Test
public void tokenUnit_bNode1() {
- tokenizeAndTestExact("_:abc", TokenType.BNODE, "abc") ;
+ tokenizeAndTestExact("_:abc", TokenType.BNODE, "abc");
}
@Test
public void tokenUnit_bNode2() {
- tokenizeAndTestExact("_:123 ", TokenType.BNODE, "123") ;
+ tokenizeAndTestExact("_:123 ", TokenType.BNODE, "123");
}
@Test(expected = RiotParseException.class)
public void tokenUnit_bNode3() {
- Tokenizer tokenizer = tokenizer("_:") ;
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertNotNull(token) ;
+ Tokenizer tokenizer = tokenizer("_:");
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertNotNull(token);
}
@Test
public void tokenUnit_bNode4() {
- tokenizeAndTestExact("_:1-2-Z ", TokenType.BNODE, "1-2-Z") ;
+ tokenizeAndTestExact("_:1-2-Z ", TokenType.BNODE, "1-2-Z");
}
@Test
public void tokenUnit_bNode5() {
- Tokenizer tokenizer = tokenizeAndTestFirst("_:x. ",
TokenType.BNODE, "x") ;
- testNextToken(tokenizer, TokenType.DOT) ;
- assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("_:x. ",
TokenType.BNODE, "x");
+ testNextToken(tokenizer, TokenType.DOT);
+ assertFalse(tokenizer.hasNext());
}
@Test
public void tokenUnit_bNode6() {
- Tokenizer tokenizer = tokenizeAndTestFirst("_:x:a. ",
TokenType.BNODE, "x") ;
- testNextToken(tokenizer, TokenType.PREFIXED_NAME, "", "a") ;
- testNextToken(tokenizer, TokenType.DOT) ;
- assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("_:x:a. ",
TokenType.BNODE, "x");
+ testNextToken(tokenizer, TokenType.PREFIXED_NAME, "", "a");
+ testNextToken(tokenizer, TokenType.DOT);
+ assertFalse(tokenizer.hasNext());
}
-// @Test
-// public void tokenUnit_cntrl1() {
-// tokenizeAndTestExact("*S", TokenType.CNTRL, "S");
-// }
-//
-// @Test
-// public void tokenUnit_cntr2() {
-// tokenizeAndTestExact("*SXYZ", TokenType.CNTRL, "SXYZ");
-// }
-//
-// @Test
-// public void tokenUnit_cntrl3() {
-// Tokenizer tokenizer = tokenizer("*S<x>");
-// assertTrue(tokenizer.hasNext());
-// Token token = tokenizer.next();
-// assertNotNull(token);
-// assertEquals(TokenType.CNTRL, token.getType());
-// assertEquals('S', token.getCntrlCode());
-// assertNull(token.getImage());
-// assertNull(token.getImage2());
-//
-// assertTrue(tokenizer.hasNext());
-// Token token2 = tokenizer.next();
-// assertNotNull(token2);
-// assertEquals(TokenType.IRI, token2.getType());
-// assertEquals("x", token2.getImage());
-// assertNull(token2.getImage2());
-// assertFalse(tokenizer.hasNext());
-// }
-
@Test
public void tokenUnit_syntax1() {
- tokenizeAndTestExact(".", TokenType.DOT, null, null) ;
+ tokenizeAndTestExact(".", TokenType.DOT, null, null);
}
@Test
public void tokenUnit_syntax2() {
- Tokenizer tokenizer = tokenizer(".;,") ;
- testNextToken(tokenizer, TokenType.DOT) ;
- testNextToken(tokenizer, TokenType.SEMICOLON) ;
- testNextToken(tokenizer, TokenType.COMMA) ;
- assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizer(".;,");
+ testNextToken(tokenizer, TokenType.DOT);
+ testNextToken(tokenizer, TokenType.SEMICOLON);
+ testNextToken(tokenizer, TokenType.COMMA);
+ assertFalse(tokenizer.hasNext());
}
@Test
public void tokenUnit_pname1() {
- tokenizeAndTestExact("a:b.c", TokenType.PREFIXED_NAME, "a", "b.c") ;
+ tokenizeAndTestExact("a:b.c", TokenType.PREFIXED_NAME, "a", "b.c");
}
@Test
public void tokenUnit_pname2() {
- Tokenizer tokenizer = tokenizeAndTestFirst("a:b.",
TokenType.PREFIXED_NAME, "a", "b") ;
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertEquals(TokenType.DOT, token.getType()) ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("a:b.",
TokenType.PREFIXED_NAME, "a", "b");
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertEquals(TokenType.DOT, token.getType());
}
@Test
public void tokenUnit_pname3() {
- tokenizeAndTestExact("a:b123", TokenType.PREFIXED_NAME, "a", "b123") ;
+ tokenizeAndTestExact("a:b123", TokenType.PREFIXED_NAME, "a", "b123");
}
@Test
public void tokenUnit_pname4() {
- tokenizeAndTestExact("a:", TokenType.PREFIXED_NAME, "a", "") ;
+ tokenizeAndTestExact("a:", TokenType.PREFIXED_NAME, "a", "");
}
@Test
public void tokenUnit_pname5() {
- tokenizeAndTestExact(":", TokenType.PREFIXED_NAME, "", "") ;
+ tokenizeAndTestExact(":", TokenType.PREFIXED_NAME, "", "");
}
@Test
public void tokenUnit_pname6() {
- tokenizeAndTestExact(":a", TokenType.PREFIXED_NAME, "", "a") ;
+ tokenizeAndTestExact(":a", TokenType.PREFIXED_NAME, "", "a");
}
@Test
public void tokenUnit_pname7() {
- tokenizeAndTestExact(":123", TokenType.PREFIXED_NAME, "", "123") ;
+ tokenizeAndTestExact(":123", TokenType.PREFIXED_NAME, "", "123");
}
@Test
public void tokenUnit_pname8() {
- tokenizeAndTestExact("a123:456", TokenType.PREFIXED_NAME, "a123",
"456") ;
+ tokenizeAndTestExact("a123:456", TokenType.PREFIXED_NAME, "a123",
"456");
}
@Test
public void tokenUnit_pname9() {
- Tokenizer tokenizer = tokenizeAndTestFirst("a123:-456",
TokenType.PREFIXED_NAME, "a123", "") ;
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertEquals(TokenType.INTEGER, token.getType()) ;
- assertEquals("-456", token.getImage()) ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("a123:-456",
TokenType.PREFIXED_NAME, "a123", "");
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertEquals(TokenType.INTEGER, token.getType());
+ assertEquals("-456", token.getImage());
}
@Test
public void tokenUnit_pname10() {
- tokenizeAndTestExact("a:a.b", TokenType.PREFIXED_NAME, "a", "a.b") ;
+ tokenizeAndTestExact("a:a.b", TokenType.PREFIXED_NAME, "a", "a.b");
}
@Test
public void tokenUnit_pname11() {
- tokenizeAndTestExact("a:0.b", TokenType.PREFIXED_NAME, "a", "0.b") ;
+ tokenizeAndTestExact("a:0.b", TokenType.PREFIXED_NAME, "a", "0.b");
}
@Test
public void tokenUnit_pname12() {
- tokenizeAndTestFirst("a:0. b", TokenType.PREFIXED_NAME, "a", "0") ;
+ tokenizeAndTestFirst("a:0. b", TokenType.PREFIXED_NAME, "a", "0");
}
@Test
public void tokenUnit_pname13() {
// x00e9 é
// x0065 e and x0301 ́
- tokenizeAndTestExact("a:xyzé", TokenType.PREFIXED_NAME, "a",
"xyz\u00e9") ;
+ tokenizeAndTestExact("a:xyzé", TokenType.PREFIXED_NAME, "a",
"xyz\u00e9");
}
@Test
public void tokenUnit_pname14() {
// x0065 e and x0301 ́
- tokenizeAndTestExact("a:xyze\u0301", TokenType.PREFIXED_NAME, "a",
"xyze\u0301") ;
+ tokenizeAndTestExact("a:xyze\u0301", TokenType.PREFIXED_NAME, "a",
"xyze\u0301");
}
@Test
public void tokenUnit_pname15() {
// x0065 e and x0301 ́
- tokenizeAndTestExact("a:xe\u0301y", TokenType.PREFIXED_NAME, "a",
"xe\u0301y") ;
+ tokenizeAndTestExact("a:xe\u0301y", TokenType.PREFIXED_NAME, "a",
"xe\u0301y");
}
@Test
public void tokenUnit_pname16() {
- tokenizeAndTestExact("a:b\\#c", TokenType.PREFIXED_NAME, "a", "b#c") ;
+ tokenizeAndTestExact("a:b\\#c", TokenType.PREFIXED_NAME, "a", "b#c");
}
@Test
public void tokenUnit_pname17() {
- tokenizeAndTestExact("a:b\\/c", TokenType.PREFIXED_NAME, "a", "b/c") ;
+ tokenizeAndTestExact("a:b\\/c", TokenType.PREFIXED_NAME, "a", "b/c");
}
@Test
public void tokenUnit_pname18() {
- tokenizeAndTestExact("a:b:c", TokenType.PREFIXED_NAME, "a", "b:c") ;
+ tokenizeAndTestExact("a:b:c", TokenType.PREFIXED_NAME, "a", "b:c");
}
@Test
public void tokenUnit_pname19() {
- tokenizeAndTestExact("a:b%AAc", TokenType.PREFIXED_NAME, "a", "b%AAc")
;
+ tokenizeAndTestExact("a:b%AAc", TokenType.PREFIXED_NAME, "a", "b%AAc");
}
@Test
public void tokenUnit_pname20() {
- Tokenizer tokenizer = tokenizeAndTestFirst("123:", TokenType.INTEGER,
"123") ;
- testNextToken(tokenizer, TokenType.PREFIXED_NAME, "", "") ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("123:", TokenType.INTEGER,
"123");
+ testNextToken(tokenizer, TokenType.PREFIXED_NAME, "", "");
}
// Generic: parse first token from ...
- // tokenTest(str, TokenType, TokenImage) ;
+ // tokenTest(str, TokenType, TokenImage);
@Test
- public void tokenUnit_num1() {
- tokenizeAndTestExact("123", TokenType.INTEGER, "123") ;
+ public void tokenUnit_num01() {
+ tokenizeAndTestExact("123", TokenType.INTEGER, "123");
}
@Test
- public void tokenUnit_num2() {
+ public void tokenUnit_num02() {
// This is a change in Turtle (and SPARQL 1.1)
- tokenizeAndTestFirst("123.", TokenType.INTEGER, "123") ;
+ tokenizeAndTestFirst("123.", TokenType.INTEGER, "123");
}
@Test
- public void tokenUnit_num3() {
- tokenizeAndTestExact("+123.456", TokenType.DECIMAL, "+123.456") ;
+ public void tokenUnit_num03() {
+ tokenizeAndTestExact("+123.456", TokenType.DECIMAL, "+123.456");
}
@Test
- public void tokenUnit_num4() {
- tokenizeAndTestExact("-1", TokenType.INTEGER, "-1") ;
+ public void tokenUnit_num04() {
+ tokenizeAndTestExact("-1", TokenType.INTEGER, "-1");
}
@Test
- public void tokenUnit_num5() {
- tokenizeAndTestExact("-1e0", TokenType.DOUBLE, "-1e0") ;
+ public void tokenUnit_num05() {
+ tokenizeAndTestExact("-1e0", TokenType.DOUBLE, "-1e0");
}
@Test
- public void tokenUnit_num6() {
- tokenizeAndTestExact("1e+1", TokenType.DOUBLE, "1e+1") ;
+ public void tokenUnit_num06() {
+ tokenizeAndTestExact("1e+1", TokenType.DOUBLE, "1e+1");
}
@Test
- public void tokenUnit_num7() {
- tokenizeAndTestExact("1.3e+1", TokenType.DOUBLE, "1.3e+1") ;
+ public void tokenUnit_num07() {
+ tokenizeAndTestExact("1.3e+1", TokenType.DOUBLE, "1.3e+1");
}
@Test
- public void tokenUnit_num8() {
- tokenizeAndTestFirst("1.3.4", TokenType.DECIMAL, "1.3") ;
+ public void tokenUnit_num08() {
+ tokenizeAndTestFirst("1.3.4", TokenType.DECIMAL, "1.3");
}
@Test
- public void tokenUnit_num9() {
- tokenizeAndTestFirst("1.3e67.7", TokenType.DOUBLE, "1.3e67") ;
+ public void tokenUnit_num09() {
+ tokenizeAndTestFirst("1.3e67.7", TokenType.DOUBLE, "1.3e67");
}
@Test
public void tokenUnit_num10() {
- tokenizeAndTestExact(".1", TokenType.DECIMAL, ".1") ;
+ tokenizeAndTestExact(".1", TokenType.DECIMAL, ".1");
}
@Test
public void tokenUnit_num11() {
- tokenizeAndTestExact(".1e0", TokenType.DOUBLE, ".1e0") ;
+ tokenizeAndTestExact(".1e0", TokenType.DOUBLE, ".1e0");
}
@Test
public void tokenUnit_num12() {
- // This is not a hex number.
+ tokenizeAndTestExact("+.1", TokenType.DECIMAL, "+.1");
+ }
- Tokenizer tokenizer = tokenizeAndTestFirst("000A .",
TokenType.INTEGER, "000") ;
- testNextToken(tokenizer, TokenType.KEYWORD, "A") ;
+ @Test
+ public void tokenUnit_num13() {
+ tokenizeAndTestExact("-.1", TokenType.DECIMAL, "-.1");
+ }
+
+ @Test
+ public void tokenUnit_num14() {
+ tokenizeAndTestExact("+.1e0", TokenType.DOUBLE, "+.1e0");
Review Comment:
:+1: do we have/need a test like this, but without the sign too?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]