This is an automated email from the ASF dual-hosted git repository. andy pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/jena.git
The following commit(s) were added to refs/heads/main by this push: new 48cd7f762a GH-2620: Striping error fix for RRX 48cd7f762a is described below commit 48cd7f762afe721c9ab14af3d70ea88818366ca1 Author: Andy Seaborne <a...@apache.org> AuthorDate: Tue Aug 6 20:37:39 2024 +0100 GH-2620: Striping error fix for RRX --- .../org/apache/jena/riot/lang/rdfxml/SysRRX.java | 1 - .../riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java | 103 +++++++++++++-------- .../rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java | 36 +++++-- .../rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java | 31 +++++-- .../jena/riot/lang/rdfxml/TC_RIOT_RDFXML.java | 6 +- .../rdfxml/converted_legacy/TS_ConvertedARP1.java | 3 + .../converted_legacy/TestARP1_W3C_Pending.java | 4 +- .../rdfxml/manifest_rdf11/Scripts_RRX_RDFXML.java | 3 +- ...Xev.java => TestManifest_RDF11_RRX_StAXev.java} | 2 +- .../lang/rdfxml/rrx/AbstractTestRDFXML_RRX.java | 2 +- .../jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java | 27 +++--- .../apache/jena/riot/lang/rdfxml/rrx/TestRRX.java | 61 ++++++++++-- jena-arq/testing/RIOT/rrx-files/README | 10 +- .../{error01.rdf => multiple_objects_lex_node.rdf} | 15 ++- .../{error01.rdf => multiple_objects_node_lex.rdf} | 15 ++- ...{error01.rdf => multiple_objects_node_node.rdf} | 15 ++- jena-arq/testing/RIOT/rrx-files/nested_object.rdf | 17 ++++ .../{error01.rdf => parseType-unknown.rdf} | 0 jena-arq/testing/RIOT/rrx/README | 4 + 19 files changed, 245 insertions(+), 110 deletions(-) diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/SysRRX.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/SysRRX.java index 0fc87e881f..17a6173b26 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/SysRRX.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/SysRRX.java @@ -38,5 +38,4 @@ public class SysRRX { xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.FALSE); return xmlInputFactory; } - } diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java index 27447c3524..e34ccc4329 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java @@ -28,6 +28,7 @@ import javax.xml.namespace.NamespaceContext; import javax.xml.namespace.QName; import org.apache.jena.atlas.io.IndentedWriter; +import org.apache.jena.atlas.lib.EscapeStr; import org.apache.jena.datatypes.RDFDatatype; import org.apache.jena.datatypes.xsd.impl.XMLLiteralType; import org.apache.jena.graph.Node; @@ -235,13 +236,18 @@ class ParserRDFXML_SAX ObjectLex, // The node implied by rdf:parseType=Resource - ObjectParserTypeResource, + ObjectParseTypeResource, // The object is rdf:parseType=Literal. Collecting characters of a RDF XML Literal ObjectParseTypeLiteral, // The object is rdf:parseType=Collection (RDF List) - ObjectParseTypeCollection + ObjectParseTypeCollection, + + // The object is a nested element. + // Unlike NodeElement, there is only one ObjectNode inside one property. + // ObjectLex becomes ObjectNode if a startElement is found. + ObjectNode } /** Integer holder for rdf:li */ @@ -314,7 +320,7 @@ class ParserRDFXML_SAX // If this frame is ParserMode.ObjectResource , then it is an implicit frame // inserted for the implied node. Pop the stack again to balance the push of // the implicit node element. - if ( parserMode == ParserMode.ObjectParserTypeResource ) { + if ( parserMode == ParserMode.ObjectParseTypeResource ) { popParserFrame(); decIndent(); } @@ -485,6 +491,7 @@ class ParserRDFXML_SAX } trace.printf(") mode = %s\n", parserMode); } + incIndent(); Position position = position(); @@ -497,16 +504,20 @@ class ParserRDFXML_SAX switch (parserMode) { case ObjectLex -> { // While processing ObjectLex, we found a startElement. - // The "ObjectLex" decision needs updating. This is a ParserMode.NodeElement. + // The "ObjectLex" decision needs updating. This is a ParserMode.NodeElement. // This is not parseType=Resource. if ( !isWhitespace(accCharacters) ) throw RDFXMLparseError("XML content before nested element", position); accCharacters.setLength(0); - // Declare that the containing frame is expecting a node element mode. - // Leave in parserMode=ObjectLex - pushParserFrame(ParserMode.NodeElement); + // Declare that the containing frame is expecting a node element as the object. + // There can be only one object. + pushParserFrame(ParserMode.ObjectNode); processBaseAndLang(attributes, position); } + case ObjectNode -> { + // Already in ObjectNode so a second statrtElement is an error. + throw RDFXMLparseError("Start tag after inner node element (only one node element permitted): got "+qName, position); + } default -> { // For everything else. pushParserFrame(); @@ -528,7 +539,7 @@ class ParserRDFXML_SAX // The top element can be a single nodeElement. startNodeElement(namespaceURI, localName, qName, attributes, position); } - case NodeElement -> + case NodeElement, ObjectNode -> startNodeElement(namespaceURI, localName, qName, attributes, position); case PropertyElement -> startPropertyElement(namespaceURI, localName, qName, attributes, position); @@ -574,30 +585,33 @@ class ParserRDFXML_SAX return; } endXMLLiteral(position); - if ( ReaderRDFXML_SAX.TRACE ) - trace.printf("**** End XML Literal[%s]: elementDepth=%d / xmlLiteralStartDepth=%s\n", qName, elementDepth, xmlLiteralStartDepth); // Keep going to finish the end tag. } switch (parserMode) { - case NodeElement -> - endNodeElement(position); + case NodeElement, ObjectNode -> + endNodeElement(position); case PropertyElement -> { if ( isEndNodeElement() ) // Possible next property but it's a node element so no property - // and it's end of node, with two "end property" tags seen in a row. + // and it is end of node, with two "end property" tags seen in a row. + // This occurs for + // <rdf:Description> and no properties *maybe some attribute properties. + // <Class></Class> endNodeElement(position); else endPropertyElement(position); } - case ObjectLex -> + case ObjectLex -> { endObjectLexical(position); - case ObjectParseTypeLiteral -> + } + case ObjectParseTypeLiteral -> { endObjectXMLLiteral(position); - case ObjectParseTypeCollection -> + } + case ObjectParseTypeCollection -> { endCollectionItem(position); - default -> - throw RDFXMLparseError("Inconsistent parserMode:" + parserMode, position); + } + default -> throw RDFXMLparseError("Inconsistent parserMode:" + parserMode, position); } popParserFrame(); @@ -783,7 +797,7 @@ class ParserRDFXML_SAX // Push a frame here as an implicit node frame because the subject is changing. // The companion "end frame" is handled in "popParserFrame" which // checks for parserMode=ImplicitNode - parserMode(ParserMode.ObjectParserTypeResource); + parserMode(ParserMode.ObjectParseTypeResource); pushParserFrame(); // ... expect a property element start or an end element. parserMode(ParserMode.PropertyElement); @@ -808,12 +822,12 @@ class ParserRDFXML_SAX return currentProperty == null; } - // private String xmlBaseStr(Attributes attributes, Position position) { - // String baseStr = attributes.getValue(xmlNS, xmlBaseLN); - // if ( baseStr == null ) - // return null; - // return IRIs.resolve(currentBase, baseStr); - // } +// private String xmlBaseStr(Attributes attributes, Position position) { +// String baseStr = attributes.getValue(xmlNS, xmlBaseLN); +// if ( baseStr == null ) +// return null; +// return IRIs.resolve(currentBase, baseStr); +// } // Start element encountered when expecting a ObjectCollection private void startCollectionItem(String namespaceURI, String localName, String qName, Attributes attributes, Position position) { @@ -958,8 +972,9 @@ class ParserRDFXML_SAX String qName = attributes.getQName(index); if ( namespace == null || namespace.isEmpty() ) { - if ( outputWarnings ) { - // In SAX, xmlns: is a qname, but namespace and local name are "". + // In SAX, xmlns: is qname, but namespace and local name are "". + //RDFXMLparseError("XML attribute '"+qName+"' used for RDF property attribute (no namespace)", position); + if ( outputWarnings ){ if ( ! localName.isEmpty() ) // Skip XML namespace declarations. RDFXMLparseWarning("XML attribute '"+qName+"' used for RDF property attribute - ignored", position); } @@ -1040,6 +1055,7 @@ class ParserRDFXML_SAX return ObjectParseType.Plain; try { String parseTypeName = parseTypeStr; + // Extensions - some names that appear in the wild switch(parseTypeName) { case "literal" -> { RDFXMLparseWarning("Encountered rdf:parseType='literal'. Treated as rdf:parseType='Literal'", position); @@ -1138,29 +1154,42 @@ class ParserRDFXML_SAX return; } // Allow whitespace only - case ObjectParserTypeResource, NodeElement, PropertyElement, ObjectParseTypeCollection -> { - if ( !isWhitespace(ch, start, length) ) - throw RDFXMLparseError("Non-whitespace text content between element tags: " - + nonWhitespaceForMsg(ch, start, length), position()); + case NodeElement, PropertyElement, ObjectParseTypeResource, ObjectParseTypeCollection, ObjectNode -> { + if ( !isWhitespace(ch, start, length) ) { + String text = nonWhitespaceMsg(ch, start, length); + throw RDFXMLparseError("Non-whitespace text content between element tags: '"+text+"'", position()); + } } case TOP -> { if ( !isWhitespace(ch, start, length) ) { - throw RDFXMLparseError("Non-whitespace text content outside element tags: " - + nonWhitespaceForMsg(ch, start, length), position()); + String text = nonWhitespaceMsg(ch, start, length); + throw RDFXMLparseError("Non-whitespace text content outside element tags: '"+text+"'", position()); } } } } /** The string for the first non-whitespace index. */ - private static String nonWhitespaceForMsg(char[] ch, int start, int length) { + private static String nonWhitespaceMsg(char[] ch, int start, int length) { + final int MaxLen = 10; // Short - this is for error messages + // Find the start of non-whitespace. + // Slice, truncate if necessary. + // Make safe. for ( int i = start ; i < start + length ; i++ ) { if ( !Character.isWhitespace(ch[i]) ) { - int len = Math.min(20, start - i); - return new String(ch, i, len); + // Slight overshoot + int remaindingLength = length - (i-start); + int len = Math.min(MaxLen, remaindingLength); + String x = new String(ch, i, len); + if ( remaindingLength > MaxLen ) + x = x+"..."; + // Escape characters, especially newlines and backspaces. + x = EscapeStr.stringEsc(x); + x = x.stripTrailing(); + return x; } } - throw new RDFXMLParseException("Failed to find any non-whitespace characters"); + throw new RDFXMLParseException("Internal error: Failed to find any non-whitespace characters"); } @Override diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java index e4a0a3a80e..cbbaac3b34 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java @@ -31,6 +31,7 @@ import javax.xml.stream.events.*; import org.apache.commons.lang3.StringUtils; import org.apache.jena.atlas.io.IndentedWriter; +import org.apache.jena.atlas.lib.EscapeStr; import org.apache.jena.datatypes.RDFDatatype; import org.apache.jena.datatypes.xsd.impl.XMLLiteralType; import org.apache.jena.graph.Node; @@ -509,6 +510,8 @@ class ParserRDFXML_StAX_EV { if ( namespace == null || namespace.isEmpty() ) { // SAX passes xmlns as attributes with namespace and local name of "". The qname is "xmlns:"/"xmlns" // StAX, does not pass namespaces. + + //RDFXMLparseError("XML attribute '"+localName+"' used for RDF property attribute (no namespace)", event); if ( outputWarnings ) RDFXMLparseWarning("XML attribute '"+localName+"' used for RDF property attribute - ignored", event); return false; @@ -692,10 +695,9 @@ class ParserRDFXML_StAX_EV { event = nextEventAny(); } if ( event.isStartElement() ) { - // DRY! // Striped - inner node element. if ( ! isWhitespace(sBuff) ) { - String msg = nonWhitespaceForMsg(sBuff.toString()); + String msg = nonWhitespaceMsg(sBuff.toString()); throw RDFXMLparseError("Content before node element. '"+msg+"'", event); } event = processNestedNodeElement(event, subject, property, emitter); @@ -1196,8 +1198,11 @@ class ParserRDFXML_StAX_EV { } case CHARACTERS, CDATA -> { Characters chars = ev.asCharacters(); - if ( ! isWhitespace(ev) ) - throw RDFXMLparseError("Read "+str(ev)+" when expecting a start or end element.", ev); + if ( ! isWhitespace(ev) ) { + String str = ev.asCharacters().getData(); + String text = nonWhitespaceMsg(str); + throw RDFXMLparseError("Expecting a start or end element. Got characters '"+text+"'", ev); + } } case COMMENT, DTD -> { } // Skip //case SPACE -> @@ -1488,7 +1493,7 @@ class ParserRDFXML_StAX_EV { private void noContentAllowed(XMLEvent event) { if ( event.isCharacters() ) { String content = event.asCharacters().getData(); - content = nonWhitespaceForMsg(content); + content = nonWhitespaceMsg(content); throw RDFXMLparseError("Expected XML start tag or end tag. Found text content (possible striping error): \""+content+"\"", event); } } @@ -1628,12 +1633,23 @@ class ParserRDFXML_StAX_EV { }; } - /** The string for the first non-whitespace index. */ - private static String nonWhitespaceForMsg(String string) { - for ( int i = 0 ; i < string.length() ; i++ ) { + /** The string for the first non-whitespace */ + private static String nonWhitespaceMsg(String string) { + final int MaxLen = 10; // Short - this is for error messages + // Find the start of non-whitespace. + // Slice, truncate if necessary. + // Make safe. + int length = string.length(); + for ( int i = 0 ; i < length ; i++ ) { if ( !Character.isWhitespace(string.charAt(i)) ) { - int index = Math.min(20, string.length()-i); - return string.substring(index); + int len = Math.min(MaxLen, length - i); + String x = string.substring(i, i+len); + if ( length > MaxLen ) + x = x+"..."; + // Escape characters, especially newlines and backspaces. + x = EscapeStr.stringEsc(x); + x = x.stripTrailing(); + return x; } } throw new RDFXMLParseException("Failed to find any non-whitespace characters"); diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java index 17a1aebafe..32b1024056 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java @@ -35,6 +35,7 @@ import javax.xml.stream.events.XMLEvent; import org.apache.commons.lang3.StringUtils; import org.apache.jena.atlas.io.IndentedWriter; +import org.apache.jena.atlas.lib.EscapeStr; import org.apache.jena.datatypes.RDFDatatype; import org.apache.jena.datatypes.xsd.impl.XMLLiteralType; import org.apache.jena.graph.Node; @@ -485,6 +486,7 @@ class ParserRDFXML_StAX_SR { if ( namespace == null || namespace.isEmpty() ) { // SAX passes xmlns as attributes with namespace and local name of "". The qname is "xmlns:"/"xmlns" // StAX, does not pass namespaces. + //RDFXMLparseError("XML attribute '"+qName.getLocalPart()+"' used for RDF property attribute (no namespace)", event); if ( outputWarnings ) RDFXMLparseWarning("XML attribute '"+qName.getLocalPart()+"' used for RDF property attribute - ignored"); return false; @@ -666,7 +668,7 @@ class ParserRDFXML_StAX_SR { } if ( lookingAt(event, START_ELEMENT) ) { if ( ! isWhitespace(accCharacters) ) { - String msg = nonWhitespaceForMsg(accCharacters.toString()); + String msg = nonWhitespaceMsg(accCharacters.toString()); throw RDFXMLparseError("Content before node element. '"+msg+"'"); } event = processNestedNodeElement(event, subject, property, emitter); @@ -1163,8 +1165,10 @@ class ParserRDFXML_StAX_SR { } case CHARACTERS, CDATA -> { String chars = xmlSource.getText(); - if ( ! isWhitespace(chars) ) - throw RDFXMLparseError("Read "+nonWhitespaceForMsg(chars)+" when expecting a start or end element."); + if ( ! isWhitespace(chars) ) { + String text = nonWhitespaceMsg(chars); + throw RDFXMLparseError("Expecting a start or end element. Got characters '"+text+"'"); + } // Skip break; } @@ -1472,7 +1476,7 @@ class ParserRDFXML_StAX_SR { private void noContentAllowed(XMLEvent event) { if ( event.isCharacters() ) { String content = event.asCharacters().getData(); - content = nonWhitespaceForMsg(content); + content = nonWhitespaceMsg(content); throw RDFXMLparseError("Expected XML start tag or end tag. Found text content (possible striping error): \""+content+"\""); } } @@ -1622,11 +1626,22 @@ class ParserRDFXML_StAX_SR { } /** The string for the first non-whitespace index. */ - private static String nonWhitespaceForMsg(String string) { - for ( int i = 0 ; i < string.length() ; i++ ) { + private static String nonWhitespaceMsg(String string) { + final int MaxLen = 10; // Short - this is for error messages + // Find the start of non-whitespace. + // Slice, truncate if necessary. + // Make safe. + int length = string.length(); + for ( int i = 0 ; i < length ; i++ ) { if ( !Character.isWhitespace(string.charAt(i)) ) { - int index = Math.min(20, string.length()-i); - return string.substring(index); + int len = Math.min(MaxLen, length - i); + String x = string.substring(i, i+len); + if ( length > MaxLen ) + x = x+"..."; + // Escape characters, especially newlines and backspaces. + x = EscapeStr.stringEsc(x); + x = x.stripTrailing(); + return x; } } throw new RDFXMLParseException("Failed to find any non-whitespace characters"); diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/TC_RIOT_RDFXML.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/TC_RIOT_RDFXML.java index 9c268d5abd..230105e600 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/TC_RIOT_RDFXML.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/TC_RIOT_RDFXML.java @@ -26,10 +26,10 @@ import org.junit.runners.Suite; @RunWith(Suite.class) @Suite.SuiteClasses( { - // Local file and rdf11-xml - detailed checking. + // Local tests, extensions and error reports. TS_RRX.class, - // Manifest-driven rdf11-xml - all parsers + // Manifest-driven RDF 1.1 rdf-xml test suite - all parsers Scripts_RRX_RDFXML.class, // jena-core legacy test (RDF 1.0) @@ -43,7 +43,7 @@ import org.junit.runners.Suite; * converted to run as RIOT tests. * <p> * {@linkplain TS_RRX} runs local RRX tests by comparing the different RRX parsers - * to ARP1. tese test check for the same number of warnig as well. + * to ARP1. These test check for the same number of warning as well. * The {@code TestRDFXML_RRX_*} are running on extra local files. The * TestRDFXML_W3C_* are running on the RDF 1.0 test suite that ARP1 has used. * <p> diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TS_ConvertedARP1.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TS_ConvertedARP1.java index 8379d47d78..e479ff7414 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TS_ConvertedARP1.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TS_ConvertedARP1.java @@ -32,5 +32,8 @@ import org.junit.runners.Suite.SuiteClasses; /** * The ARP (final) tests from jena-core, so related to RDF 1.0 test suite, * then converted to run as RIOT tests. + * + * The files used are in " * Files in "testing/RIOT/rdf11-xml/" + * which have been updated for RDF 1.1 if necessary. */ public class TS_ConvertedARP1 {} diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TestARP1_W3C_Pending.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TestARP1_W3C_Pending.java index 305d11409f..b5cd53e162 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TestARP1_W3C_Pending.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TestARP1_W3C_Pending.java @@ -24,7 +24,9 @@ import org.junit.Test; /** * The ARP test suite run on a local legacy copy of the RDF 1.0 WG test suite - * (updated for RDF 1.1). Tests marked pending. + * (updated for RDF 1.1). + * + * Tests marked pending. */ public class TestARP1_W3C_Pending { @Test diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/Scripts_RRX_RDFXML.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/Scripts_RRX_RDFXML.java index 15bcfca775..afe97d071b 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/Scripts_RRX_RDFXML.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/Scripts_RRX_RDFXML.java @@ -24,6 +24,7 @@ import org.junit.runners.Suite.SuiteClasses; /** * Run all the parsers on the rdf-test CG RDF/XML manifest files (RDF 1.1). + * Files in "testing/RIOT/rdf11-xml/". */ @RunWith(Suite.class) @SuiteClasses( { @@ -32,7 +33,7 @@ import org.junit.runners.Suite.SuiteClasses; TestManifest_RDF11_ARP1.class, TestManifest_RDF11_RRX_SAX.class, TestManifest_RDF11_RRX_StAXsr.class, - TestManifest_RDF_RRX_StAXev.class + TestManifest_RDF11_RRX_StAXev.class }) public class Scripts_RRX_RDFXML {} diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF_RRX_StAXev.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF11_RRX_StAXev.java similarity index 97% rename from jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF_RRX_StAXev.java rename to jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF11_RRX_StAXev.java index 859c92bbb8..fb904943f6 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF_RRX_StAXev.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF11_RRX_StAXev.java @@ -36,7 +36,7 @@ import org.junit.runner.RunWith; "testing/RIOT/rdf11-xml/manifest.ttl" }) -public class TestManifest_RDF_RRX_StAXev { +public class TestManifest_RDF11_RRX_StAXev { static ReaderRIOTFactory systemReaderfactory; @BeforeClass diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/AbstractTestRDFXML_RRX.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/AbstractTestRDFXML_RRX.java index b25bb22968..02c3162121 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/AbstractTestRDFXML_RRX.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/AbstractTestRDFXML_RRX.java @@ -44,6 +44,6 @@ public abstract class AbstractTestRDFXML_RRX { } @Test public void test() { - RunTestRDFXML.runTest(testLabel, factory, implName, filename); + RunTestRDFXML.runTestCompareARP(testLabel, factory, implName, filename); } } diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java index 09aaea80ec..da6648a05b 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java @@ -72,10 +72,12 @@ public class RunTestRDFXML { } /** - * Manifest-like in that the test files in a specific order. + * Manifest-like in that the test files are run in a specific order. * The local files cover all the features of RDF/XML parsing * but not in great depth. * These tests more easily highlight problems and the grouping helps. + * + * Check the files on disk agree with the built-in order list. */ static List<String> localTestFiles() { Path LOCAL_DIR = Path.of("testing/RIOT/rrx/"); @@ -184,7 +186,6 @@ public class RunTestRDFXML { ); for ( String fn : testfiles ) { - if ( ! found.contains(fn) ) output.printf("Not found in file area: %s\n", fn); } @@ -216,17 +217,6 @@ public class RunTestRDFXML { return x; } - static void runTest(String label, ReaderRIOTFactory factory, String implLabel, String filename) { - try { - runTestCompareARP(label, factory, implLabel, filename); - } catch(Throwable ex) { - throw new RuntimeException(filename, ex) { - @Override - public Throwable fillInStackTrace() { return this; } - }; - } - } - static class ErrorHandlerCollector implements ErrorHandler { List<String> warnings = new ArrayList<>(); List<String> errors = new ArrayList<>(); @@ -295,9 +285,9 @@ public class RunTestRDFXML { String testFullLabel = format("-- Test : %-4s : %s", testLabel, filename); Graph expectedGraph; - // -- "Reference" implementation ErrorHandlerCollector errorHandlerReference = new ErrorHandlerCollector(); try { + // Reference expectation expectedGraph = parseFile(referenceFactory, errorHandlerReference, filename); } catch (RiotException ex) { // Exception expected. Run as "failure test" @@ -310,6 +300,15 @@ public class RunTestRDFXML { } + /** + * Run a test, single parse of using the given reader factory. + */ + public static void runTestPlain(String label, ReaderRIOTFactory testSubjectFactory, String implLabel, String filename) { + String testLabel = format("-- Test : %-4s : %s", implLabel, filename); + ErrorHandlerCollector errorHandlerReference = new ErrorHandlerCollector(); + parseFile(testSubjectFactory, errorHandlerReference, filename); + } + /** * Run a test, expecting a graph as the result. */ diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java index 00eac3a12b..8c16eec860 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java @@ -39,10 +39,20 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; -/** Cases where the RRX parsers differ from ARP */ +/** + * Additional tests for RRX: + * <ul> + * <li>errors and warnings not in the W3C manifest files</li> + * <li>additional reports</li> + * <li>extensions toRDF/XML</li> + * </ul> + */ @RunWith(Parameterized.class) public class TestRRX { + + private static String DIR = "testing/RIOT/rrx-files/"; + @Parameters(name = "{index}: {0} {1}") public static Iterable<Object[]> data() { List<Object[]> x = new ArrayList<>(); @@ -60,17 +70,38 @@ public class TestRRX { this.lang = lang; } - @Test public void error01() { - errorTest("error01.rdf"); + // Test2 for more than one object in RDF/XML striping. + @Test public void error_multiple_objects_lex_node() { + errorTest("multiple_objects_lex_node.rdf"); + } + + @Test public void error_multiple_objects_node_lex() { + errorTest("multiple_objects_node_lex.rdf"); + } + + @Test public void error_multiple_objects_node_node() { + errorTest("multiple_objects_node_node.rdf"); + } + + // Check that the "one object" parse state does not impact deeper structures. + @Test public void nested_object() { + goodTest("nested_object.rdf"); } - @Test public void warn_literal() { + // rdf:parserType= + @Test public void error_parseType_unknown() { + // This is only a warning in ARP. + errorTest("parseType-unknown.rdf", false); + } + + @Test public void warn_parseType_extension_1() { // Now valid. parseType="literal" -> parseType="Literal" // because ARP behaved that way. // Warning issued. warningTest("warn01.rdf", 1); } + // CIM @Test public void cim_statements01() { // parseType="Statements" // because ARP behaved that way. @@ -79,6 +110,7 @@ public class TestRRX { warningTest("cim_statements01.rdf", 2); } + // misc @Test public void noBase01() { // Call with no base; no base needed. noBase("file-no-base.rdf"); @@ -92,7 +124,7 @@ public class TestRRX { private void noBase(String filename) { ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang); - String fn = "testing/RIOT/rrx-files/"+filename; + String fn = DIR+filename; ErrorHandlerCollector errorHandler = new ErrorHandlerCollector(); ParserProfile parserProfile = RiotLib.createParserProfile(RiotLib.factoryRDF(), errorHandler, true); ReaderRIOT reader = factory.create(lang, parserProfile); @@ -105,16 +137,29 @@ public class TestRRX { } } + private void goodTest(String filename) { + ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang); + String fn = DIR+filename; + RunTestRDFXML.runTestPlain(filename, factory, label, fn); + RunTestRDFXML.runTestCompareARP(fn, factory, label, fn); + } + private void warningTest(String filename, int warnings) { ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang); - String fn = "testing/RIOT/rrx-files/"+filename; + String fn = DIR+filename; RunTestRDFXML.runTestExpectWarning(filename, factory, label, warnings, fn); + RunTestRDFXML.runTestCompareARP(fn, factory, label, fn); } private void errorTest(String filename) { + errorTest(filename, true); + } + + private void errorTest(String filename, boolean compare) { ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang); - String fn = "testing/RIOT/rrx-files/"+filename; + String fn = DIR+filename; RunTestRDFXML.runTestExpectFailure(filename, factory, label, fn); + if ( compare ) + RunTestRDFXML.runTestCompareARP(fn, factory, label, fn); } - } diff --git a/jena-arq/testing/RIOT/rrx-files/README b/jena-arq/testing/RIOT/rrx-files/README index 79eefb4d90..2f50a97add 100644 --- a/jena-arq/testing/RIOT/rrx-files/README +++ b/jena-arq/testing/RIOT/rrx-files/README @@ -1,5 +1,13 @@ -# Extensions to RDF/XML +## Files for specific tests of RRX. + +This directory contained files used for specific tests of RRX to suppliment the +basic positive tests (in + +- errors and extensions. +See TestRRX.java + +# Extensions to RDF/XML CIM - uses rdf:parseType="Statements" for rdf:parseType="Literal" https://github.com/apache/jena/issues/2473 diff --git a/jena-arq/testing/RIOT/rrx-files/error01.rdf b/jena-arq/testing/RIOT/rrx-files/multiple_objects_lex_node.rdf similarity index 51% copy from jena-arq/testing/RIOT/rrx-files/error01.rdf copy to jena-arq/testing/RIOT/rrx-files/multiple_objects_lex_node.rdf index 1ac33893fb..7e39b6b036 100644 --- a/jena-arq/testing/RIOT/rrx-files/error01.rdf +++ b/jena-arq/testing/RIOT/rrx-files/multiple_objects_lex_node.rdf @@ -1,14 +1,13 @@ <?xml version='1.0'?> <!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0 --> -<rdf:RDF +<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" - xmlns:ex="http://example/" - > - - <!-- rdf:parseType not recognized --> - <rdf:Description rdf:about="http://host/subject"> - <ex:property rdf:parseType="unknown"/> + xmlns:ex="http://example/"> + <rdf:Description rdf:about="http://example/s"> + <ex:property> + TEXT + <ex:object2/> + </ex:property> </rdf:Description> - </rdf:RDF> diff --git a/jena-arq/testing/RIOT/rrx-files/error01.rdf b/jena-arq/testing/RIOT/rrx-files/multiple_objects_node_lex.rdf similarity index 51% copy from jena-arq/testing/RIOT/rrx-files/error01.rdf copy to jena-arq/testing/RIOT/rrx-files/multiple_objects_node_lex.rdf index 1ac33893fb..4336b6eb4b 100644 --- a/jena-arq/testing/RIOT/rrx-files/error01.rdf +++ b/jena-arq/testing/RIOT/rrx-files/multiple_objects_node_lex.rdf @@ -1,14 +1,13 @@ <?xml version='1.0'?> <!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0 --> -<rdf:RDF +<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" - xmlns:ex="http://example/" - > - - <!-- rdf:parseType not recognized --> - <rdf:Description rdf:about="http://host/subject"> - <ex:property rdf:parseType="unknown"/> + xmlns:ex="http://example/"> + <rdf:Description rdf:about="http://example/s"> + <ex:property> + <ex:object1/> + TEXT + </ex:property> </rdf:Description> - </rdf:RDF> diff --git a/jena-arq/testing/RIOT/rrx-files/error01.rdf b/jena-arq/testing/RIOT/rrx-files/multiple_objects_node_node.rdf similarity index 51% copy from jena-arq/testing/RIOT/rrx-files/error01.rdf copy to jena-arq/testing/RIOT/rrx-files/multiple_objects_node_node.rdf index 1ac33893fb..63dc5d5785 100644 --- a/jena-arq/testing/RIOT/rrx-files/error01.rdf +++ b/jena-arq/testing/RIOT/rrx-files/multiple_objects_node_node.rdf @@ -1,14 +1,13 @@ <?xml version='1.0'?> <!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0 --> -<rdf:RDF +<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" - xmlns:ex="http://example/" - > - - <!-- rdf:parseType not recognized --> - <rdf:Description rdf:about="http://host/subject"> - <ex:property rdf:parseType="unknown"/> + xmlns:ex="http://example/"> + <rdf:Description rdf:about="http://example/s"> + <ex:property> + <ex:object1/> + <ex:object2/> + </ex:property> </rdf:Description> - </rdf:RDF> diff --git a/jena-arq/testing/RIOT/rrx-files/nested_object.rdf b/jena-arq/testing/RIOT/rrx-files/nested_object.rdf new file mode 100644 index 0000000000..bde480de20 --- /dev/null +++ b/jena-arq/testing/RIOT/rrx-files/nested_object.rdf @@ -0,0 +1,17 @@ +<?xml version='1.0'?> +<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0 --> + +<rdf:RDF + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:ex="http://example/"> + <rdf:Description rdf:about="http://example/s"> + <ex:property1> + <rdf:Description rdf:about="http://example/o1"> + <ex:property2>NESTED1</ex:property2> + <ex:property3>NESTED2</ex:property3> + <!-- white space --> + + </rdf:Description> + </ex:property1> + </rdf:Description> +</rdf:RDF> diff --git a/jena-arq/testing/RIOT/rrx-files/error01.rdf b/jena-arq/testing/RIOT/rrx-files/parseType-unknown.rdf similarity index 100% rename from jena-arq/testing/RIOT/rrx-files/error01.rdf rename to jena-arq/testing/RIOT/rrx-files/parseType-unknown.rdf diff --git a/jena-arq/testing/RIOT/rrx/README b/jena-arq/testing/RIOT/rrx/README new file mode 100644 index 0000000000..399d3bf1ca --- /dev/null +++ b/jena-arq/testing/RIOT/rrx/README @@ -0,0 +1,4 @@ +== Basic positive tests for RRX + +The tests in this directory cover RDF/XML features. +See RunTestRDFXML.java.