This is an automated email from the ASF dual-hosted git repository.

andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git


The following commit(s) were added to refs/heads/main by this push:
     new 48cd7f762a GH-2620: Striping error fix for RRX
48cd7f762a is described below

commit 48cd7f762afe721c9ab14af3d70ea88818366ca1
Author: Andy Seaborne <a...@apache.org>
AuthorDate: Tue Aug 6 20:37:39 2024 +0100

    GH-2620: Striping error fix for RRX
---
 .../org/apache/jena/riot/lang/rdfxml/SysRRX.java   |   1 -
 .../riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java     | 103 +++++++++++++--------
 .../rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java   |  36 +++++--
 .../rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java   |  31 +++++--
 .../jena/riot/lang/rdfxml/TC_RIOT_RDFXML.java      |   6 +-
 .../rdfxml/converted_legacy/TS_ConvertedARP1.java  |   3 +
 .../converted_legacy/TestARP1_W3C_Pending.java     |   4 +-
 .../rdfxml/manifest_rdf11/Scripts_RRX_RDFXML.java  |   3 +-
 ...Xev.java => TestManifest_RDF11_RRX_StAXev.java} |   2 +-
 .../lang/rdfxml/rrx/AbstractTestRDFXML_RRX.java    |   2 +-
 .../jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java   |  27 +++---
 .../apache/jena/riot/lang/rdfxml/rrx/TestRRX.java  |  61 ++++++++++--
 jena-arq/testing/RIOT/rrx-files/README             |  10 +-
 .../{error01.rdf => multiple_objects_lex_node.rdf} |  15 ++-
 .../{error01.rdf => multiple_objects_node_lex.rdf} |  15 ++-
 ...{error01.rdf => multiple_objects_node_node.rdf} |  15 ++-
 jena-arq/testing/RIOT/rrx-files/nested_object.rdf  |  17 ++++
 .../{error01.rdf => parseType-unknown.rdf}         |   0
 jena-arq/testing/RIOT/rrx/README                   |   4 +
 19 files changed, 245 insertions(+), 110 deletions(-)

diff --git 
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/SysRRX.java 
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/SysRRX.java
index 0fc87e881f..17a6173b26 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/SysRRX.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/SysRRX.java
@@ -38,5 +38,4 @@ public class SysRRX {
         xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, 
Boolean.FALSE);
         return xmlInputFactory;
     }
-
 }
diff --git 
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java
 
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java
index 27447c3524..e34ccc4329 100644
--- 
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java
+++ 
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java
@@ -28,6 +28,7 @@ import javax.xml.namespace.NamespaceContext;
 import javax.xml.namespace.QName;
 
 import org.apache.jena.atlas.io.IndentedWriter;
+import org.apache.jena.atlas.lib.EscapeStr;
 import org.apache.jena.datatypes.RDFDatatype;
 import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
 import org.apache.jena.graph.Node;
@@ -235,13 +236,18 @@ class ParserRDFXML_SAX
         ObjectLex,
 
         // The node implied by rdf:parseType=Resource
-        ObjectParserTypeResource,
+        ObjectParseTypeResource,
 
         // The object is rdf:parseType=Literal. Collecting characters of a RDF 
XML Literal
         ObjectParseTypeLiteral,
 
         // The object is rdf:parseType=Collection (RDF List)
-        ObjectParseTypeCollection
+        ObjectParseTypeCollection,
+
+        // The object is a nested element.
+        // Unlike NodeElement, there is only one ObjectNode inside one 
property.
+        // ObjectLex becomes ObjectNode if a startElement is found.
+        ObjectNode
     }
 
     /** Integer holder for rdf:li */
@@ -314,7 +320,7 @@ class ParserRDFXML_SAX
         // If this frame is ParserMode.ObjectResource , then it is an implicit 
frame
         // inserted for the implied node. Pop the stack again to balance the 
push of
         // the implicit node element.
-        if ( parserMode == ParserMode.ObjectParserTypeResource ) {
+        if ( parserMode == ParserMode.ObjectParseTypeResource ) {
             popParserFrame();
             decIndent();
         }
@@ -485,6 +491,7 @@ class ParserRDFXML_SAX
             }
             trace.printf(") mode = %s\n", parserMode);
         }
+
         incIndent();
         Position position = position();
 
@@ -497,16 +504,20 @@ class ParserRDFXML_SAX
         switch (parserMode) {
             case ObjectLex -> {
                 // While processing ObjectLex, we found a startElement.
-                // The "ObjectLex" decision needs updating. This is a  
ParserMode.NodeElement.
+                // The "ObjectLex" decision needs updating. This is a 
ParserMode.NodeElement.
                 // This is not parseType=Resource.
                 if ( !isWhitespace(accCharacters) )
                     throw RDFXMLparseError("XML content before nested 
element", position);
                 accCharacters.setLength(0);
-                // Declare that the containing frame is expecting a node 
element mode.
-                // Leave in parserMode=ObjectLex
-                pushParserFrame(ParserMode.NodeElement);
+                // Declare that the containing frame is expecting a node 
element as the object.
+                // There can be only one object.
+                pushParserFrame(ParserMode.ObjectNode);
                 processBaseAndLang(attributes, position);
             }
+            case ObjectNode -> {
+                // Already in ObjectNode so a second statrtElement is an error.
+                throw RDFXMLparseError("Start tag after inner node element 
(only one node element permitted): got "+qName, position);
+            }
             default -> {
                 // For everything else.
                 pushParserFrame();
@@ -528,7 +539,7 @@ class ParserRDFXML_SAX
                 // The top element can be a single nodeElement.
                 startNodeElement(namespaceURI, localName, qName, attributes, 
position);
             }
-            case NodeElement ->
+            case NodeElement, ObjectNode ->
                 startNodeElement(namespaceURI, localName, qName, attributes, 
position);
             case PropertyElement ->
                 startPropertyElement(namespaceURI, localName, qName, 
attributes, position);
@@ -574,30 +585,33 @@ class ParserRDFXML_SAX
                 return;
             }
             endXMLLiteral(position);
-            if ( ReaderRDFXML_SAX.TRACE )
-                trace.printf("**** End XML Literal[%s]: elementDepth=%d / 
xmlLiteralStartDepth=%s\n", qName, elementDepth, xmlLiteralStartDepth);
             // Keep going to finish the end tag.
         }
 
         switch (parserMode) {
-            case NodeElement ->
-                    endNodeElement(position);
+            case NodeElement, ObjectNode ->
+                endNodeElement(position);
             case PropertyElement -> {
                 if ( isEndNodeElement() )
                     // Possible next property but it's a node element so no 
property
-                    // and it's end of node, with two "end property" tags seen 
in a row.
+                    // and it is end of node, with two "end property" tags 
seen in a row.
+                    // This occurs for
+                    //   <rdf:Description> and no properties *maybe some 
attribute properties.
+                    //   <Class></Class>
                     endNodeElement(position);
                 else
                     endPropertyElement(position);
             }
-            case ObjectLex ->
+            case ObjectLex -> {
                 endObjectLexical(position);
-            case ObjectParseTypeLiteral ->
+            }
+            case ObjectParseTypeLiteral -> {
                 endObjectXMLLiteral(position);
-            case ObjectParseTypeCollection ->
+            }
+            case ObjectParseTypeCollection -> {
                 endCollectionItem(position);
-            default ->
-                throw RDFXMLparseError("Inconsistent parserMode:" + 
parserMode, position);
+            }
+            default -> throw RDFXMLparseError("Inconsistent parserMode:" + 
parserMode, position);
         }
 
         popParserFrame();
@@ -783,7 +797,7 @@ class ParserRDFXML_SAX
                 // Push a frame here as an implicit node frame because the 
subject is changing.
                 // The companion "end frame" is handled in "popParserFrame" 
which
                 // checks for parserMode=ImplicitNode
-                parserMode(ParserMode.ObjectParserTypeResource);
+                parserMode(ParserMode.ObjectParseTypeResource);
                 pushParserFrame();
                 // ... expect a property element start or an end element.
                 parserMode(ParserMode.PropertyElement);
@@ -808,12 +822,12 @@ class ParserRDFXML_SAX
         return currentProperty == null;
     }
 
-    //    private String xmlBaseStr(Attributes attributes, Position position) {
-    //        String baseStr = attributes.getValue(xmlNS, xmlBaseLN);
-    //        if ( baseStr == null )
-    //            return null;
-    //        return IRIs.resolve(currentBase, baseStr);
-    //    }
+//    private String xmlBaseStr(Attributes attributes, Position position) {
+//        String baseStr = attributes.getValue(xmlNS, xmlBaseLN);
+//        if ( baseStr == null )
+//            return null;
+//        return IRIs.resolve(currentBase, baseStr);
+//    }
 
         // Start element encountered when expecting a ObjectCollection
         private void startCollectionItem(String namespaceURI, String 
localName, String qName, Attributes attributes, Position position) {
@@ -958,8 +972,9 @@ class ParserRDFXML_SAX
         String qName = attributes.getQName(index);
 
         if ( namespace == null || namespace.isEmpty() ) {
-            if ( outputWarnings ) {
-                // In SAX, xmlns: is a qname, but namespace and local name are 
"".
+            // In SAX, xmlns: is qname, but namespace and local name are "".
+            //RDFXMLparseError("XML attribute '"+qName+"' used for RDF 
property attribute (no namespace)", position);
+            if ( outputWarnings ){
                 if ( ! localName.isEmpty() )    // Skip XML namespace 
declarations.
                     RDFXMLparseWarning("XML attribute '"+qName+"' used for RDF 
property attribute - ignored", position);
             }
@@ -1040,6 +1055,7 @@ class ParserRDFXML_SAX
             return ObjectParseType.Plain;
         try {
             String parseTypeName = parseTypeStr;
+            // Extensions - some names that appear in the wild
             switch(parseTypeName) {
                 case "literal" -> {
                     RDFXMLparseWarning("Encountered rdf:parseType='literal'. 
Treated as rdf:parseType='Literal'", position);
@@ -1138,29 +1154,42 @@ class ParserRDFXML_SAX
                 return;
             }
             // Allow whitespace only
-            case ObjectParserTypeResource, NodeElement, PropertyElement, 
ObjectParseTypeCollection -> {
-                if ( !isWhitespace(ch, start, length) )
-                    throw RDFXMLparseError("Non-whitespace text content 
between element tags: "
-                                                             + 
nonWhitespaceForMsg(ch, start, length), position());
+            case NodeElement, PropertyElement, ObjectParseTypeResource, 
ObjectParseTypeCollection, ObjectNode -> {
+                if ( !isWhitespace(ch, start, length) ) {
+                    String text = nonWhitespaceMsg(ch, start, length);
+                    throw RDFXMLparseError("Non-whitespace text content 
between element tags: '"+text+"'", position());
+                }
             }
             case TOP -> {
                 if ( !isWhitespace(ch, start, length) ) {
-                    throw RDFXMLparseError("Non-whitespace text content 
outside element tags: "
-                                                             + 
nonWhitespaceForMsg(ch, start, length), position());
+                    String text = nonWhitespaceMsg(ch, start, length);
+                    throw RDFXMLparseError("Non-whitespace text content 
outside element tags: '"+text+"'", position());
                 }
             }
         }
     }
 
     /** The string for the first non-whitespace index. */
-    private static String nonWhitespaceForMsg(char[] ch, int start, int 
length) {
+    private static String nonWhitespaceMsg(char[] ch, int start, int length) {
+        final int MaxLen = 10; // Short - this is for error messages
+        // Find the start of non-whitespace.
+        // Slice, truncate if necessary.
+        // Make safe.
         for ( int i = start ; i < start + length ; i++ ) {
             if ( !Character.isWhitespace(ch[i]) ) {
-                int len = Math.min(20, start - i);
-                return new String(ch, i, len);
+                // Slight overshoot
+                int remaindingLength = length - (i-start);
+                int len = Math.min(MaxLen, remaindingLength);
+                String x = new String(ch, i, len);
+                if ( remaindingLength > MaxLen )
+                    x = x+"...";
+                // Escape characters, especially newlines and backspaces.
+                x = EscapeStr.stringEsc(x);
+                x = x.stripTrailing();
+                return x;
             }
         }
-        throw new RDFXMLParseException("Failed to find any non-whitespace 
characters");
+        throw new RDFXMLParseException("Internal error: Failed to find any 
non-whitespace characters");
     }
 
     @Override
diff --git 
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java
 
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java
index e4a0a3a80e..cbbaac3b34 100644
--- 
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java
+++ 
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java
@@ -31,6 +31,7 @@ import javax.xml.stream.events.*;
 
 import org.apache.commons.lang3.StringUtils;
 import org.apache.jena.atlas.io.IndentedWriter;
+import org.apache.jena.atlas.lib.EscapeStr;
 import org.apache.jena.datatypes.RDFDatatype;
 import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
 import org.apache.jena.graph.Node;
@@ -509,6 +510,8 @@ class ParserRDFXML_StAX_EV {
         if ( namespace == null || namespace.isEmpty() ) {
             // SAX passes xmlns as attributes with namespace and local name of 
"". The qname is "xmlns:"/"xmlns"
             // StAX, does not pass namespaces.
+
+            //RDFXMLparseError("XML attribute '"+localName+"' used for RDF 
property attribute (no namespace)", event);
             if ( outputWarnings )
                 RDFXMLparseWarning("XML attribute '"+localName+"' used for RDF 
property attribute - ignored", event);
             return false;
@@ -692,10 +695,9 @@ class ParserRDFXML_StAX_EV {
                 event = nextEventAny();
             }
             if ( event.isStartElement() ) {
-                // DRY!
                 // Striped - inner node element.
                 if ( ! isWhitespace(sBuff) ) {
-                    String msg = nonWhitespaceForMsg(sBuff.toString());
+                    String msg = nonWhitespaceMsg(sBuff.toString());
                     throw RDFXMLparseError("Content before node element. 
'"+msg+"'", event);
                 }
                 event = processNestedNodeElement(event, subject, property, 
emitter);
@@ -1196,8 +1198,11 @@ class ParserRDFXML_StAX_EV {
                     }
                     case CHARACTERS, CDATA -> {
                         Characters chars = ev.asCharacters();
-                        if ( ! isWhitespace(ev) )
-                            throw RDFXMLparseError("Read "+str(ev)+" when 
expecting a start or end element.", ev);
+                        if ( ! isWhitespace(ev) ) {
+                            String str = ev.asCharacters().getData();
+                            String text = nonWhitespaceMsg(str);
+                            throw RDFXMLparseError("Expecting a start or end 
element. Got characters '"+text+"'", ev);
+                        }
                     }
                     case COMMENT, DTD -> { } // Skip
                     //case SPACE ->
@@ -1488,7 +1493,7 @@ class ParserRDFXML_StAX_EV {
     private void noContentAllowed(XMLEvent event) {
         if ( event.isCharacters() ) {
             String content = event.asCharacters().getData();
-            content = nonWhitespaceForMsg(content);
+            content = nonWhitespaceMsg(content);
             throw RDFXMLparseError("Expected XML start tag or end tag. Found 
text content (possible striping error): \""+content+"\"", event);
         }
     }
@@ -1628,12 +1633,23 @@ class ParserRDFXML_StAX_EV {
         };
     }
 
-    /** The string for the first non-whitespace index. */
-    private static String nonWhitespaceForMsg(String string) {
-        for ( int i = 0 ; i < string.length() ; i++ ) {
+    /** The string for the first non-whitespace */
+    private static String nonWhitespaceMsg(String string) {
+        final int MaxLen = 10; // Short - this is for error messages
+        // Find the start of non-whitespace.
+        // Slice, truncate if necessary.
+        // Make safe.
+        int length = string.length();
+        for ( int i = 0 ; i < length ; i++ ) {
             if ( !Character.isWhitespace(string.charAt(i)) ) {
-                int index = Math.min(20, string.length()-i);
-                return string.substring(index);
+                int len = Math.min(MaxLen, length - i);
+                String x = string.substring(i, i+len);
+                if ( length > MaxLen )
+                    x = x+"...";
+                // Escape characters, especially newlines and backspaces.
+                x = EscapeStr.stringEsc(x);
+                x = x.stripTrailing();
+                return x;
             }
         }
         throw new RDFXMLParseException("Failed to find any non-whitespace 
characters");
diff --git 
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java
 
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java
index 17a1aebafe..32b1024056 100644
--- 
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java
+++ 
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java
@@ -35,6 +35,7 @@ import javax.xml.stream.events.XMLEvent;
 
 import org.apache.commons.lang3.StringUtils;
 import org.apache.jena.atlas.io.IndentedWriter;
+import org.apache.jena.atlas.lib.EscapeStr;
 import org.apache.jena.datatypes.RDFDatatype;
 import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
 import org.apache.jena.graph.Node;
@@ -485,6 +486,7 @@ class ParserRDFXML_StAX_SR {
         if ( namespace == null || namespace.isEmpty() ) {
             // SAX passes xmlns as attributes with namespace and local name of 
"". The qname is "xmlns:"/"xmlns"
             // StAX, does not pass namespaces.
+            //RDFXMLparseError("XML attribute '"+qName.getLocalPart()+"' used 
for RDF property attribute (no namespace)", event);
             if ( outputWarnings )
                 RDFXMLparseWarning("XML attribute '"+qName.getLocalPart()+"' 
used for RDF property attribute - ignored");
             return false;
@@ -666,7 +668,7 @@ class ParserRDFXML_StAX_SR {
             }
             if ( lookingAt(event, START_ELEMENT) ) {
                 if ( ! isWhitespace(accCharacters) ) {
-                    String msg = nonWhitespaceForMsg(accCharacters.toString());
+                    String msg = nonWhitespaceMsg(accCharacters.toString());
                     throw RDFXMLparseError("Content before node element. 
'"+msg+"'");
                 }
                 event = processNestedNodeElement(event, subject, property, 
emitter);
@@ -1163,8 +1165,10 @@ class ParserRDFXML_StAX_SR {
                     }
                     case CHARACTERS, CDATA -> {
                         String chars = xmlSource.getText();
-                        if ( ! isWhitespace(chars) )
-                            throw RDFXMLparseError("Read 
"+nonWhitespaceForMsg(chars)+" when expecting a start or end element.");
+                        if ( ! isWhitespace(chars) ) {
+                            String text = nonWhitespaceMsg(chars);
+                            throw RDFXMLparseError("Expecting a start or end 
element. Got characters '"+text+"'");
+                        }
                         // Skip
                         break;
                     }
@@ -1472,7 +1476,7 @@ class ParserRDFXML_StAX_SR {
     private void noContentAllowed(XMLEvent event) {
         if ( event.isCharacters() ) {
             String content = event.asCharacters().getData();
-            content = nonWhitespaceForMsg(content);
+            content = nonWhitespaceMsg(content);
             throw RDFXMLparseError("Expected XML start tag or end tag. Found 
text content (possible striping error): \""+content+"\"");
         }
     }
@@ -1622,11 +1626,22 @@ class ParserRDFXML_StAX_SR {
     }
 
     /** The string for the first non-whitespace index. */
-    private static String nonWhitespaceForMsg(String string) {
-        for ( int i = 0 ; i < string.length() ; i++ ) {
+    private static String nonWhitespaceMsg(String string) {
+        final int MaxLen = 10; // Short - this is for error messages
+        // Find the start of non-whitespace.
+        // Slice, truncate if necessary.
+        // Make safe.
+        int length = string.length();
+        for ( int i = 0 ; i < length ; i++ ) {
             if ( !Character.isWhitespace(string.charAt(i)) ) {
-                int index = Math.min(20, string.length()-i);
-                return string.substring(index);
+                int len = Math.min(MaxLen, length - i);
+                String x = string.substring(i, i+len);
+                if ( length > MaxLen )
+                    x = x+"...";
+                // Escape characters, especially newlines and backspaces.
+                x = EscapeStr.stringEsc(x);
+                x = x.stripTrailing();
+                return x;
             }
         }
         throw new RDFXMLParseException("Failed to find any non-whitespace 
characters");
diff --git 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/TC_RIOT_RDFXML.java 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/TC_RIOT_RDFXML.java
index 9c268d5abd..230105e600 100644
--- 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/TC_RIOT_RDFXML.java
+++ 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/TC_RIOT_RDFXML.java
@@ -26,10 +26,10 @@ import org.junit.runners.Suite;
 
 @RunWith(Suite.class)
 @Suite.SuiteClasses( {
-    // Local file and rdf11-xml - detailed checking.
+    // Local tests, extensions and error reports.
     TS_RRX.class,
 
-    // Manifest-driven rdf11-xml - all parsers
+    // Manifest-driven RDF 1.1 rdf-xml test suite - all parsers
     Scripts_RRX_RDFXML.class,
 
     // jena-core legacy test (RDF 1.0)
@@ -43,7 +43,7 @@ import org.junit.runners.Suite;
  * converted to run as RIOT tests.
  * <p>
  * {@linkplain TS_RRX} runs local RRX tests by comparing the different RRX 
parsers
- * to ARP1. tese test check for the same number of warnig as well.
+ * to ARP1. These test check for the same number of warning as well.
  * The {@code TestRDFXML_RRX_*} are running on extra local files. The
  * TestRDFXML_W3C_* are running on the RDF 1.0 test suite that ARP1 has used.
  * <p>
diff --git 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TS_ConvertedARP1.java
 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TS_ConvertedARP1.java
index 8379d47d78..e479ff7414 100644
--- 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TS_ConvertedARP1.java
+++ 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TS_ConvertedARP1.java
@@ -32,5 +32,8 @@ import org.junit.runners.Suite.SuiteClasses;
 /**
  * The ARP (final) tests from jena-core, so related to RDF 1.0 test suite,
  * then converted to run as RIOT tests.
+ *
+ * The files used are in " * Files in "testing/RIOT/rdf11-xml/"
+ * which have been updated for RDF 1.1 if necessary.
  */
 public class TS_ConvertedARP1 {}
diff --git 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TestARP1_W3C_Pending.java
 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TestARP1_W3C_Pending.java
index 305d11409f..b5cd53e162 100644
--- 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TestARP1_W3C_Pending.java
+++ 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TestARP1_W3C_Pending.java
@@ -24,7 +24,9 @@ import org.junit.Test;
 
 /**
  * The ARP test suite run on a local legacy copy of the RDF 1.0 WG test suite
- * (updated for RDF 1.1). Tests marked pending.
+ * (updated for RDF 1.1).
+ *
+ * Tests marked pending.
  */
 public class TestARP1_W3C_Pending {
     @Test
diff --git 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/Scripts_RRX_RDFXML.java
 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/Scripts_RRX_RDFXML.java
index 15bcfca775..afe97d071b 100644
--- 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/Scripts_RRX_RDFXML.java
+++ 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/Scripts_RRX_RDFXML.java
@@ -24,6 +24,7 @@ import org.junit.runners.Suite.SuiteClasses;
 
 /**
  * Run all the parsers on the rdf-test CG RDF/XML manifest files (RDF 1.1).
+ * Files in "testing/RIOT/rdf11-xml/".
  */
 @RunWith(Suite.class)
 @SuiteClasses( {
@@ -32,7 +33,7 @@ import org.junit.runners.Suite.SuiteClasses;
     TestManifest_RDF11_ARP1.class,
     TestManifest_RDF11_RRX_SAX.class,
     TestManifest_RDF11_RRX_StAXsr.class,
-    TestManifest_RDF_RRX_StAXev.class
+    TestManifest_RDF11_RRX_StAXev.class
 })
 
 public class Scripts_RRX_RDFXML {}
diff --git 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF_RRX_StAXev.java
 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF11_RRX_StAXev.java
similarity index 97%
rename from 
jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF_RRX_StAXev.java
rename to 
jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF11_RRX_StAXev.java
index 859c92bbb8..fb904943f6 100644
--- 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF_RRX_StAXev.java
+++ 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF11_RRX_StAXev.java
@@ -36,7 +36,7 @@ import org.junit.runner.RunWith;
     "testing/RIOT/rdf11-xml/manifest.ttl"
 })
 
-public class TestManifest_RDF_RRX_StAXev {
+public class TestManifest_RDF11_RRX_StAXev {
 
     static ReaderRIOTFactory systemReaderfactory;
     @BeforeClass
diff --git 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/AbstractTestRDFXML_RRX.java
 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/AbstractTestRDFXML_RRX.java
index b25bb22968..02c3162121 100644
--- 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/AbstractTestRDFXML_RRX.java
+++ 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/AbstractTestRDFXML_RRX.java
@@ -44,6 +44,6 @@ public abstract class AbstractTestRDFXML_RRX {
     }
 
     @Test public void test() {
-        RunTestRDFXML.runTest(testLabel, factory, implName, filename);
+        RunTestRDFXML.runTestCompareARP(testLabel, factory, implName, 
filename);
     }
 }
diff --git 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java
 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java
index 09aaea80ec..da6648a05b 100644
--- 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java
+++ 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java
@@ -72,10 +72,12 @@ public class RunTestRDFXML {
     }
 
     /**
-     * Manifest-like in that the test files in a specific order.
+     * Manifest-like in that the test files are run in a specific order.
      * The local files cover all the features of RDF/XML parsing
      * but not in great depth.
      * These tests more easily highlight problems and the grouping helps.
+     *
+     * Check the files on disk agree with the built-in order list.
      */
     static List<String> localTestFiles() {
         Path LOCAL_DIR = Path.of("testing/RIOT/rrx/");
@@ -184,7 +186,6 @@ public class RunTestRDFXML {
                  );
 
         for ( String fn : testfiles ) {
-
             if ( ! found.contains(fn) )
                 output.printf("Not found in file area: %s\n", fn);
         }
@@ -216,17 +217,6 @@ public class RunTestRDFXML {
         return x;
     }
 
-    static void runTest(String label, ReaderRIOTFactory factory, String 
implLabel, String filename) {
-        try {
-            runTestCompareARP(label, factory, implLabel, filename);
-        } catch(Throwable ex) {
-            throw new RuntimeException(filename, ex) {
-                @Override
-                public Throwable fillInStackTrace() { return this; }
-            };
-        }
-    }
-
     static class ErrorHandlerCollector implements ErrorHandler {
         List<String> warnings = new ArrayList<>();
         List<String> errors = new ArrayList<>();
@@ -295,9 +285,9 @@ public class RunTestRDFXML {
         String testFullLabel = format("-- Test : %-4s : %s", testLabel, 
filename);
 
         Graph expectedGraph;
-        // -- "Reference" implementation
         ErrorHandlerCollector errorHandlerReference = new 
ErrorHandlerCollector();
         try {
+            // Reference expectation
             expectedGraph = parseFile(referenceFactory, errorHandlerReference, 
filename);
         } catch (RiotException ex) {
             // Exception expected. Run as "failure test"
@@ -310,6 +300,15 @@ public class RunTestRDFXML {
     }
 
 
+    /**
+     * Run a test, single parse of using the given reader factory.
+     */
+    public static void runTestPlain(String label, ReaderRIOTFactory 
testSubjectFactory, String implLabel, String filename) {
+        String testLabel = format("-- Test : %-4s : %s", implLabel, filename);
+        ErrorHandlerCollector errorHandlerReference = new 
ErrorHandlerCollector();
+        parseFile(testSubjectFactory, errorHandlerReference, filename);
+    }
+
     /**
      * Run a test, expecting a graph as the result.
      */
diff --git 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java
index 00eac3a12b..8c16eec860 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java
@@ -39,10 +39,20 @@ import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 import org.junit.runners.Parameterized.Parameters;
 
-/** Cases where the RRX parsers differ from ARP */
+/**
+ * Additional tests for RRX:
+ * <ul>
+ * <li>errors and warnings not in the W3C manifest files</li>
+ * <li>additional reports</li>
+ * <li>extensions toRDF/XML</li>
+ * </ul>
+ */
 
 @RunWith(Parameterized.class)
 public class TestRRX {
+
+    private static String DIR = "testing/RIOT/rrx-files/";
+
     @Parameters(name = "{index}: {0} {1}")
     public static Iterable<Object[]> data() {
         List<Object[]> x = new ArrayList<>();
@@ -60,17 +70,38 @@ public class TestRRX {
         this.lang = lang;
     }
 
-    @Test public void error01() {
-        errorTest("error01.rdf");
+    // Test2 for more than one object in RDF/XML striping.
+    @Test public void error_multiple_objects_lex_node() {
+        errorTest("multiple_objects_lex_node.rdf");
+    }
+
+    @Test public void error_multiple_objects_node_lex() {
+        errorTest("multiple_objects_node_lex.rdf");
+    }
+
+    @Test public void error_multiple_objects_node_node() {
+        errorTest("multiple_objects_node_node.rdf");
+    }
+
+    // Check that the "one object" parse state does not impact deeper 
structures.
+    @Test public void nested_object() {
+        goodTest("nested_object.rdf");
     }
 
-    @Test public void warn_literal() {
+    // rdf:parserType=
+    @Test public void error_parseType_unknown() {
+        // This is only a warning in ARP.
+        errorTest("parseType-unknown.rdf", false);
+    }
+
+    @Test public void warn_parseType_extension_1() {
         // Now valid. parseType="literal" -> parseType="Literal"
         // because ARP behaved that way.
         // Warning issued.
         warningTest("warn01.rdf", 1);
     }
 
+    // CIM
     @Test public void cim_statements01() {
         // parseType="Statements"
         // because ARP behaved that way.
@@ -79,6 +110,7 @@ public class TestRRX {
         warningTest("cim_statements01.rdf", 2);
     }
 
+    // misc
     @Test public void noBase01() {
         // Call with no base; no base needed.
         noBase("file-no-base.rdf");
@@ -92,7 +124,7 @@ public class TestRRX {
 
     private void noBase(String filename) {
         ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang);
-        String fn = "testing/RIOT/rrx-files/"+filename;
+        String fn = DIR+filename;
         ErrorHandlerCollector errorHandler = new ErrorHandlerCollector();
         ParserProfile parserProfile = 
RiotLib.createParserProfile(RiotLib.factoryRDF(), errorHandler, true);
         ReaderRIOT reader = factory.create(lang, parserProfile);
@@ -105,16 +137,29 @@ public class TestRRX {
         }
     }
 
+    private void goodTest(String filename) {
+        ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang);
+        String fn = DIR+filename;
+        RunTestRDFXML.runTestPlain(filename, factory, label, fn);
+        RunTestRDFXML.runTestCompareARP(fn, factory, label, fn);
+    }
+
     private void warningTest(String filename, int warnings) {
         ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang);
-        String fn = "testing/RIOT/rrx-files/"+filename;
+        String fn = DIR+filename;
         RunTestRDFXML.runTestExpectWarning(filename, factory, label, warnings, 
fn);
+        RunTestRDFXML.runTestCompareARP(fn, factory, label, fn);
     }
 
     private void errorTest(String filename) {
+        errorTest(filename, true);
+    }
+
+    private void errorTest(String filename, boolean compare) {
         ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang);
-        String fn = "testing/RIOT/rrx-files/"+filename;
+        String fn = DIR+filename;
         RunTestRDFXML.runTestExpectFailure(filename, factory, label, fn);
+        if ( compare )
+            RunTestRDFXML.runTestCompareARP(fn, factory, label, fn);
     }
-
 }
diff --git a/jena-arq/testing/RIOT/rrx-files/README 
b/jena-arq/testing/RIOT/rrx-files/README
index 79eefb4d90..2f50a97add 100644
--- a/jena-arq/testing/RIOT/rrx-files/README
+++ b/jena-arq/testing/RIOT/rrx-files/README
@@ -1,5 +1,13 @@
-# Extensions to RDF/XML
+## Files for specific tests of RRX.
+
+This directory contained files used for specific tests of RRX to suppliment the
+basic positive tests (in 
+
+- errors and extensions.
 
+See TestRRX.java
+
+# Extensions to RDF/XML
 
 CIM - uses rdf:parseType="Statements" for rdf:parseType="Literal"
   https://github.com/apache/jena/issues/2473
diff --git a/jena-arq/testing/RIOT/rrx-files/error01.rdf 
b/jena-arq/testing/RIOT/rrx-files/multiple_objects_lex_node.rdf
similarity index 51%
copy from jena-arq/testing/RIOT/rrx-files/error01.rdf
copy to jena-arq/testing/RIOT/rrx-files/multiple_objects_lex_node.rdf
index 1ac33893fb..7e39b6b036 100644
--- a/jena-arq/testing/RIOT/rrx-files/error01.rdf
+++ b/jena-arq/testing/RIOT/rrx-files/multiple_objects_lex_node.rdf
@@ -1,14 +1,13 @@
 <?xml version='1.0'?>
 <!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0 
-->
 
-<rdf:RDF
+<rdf:RDF 
     xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
-    xmlns:ex="http://example/";
-    >
-
-  <!-- rdf:parseType not recognized -->
-  <rdf:Description rdf:about="http://host/subject";>
-    <ex:property rdf:parseType="unknown"/>
+    xmlns:ex="http://example/";>
+  <rdf:Description rdf:about="http://example/s";>
+    <ex:property>
+      TEXT
+      <ex:object2/>
+    </ex:property>
   </rdf:Description>
-
 </rdf:RDF>
diff --git a/jena-arq/testing/RIOT/rrx-files/error01.rdf 
b/jena-arq/testing/RIOT/rrx-files/multiple_objects_node_lex.rdf
similarity index 51%
copy from jena-arq/testing/RIOT/rrx-files/error01.rdf
copy to jena-arq/testing/RIOT/rrx-files/multiple_objects_node_lex.rdf
index 1ac33893fb..4336b6eb4b 100644
--- a/jena-arq/testing/RIOT/rrx-files/error01.rdf
+++ b/jena-arq/testing/RIOT/rrx-files/multiple_objects_node_lex.rdf
@@ -1,14 +1,13 @@
 <?xml version='1.0'?>
 <!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0 
-->
 
-<rdf:RDF
+<rdf:RDF 
     xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
-    xmlns:ex="http://example/";
-    >
-
-  <!-- rdf:parseType not recognized -->
-  <rdf:Description rdf:about="http://host/subject";>
-    <ex:property rdf:parseType="unknown"/>
+    xmlns:ex="http://example/";>
+  <rdf:Description rdf:about="http://example/s";>
+    <ex:property>
+      <ex:object1/>
+      TEXT
+    </ex:property>
   </rdf:Description>
-
 </rdf:RDF>
diff --git a/jena-arq/testing/RIOT/rrx-files/error01.rdf 
b/jena-arq/testing/RIOT/rrx-files/multiple_objects_node_node.rdf
similarity index 51%
copy from jena-arq/testing/RIOT/rrx-files/error01.rdf
copy to jena-arq/testing/RIOT/rrx-files/multiple_objects_node_node.rdf
index 1ac33893fb..63dc5d5785 100644
--- a/jena-arq/testing/RIOT/rrx-files/error01.rdf
+++ b/jena-arq/testing/RIOT/rrx-files/multiple_objects_node_node.rdf
@@ -1,14 +1,13 @@
 <?xml version='1.0'?>
 <!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0 
-->
 
-<rdf:RDF
+<rdf:RDF 
     xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
-    xmlns:ex="http://example/";
-    >
-
-  <!-- rdf:parseType not recognized -->
-  <rdf:Description rdf:about="http://host/subject";>
-    <ex:property rdf:parseType="unknown"/>
+    xmlns:ex="http://example/";>
+  <rdf:Description rdf:about="http://example/s";>
+     <ex:property>
+       <ex:object1/>
+       <ex:object2/>
+     </ex:property>
   </rdf:Description>
-
 </rdf:RDF>
diff --git a/jena-arq/testing/RIOT/rrx-files/nested_object.rdf 
b/jena-arq/testing/RIOT/rrx-files/nested_object.rdf
new file mode 100644
index 0000000000..bde480de20
--- /dev/null
+++ b/jena-arq/testing/RIOT/rrx-files/nested_object.rdf
@@ -0,0 +1,17 @@
+<?xml version='1.0'?>
+<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0 
-->
+
+<rdf:RDF 
+    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
+    xmlns:ex="http://example/";>
+  <rdf:Description rdf:about="http://example/s";>
+    <ex:property1>
+      <rdf:Description rdf:about="http://example/o1";>
+        <ex:property2>NESTED1</ex:property2>
+        <ex:property3>NESTED2</ex:property3>
+        <!-- white space -->
+
+      </rdf:Description>
+    </ex:property1>
+  </rdf:Description>
+</rdf:RDF>
diff --git a/jena-arq/testing/RIOT/rrx-files/error01.rdf 
b/jena-arq/testing/RIOT/rrx-files/parseType-unknown.rdf
similarity index 100%
rename from jena-arq/testing/RIOT/rrx-files/error01.rdf
rename to jena-arq/testing/RIOT/rrx-files/parseType-unknown.rdf
diff --git a/jena-arq/testing/RIOT/rrx/README b/jena-arq/testing/RIOT/rrx/README
new file mode 100644
index 0000000000..399d3bf1ca
--- /dev/null
+++ b/jena-arq/testing/RIOT/rrx/README
@@ -0,0 +1,4 @@
+== Basic positive tests for RRX
+
+The tests in this directory cover RDF/XML features.
+See RunTestRDFXML.java.


Reply via email to