Author: tilman
Date: Mon Dec 15 09:31:04 2025
New Revision: 1930584

Log:
PDFBOX-6123: support Seq / Bag mixup in lenient mode + test

Modified:
   pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
   pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java

Modified: 
pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
==============================================================================
--- pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java   
Mon Dec 15 09:30:59 2025        (r1930583)
+++ pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java   
Mon Dec 15 09:31:04 2025        (r1930584)
@@ -523,7 +523,7 @@ public class DomXmpParser
                     + whatFound
                     + " [prefix=" + prefix + "; name=" + name + "]");
         }
-        if (!bagOrSeq.getLocalName().equals(type.card().name()))
+        if (strictParsing && 
!bagOrSeq.getLocalName().equals(type.card().name()))
         {
             // not the good array type
             throw new XmpParsingException(ErrorType.Format, "Invalid array 
type, expecting " + type.card()

Modified: 
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
==============================================================================
--- 
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java   
    Mon Dec 15 09:30:59 2025        (r1930583)
+++ 
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java   
    Mon Dec 15 09:31:04 2025        (r1930584)
@@ -28,6 +28,7 @@ import java.util.Calendar;
 import java.util.List;
 
 import org.apache.xmpbox.XMPMetadata;
+import org.apache.xmpbox.schema.DublinCoreSchema;
 import org.apache.xmpbox.schema.PDFAIdentificationSchema;
 import org.apache.xmpbox.schema.PhotoshopSchema;
 import org.apache.xmpbox.schema.XMPMediaManagementSchema;
@@ -517,4 +518,41 @@ class DomXmpParserTest
         String dataValue = 
xmp.getSchema("http://ns.example.org/default/1.0/";).getUnqualifiedTextPropertyValue("Data");
         assertEquals("Example", dataValue);
     }
+
+    /**
+     * Test that a Seq / Mag mixup gets detected in strict mode and gets read 
in lenient mode.
+     * @throws XmpParsingException 
+     */
+    @Test
+    void testLenientBagSeqMixup() throws XmpParsingException
+    {
+        String s = "<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>\n" +
+                    "<?adobe-xap-filters esc=\"CRLF\"?>\n" +
+                    "<x:xmpmeta xmlns:x='adobe:ns:meta/'>\n" +
+                    "  <rdf:RDF 
xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>\n" +
+                    "          <rdf:Description 
xmlns:dc='http://purl.org/dc/elements/1.1/'\n" +
+                    "                           
dc:format='application/pdf'>\n" +
+                    "                  <dc:subject>\n" +
+                    "                          <rdf:Seq>\n" +
+                    "                                  <rdf:li>Important 
subject</rdf:li>\n" +
+                    "                                  <rdf:li>Unimportant 
subject</rdf:li>\n" +
+                    "                          </rdf:Seq>\n" +
+                    "                  </dc:subject>\n" +
+                    "          </rdf:Description>\n" +
+                    "  </rdf:RDF>\n" +
+                    "</x:xmpmeta>\n" +
+                    "<?xpacket end='w'?>";
+        XmpParsingException ex = assertThrows(
+                XmpParsingException.class,
+                () -> new 
DomXmpParser().parse(s.getBytes(StandardCharsets.UTF_8)));
+        assertEquals("Invalid array type, expecting Bag and found Seq 
[prefix=dc; name=subject]", ex.getMessage());
+        DomXmpParser xmpParser = new DomXmpParser();
+        xmpParser.setStrictParsing(false);
+        XMPMetadata xmp = xmpParser.parse(s.getBytes(StandardCharsets.UTF_8));
+        DublinCoreSchema dublinCoreSchema = xmp.getDublinCoreSchema();
+        List<String> subjects = dublinCoreSchema.getSubjects();
+        assertEquals(2, subjects.size());
+        assertEquals("Important subject", subjects.get(0));
+        assertEquals("Unimportant subject", subjects.get(1));
+    }
 }

Reply via email to