Author: tilman
Date: Wed Dec 17 20:31:17 2025
New Revision: 1930681

Log:
PDFBOX-6125: skip empty property if wrong type when in lenient mode + test

Modified:
   
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
   
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java

Modified: 
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
==============================================================================
--- 
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
    Wed Dec 17 19:07:11 2025        (r1930680)
+++ 
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
    Wed Dec 17 20:31:17 2025        (r1930681)
@@ -542,18 +542,26 @@ public class DomXmpParser
         if (bagOrSeq == null)
         {
             // not an array
-            String whatFound = "nothing";
             Node firstChild = property.getFirstChild();
-            if (firstChild != null)
+            if (!strictParsing)
             {
-                whatFound = firstChild instanceof Text ? "Text" : 
firstChild.getClass().getName();
+                if (firstChild == null)
+                {
+                    // PDFBOX-6125: ignore
+                    return;
+                }
+                if (firstChild instanceof Text)
+                {
+                    // PDFBOX-6125: Default to text in lenient mode
+                    // Improvement idea in the future: create an array and add 
the text item.
+                    manageSimpleType(xmp, property, Types.Text, container);
+                    return;
+                }
             }
-            if (!strictParsing && firstChild instanceof Text)
+            String whatFound = "nothing";
+            if (firstChild != null)
             {
-                // Default to text in lenient mode
-                // Improvement idea in the future: create an array and add the 
text item.
-                manageSimpleType(xmp, property, Types.Text, container);
-                return;
+                whatFound = firstChild instanceof Text ? "Text" : 
firstChild.getClass().getName();
             }
             throw new XmpParsingException(ErrorType.Format, "Invalid array 
definition, expecting " + type.card()
                     + " and found "

Modified: 
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
==============================================================================
--- 
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
        Wed Dec 17 19:07:11 2025        (r1930680)
+++ 
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
        Wed Dec 17 20:31:17 2025        (r1930681)
@@ -1050,4 +1050,49 @@ public class DomXmpParserTest
         XMPSchema uaSchema2  = 
xmp2.getSchema("http://www.aiim.org/pdfua/ns/id/";);
         assertEquals((Integer) 1, 
uaSchema2.getIntegerPropertyValueAsSimple("part"));
     }
+
+    /**
+     * Test empty property where an LangAlt is expected. The property is 
skipped in lenient mode.
+     *
+     * @throws XmpParsingException
+     * @throws TransformerException
+     * @throws BadFieldValueException 
+     */
+    @Test
+    public void testBadProp() throws XmpParsingException, 
TransformerException, BadFieldValueException, UnsupportedEncodingException
+    {
+        String s = "<?xml version=\"1.0\" encoding=\"UTF-8\" 
standalone=\"no\"?>\n" +
+"<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d' bytes='1506'?><rdf:RDF 
xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"; 
xmlns:iX=\"http://ns.adobe.com/iX/1.0/\";>\n" +
+"    <rdf:Description xmlns=\"http://purl.org/dc/elements/1.1/\"; 
xmlns:dc=\"http://purl.org/dc/elements/1.1/\"; about=\"\">\n" +
+"        <dc:creator/>\n" +
+"        <dc:coverage>Cover</dc:coverage>\n" +
+"    </rdf:Description>\n" +
+"</rdf:RDF><?xpacket end='r'?>";
+        try
+        {
+            new DomXmpParser().parse(s.getBytes("utf-8"));
+            fail("XmpParsingException expected");
+        }
+        catch (XmpParsingException ex)
+        {
+            assertEquals("Invalid array definition, expecting Seq and found 
nothing [prefix=dc; name=creator]", ex.getMessage());
+        }
+        DomXmpParser xmpParser2 = new DomXmpParser();
+        xmpParser2.setStrictParsing(false);
+        XMPMetadata xmp2 = xmpParser2.parse(s.getBytes("utf-8"));
+        DublinCoreSchema dublinCoreSchema2 = xmp2.getDublinCoreSchema();
+        assertNull(dublinCoreSchema2.getCreators());
+        assertNull(dublinCoreSchema2.getProperty(DublinCoreSchema.CREATOR));
+        assertEquals("Cover", dublinCoreSchema2.getCoverage());
+        XmpSerializer serializer = new XmpSerializer();
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        serializer.serialize(xmp2, baos, true);
+        DomXmpParser xmpParser3 = new DomXmpParser();
+        xmpParser3.setStrictParsing(false);
+        XMPMetadata xmp3 = xmpParser3.parse(baos.toByteArray());
+        DublinCoreSchema dublinCoreSchema3 = xmp3.getDublinCoreSchema();
+        assertNull(dublinCoreSchema3.getCreators());
+        assertNull(dublinCoreSchema3.getProperty(DublinCoreSchema.CREATOR));
+        assertEquals("Cover", dublinCoreSchema3.getCoverage());
+    }
 }

Reply via email to