Author: tilman
Date: Wed Dec 17 19:07:05 2025
New Revision: 1930679

Log:
PDFBOX-6125: LangAlt also not simple; skip empty attributes; add more tests

Modified:
   pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
   pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java

Modified: 
pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
==============================================================================
--- pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java   
Wed Dec 17 19:07:00 2025        (r1930678)
+++ pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java   
Wed Dec 17 19:07:05 2025        (r1930679)
@@ -366,7 +366,7 @@ public class DomXmpParser
                     type = TypeMapping.createPropertyType(Types.Text, 
Cardinality.Simple);
                 }
             }
-            else if (!type.type().isSimple() || type.card().isArray())
+            else if (!type.type().isSimple() || type.card().isArray() || 
type.type() == Types.LangAlt)
             {
                 if (strictParsing)
                 {
@@ -376,7 +376,12 @@ public class DomXmpParser
                 }
                 else
                 {
-                    // PDFBOX-6125: Default to text
+                    // PDFBOX-6125: Default to text or skip
+                    if (attr.getValue() == null || attr.getValue().isEmpty())
+                    {
+                        schema.removeAttribute(attr.getLocalName());
+                        return;
+                    }
                     type = TypeMapping.createPropertyType(Types.Text, 
Cardinality.Simple);
                 }
             }
@@ -1159,7 +1164,7 @@ public class DomXmpParser
                             type = TypeMapping.createPropertyType(Types.Text, 
Cardinality.Simple);
                         }
                     }
-                    else if (!type.type().isSimple() || type.card().isArray())
+                    else if (!type.type().isSimple() || type.card().isArray() 
|| type.type() == Types.LangAlt)
                     {
                         if (strictParsing)
                         {
@@ -1169,7 +1174,11 @@ public class DomXmpParser
                         }
                         else
                         {
-                            // PDFBOX-6125: Default to text
+                            // PDFBOX-6125: Default to text or skip
+                            if (attr.getValue() == null || 
attr.getValue().isEmpty())
+                            {
+                                continue;
+                            }
                             type = TypeMapping.createPropertyType(Types.Text, 
Cardinality.Simple);
                         }
                     }

Modified: 
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
==============================================================================
--- 
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java   
    Wed Dec 17 19:07:00 2025        (r1930678)
+++ 
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java   
    Wed Dec 17 19:07:05 2025        (r1930679)
@@ -51,6 +51,7 @@ import org.apache.xmpbox.type.TextType;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import org.junit.jupiter.api.Test;
 
@@ -818,6 +819,80 @@ class DomXmpParserTest
         assertEquals("[creator=TextType:Creator]", 
dublinCoreSchema.getProperty(DublinCoreSchema.CREATOR).toString());
     }
 
+    /**
+     * Test empty attribute where an array is expected. The attribute is 
skipped in lenient mode.
+     *
+     * @throws XmpParsingException
+     * @throws TransformerException
+     * @throws BadFieldValueException 
+     */
+    @Test
+    void testBadAttr4() throws XmpParsingException, TransformerException, 
BadFieldValueException
+    {
+        String s = "<?xml version=\"1.0\" encoding=\"UTF-8\" 
standalone=\"no\"?>\n" +
+"<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d' bytes='1206'?><rdf:RDF 
xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"; >\n" +
+"    <rdf:Description xmlns=\"http://purl.org/dc/elements/1.1/\"; 
xmlns:dc=\"http://purl.org/dc/elements/1.1/\"; about=\"\" dc:creator=\"\">\n" +
+"        <dc:coverage>Coverage</dc:coverage>\n" +
+"    </rdf:Description>\n" +
+"</rdf:RDF><?xpacket end='r'?>";
+        final DomXmpParser xmpParser1 = new DomXmpParser();
+        XmpParsingException ex = assertThrows(XmpParsingException.class,
+                () -> xmpParser1.parse(s.getBytes(StandardCharsets.UTF_8)));
+        assertEquals("The type 'Text' in 'dc:creator=' is a structured or 
array type, but attributes are simple types", ex.getMessage());
+        DomXmpParser xmpParser2 = new DomXmpParser();
+        xmpParser2.setStrictParsing(false);
+        XMPMetadata xmp2 = 
xmpParser2.parse(s.getBytes(StandardCharsets.UTF_8));
+        DublinCoreSchema dublinCoreSchema2 = xmp2.getDublinCoreSchema();
+        assertEquals("Coverage", dublinCoreSchema2.getCoverage());
+        assertNull(dublinCoreSchema2.getProperty(DublinCoreSchema.CREATOR));
+        XmpSerializer serializer = new XmpSerializer();
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        serializer.serialize(xmp2, baos, true);
+        DomXmpParser xmpParser3 = new DomXmpParser();
+        xmpParser3.setStrictParsing(false);
+        XMPMetadata xmp3 = xmpParser3.parse(baos.toByteArray());
+        DublinCoreSchema dublinCoreSchema3 = xmp3.getDublinCoreSchema();
+        assertEquals("Coverage", dublinCoreSchema3.getCoverage());
+        assertNull(dublinCoreSchema2.getProperty(DublinCoreSchema.CREATOR));
+    }
+
+    /**
+     * Test empty attribute where an LangAlt is expected. The attribute is 
skipped in lenient mode.
+     *
+     * @throws XmpParsingException
+     * @throws TransformerException
+     * @throws BadFieldValueException 
+     */
+    @Test
+    void testBadAttr5() throws XmpParsingException, TransformerException, 
BadFieldValueException
+    {
+        String s = "<?xml version=\"1.0\" encoding=\"UTF-8\" 
standalone=\"no\"?>\n" +
+"<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d' bytes='987'?><rdf:RDF 
xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"; 
xmlns:iX=\"http://ns.adobe.com/iX/1.0/\";>\n" +
+"    <rdf:Description xmlns=\"http://purl.org/dc/elements/1.1/\"; 
xmlns:dc=\"http://purl.org/dc/elements/1.1/\"; about=\"\" dc:title=\"\" 
dc:coverage=\"COVER\"/>\n" +
+"</rdf:RDF><?xpacket end='r'?>";
+        final DomXmpParser xmpParser1 = new DomXmpParser();
+        XmpParsingException ex = assertThrows(XmpParsingException.class,
+                () -> xmpParser1.parse(s.getBytes(StandardCharsets.UTF_8)));
+        assertEquals("The type 'LangAlt' in 'dc:title=' is a structured or 
array type, but attributes are simple types", ex.getMessage());
+        DomXmpParser xmpParser2 = new DomXmpParser();
+        xmpParser2.setStrictParsing(false);
+        XMPMetadata xmp2 = 
xmpParser2.parse(s.getBytes(StandardCharsets.UTF_8));
+        DublinCoreSchema dublinCoreSchema2 = xmp2.getDublinCoreSchema();
+        assertNull(dublinCoreSchema2.getTitle());
+        assertNull(dublinCoreSchema2.getProperty(DublinCoreSchema.TITLE));
+        assertEquals("COVER", dublinCoreSchema2.getCoverage());
+        XmpSerializer serializer = new XmpSerializer();
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        serializer.serialize(xmp2, baos, true);
+        DomXmpParser xmpParser3 = new DomXmpParser();
+        xmpParser3.setStrictParsing(false);
+        XMPMetadata xmp3 = xmpParser3.parse(baos.toByteArray());
+        DublinCoreSchema dublinCoreSchema3 = xmp3.getDublinCoreSchema();
+        assertNull(dublinCoreSchema3.getTitle());
+        assertNull(dublinCoreSchema3.getProperty(DublinCoreSchema.TITLE));
+        assertEquals("COVER", dublinCoreSchema3.getCoverage());
+    }
+
     @Test
     void testBadSchema() throws XmpParsingException
     {

Reply via email to