Author: tilman
Date: Wed Dec 17 19:07:00 2025
New Revision: 1930678
Log:
PDFBOX-6125: LangAlt also not simple; skip empty attributes; add more tests
Modified:
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Modified:
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
==============================================================================
---
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Wed Dec 17 17:55:55 2025 (r1930677)
+++
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Wed Dec 17 19:07:00 2025 (r1930678)
@@ -335,7 +335,7 @@ public class DomXmpParser
type = TypeMapping.createPropertyType(Types.Text,
Cardinality.Simple);
}
}
- else if (!type.type().isSimple() || type.card().isArray())
+ else if (!type.type().isSimple() || type.card().isArray() ||
type.type() == Types.LangAlt)
{
if (strictParsing)
{
@@ -345,7 +345,12 @@ public class DomXmpParser
}
else
{
- // PDFBOX-6125: Default to text
+ // PDFBOX-6125: Default to text or skip
+ if (attr.getValue() == null || attr.getValue().isEmpty())
+ {
+ schema.removeAttribute(attr.getLocalName());
+ return;
+ }
type = TypeMapping.createPropertyType(Types.Text,
Cardinality.Simple);
}
}
@@ -1134,7 +1139,7 @@ public class DomXmpParser
type = TypeMapping.createPropertyType(Types.Text,
Cardinality.Simple);
}
}
- else if (!type.type().isSimple() || type.card().isArray())
+ else if (!type.type().isSimple() || type.card().isArray()
|| type.type() == Types.LangAlt)
{
if (strictParsing)
{
@@ -1144,7 +1149,11 @@ public class DomXmpParser
}
else
{
- // PDFBOX-6125: Default to text
+ // PDFBOX-6125: Default to text or skip
+ if (attr.getValue() == null ||
attr.getValue().isEmpty())
+ {
+ continue;
+ }
type = TypeMapping.createPropertyType(Types.Text,
Cardinality.Simple);
}
}
Modified:
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
==============================================================================
---
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Wed Dec 17 17:55:55 2025 (r1930677)
+++
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Wed Dec 17 19:07:00 2025 (r1930678)
@@ -48,6 +48,7 @@ import org.apache.xmpbox.type.ResourceEv
import org.apache.xmpbox.type.ResourceRefType;
import org.apache.xmpbox.type.TextType;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.fail;
import org.junit.Test;
@@ -866,6 +867,90 @@ public class DomXmpParserTest
assertEquals("[creator=TextType:Creator]",
dublinCoreSchema.getProperty(DublinCoreSchema.CREATOR).toString());
}
+ /**
+ * Test empty attribute where an array is expected. The attribute is
skipped in lenient mode.
+ *
+ * @throws XmpParsingException
+ * @throws TransformerException
+ * @throws BadFieldValueException
+ */
+ @Test
+ public void testBadAttr4() throws XmpParsingException,
TransformerException, BadFieldValueException, UnsupportedEncodingException
+ {
+ String s = "<?xml version=\"1.0\" encoding=\"UTF-8\"
standalone=\"no\"?>\n" +
+"<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d' bytes='1206'?><rdf:RDF
xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" >\n" +
+" <rdf:Description xmlns=\"http://purl.org/dc/elements/1.1/\"
xmlns:dc=\"http://purl.org/dc/elements/1.1/\" about=\"\" dc:creator=\"\">\n" +
+" <dc:coverage>Coverage</dc:coverage>\n" +
+" </rdf:Description>\n" +
+"</rdf:RDF><?xpacket end='r'?>";
+ try
+ {
+ new DomXmpParser().parse(s.getBytes("utf-8"));
+ fail("XmpParsingException expected");
+ }
+ catch (XmpParsingException ex)
+ {
+ assertEquals("The type 'Text' in 'dc:creator=' is a structured or
array type, but attributes are simple types", ex.getMessage());
+ }
+ DomXmpParser xmpParser2 = new DomXmpParser();
+ xmpParser2.setStrictParsing(false);
+ XMPMetadata xmp2 = xmpParser2.parse(s.getBytes("utf-8"));
+ DublinCoreSchema dublinCoreSchema2 = xmp2.getDublinCoreSchema();
+ assertEquals("Coverage", dublinCoreSchema2.getCoverage());
+ assertNull(dublinCoreSchema2.getProperty(DublinCoreSchema.CREATOR));
+ XmpSerializer serializer = new XmpSerializer();
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ serializer.serialize(xmp2, baos, true);
+ DomXmpParser xmpParser3 = new DomXmpParser();
+ xmpParser3.setStrictParsing(false);
+ XMPMetadata xmp3 = xmpParser3.parse(baos.toByteArray());
+ DublinCoreSchema dublinCoreSchema3 = xmp3.getDublinCoreSchema();
+ assertEquals("Coverage", dublinCoreSchema3.getCoverage());
+ assertNull(dublinCoreSchema2.getProperty(DublinCoreSchema.CREATOR));
+ }
+
+ /**
+ * Test empty attribute where an LangAlt is expected. The attribute is
skipped in lenient mode.
+ *
+ * @throws XmpParsingException
+ * @throws TransformerException
+ * @throws BadFieldValueException
+ */
+ @Test
+ public void testBadAttr5() throws XmpParsingException,
TransformerException, BadFieldValueException, UnsupportedEncodingException
+ {
+ String s = "<?xml version=\"1.0\" encoding=\"UTF-8\"
standalone=\"no\"?>\n" +
+"<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d' bytes='987'?><rdf:RDF
xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"
xmlns:iX=\"http://ns.adobe.com/iX/1.0/\">\n" +
+" <rdf:Description xmlns=\"http://purl.org/dc/elements/1.1/\"
xmlns:dc=\"http://purl.org/dc/elements/1.1/\" about=\"\" dc:title=\"\"
dc:coverage=\"COVER\"/>\n" +
+"</rdf:RDF><?xpacket end='r'?>";
+ try
+ {
+ new DomXmpParser().parse(s.getBytes("utf-8"));
+ fail("XmpParsingException expected");
+ }
+ catch (XmpParsingException ex)
+ {
+ assertEquals("The type 'LangAlt' in 'dc:title=' is a structured or
array type, but attributes are simple types", ex.getMessage());
+ }
+ DomXmpParser xmpParser2 = new DomXmpParser();
+ xmpParser2.setStrictParsing(false);
+ XMPMetadata xmp2 = xmpParser2.parse(s.getBytes("utf-8"));
+ DublinCoreSchema dublinCoreSchema2 = xmp2.getDublinCoreSchema();
+ assertNull(dublinCoreSchema2.getTitle());
+ assertNull(dublinCoreSchema2.getProperty(DublinCoreSchema.TITLE));
+ assertEquals("COVER", dublinCoreSchema2.getCoverage());
+ XmpSerializer serializer = new XmpSerializer();
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ serializer.serialize(xmp2, baos, true);
+ DomXmpParser xmpParser3 = new DomXmpParser();
+ xmpParser3.setStrictParsing(false);
+ XMPMetadata xmp3 = xmpParser3.parse(baos.toByteArray());
+ DublinCoreSchema dublinCoreSchema3 = xmp3.getDublinCoreSchema();
+ assertNull(dublinCoreSchema3.getTitle());
+ assertNull(dublinCoreSchema3.getProperty(DublinCoreSchema.TITLE));
+ assertEquals("COVER", dublinCoreSchema3.getCoverage());
+ }
+
@Test
public void testBadSchema() throws XmpParsingException,
UnsupportedEncodingException
{