Author: tilman
Date: Mon Dec 15 09:31:04 2025
New Revision: 1930584
Log:
PDFBOX-6123: support Seq / Bag mixup in lenient mode + test
Modified:
pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Modified:
pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
==============================================================================
--- pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Mon Dec 15 09:30:59 2025 (r1930583)
+++ pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Mon Dec 15 09:31:04 2025 (r1930584)
@@ -523,7 +523,7 @@ public class DomXmpParser
+ whatFound
+ " [prefix=" + prefix + "; name=" + name + "]");
}
- if (!bagOrSeq.getLocalName().equals(type.card().name()))
+ if (strictParsing &&
!bagOrSeq.getLocalName().equals(type.card().name()))
{
// not the good array type
throw new XmpParsingException(ErrorType.Format, "Invalid array
type, expecting " + type.card()
Modified:
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
==============================================================================
---
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Mon Dec 15 09:30:59 2025 (r1930583)
+++
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Mon Dec 15 09:31:04 2025 (r1930584)
@@ -28,6 +28,7 @@ import java.util.Calendar;
import java.util.List;
import org.apache.xmpbox.XMPMetadata;
+import org.apache.xmpbox.schema.DublinCoreSchema;
import org.apache.xmpbox.schema.PDFAIdentificationSchema;
import org.apache.xmpbox.schema.PhotoshopSchema;
import org.apache.xmpbox.schema.XMPMediaManagementSchema;
@@ -517,4 +518,41 @@ class DomXmpParserTest
String dataValue =
xmp.getSchema("http://ns.example.org/default/1.0/").getUnqualifiedTextPropertyValue("Data");
assertEquals("Example", dataValue);
}
+
+ /**
+ * Test that a Seq / Mag mixup gets detected in strict mode and gets read
in lenient mode.
+ * @throws XmpParsingException
+ */
+ @Test
+ void testLenientBagSeqMixup() throws XmpParsingException
+ {
+ String s = "<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>\n" +
+ "<?adobe-xap-filters esc=\"CRLF\"?>\n" +
+ "<x:xmpmeta xmlns:x='adobe:ns:meta/'>\n" +
+ " <rdf:RDF
xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>\n" +
+ " <rdf:Description
xmlns:dc='http://purl.org/dc/elements/1.1/'\n" +
+ "
dc:format='application/pdf'>\n" +
+ " <dc:subject>\n" +
+ " <rdf:Seq>\n" +
+ " <rdf:li>Important
subject</rdf:li>\n" +
+ " <rdf:li>Unimportant
subject</rdf:li>\n" +
+ " </rdf:Seq>\n" +
+ " </dc:subject>\n" +
+ " </rdf:Description>\n" +
+ " </rdf:RDF>\n" +
+ "</x:xmpmeta>\n" +
+ "<?xpacket end='w'?>";
+ XmpParsingException ex = assertThrows(
+ XmpParsingException.class,
+ () -> new
DomXmpParser().parse(s.getBytes(StandardCharsets.UTF_8)));
+ assertEquals("Invalid array type, expecting Bag and found Seq
[prefix=dc; name=subject]", ex.getMessage());
+ DomXmpParser xmpParser = new DomXmpParser();
+ xmpParser.setStrictParsing(false);
+ XMPMetadata xmp = xmpParser.parse(s.getBytes(StandardCharsets.UTF_8));
+ DublinCoreSchema dublinCoreSchema = xmp.getDublinCoreSchema();
+ List<String> subjects = dublinCoreSchema.getSubjects();
+ assertEquals(2, subjects.size());
+ assertEquals("Important subject", subjects.get(0));
+ assertEquals("Unimportant subject", subjects.get(1));
+ }
}