Author: tilman
Date: Sat Dec 20 10:41:30 2025
New Revision: 1930752
Log:
PDFBOX-6129: pass strict mode to PDFA Helper; be lenient when missing property;
add test
Modified:
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/PdfaExtensionHelper.java
pdfbox/branches/3.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Modified:
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
==============================================================================
---
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Sat Dec 20 10:41:24 2025 (r1930751)
+++
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Sat Dec 20 10:41:30 2025 (r1930752)
@@ -217,7 +217,7 @@ public class DomXmpParser
}
// find schema description
- PdfaExtensionHelper.populateSchemaMapping(xmp);
+ PdfaExtensionHelper.populateSchemaMapping(xmp, strictParsing);
// parse data description
for (Element description : descriptions)
Modified:
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/PdfaExtensionHelper.java
==============================================================================
---
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/PdfaExtensionHelper.java
Sat Dec 20 10:41:24 2025 (r1930751)
+++
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/PdfaExtensionHelper.java
Sat Dec 20 10:41:30 2025 (r1930752)
@@ -95,8 +95,20 @@ public final class PdfaExtensionHelper
}
}
+ /**
+ *
+ * @param meta
+ * @throws XmpParsingException
+ * @deprecated use {@link
#populateSchemaMapping(org.apache.xmpbox.XMPMetadata, boolean)}
+ */
+ @Deprecated
public static void populateSchemaMapping(XMPMetadata meta) throws
XmpParsingException
{
+ populateSchemaMapping(meta, true);
+ }
+
+ public static void populateSchemaMapping(XMPMetadata meta, boolean
strictParsing) throws XmpParsingException
+ {
List<XMPSchema> schems = meta.getAllSchemas();
TypeMapping tm = meta.getTypeMapping();
StructuredType stPdfaExt =
PDFAExtensionSchema.class.getAnnotation(StructuredType.class);
@@ -118,14 +130,14 @@ public final class PdfaExtensionHelper
{
if (af instanceof PDFASchemaType)
{
- populatePDFASchemaType(meta, (PDFASchemaType) af, tm);
+ populatePDFASchemaType(meta, (PDFASchemaType) af, tm,
strictParsing);
} // TODO unmanaged ?
}
}
}
}
- private static void populatePDFASchemaType(XMPMetadata meta,
PDFASchemaType st, TypeMapping tm)
+ private static void populatePDFASchemaType(XMPMetadata meta,
PDFASchemaType st, TypeMapping tm, boolean strictParsing)
throws XmpParsingException
{
String namespaceUri = st.getNamespaceURI();
@@ -155,6 +167,10 @@ public final class PdfaExtensionHelper
}
}
// populate properties
+ if (properties == null && !strictParsing)
+ {
+ return;
+ }
requireNonNull(properties, () -> "Missing pdfaSchema:property in type
definition");
for (AbstractField af2 : properties.getAllProperties())
{
Modified:
pdfbox/branches/3.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
==============================================================================
---
pdfbox/branches/3.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Sat Dec 20 10:41:24 2025 (r1930751)
+++
pdfbox/branches/3.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Sat Dec 20 10:41:30 2025 (r1930752)
@@ -50,6 +50,7 @@ import org.apache.xmpbox.type.ResourceRe
import org.apache.xmpbox.type.TextType;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
@@ -1174,4 +1175,91 @@ class DomXmpParserTest
assertEquals("created", firstHistoryEntry.getAction());
assertEquals("original PDF file", firstHistoryEntry.getParameters());
}
-}
+
+ @Test
+ void testLenientPdfaExtension() throws XmpParsingException
+ {
+ // First bag in pdfaExtension is incomplete.
+ final String s =
+ "<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n" +
+ "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"\n" +
+ " x:xmptk=\"Adobe XMP Core 4.2.1-c043 52.372728,
2009/01/18-15:08:04\">\n" +
+ " <rdf:RDF
xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" +
+ " <rdf:Description rdf:about=\"\"\n" +
+ "
xmlns:xmpMM=\"http://ns.adobe.com/xap/1.0/mm/\">\n" +
+ "
<xmpMM:DocumentID>uuid:0b306144-6a43-dcbd-6b3e-c6b6b1df873d</xmpMM:DocumentID>\n"
+
+ "
<xmpMM:InstanceID>uuid:0b306144-6a43-dcbd-6b3e-c6b6b1df873d</xmpMM:InstanceID>\n"
+
+ " </rdf:Description>\n" +
+ " <rdf:Description rdf:about=\"\"\n" +
+ "
xmlns:pdfaExtension=\"http://www.aiim.org/pdfa/ns/extension/\"\n" +
+ "
xmlns:pdfaSchema=\"http://www.aiim.org/pdfa/ns/schema#\"\n" +
+ "
xmlns:pdfaProperty=\"http://www.aiim.org/pdfa/ns/property#\">\n" +
+ " <pdfaExtension:schemas>\n" +
+ " <rdf:Bag>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaSchema:namespaceURI>http://ns.adobe.com/pdf/1.3/</pdfaSchema:namespaceURI>\n"
+
+ "
<pdfaSchema:prefix>pdf</pdfaSchema:prefix>\n" +
+ "
<pdfaSchema:schema>Adobe PDF Schema</pdfaSchema:schema>\n" +
+ " </rdf:li>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaSchema:namespaceURI>http://ns.adobe.com/xap/1.0/mm/</pdfaSchema:namespaceURI>\n"
+
+ "
<pdfaSchema:prefix>xmpMM</pdfaSchema:prefix>\n" +
+ " <pdfaSchema:schema>XMP
Media Management Schema</pdfaSchema:schema>\n" +
+ "
<pdfaSchema:property>\n" +
+ " <rdf:Seq>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+ "
<pdfaProperty:description>UUID based identifier for specific incarnation of a
document</pdfaProperty:description>\n" +
+ "
<pdfaProperty:name>InstanceID</pdfaProperty:name>\n" +
+ "
<pdfaProperty:valueType>URI</pdfaProperty:valueType>\n" +
+ "
</rdf:li>\n" +
+ " </rdf:Seq>\n" +
+ "
</pdfaSchema:property>\n" +
+ " </rdf:li>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaSchema:namespaceURI>http://www.aiim.org/pdfa/ns/id/</pdfaSchema:namespaceURI>\n"
+
+ "
<pdfaSchema:prefix>pdfaid</pdfaSchema:prefix>\n" +
+ "
<pdfaSchema:schema>PDF/A ID Schema</pdfaSchema:schema>\n" +
+ "
<pdfaSchema:property>\n" +
+ " <rdf:Seq>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+ "
<pdfaProperty:description>Part of PDF/A standard</pdfaProperty:description>\n" +
+ "
<pdfaProperty:name>part</pdfaProperty:name>\n" +
+ "
<pdfaProperty:valueType>Integer</pdfaProperty:valueType>\n" +
+ "
</rdf:li>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+ "
<pdfaProperty:description>Amendment of PDF/A
standard</pdfaProperty:description>\n" +
+ "
<pdfaProperty:name>amd</pdfaProperty:name>\n" +
+ "
<pdfaProperty:valueType>Text</pdfaProperty:valueType>\n" +
+ "
</rdf:li>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+ "
<pdfaProperty:description>Conformance level of PDF/A
standard</pdfaProperty:description>\n" +
+ "
<pdfaProperty:name>conformance</pdfaProperty:name>\n" +
+ "
<pdfaProperty:valueType>Text</pdfaProperty:valueType>\n" +
+ "
</rdf:li>\n" +
+ " </rdf:Seq>\n" +
+ "
</pdfaSchema:property>\n" +
+ " </rdf:li>\n" +
+ " </rdf:Bag>\n" +
+ " </pdfaExtension:schemas>\n" +
+ " </rdf:Description>\n" +
+ " </rdf:RDF>\n" +
+ "</x:xmpmeta>\n" +
+ "<?xpacket end=\"w\"?>";
+ final DomXmpParser xmpParser1 = new DomXmpParser();
+ XmpParsingException ex = assertThrows(XmpParsingException.class,
+ () -> xmpParser1.parse(s.getBytes(StandardCharsets.UTF_8)));
+ assertEquals("Missing pdfaSchema:property in type definition",
ex.getMessage());
+ DomXmpParser xmpParser2 = new DomXmpParser();
+ assertTrue(xmpParser2.isStrictParsing());
+ xmpParser2.setStrictParsing(false);
+ assertFalse(xmpParser2.isStrictParsing());
+ XMPMetadata xmp2 =
xmpParser2.parse(s.getBytes(StandardCharsets.UTF_8));
+ XMPMediaManagementSchema xmpMediaManagementSchema =
xmp2.getXMPMediaManagementSchema();
+ assertEquals("uuid:0b306144-6a43-dcbd-6b3e-c6b6b1df873d",
xmpMediaManagementSchema.getInstanceID());
+ assertEquals("uuid:0b306144-6a43-dcbd-6b3e-c6b6b1df873d",
xmpMediaManagementSchema.getDocumentID());
+ }
+}
\ No newline at end of file