Author: tilman
Date: Mon Dec 8 10:06:35 2025
New Revision: 1930354
Log:
PDFBOX-5292: allow xmp extension schemata to be defined and used within the
same description, by David Sommer; closes #132
Modified:
pdfbox/branches/3.0/preflight/src/test/resources/expected_errors.txt
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
pdfbox/branches/3.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Modified: pdfbox/branches/3.0/preflight/src/test/resources/expected_errors.txt
==============================================================================
--- pdfbox/branches/3.0/preflight/src/test/resources/expected_errors.txt
Mon Dec 8 09:23:08 2025 (r1930353)
+++ pdfbox/branches/3.0/preflight/src/test/resources/expected_errors.txt
Mon Dec 8 10:06:35 2025 (r1930354)
@@ -210,7 +210,7 @@ isartor-6-7-3-t01-fail-c.pdf=7.2
isartor-6-7-5-t01-fail-a.pdf=7.0.0
isartor-6-7-5-t02-fail-a.pdf=7.0.0
isartor-6-7-8-t01-fail-a.pdf=7.3
-isartor-6-7-8-t02-fail-a.pdf=7.3 // 7.4.2 Impossible car NSPrefix utilisé pour
identifier pdfschemaext. Ici il est vu comme un schema inconnu
+isartor-6-7-8-t02-fail-a.pdf=7.4.1 // 7.4.2 Impossible car NSPrefix utilisé
pour identifier pdfschemaext. Ici il est vu comme un schema inconnu; After
PDFBOX-5292 still rejected, but for better reason.
isartor-6-7-8-t02-fail-b.pdf=7.4.1
isartor-6-7-8-t02-fail-c.pdf=7.1 //traité comme probleme de format
isartor-6-7-8-t02-fail-d.pdf=7.1.1 // Property missing so Property unknown
(7.1.1)
Modified:
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
==============================================================================
---
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Mon Dec 8 09:23:08 2025 (r1930353)
+++
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Mon Dec 8 10:06:35 2025 (r1930354)
@@ -31,6 +31,7 @@ import java.util.List;
import java.util.Map;
import java.util.Deque;
import java.util.StringTokenizer;
+import java.util.stream.Collectors;
import javax.xml.XMLConstants;
import javax.xml.namespace.QName;
@@ -180,25 +181,17 @@ public class DomXmpParser
Element rdfRdf = findDescriptionsParent(root);
nsFinder.push(rdfRdf); // PDFBOX-6099: push namespaces in rdf:RDF
List<Element> descriptions = DomHelper.getElementChildren(rdfRdf);
- List<Element> dataDescriptions = new ArrayList<>(descriptions.size());
for (Element description : descriptions)
{
- Element first = DomHelper.getFirstChildElement(description);
- if (first != null && "pdfaExtension".equals(first.getPrefix()))
- {
- PdfaExtensionHelper.validateNaming(xmp, description);
- parseDescriptionRoot(xmp, description);
- }
- else
- {
- dataDescriptions.add(description);
- }
+ PdfaExtensionHelper.validateNaming(xmp, description);
+ parseSchemaExtensions(xmp, description);
}
// find schema description
PdfaExtensionHelper.populateSchemaMapping(xmp);
+
// parse data description
- for (Element description : dataDescriptions)
+ for (Element description : descriptions)
{
parseDescriptionRoot(xmp, description);
}
@@ -208,6 +201,46 @@ public class DomXmpParser
return xmp;
}
+ private boolean isSchemaExtensionProperty(final Element element)
+ {
+ return element != null && "pdfaExtension".equals(element.getPrefix());
+ }
+
+ private void parseSchemaExtensions(final XMPMetadata xmp, final Element
description) throws XmpParsingException
+ {
+ final TypeMapping tm = xmp.getTypeMapping();
+ nsFinder.push(description);
+ try
+ {
+ final List<Element> schemaExtensions =
DomHelper.getElementChildren(description)
+ .stream()
+ .filter(this::isSchemaExtensionProperty)
+ .collect(Collectors.toList());
+ for (final Element schemaExtension : schemaExtensions)
+ {
+ final String namespace = schemaExtension.getNamespaceURI();
+ if (!tm.isDefinedSchema(schemaExtension.getNamespaceURI()))
+ {
+ throw new XmpParsingException(ErrorType.NoSchema,
+ "This namespace is not a schema or a structured
type : " + namespace);
+ }
+ PropertyType type = checkPropertyDefinition(xmp,
DomHelper.getQName(schemaExtension));
+ final XMPSchema schema =
tm.getSchemaFactory(namespace).createXMPSchema(xmp,
schemaExtension.getPrefix());
+ loadAttributes(schema, description);
+ ComplexPropertyContainer container = schema.getContainer();
+ createProperty(xmp, schemaExtension, type, container);
+ }
+ }
+ catch (XmpSchemaException e)
+ {
+ throw new XmpParsingException(ErrorType.Undefined, "Parsing
failed", e);
+ }
+ finally
+ {
+ nsFinder.pop();
+ }
+ }
+
private void parseDescriptionRoot(XMPMetadata xmp, Element description)
throws XmpParsingException
{
nsFinder.push(description);
@@ -318,6 +351,10 @@ public class DomXmpParser
throw new XmpParsingException(ErrorType.NoSchema,
"This namespace is not a schema or a structured type :
" + namespace);
}
+ if (isSchemaExtensionProperty(property))
+ {
+ continue;
+ }
XMPSchema schema = xmp.getSchema(namespace);
if (schema == null)
{
@@ -888,7 +925,7 @@ public class DomXmpParser
return;
}
- for (int i = 0; i < nl.getLength(); i++)
+ for (int i = 0; i < nl.getLength(); i++)
{
Node node = nl.item(i);
if (node instanceof Comment)
Modified:
pdfbox/branches/3.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
==============================================================================
---
pdfbox/branches/3.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Mon Dec 8 09:23:08 2025 (r1930353)
+++
pdfbox/branches/3.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Mon Dec 8 10:06:35 2025 (r1930354)
@@ -28,12 +28,14 @@ import java.util.Calendar;
import java.util.List;
import org.apache.xmpbox.XMPMetadata;
+import org.apache.xmpbox.schema.PDFAIdentificationSchema;
import org.apache.xmpbox.schema.PhotoshopSchema;
import org.apache.xmpbox.schema.XMPMediaManagementSchema;
import org.apache.xmpbox.schema.XMPSchema;
import org.apache.xmpbox.schema.XMPageTextSchema;
import org.apache.xmpbox.type.AbstractField;
import org.apache.xmpbox.type.ArrayProperty;
+import org.apache.xmpbox.type.BadFieldValueException;
import org.apache.xmpbox.type.DefinedStructuredType;
import org.apache.xmpbox.type.DimensionsType;
import org.apache.xmpbox.type.PDFASchemaType;
@@ -407,4 +409,100 @@ class DomXmpParserTest
// xmpMediaManagementSchema.getDerivedFromProperty() doesn't work.
// However the PDFLib XMP validator considers this file to be invalid,
so lets not bother more
}
+
+ /**
+ * PDFBOX-5292: Test whether inline extension schema is detected.
+ *
+ * @throws XmpParsingException
+ */
+ @Test
+ void testPDFBox5292() throws XmpParsingException, BadFieldValueException
+ {
+ String s = "<?xpacket begin=\"\"
id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n" +
+ "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\" x:xmptk=\"Adobe XMP
Core 5.6-c015 84.159810, 2016/09/10-02:41:30 \">\n" +
+ " <rdf:RDF
xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" +
+ " <rdf:Description rdf:about=\"\"\n" +
+ "
xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\"\n" +
+ "
xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n" +
+ "
xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\"\n" +
+ "
xmlns:pdfaid=\"http://www.aiim.org/pdfa/ns/id/\"\n" +
+ "
xmlns:pdfaExtension=\"http://www.aiim.org/pdfa/ns/extension/\"\n" +
+ "
xmlns:pdfaSchema=\"http://www.aiim.org/pdfa/ns/schema#\"\n" +
+ "
xmlns:pdfaProperty=\"http://www.aiim.org/pdfa/ns/property#\"\n" +
+ "
xmlns:example=\"http://ns.example.org/default/1.0/\">\n" +
+ "
<xmp:CreateDate>2021-05-21T11:42:49+01:00</xmp:CreateDate>\n" +
+ "
<xmp:ModifyDate>2021-05-21T11:47:16+02:00</xmp:ModifyDate>\n" +
+ "
<xmp:MetadataDate>2021-05-21T11:47:16+02:00</xmp:MetadataDate>\n" +
+ " <dc:format>application/pdf</dc:format>\n" +
+ " <dc:title>\n" +
+ " <rdf:Alt>\n" +
+ " <rdf:li xml:lang=\"x-default\">Inline
XMP Extension PoC</rdf:li>\n" +
+ " </rdf:Alt>\n" +
+ " </dc:title>\n" +
+ " <dc:creator>\n" +
+ " <rdf:Seq>\n" +
+ " <rdf:li>DSO</rdf:li>\n" +
+ " </rdf:Seq>\n" +
+ " </dc:creator>\n" +
+ " <dc:description>\n" +
+ " <rdf:Alt>\n" +
+ " <rdf:li xml:lang=\"x-default\">Inline
XMP Extension PoC</rdf:li>\n" +
+ " </rdf:Alt>\n" +
+ " </dc:description>\n" +
+ " <pdf:Keywords/>\n" +
+ " <pdfaid:part>2</pdfaid:part>\n" +
+ " <pdfaid:conformance>A</pdfaid:conformance>\n"
+
+ " <example:Data>Example</example:Data>\n" +
+ " <pdfaExtension:schemas>\n" +
+ " <rdf:Bag>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ " <pdfaSchema:schema>Simple
Schema</pdfaSchema:schema>\n" +
+ "
<pdfaSchema:namespaceURI>http://ns.example.org/default/1.0/</pdfaSchema:namespaceURI>\n"
+
+ "
<pdfaSchema:prefix>example</pdfaSchema:prefix>\n" +
+ " <pdfaSchema:property>\n" +
+ " <rdf:Seq>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaProperty:name>Data</pdfaProperty:name>\n" +
+ "
<pdfaProperty:valueType>Text</pdfaProperty:valueType>\n" +
+ "
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+ "
<pdfaProperty:description>Example Data</pdfaProperty:description>\n" +
+ " </rdf:li>\n" +
+ " </rdf:Seq>\n" +
+ " </pdfaSchema:property>\n" +
+ " </rdf:li>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaSchema:namespaceURI>http://www.aiim.org/pdfa/ns/id/</pdfaSchema:namespaceURI>\n"
+
+ "
<pdfaSchema:prefix>pdfaid</pdfaSchema:prefix>\n" +
+ " <pdfaSchema:schema>PDF/A ID
Schema</pdfaSchema:schema>\n" +
+ " <pdfaSchema:property>\n" +
+ " <rdf:Seq>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+ "
<pdfaProperty:description>Part of PDF/A standard</pdfaProperty:description>\n" +
+ "
<pdfaProperty:name>part</pdfaProperty:name>\n" +
+ "
<pdfaProperty:valueType>Integer</pdfaProperty:valueType>\n" +
+ " </rdf:li>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+ "
<pdfaProperty:description>Conformance level of PDF/A
standard</pdfaProperty:description>\n" +
+ "
<pdfaProperty:name>conformance</pdfaProperty:name>\n" +
+ "
<pdfaProperty:valueType>Text</pdfaProperty:valueType>\n" +
+ " </rdf:li>\n" +
+ " </rdf:Seq>\n" +
+ " </pdfaSchema:property>\n" +
+ " </rdf:li>\n" +
+ " </rdf:Bag>\n" +
+ " </pdfaExtension:schemas>\n" +
+ " </rdf:Description>\n" +
+ " </rdf:RDF>\n" +
+ "</x:xmpmeta>\n" +
+ "\n" +
+ "<?xpacket end=\"w\"?>";
+ DomXmpParser xmpParser = new DomXmpParser();
+ XMPMetadata xmp = xmpParser.parse(s.getBytes(StandardCharsets.UTF_8));
+ PDFAIdentificationSchema pdfaIdSchema =
xmp.getPDFAIdentificationSchema();
+ assertEquals(2, pdfaIdSchema.getPart());
+ String dataValue =
xmp.getSchema("http://ns.example.org/default/1.0/").getUnqualifiedTextPropertyValue("Data");
+ assertEquals("Example", dataValue);
+ }
}