Author: tilman
Date: Mon Dec  8 10:06:35 2025
New Revision: 1930354

Log:
PDFBOX-5292: allow xmp extension schemata to be defined and used within the 
same description, by David Sommer; closes #132

Modified:
   pdfbox/branches/3.0/preflight/src/test/resources/expected_errors.txt
   
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
   
pdfbox/branches/3.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java

Modified: pdfbox/branches/3.0/preflight/src/test/resources/expected_errors.txt
==============================================================================
--- pdfbox/branches/3.0/preflight/src/test/resources/expected_errors.txt        
Mon Dec  8 09:23:08 2025        (r1930353)
+++ pdfbox/branches/3.0/preflight/src/test/resources/expected_errors.txt        
Mon Dec  8 10:06:35 2025        (r1930354)
@@ -210,7 +210,7 @@ isartor-6-7-3-t01-fail-c.pdf=7.2
 isartor-6-7-5-t01-fail-a.pdf=7.0.0
 isartor-6-7-5-t02-fail-a.pdf=7.0.0
 isartor-6-7-8-t01-fail-a.pdf=7.3
-isartor-6-7-8-t02-fail-a.pdf=7.3 // 7.4.2 Impossible car NSPrefix utilisé pour 
identifier pdfschemaext. Ici il est vu comme un schema inconnu
+isartor-6-7-8-t02-fail-a.pdf=7.4.1 // 7.4.2 Impossible car NSPrefix utilisé 
pour identifier pdfschemaext. Ici il est vu comme un schema inconnu; After 
PDFBOX-5292 still rejected, but for better reason.
 isartor-6-7-8-t02-fail-b.pdf=7.4.1 
 isartor-6-7-8-t02-fail-c.pdf=7.1 //traité comme probleme de format 
 isartor-6-7-8-t02-fail-d.pdf=7.1.1 // Property missing so Property unknown 
(7.1.1)

Modified: 
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
==============================================================================
--- 
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
    Mon Dec  8 09:23:08 2025        (r1930353)
+++ 
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
    Mon Dec  8 10:06:35 2025        (r1930354)
@@ -31,6 +31,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Deque;
 import java.util.StringTokenizer;
+import java.util.stream.Collectors;
 
 import javax.xml.XMLConstants;
 import javax.xml.namespace.QName;
@@ -180,25 +181,17 @@ public class DomXmpParser
         Element rdfRdf = findDescriptionsParent(root);
         nsFinder.push(rdfRdf); // PDFBOX-6099: push namespaces in rdf:RDF
         List<Element> descriptions = DomHelper.getElementChildren(rdfRdf);
-        List<Element> dataDescriptions = new ArrayList<>(descriptions.size());
         for (Element description : descriptions)
         {
-            Element first = DomHelper.getFirstChildElement(description);
-            if (first != null && "pdfaExtension".equals(first.getPrefix()))
-            {
-                PdfaExtensionHelper.validateNaming(xmp, description);
-                parseDescriptionRoot(xmp, description);
-            }
-            else
-            {
-                dataDescriptions.add(description);
-            }
+            PdfaExtensionHelper.validateNaming(xmp, description);
+            parseSchemaExtensions(xmp, description);
         }
 
         // find schema description
         PdfaExtensionHelper.populateSchemaMapping(xmp);
+
         // parse data description
-        for (Element description : dataDescriptions)
+        for (Element description : descriptions)
         {
             parseDescriptionRoot(xmp, description);
         }
@@ -208,6 +201,46 @@ public class DomXmpParser
         return xmp;
     }
 
+    private boolean isSchemaExtensionProperty(final Element element)
+    {
+        return element != null && "pdfaExtension".equals(element.getPrefix());
+    }
+
+    private void parseSchemaExtensions(final XMPMetadata xmp, final Element 
description) throws XmpParsingException
+    {
+        final TypeMapping tm = xmp.getTypeMapping();
+        nsFinder.push(description);
+        try
+        {
+            final List<Element> schemaExtensions = 
DomHelper.getElementChildren(description)
+                    .stream()
+                    .filter(this::isSchemaExtensionProperty)
+                    .collect(Collectors.toList());
+            for (final Element schemaExtension : schemaExtensions)
+            {
+                final String namespace = schemaExtension.getNamespaceURI();
+                if (!tm.isDefinedSchema(schemaExtension.getNamespaceURI()))
+                {
+                    throw new XmpParsingException(ErrorType.NoSchema,
+                            "This namespace is not a schema or a structured 
type : " + namespace);
+                }
+                PropertyType type = checkPropertyDefinition(xmp, 
DomHelper.getQName(schemaExtension));
+                final XMPSchema schema = 
tm.getSchemaFactory(namespace).createXMPSchema(xmp, 
schemaExtension.getPrefix());
+                loadAttributes(schema, description);
+                ComplexPropertyContainer container = schema.getContainer();
+                createProperty(xmp, schemaExtension, type, container);
+            }
+        }
+        catch (XmpSchemaException e)
+        {
+            throw new XmpParsingException(ErrorType.Undefined, "Parsing 
failed", e);
+        }
+        finally
+        {
+            nsFinder.pop();
+        }
+    }
+
     private void parseDescriptionRoot(XMPMetadata xmp, Element description) 
throws XmpParsingException
     {
         nsFinder.push(description);
@@ -318,6 +351,10 @@ public class DomXmpParser
                 throw new XmpParsingException(ErrorType.NoSchema,
                         "This namespace is not a schema or a structured type : 
" + namespace);
             }
+            if (isSchemaExtensionProperty(property))
+            {
+                continue;
+            }
             XMPSchema schema = xmp.getSchema(namespace);
             if (schema == null)
             {
@@ -888,7 +925,7 @@ public class DomXmpParser
             return;
         }
         
-        for (int i = 0; i < nl.getLength(); i++) 
+        for (int i = 0; i < nl.getLength(); i++)
         {
             Node node = nl.item(i);
             if (node instanceof Comment)

Modified: 
pdfbox/branches/3.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
==============================================================================
--- 
pdfbox/branches/3.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
        Mon Dec  8 09:23:08 2025        (r1930353)
+++ 
pdfbox/branches/3.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
        Mon Dec  8 10:06:35 2025        (r1930354)
@@ -28,12 +28,14 @@ import java.util.Calendar;
 import java.util.List;
 
 import org.apache.xmpbox.XMPMetadata;
+import org.apache.xmpbox.schema.PDFAIdentificationSchema;
 import org.apache.xmpbox.schema.PhotoshopSchema;
 import org.apache.xmpbox.schema.XMPMediaManagementSchema;
 import org.apache.xmpbox.schema.XMPSchema;
 import org.apache.xmpbox.schema.XMPageTextSchema;
 import org.apache.xmpbox.type.AbstractField;
 import org.apache.xmpbox.type.ArrayProperty;
+import org.apache.xmpbox.type.BadFieldValueException;
 import org.apache.xmpbox.type.DefinedStructuredType;
 import org.apache.xmpbox.type.DimensionsType;
 import org.apache.xmpbox.type.PDFASchemaType;
@@ -407,4 +409,100 @@ class DomXmpParserTest
         // xmpMediaManagementSchema.getDerivedFromProperty() doesn't work.
         // However the PDFLib XMP validator considers this file to be invalid, 
so lets not bother more
     }
+
+    /**
+     * PDFBOX-5292: Test whether inline extension schema is detected.
+     *
+     * @throws XmpParsingException 
+     */
+    @Test
+    void testPDFBox5292() throws XmpParsingException, BadFieldValueException
+    {
+        String s = "<?xpacket begin=\"\" 
id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n" +
+                    "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\" x:xmptk=\"Adobe XMP 
Core 5.6-c015 84.159810, 2016/09/10-02:41:30        \">\n" +
+                    "    <rdf:RDF 
xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\";>\n" +
+                    "        <rdf:Description rdf:about=\"\"\n" +
+                    "                         
xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\"\n"; +
+                    "                         
xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n"; +
+                    "                         
xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\"\n"; +
+                    "                         
xmlns:pdfaid=\"http://www.aiim.org/pdfa/ns/id/\"\n"; +
+                    "                         
xmlns:pdfaExtension=\"http://www.aiim.org/pdfa/ns/extension/\"\n"; +
+                    "                         
xmlns:pdfaSchema=\"http://www.aiim.org/pdfa/ns/schema#\"\n"; +
+                    "                         
xmlns:pdfaProperty=\"http://www.aiim.org/pdfa/ns/property#\"\n"; +
+                    "                         
xmlns:example=\"http://ns.example.org/default/1.0/\";>\n" +
+                    "            
<xmp:CreateDate>2021-05-21T11:42:49+01:00</xmp:CreateDate>\n" +
+                    "            
<xmp:ModifyDate>2021-05-21T11:47:16+02:00</xmp:ModifyDate>\n" +
+                    "            
<xmp:MetadataDate>2021-05-21T11:47:16+02:00</xmp:MetadataDate>\n" +
+                    "            <dc:format>application/pdf</dc:format>\n" +
+                    "            <dc:title>\n" +
+                    "                <rdf:Alt>\n" +
+                    "                    <rdf:li xml:lang=\"x-default\">Inline 
XMP Extension PoC</rdf:li>\n" +
+                    "                </rdf:Alt>\n" +
+                    "            </dc:title>\n" +
+                    "            <dc:creator>\n" +
+                    "                <rdf:Seq>\n" +
+                    "                    <rdf:li>DSO</rdf:li>\n" +
+                    "                </rdf:Seq>\n" +
+                    "            </dc:creator>\n" +
+                    "            <dc:description>\n" +
+                    "                <rdf:Alt>\n" +
+                    "                    <rdf:li xml:lang=\"x-default\">Inline 
XMP Extension PoC</rdf:li>\n" +
+                    "                </rdf:Alt>\n" +
+                    "            </dc:description>\n" +
+                    "            <pdf:Keywords/>\n" +
+                    "            <pdfaid:part>2</pdfaid:part>\n" +
+                    "            <pdfaid:conformance>A</pdfaid:conformance>\n" 
+
+                    "            <example:Data>Example</example:Data>\n" +
+                    "            <pdfaExtension:schemas>\n" +
+                    "                <rdf:Bag>\n" +
+                    "                    <rdf:li 
rdf:parseType=\"Resource\">\n" +
+                    "                        <pdfaSchema:schema>Simple 
Schema</pdfaSchema:schema>\n" +
+                    "                        
<pdfaSchema:namespaceURI>http://ns.example.org/default/1.0/</pdfaSchema:namespaceURI>\n"
 +
+                    "                        
<pdfaSchema:prefix>example</pdfaSchema:prefix>\n" +
+                    "                        <pdfaSchema:property>\n" +
+                    "                            <rdf:Seq>\n" +
+                    "                                <rdf:li 
rdf:parseType=\"Resource\">\n" +
+                    "                                    
<pdfaProperty:name>Data</pdfaProperty:name>\n" +
+                    "                                    
<pdfaProperty:valueType>Text</pdfaProperty:valueType>\n" +
+                    "                                    
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+                    "                                    
<pdfaProperty:description>Example Data</pdfaProperty:description>\n" +
+                    "                                </rdf:li>\n" +
+                    "                            </rdf:Seq>\n" +
+                    "                        </pdfaSchema:property>\n" +
+                    "                    </rdf:li>\n" +
+                    "                    <rdf:li 
rdf:parseType=\"Resource\">\n" +
+                    "                        
<pdfaSchema:namespaceURI>http://www.aiim.org/pdfa/ns/id/</pdfaSchema:namespaceURI>\n"
 +
+                    "                        
<pdfaSchema:prefix>pdfaid</pdfaSchema:prefix>\n" +
+                    "                        <pdfaSchema:schema>PDF/A ID 
Schema</pdfaSchema:schema>\n" +
+                    "                        <pdfaSchema:property>\n" +
+                    "                            <rdf:Seq>\n" +
+                    "                                <rdf:li 
rdf:parseType=\"Resource\">\n" +
+                    "                                    
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+                    "                                    
<pdfaProperty:description>Part of PDF/A standard</pdfaProperty:description>\n" +
+                    "                                    
<pdfaProperty:name>part</pdfaProperty:name>\n" +
+                    "                                    
<pdfaProperty:valueType>Integer</pdfaProperty:valueType>\n" +
+                    "                                </rdf:li>\n" +
+                    "                                <rdf:li 
rdf:parseType=\"Resource\">\n" +
+                    "                                    
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+                    "                                    
<pdfaProperty:description>Conformance level of PDF/A 
standard</pdfaProperty:description>\n" +
+                    "                                    
<pdfaProperty:name>conformance</pdfaProperty:name>\n" +
+                    "                                    
<pdfaProperty:valueType>Text</pdfaProperty:valueType>\n" +
+                    "                                </rdf:li>\n" +
+                    "                            </rdf:Seq>\n" +
+                    "                        </pdfaSchema:property>\n" +
+                    "                    </rdf:li>\n" +
+                    "                </rdf:Bag>\n" +
+                    "            </pdfaExtension:schemas>\n" +
+                    "        </rdf:Description>\n" +
+                    "    </rdf:RDF>\n" +
+                    "</x:xmpmeta>\n" +
+                    "\n" +
+                    "<?xpacket end=\"w\"?>";
+        DomXmpParser xmpParser = new DomXmpParser();
+        XMPMetadata xmp = xmpParser.parse(s.getBytes(StandardCharsets.UTF_8));
+        PDFAIdentificationSchema pdfaIdSchema = 
xmp.getPDFAIdentificationSchema();
+        assertEquals(2, pdfaIdSchema.getPart());
+        String dataValue = 
xmp.getSchema("http://ns.example.org/default/1.0/";).getUnqualifiedTextPropertyValue("Data");
+        assertEquals("Example", dataValue);
+    }
 }

Reply via email to