Author: tilman
Date: Sat Dec 20 10:41:24 2025
New Revision: 1930751

Log:
PDFBOX-6129: pass strict mode to PDFA Helper; be lenient when missing property; 
add test

Modified:
   
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
   
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/PdfaExtensionHelper.java
   
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java

Modified: 
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
==============================================================================
--- 
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
    Sat Dec 20 08:30:16 2025        (r1930750)
+++ 
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
    Sat Dec 20 10:41:24 2025        (r1930751)
@@ -202,7 +202,7 @@ public class DomXmpParser
         }
 
         // find schema description
-        PdfaExtensionHelper.populateSchemaMapping(xmp);
+        PdfaExtensionHelper.populateSchemaMapping(xmp, strictParsing);
 
         // parse data description
         for (Element description : descriptions)

Modified: 
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/PdfaExtensionHelper.java
==============================================================================
--- 
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/PdfaExtensionHelper.java
     Sat Dec 20 08:30:16 2025        (r1930750)
+++ 
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/PdfaExtensionHelper.java
     Sat Dec 20 10:41:24 2025        (r1930751)
@@ -94,8 +94,20 @@ public final class PdfaExtensionHelper
         }
     }
 
+    /**
+     * 
+     * @param meta
+     * @throws XmpParsingException
+     * @deprecated use {@link 
#populateSchemaMapping(org.apache.xmpbox.XMPMetadata, boolean)}
+     */
+    @Deprecated
     public static void populateSchemaMapping(XMPMetadata meta) throws 
XmpParsingException
     {
+        populateSchemaMapping(meta, true);
+    }
+
+    public static void populateSchemaMapping(XMPMetadata meta, boolean 
strictParsing) throws XmpParsingException
+    {
         List<XMPSchema> schems = meta.getAllSchemas();
         TypeMapping tm = meta.getTypeMapping();
         StructuredType stPdfaExt = 
PDFAExtensionSchema.class.getAnnotation(StructuredType.class);
@@ -117,14 +129,14 @@ public final class PdfaExtensionHelper
                 {
                     if (af instanceof PDFASchemaType)
                     {
-                        populatePDFASchemaType(meta, (PDFASchemaType) af, tm);
+                        populatePDFASchemaType(meta, (PDFASchemaType) af, tm, 
strictParsing);
                     } // TODO unmanaged ?
                 }
             }
         }
     }
 
-    private static void populatePDFASchemaType(XMPMetadata meta, 
PDFASchemaType st, TypeMapping tm)
+    private static void populatePDFASchemaType(XMPMetadata meta, 
PDFASchemaType st, TypeMapping tm, boolean strictParsing)
             throws XmpParsingException
     {
         String namespaceUri = st.getNamespaceURI();
@@ -158,6 +170,10 @@ public final class PdfaExtensionHelper
             }
         }
         // populate properties
+        if (properties == null && !strictParsing)
+        {
+            return;
+        }
         if (properties == null)
         {
             throw new XmpParsingException(ErrorType.RequiredProperty,

Modified: 
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
==============================================================================
--- 
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
        Sat Dec 20 08:30:16 2025        (r1930750)
+++ 
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
        Sat Dec 20 10:41:24 2025        (r1930751)
@@ -48,6 +48,7 @@ import org.apache.xmpbox.type.ResourceEv
 import org.apache.xmpbox.type.ResourceRefType;
 import org.apache.xmpbox.type.TextType;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
@@ -1234,4 +1235,96 @@ public class DomXmpParserTest
         assertEquals("created", firstHistoryEntry.getAction());
         assertEquals("original PDF file", firstHistoryEntry.getParameters());
     }
-}
+
+    @Test
+    public void testLenientPdfaExtension() throws XmpParsingException, 
UnsupportedEncodingException
+    {
+        // First bag in pdfaExtension is incomplete.
+        final String s = 
+            "<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n" +
+            "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"\n" +
+            "           x:xmptk=\"Adobe XMP Core 4.2.1-c043 52.372728, 
2009/01/18-15:08:04\">\n" +
+            "  <rdf:RDF 
xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\";>\n" +
+            "          <rdf:Description rdf:about=\"\"\n" +
+            "                           
xmlns:xmpMM=\"http://ns.adobe.com/xap/1.0/mm/\";>\n" +
+            "                  
<xmpMM:DocumentID>uuid:0b306144-6a43-dcbd-6b3e-c6b6b1df873d</xmpMM:DocumentID>\n"
 +
+            "                  
<xmpMM:InstanceID>uuid:0b306144-6a43-dcbd-6b3e-c6b6b1df873d</xmpMM:InstanceID>\n"
 +
+            "          </rdf:Description>\n" +
+            "          <rdf:Description rdf:about=\"\"\n" +
+            "                           
xmlns:pdfaExtension=\"http://www.aiim.org/pdfa/ns/extension/\"\n"; +
+            "                           
xmlns:pdfaSchema=\"http://www.aiim.org/pdfa/ns/schema#\"\n"; +
+            "                           
xmlns:pdfaProperty=\"http://www.aiim.org/pdfa/ns/property#\";>\n" +
+            "                  <pdfaExtension:schemas>\n" +
+            "                          <rdf:Bag>\n" +
+            "                                  <rdf:li 
rdf:parseType=\"Resource\">\n" +
+            "                                          
<pdfaSchema:namespaceURI>http://ns.adobe.com/pdf/1.3/</pdfaSchema:namespaceURI>\n"
 +
+            "                                          
<pdfaSchema:prefix>pdf</pdfaSchema:prefix>\n" +
+            "                                          
<pdfaSchema:schema>Adobe PDF Schema</pdfaSchema:schema>\n" +
+            "                                  </rdf:li>\n" +
+            "                                  <rdf:li 
rdf:parseType=\"Resource\">\n" +
+            "                                          
<pdfaSchema:namespaceURI>http://ns.adobe.com/xap/1.0/mm/</pdfaSchema:namespaceURI>\n"
 +
+            "                                          
<pdfaSchema:prefix>xmpMM</pdfaSchema:prefix>\n" +
+            "                                          <pdfaSchema:schema>XMP 
Media Management Schema</pdfaSchema:schema>\n" +
+            "                                          
<pdfaSchema:property>\n" +
+            "                                                  <rdf:Seq>\n" +
+            "                                                          <rdf:li 
rdf:parseType=\"Resource\">\n" +
+            "                                                                  
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+            "                                                                  
<pdfaProperty:description>UUID based identifier for specific incarnation of a 
document</pdfaProperty:description>\n" +
+            "                                                                  
<pdfaProperty:name>InstanceID</pdfaProperty:name>\n" +
+            "                                                                  
<pdfaProperty:valueType>URI</pdfaProperty:valueType>\n" +
+            "                                                          
</rdf:li>\n" +
+            "                                                  </rdf:Seq>\n" +
+            "                                          
</pdfaSchema:property>\n" +
+            "                                  </rdf:li>\n" +
+            "                                  <rdf:li 
rdf:parseType=\"Resource\">\n" +
+            "                                          
<pdfaSchema:namespaceURI>http://www.aiim.org/pdfa/ns/id/</pdfaSchema:namespaceURI>\n"
 +
+            "                                          
<pdfaSchema:prefix>pdfaid</pdfaSchema:prefix>\n" +
+            "                                          
<pdfaSchema:schema>PDF/A ID Schema</pdfaSchema:schema>\n" +
+            "                                          
<pdfaSchema:property>\n" +
+            "                                                  <rdf:Seq>\n" +
+            "                                                          <rdf:li 
rdf:parseType=\"Resource\">\n" +
+            "                                                                  
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+            "                                                                  
<pdfaProperty:description>Part of PDF/A standard</pdfaProperty:description>\n" +
+            "                                                                  
<pdfaProperty:name>part</pdfaProperty:name>\n" +
+            "                                                                  
<pdfaProperty:valueType>Integer</pdfaProperty:valueType>\n" +
+            "                                                          
</rdf:li>\n" +
+            "                                                          <rdf:li 
rdf:parseType=\"Resource\">\n" +
+            "                                                                  
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+            "                                                                  
<pdfaProperty:description>Amendment of PDF/A 
standard</pdfaProperty:description>\n" +
+            "                                                                  
<pdfaProperty:name>amd</pdfaProperty:name>\n" +
+            "                                                                  
<pdfaProperty:valueType>Text</pdfaProperty:valueType>\n" +
+            "                                                          
</rdf:li>\n" +
+            "                                                          <rdf:li 
rdf:parseType=\"Resource\">\n" +
+            "                                                                  
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+            "                                                                  
<pdfaProperty:description>Conformance level of PDF/A 
standard</pdfaProperty:description>\n" +
+            "                                                                  
<pdfaProperty:name>conformance</pdfaProperty:name>\n" +
+            "                                                                  
<pdfaProperty:valueType>Text</pdfaProperty:valueType>\n" +
+            "                                                          
</rdf:li>\n" +
+            "                                                  </rdf:Seq>\n" +
+            "                                          
</pdfaSchema:property>\n" +
+            "                                  </rdf:li>\n" +
+            "                          </rdf:Bag>\n" +
+            "                  </pdfaExtension:schemas>\n" +
+            "          </rdf:Description>\n" +
+            "  </rdf:RDF>\n" +
+            "</x:xmpmeta>\n" +
+            "<?xpacket end=\"w\"?>";
+        try
+        {
+            new DomXmpParser().parse(s.getBytes("utf-8"));
+            fail("XmpParsingException expected");
+        }
+        catch (XmpParsingException ex)
+        {
+            assertEquals("Missing pdfaSchema:property in type definition", 
ex.getMessage());
+        }
+        DomXmpParser xmpParser2 = new DomXmpParser();
+        assertTrue(xmpParser2.isStrictParsing());
+        xmpParser2.setStrictParsing(false);
+        assertFalse(xmpParser2.isStrictParsing());
+        XMPMetadata xmp2 = xmpParser2.parse(s.getBytes("utf-8"));
+        XMPMediaManagementSchema xmpMediaManagementSchema = 
xmp2.getXMPMediaManagementSchema();
+        assertEquals("uuid:0b306144-6a43-dcbd-6b3e-c6b6b1df873d", 
xmpMediaManagementSchema.getInstanceID());
+        assertEquals("uuid:0b306144-6a43-dcbd-6b3e-c6b6b1df873d", 
xmpMediaManagementSchema.getDocumentID());
+    }
+}
\ No newline at end of file

Reply via email to