Author: tilman
Date: Wed Dec 17 14:27:34 2025
New Revision: 1930669

Log:
PDFBOX-6127: look for non standard namespaces in rdf:RDF

Modified:
   pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
   pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java

Modified: 
pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
==============================================================================
--- pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java   
Wed Dec 17 12:30:00 2025        (r1930668)
+++ pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java   
Wed Dec 17 14:27:34 2025        (r1930669)
@@ -192,6 +192,24 @@ public class DomXmpParser
         // Now, parse the content of root
         Element rdfRdf = findDescriptionsParent(root);
         nsFinder.push(rdfRdf); // PDFBOX-6099: push namespaces in rdf:RDF
+
+        // PDFBOX-6127: look for non standard namespaces (similar to 
PDFBOX-2378)
+        if (!strictParsing)
+        {
+            NamedNodeMap nnm = rdfRdf.getAttributes();
+            if (nnm != null)
+            {
+                for (int i = 0; i < nnm.getLength(); i++)
+                {
+                    Attr attr = (Attr) nnm.item(i);
+                    if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.getPrefix()))
+                    {
+                        maybeAddNonStandardNamespace(xmp, attr);
+                    }
+                }
+            }
+        }
+
         List<Element> descriptions = DomHelper.getElementChildren(rdfRdf);
         for (Element description : descriptions)
         {
@@ -212,6 +230,23 @@ public class DomXmpParser
         return xmp;
     }
 
+    private void maybeAddNonStandardNamespace(XMPMetadata xmp, Attr attr)
+    {
+        // xmlns:prefix="namespace"
+        TypeMapping tm = xmp.getTypeMapping();
+        String namespace = attr.getValue();
+        if (!XmpConstants.RDF_NAMESPACE.equals(namespace) &&
+            !tm.isStructuredTypeNamespace(namespace) &&
+            xmp.getSchema(namespace) == null && tm.getSchemaFactory(namespace) 
== null)
+        {
+            // PDFBOX-5128 / PDFBOX-6127: Add the schema on the fly if it 
can't be found
+            // PDFBOX-5649: But only if the namespace isn't already known
+            // because this adds a namespace without property descriptions
+            // PDFBOX-6127: never rdf
+            tm.addNewNameSpace(namespace, attr.getLocalName());
+        }
+    }
+
     private boolean isSchemaExtensionProperty(final Element element)
     {
         return element != null && "pdfaExtension".equals(element.getPrefix());
@@ -279,14 +314,9 @@ public class DomXmpParser
                 }
                 else if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.getPrefix()))
                 {
-                    String namespace = attr.getValue();
-                    if (!strictParsing && 
!tm.isStructuredTypeNamespace(namespace) &&
-                        xmp.getSchema(namespace) == null && 
tm.getSchemaFactory(namespace) == null)
+                    if (!strictParsing)
                     {
-                        // PDFBOX-5128: Add the schema on the fly if it can't 
be found
-                        // PDFBOX-5649: But only if the namespace isn't 
already known
-                        // because this adds a namespace without property 
descriptions
-                        tm.addNewNameSpace(namespace, attr.getLocalName());
+                        maybeAddNonStandardNamespace(xmp, attr);
                     }
                 }
                 else

Modified: 
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
==============================================================================
--- 
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java   
    Wed Dec 17 12:30:00 2025        (r1930668)
+++ 
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java   
    Wed Dec 17 14:27:34 2025        (r1930669)
@@ -914,4 +914,37 @@ class DomXmpParserTest
         XMPSchema uaSchema2  = 
xmp2.getSchema("http://www.aiim.org/pdfua/ns/id/";);
         assertEquals(1, uaSchema2.getIntegerPropertyValueAsSimple("part"));
     }
+
+    @Test
+    void testNonStandardURIinRDF() throws XmpParsingException, 
TransformerException
+    {
+        String s = "<?xml version=\"1.0\" encoding=\"UTF-8\" 
standalone=\"no\"?>\n" +
+"<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?><x:xmpmeta 
xmlns:x=\"adobe:ns:meta/\" x:xmptk=\"Adobe XMP Core 4.2.1-c041 52.342996, 
2008/05/07-20:48:00        \">\n" +
+"    <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\";>\n" +
+"        <rdf:Description xmlns:pdfx=\"http://ns.adobe.com/pdfx/1.3/\"; 
rdf:about=\"\">\n" +
+"            <pdfx:XPressPrivate>private</pdfx:XPressPrivate>\n" +
+"        </rdf:Description>\n" +
+"    </rdf:RDF>\n" +
+"</x:xmpmeta><?xpacket end=\"w\"?>";
+        final DomXmpParser xmpParser1 = new DomXmpParser();
+        XmpParsingException ex = assertThrows(XmpParsingException.class,
+                () -> xmpParser1.parse(s.getBytes(StandardCharsets.UTF_8)));
+        assertEquals("Cannot find a definition for the namespace 
http://ns.adobe.com/pdfx/1.3/, property: pdfx:XPressPrivate", ex.getMessage());
+        DomXmpParser xmpParser2 = new DomXmpParser();
+        xmpParser2.setStrictParsing(false);
+        XMPMetadata xmp2 = 
xmpParser2.parse(s.getBytes(StandardCharsets.UTF_8));
+        XMPSchema schema2 = xmp2.getSchema("http://ns.adobe.com/pdfx/1.3/";);
+        assertEquals("[XPressPrivate=TextType:private]", 
schema2.getProperty("XPressPrivate").toString());
+        XmpSerializer serializer = new XmpSerializer();
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        serializer.serialize(xmp2, baos, true);
+        final DomXmpParser xmpParser3 = new DomXmpParser();
+        ex = assertThrows(XmpParsingException.class, () -> 
xmpParser3.parse(baos.toByteArray()));
+        assertEquals("Cannot find a definition for the namespace 
http://ns.adobe.com/pdfx/1.3/, property: pdfx:XPressPrivate", ex.getMessage());
+        DomXmpParser xmpParser4 = new DomXmpParser();
+        xmpParser4.setStrictParsing(false);
+        XMPMetadata xmp4 = xmpParser4.parse(baos.toByteArray());
+        XMPSchema schema4 = xmp4.getSchema("http://ns.adobe.com/pdfx/1.3/";);
+        assertEquals("[XPressPrivate=TextType:private]", 
schema4.getProperty("XPressPrivate").toString());
+    }
 }

Reply via email to