Author: tilman
Date: Wed Dec 17 14:27:34 2025
New Revision: 1930669
Log:
PDFBOX-6127: look for non standard namespaces in rdf:RDF
Modified:
pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Modified:
pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
==============================================================================
--- pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Wed Dec 17 12:30:00 2025 (r1930668)
+++ pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Wed Dec 17 14:27:34 2025 (r1930669)
@@ -192,6 +192,24 @@ public class DomXmpParser
// Now, parse the content of root
Element rdfRdf = findDescriptionsParent(root);
nsFinder.push(rdfRdf); // PDFBOX-6099: push namespaces in rdf:RDF
+
+ // PDFBOX-6127: look for non standard namespaces (similar to
PDFBOX-2378)
+ if (!strictParsing)
+ {
+ NamedNodeMap nnm = rdfRdf.getAttributes();
+ if (nnm != null)
+ {
+ for (int i = 0; i < nnm.getLength(); i++)
+ {
+ Attr attr = (Attr) nnm.item(i);
+ if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.getPrefix()))
+ {
+ maybeAddNonStandardNamespace(xmp, attr);
+ }
+ }
+ }
+ }
+
List<Element> descriptions = DomHelper.getElementChildren(rdfRdf);
for (Element description : descriptions)
{
@@ -212,6 +230,23 @@ public class DomXmpParser
return xmp;
}
+ private void maybeAddNonStandardNamespace(XMPMetadata xmp, Attr attr)
+ {
+ // xmlns:prefix="namespace"
+ TypeMapping tm = xmp.getTypeMapping();
+ String namespace = attr.getValue();
+ if (!XmpConstants.RDF_NAMESPACE.equals(namespace) &&
+ !tm.isStructuredTypeNamespace(namespace) &&
+ xmp.getSchema(namespace) == null && tm.getSchemaFactory(namespace)
== null)
+ {
+ // PDFBOX-5128 / PDFBOX-6127: Add the schema on the fly if it
can't be found
+ // PDFBOX-5649: But only if the namespace isn't already known
+ // because this adds a namespace without property descriptions
+ // PDFBOX-6127: never rdf
+ tm.addNewNameSpace(namespace, attr.getLocalName());
+ }
+ }
+
private boolean isSchemaExtensionProperty(final Element element)
{
return element != null && "pdfaExtension".equals(element.getPrefix());
@@ -279,14 +314,9 @@ public class DomXmpParser
}
else if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.getPrefix()))
{
- String namespace = attr.getValue();
- if (!strictParsing &&
!tm.isStructuredTypeNamespace(namespace) &&
- xmp.getSchema(namespace) == null &&
tm.getSchemaFactory(namespace) == null)
+ if (!strictParsing)
{
- // PDFBOX-5128: Add the schema on the fly if it can't
be found
- // PDFBOX-5649: But only if the namespace isn't
already known
- // because this adds a namespace without property
descriptions
- tm.addNewNameSpace(namespace, attr.getLocalName());
+ maybeAddNonStandardNamespace(xmp, attr);
}
}
else
Modified:
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
==============================================================================
---
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Wed Dec 17 12:30:00 2025 (r1930668)
+++
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Wed Dec 17 14:27:34 2025 (r1930669)
@@ -914,4 +914,37 @@ class DomXmpParserTest
XMPSchema uaSchema2 =
xmp2.getSchema("http://www.aiim.org/pdfua/ns/id/");
assertEquals(1, uaSchema2.getIntegerPropertyValueAsSimple("part"));
}
+
+ @Test
+ void testNonStandardURIinRDF() throws XmpParsingException,
TransformerException
+ {
+ String s = "<?xml version=\"1.0\" encoding=\"UTF-8\"
standalone=\"no\"?>\n" +
+"<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?><x:xmpmeta
xmlns:x=\"adobe:ns:meta/\" x:xmptk=\"Adobe XMP Core 4.2.1-c041 52.342996,
2008/05/07-20:48:00 \">\n" +
+" <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" +
+" <rdf:Description xmlns:pdfx=\"http://ns.adobe.com/pdfx/1.3/\"
rdf:about=\"\">\n" +
+" <pdfx:XPressPrivate>private</pdfx:XPressPrivate>\n" +
+" </rdf:Description>\n" +
+" </rdf:RDF>\n" +
+"</x:xmpmeta><?xpacket end=\"w\"?>";
+ final DomXmpParser xmpParser1 = new DomXmpParser();
+ XmpParsingException ex = assertThrows(XmpParsingException.class,
+ () -> xmpParser1.parse(s.getBytes(StandardCharsets.UTF_8)));
+ assertEquals("Cannot find a definition for the namespace
http://ns.adobe.com/pdfx/1.3/, property: pdfx:XPressPrivate", ex.getMessage());
+ DomXmpParser xmpParser2 = new DomXmpParser();
+ xmpParser2.setStrictParsing(false);
+ XMPMetadata xmp2 =
xmpParser2.parse(s.getBytes(StandardCharsets.UTF_8));
+ XMPSchema schema2 = xmp2.getSchema("http://ns.adobe.com/pdfx/1.3/");
+ assertEquals("[XPressPrivate=TextType:private]",
schema2.getProperty("XPressPrivate").toString());
+ XmpSerializer serializer = new XmpSerializer();
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ serializer.serialize(xmp2, baos, true);
+ final DomXmpParser xmpParser3 = new DomXmpParser();
+ ex = assertThrows(XmpParsingException.class, () ->
xmpParser3.parse(baos.toByteArray()));
+ assertEquals("Cannot find a definition for the namespace
http://ns.adobe.com/pdfx/1.3/, property: pdfx:XPressPrivate", ex.getMessage());
+ DomXmpParser xmpParser4 = new DomXmpParser();
+ xmpParser4.setStrictParsing(false);
+ XMPMetadata xmp4 = xmpParser4.parse(baos.toByteArray());
+ XMPSchema schema4 = xmp4.getSchema("http://ns.adobe.com/pdfx/1.3/");
+ assertEquals("[XPressPrivate=TextType:private]",
schema4.getProperty("XPressPrivate").toString());
+ }
}