Author: tilman
Date: Sat Apr  5 09:55:07 2025
New Revision: 1924801

URL: http://svn.apache.org/viewvc?rev=1924801&view=rev
Log:
PDFBOX-5982: support DP and MP operators

Added:
    
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/operator/markedcontent/MarkedContentPoint.java
   (with props)
    
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/operator/markedcontent/MarkedContentPointWithProperties.java
   (with props)
Modified:
    
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/PDFStreamEngine.java
    
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDAbstractContentStream.java
    
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFMarkedContentExtractor.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/PDFStreamEngine.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/PDFStreamEngine.java?rev=1924801&r1=1924800&r2=1924801&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/PDFStreamEngine.java
 (original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/PDFStreamEngine.java
 Sat Apr  5 09:55:07 2025
@@ -1182,4 +1182,15 @@ public abstract class PDFStreamEngine
     {
         return shouldProcessColorOperators;
     }
+
+    /**
+     * Handles MP and DP operators.
+     *
+     * @param tag indicates the role or significance of the sequence
+     * @param properties optional properties
+     */
+    public void markedContentPoint(COSName tag, COSDictionary properties)
+    {
+        // overridden in subclasses
+    }
 }

Added: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/operator/markedcontent/MarkedContentPoint.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/operator/markedcontent/MarkedContentPoint.java?rev=1924801&view=auto
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/operator/markedcontent/MarkedContentPoint.java
 (added)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/operator/markedcontent/MarkedContentPoint.java
 Sat Apr  5 09:55:07 2025
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.contentstream.operator.markedcontent;
+
+import java.io.IOException;
+import java.util.List;
+import org.apache.pdfbox.contentstream.PDFStreamEngine;
+import org.apache.pdfbox.contentstream.operator.MissingOperandException;
+import org.apache.pdfbox.contentstream.operator.Operator;
+import org.apache.pdfbox.contentstream.operator.OperatorName;
+import org.apache.pdfbox.contentstream.operator.OperatorProcessor;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSName;
+
+/**
+ *
+ * @author Tilman Hausherr
+ */
+public class MarkedContentPoint extends OperatorProcessor
+{
+    public MarkedContentPoint(PDFStreamEngine context)
+    {
+        super(context);
+    }
+
+    @Override
+    public void process(Operator operator, List<COSBase> operands) throws 
IOException
+    {
+        if (operands.isEmpty())
+        {
+            throw new MissingOperandException(operator, operands);
+        }
+        COSBase base0 = operands.get(0);
+        if (!(base0 instanceof COSName))
+        {
+            return;
+        }
+        getContext().markedContentPoint((COSName) base0, null);
+    }
+
+    @Override
+    public String getName()
+    {
+        return OperatorName.MARKED_CONTENT_POINT;
+    }
+    
+}

Propchange: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/operator/markedcontent/MarkedContentPoint.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/operator/markedcontent/MarkedContentPointWithProperties.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/operator/markedcontent/MarkedContentPointWithProperties.java?rev=1924801&view=auto
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/operator/markedcontent/MarkedContentPointWithProperties.java
 (added)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/operator/markedcontent/MarkedContentPointWithProperties.java
 Sat Apr  5 09:55:07 2025
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.contentstream.operator.markedcontent;
+
+import java.io.IOException;
+import java.util.List;
+import org.apache.pdfbox.contentstream.PDFStreamEngine;
+import org.apache.pdfbox.contentstream.operator.MissingOperandException;
+import org.apache.pdfbox.contentstream.operator.Operator;
+import org.apache.pdfbox.contentstream.operator.OperatorName;
+import org.apache.pdfbox.contentstream.operator.OperatorProcessor;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
+import 
org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList;
+
+/**
+ *
+ * @author Tilman Hausherr
+ */
+public class MarkedContentPointWithProperties extends OperatorProcessor
+{
+    public MarkedContentPointWithProperties(PDFStreamEngine context)
+    {
+        super(context);
+    }
+
+    @Override
+    public void process(Operator operator, List<COSBase> operands) throws 
IOException
+    {
+        if (operands.size() < 2)
+        {
+            throw new MissingOperandException(operator, operands);
+        }
+        if (!(operands.get(0) instanceof COSName))
+        {
+            return;
+        }
+        PDFStreamEngine context = getContext();
+        COSName tag = (COSName) operands.get(0);
+        COSBase op1 = operands.get(1);
+        COSDictionary propDict = null;
+        if (op1 instanceof COSName)
+        {
+            PDPropertyList prop = 
context.getResources().getProperties((COSName) op1);
+            if (prop != null)
+            {
+                propDict = prop.getCOSObject();
+            }
+        }
+        else if (op1 instanceof COSDictionary)
+        {
+            propDict = (COSDictionary) op1;
+        }
+        if (propDict == null)
+        {
+            // wrong type or property not found
+            return;
+        }
+        context.markedContentPoint(tag, propDict);
+    }
+
+    @Override
+    public String getName()
+    {
+        return OperatorName.MARKED_CONTENT_POINT_WITH_PROPS;
+    }
+    
+}

Propchange: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/operator/markedcontent/MarkedContentPointWithProperties.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDAbstractContentStream.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDAbstractContentStream.java?rev=1924801&r1=1924800&r2=1924801&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDAbstractContentStream.java
 (original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDAbstractContentStream.java
 Sat Apr  5 09:55:07 2025
@@ -1373,6 +1373,32 @@ abstract class PDAbstractContentStream i
     }
 
     /**
+     * set a marked content point.
+     *
+     * @param tag the tag to be added to the content stream
+     * @throws IOException If the content stream could not be written
+     */
+    public void setMarkedContentPoint(COSName tag) throws IOException
+    {
+        writeOperand(tag);
+        writeOperator(OperatorName.MARKED_CONTENT_POINT);
+    }
+
+    /**
+     * Set a marked content point with a reference to an entry in the page 
resources' Properties dictionary.
+     *
+     * @param tag the tag to be added to the content stream
+     * @param propertyList property list to be added to the content stream
+     * @throws IOException If the content stream could not be written
+     */
+    public void setMarkedContentPointWithProperties(COSName tag, 
PDPropertyList propertyList) throws IOException
+    {
+        writeOperand(tag);
+        writeOperand(resources.add(propertyList));
+        writeOperator(OperatorName.MARKED_CONTENT_POINT_WITH_PROPS);
+    }
+
+    /**
      * Set an extended graphics state.
      * 
      * @param state The extended graphics state to be added to the content 
stream

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFMarkedContentExtractor.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFMarkedContentExtractor.java?rev=1924801&r1=1924800&r2=1924801&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFMarkedContentExtractor.java
 (original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFMarkedContentExtractor.java
 Sat Apr  5 09:55:07 2025
@@ -31,6 +31,8 @@ import org.apache.pdfbox.contentstream.o
 import 
org.apache.pdfbox.contentstream.operator.markedcontent.BeginMarkedContentSequenceWithProperties;
 import org.apache.pdfbox.contentstream.operator.markedcontent.DrawObject;
 import 
org.apache.pdfbox.contentstream.operator.markedcontent.EndMarkedContentSequence;
+import 
org.apache.pdfbox.contentstream.operator.markedcontent.MarkedContentPoint;
+import 
org.apache.pdfbox.contentstream.operator.markedcontent.MarkedContentPointWithProperties;
 
 /**
  * This is an stream engine to extract the marked content of a pdf.
@@ -45,7 +47,7 @@ public class PDFMarkedContentExtractor e
     private final Map<String, List<TextPosition>> characterListMapping = new 
HashMap<>();
 
     /**
-     * Instantiate a new PDFTextStripper object.
+     * Instantiate a new PDFMarkedContentExtractor object.
      */
     public PDFMarkedContentExtractor()
     {
@@ -63,8 +65,8 @@ public class PDFMarkedContentExtractor e
         addOperator(new BeginMarkedContentSequence(this));
         addOperator(new EndMarkedContentSequence(this));
         addOperator(new DrawObject(this));
-        // todo: DP - Marked Content Point
-        // todo: MP - Marked Content Point with Properties
+        addOperator(new MarkedContentPoint(this));
+        addOperator(new MarkedContentPointWithProperties(this));
     }
 
     /**
@@ -129,6 +131,13 @@ public class PDFMarkedContentExtractor e
         }
     }
 
+    @Override
+    public void markedContentPoint(COSName tag, COSDictionary properties)
+    {
+        // Nothing happens here yet. If you know anything useful that should 
happen, please tell us.
+        super.markedContentPoint(tag, properties);
+    }
+
     public void xobject(PDXObject xobject)
     {
         if (!this.currentMarkedContents.isEmpty())


Reply via email to