Author: tilman
Date: Tue Aug 27 03:36:35 2024
New Revision: 1920212

URL: http://svn.apache.org/viewvc?rev=1920212&view=rev
Log:
PDFBOX-5873: add more resources; don't save fonts twice

Modified:
    
pdfbox/branches/2.0/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractTTFFonts.java

Modified: 
pdfbox/branches/2.0/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractTTFFonts.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/2.0/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractTTFFonts.java?rev=1920212&r1=1920211&r2=1920212&view=diff
==============================================================================
--- 
pdfbox/branches/2.0/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractTTFFonts.java
 (original)
+++ 
pdfbox/branches/2.0/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractTTFFonts.java
 Tue Aug 27 03:36:35 2024
@@ -19,11 +19,15 @@ package org.apache.pdfbox.examples.pdmod
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+import org.apache.pdfbox.cos.COSDictionary;
 
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageTree;
 import org.apache.pdfbox.pdmodel.PDResources;
 import org.apache.pdfbox.pdmodel.common.PDStream;
 import org.apache.pdfbox.pdmodel.font.PDCIDFont;
@@ -34,6 +38,13 @@ import org.apache.pdfbox.pdmodel.font.PD
 import org.apache.pdfbox.pdmodel.font.PDType0Font;
 import org.apache.pdfbox.pdmodel.graphics.PDXObject;
 import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
+import org.apache.pdfbox.pdmodel.graphics.form.PDTransparencyGroup;
+import org.apache.pdfbox.pdmodel.graphics.pattern.PDAbstractPattern;
+import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern;
+import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
+import org.apache.pdfbox.pdmodel.graphics.state.PDSoftMask;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream;
 
 /**
  * This will extract all true type-fonts of a pdf.
@@ -44,6 +55,8 @@ public final class ExtractTTFFonts
     private int fontCounter = 1;
 
     @SuppressWarnings({"squid:S2068"})
+    private final Set<COSDictionary> fontSet = new HashSet<>();
+    private int currentPage;
     private static final String PASSWORD = "-password";
     private static final String PREFIX = "-prefix";
     private static final String ADDKEY = "-addkey";
@@ -124,11 +137,21 @@ public final class ExtractTTFFonts
                 try
                 {
                     document = PDDocument.load(new File(pdfFile), password);
-                    for (PDPage page : document.getPages())
+                    PDPageTree pageTree = document.getPages();
+                    for (PDPage page : pageTree)
                     {
-                        PDResources resources = page.getResources();
+                        currentPage = pageTree.indexOf(page) + 1;
                         // extract all fonts which are part of the page 
resources
-                        processResources(resources, prefix, addKey);
+                        processResources(page.getResources(), prefix, addKey);
+                        
+                        for (PDAnnotation ann : page.getAnnotations())
+                        {
+                            PDAppearanceStream nas = 
ann.getNormalAppearanceStream();
+                            if (nas != null)
+                            {
+                                processResources(nas.getResources(), prefix, 
addKey);
+                            }
+                        }
                     }
                 }
                 finally
@@ -152,6 +175,12 @@ public final class ExtractTTFFonts
         for (COSName key : resources.getFontNames())
         {
             PDFont font = resources.getFont(key);
+            System.out.println(font.getName() + " on page " + currentPage);
+            if (fontSet.contains(font.getCOSObject()))
+            {
+                continue;
+            }
+            fontSet.add(font.getCOSObject());
             // write the font
             if (font instanceof PDTrueTypeFont)
             {
@@ -191,11 +220,33 @@ public final class ExtractTTFFonts
             if (xobject instanceof PDFormXObject)
             {
                 PDFormXObject xObjectForm = (PDFormXObject) xobject;
-                PDResources formResources = xObjectForm.getResources();
-                processResources(formResources, prefix, addKey);
+                processResources(xObjectForm.getResources(), prefix, addKey);
+            }
+        }
+
+        for (COSName name : resources.getPatternNames())
+        {
+            PDAbstractPattern pattern = resources.getPattern(name);
+            if (pattern instanceof PDTilingPattern)
+            {
+                PDTilingPattern tilingPattern = (PDTilingPattern) pattern;
+                processResources(tilingPattern.getResources(), prefix, addKey);
             }
         }
 
+        for (COSName name : resources.getExtGStateNames())
+        {
+            PDExtendedGraphicsState extGState = resources.getExtGState(name);
+            PDSoftMask softMask = extGState.getSoftMask();
+            if (softMask != null)
+            {
+                PDTransparencyGroup group = softMask.getGroup();
+                if (group != null)
+                {
+                    processResources(group.getResources(), prefix, addKey);
+                }
+            }
+        }
     }
 
     private void writeFont(PDFontDescriptor fd, String name) throws IOException


Reply via email to