Author: tilman
Date: Tue Aug 27 03:36:44 2024
New Revision: 1920214
URL: http://svn.apache.org/viewvc?rev=1920214&view=rev
Log:
PDFBOX-5873: add more resources; don't save fonts twice
Modified:
pdfbox/branches/3.0/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractTTFFonts.java
Modified:
pdfbox/branches/3.0/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractTTFFonts.java
URL:
http://svn.apache.org/viewvc/pdfbox/branches/3.0/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractTTFFonts.java?rev=1920214&r1=1920213&r2=1920214&view=diff
==============================================================================
---
pdfbox/branches/3.0/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractTTFFonts.java
(original)
+++
pdfbox/branches/3.0/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractTTFFonts.java
Tue Aug 27 03:36:44 2024
@@ -21,12 +21,16 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.util.HashSet;
+import java.util.Set;
import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.font.PDCIDFont;
@@ -37,6 +41,13 @@ import org.apache.pdfbox.pdmodel.font.PD
import org.apache.pdfbox.pdmodel.font.PDType0Font;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
+import org.apache.pdfbox.pdmodel.graphics.form.PDTransparencyGroup;
+import org.apache.pdfbox.pdmodel.graphics.pattern.PDAbstractPattern;
+import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern;
+import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
+import org.apache.pdfbox.pdmodel.graphics.state.PDSoftMask;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream;
/**
* This will extract all true type-fonts of a pdf.
@@ -45,6 +56,8 @@ import org.apache.pdfbox.pdmodel.graphic
public final class ExtractTTFFonts
{
private int fontCounter = 1;
+ private final Set<COSDictionary> fontSet = new HashSet<>();
+ private int currentPage;
@SuppressWarnings({"squid:S2068"})
private static final String PASSWORD = "-password";
@@ -124,11 +137,21 @@ public final class ExtractTTFFonts
}
try (PDDocument document = Loader.loadPDF(new File(pdfFile),
password))
{
- for (PDPage page : document.getPages())
+ PDPageTree pageTree = document.getPages();
+ for (PDPage page : pageTree)
{
- PDResources resources = page.getResources();
+ currentPage = pageTree.indexOf(page) + 1;
// extract all fonts which are part of the page
resources
- processResources(resources, prefix, addKey);
+ processResources(page.getResources(), prefix, addKey);
+
+ for (PDAnnotation ann : page.getAnnotations())
+ {
+ PDAppearanceStream nas =
ann.getNormalAppearanceStream();
+ if (nas != null)
+ {
+ processResources(nas.getResources(), prefix,
addKey);
+ }
+ }
}
}
}
@@ -145,6 +168,12 @@ public final class ExtractTTFFonts
for (COSName key : resources.getFontNames())
{
PDFont font = resources.getFont(key);
+ System.out.println(font.getName() + " on page " + currentPage);
+ if (fontSet.contains(font.getCOSObject()))
+ {
+ continue;
+ }
+ fontSet.add(font.getCOSObject());
// write the font
if (font instanceof PDTrueTypeFont)
{
@@ -184,11 +213,33 @@ public final class ExtractTTFFonts
if (xobject instanceof PDFormXObject)
{
PDFormXObject xObjectForm = (PDFormXObject) xobject;
- PDResources formResources = xObjectForm.getResources();
- processResources(formResources, prefix, addKey);
+ processResources(xObjectForm.getResources(), prefix, addKey);
}
}
+ for (COSName name : resources.getPatternNames())
+ {
+ PDAbstractPattern pattern = resources.getPattern(name);
+ if (pattern instanceof PDTilingPattern)
+ {
+ PDTilingPattern tilingPattern = (PDTilingPattern) pattern;
+ processResources(tilingPattern.getResources(), prefix, addKey);
+ }
+ }
+
+ for (COSName name : resources.getExtGStateNames())
+ {
+ PDExtendedGraphicsState extGState = resources.getExtGState(name);
+ PDSoftMask softMask = extGState.getSoftMask();
+ if (softMask != null)
+ {
+ PDTransparencyGroup group = softMask.getGroup();
+ if (group != null)
+ {
+ processResources(group.getResources(), prefix, addKey);
+ }
+ }
+ }
}
private void writeFont(PDFontDescriptor fd, String name) throws IOException