Author: tilman
Date: Mon Jul  1 10:36:07 2024
New Revision: 1918774

URL: http://svn.apache.org/viewvc?rev=1918774&view=rev
Log:
PDFBOX-5847: Improve performance of FileSystemFontProvider.scanFonts() by 
introducing an "only headers" mode for the font parsers where each table reads 
as little information as possible, by Mykola Bohdiuk

Added:
    
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/FontHeaders.java
   (with props)
    
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadUnbufferedDataStream.java
   (with props)
Modified:
    
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java
    
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java
    
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java
    
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java
    
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadDataStream.java
    
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTCDataStream.java
    
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java
    
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java
    
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTFTable.java
    
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeCollection.java
    
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java
    
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java

Modified: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java?rev=1918774&r1=1918773&r2=1918774&view=diff
==============================================================================
--- 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java 
(original)
+++ 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java 
Mon Jul  1 10:36:07 2024
@@ -28,6 +28,7 @@ import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.fontbox.ttf.FontHeaders;
 import org.apache.pdfbox.io.RandomAccessRead;
 
 
@@ -106,13 +107,61 @@ public class CFFParser
     }
 
     /**
-     * Parse CFF font using a DataInput as input.
+     * Extract "Registry", "Ordering" and "Supplement" properties from the 
first CFF subfont.
      * 
-     * @param input the source to be parsed
-     * @return the parsed CFF fonts
+     * @param randomAccessRead the source to be parsed
+     * @param outHeaders where to put results
+     *
      * @throws IOException If there is an error reading from the stream
      */
-    private List<CFFFont> parse(DataInput input) throws IOException
+    public void parseFirstSubFontROS(RandomAccessRead randomAccessRead, 
FontHeaders outHeaders) throws IOException
+    {
+        // this method is a simplified and merged version of 
parse(RandomAccessRead) > parse(DataInput) > parseFont(...)
+
+        // start code from parse(RandomAccessRead)
+        randomAccessRead.seek(0);
+        DataInput input = new DataInputRandomAccessRead(randomAccessRead);
+
+        // start code from parse(DataInput)
+        input = skipHeader(input);
+        String[] nameIndex = readStringIndexData(input);
+        if (nameIndex.length == 0)
+        {
+            outHeaders.setError("Name index missing in CFF font");
+            return;
+        }
+        byte[][] topDictIndex = readIndexData(input);
+        if (topDictIndex.length == 0)
+        {
+            outHeaders.setError("Top DICT INDEX missing in CFF font");
+            return;
+        }
+
+        // 'stringIndex' is required by 'parseROS() > readString()'
+        stringIndex = readStringIndexData(input);
+
+        // start code from parseFont(...)
+        DataInputByteArray topDictInput = new 
DataInputByteArray(topDictIndex[0]);
+        DictData topDict = readDictData(topDictInput);
+
+        DictData.Entry syntheticBaseEntry = topDict.getEntry("SyntheticBase");
+        if (syntheticBaseEntry != null)
+        {
+            outHeaders.setError("Synthetic Fonts are not supported");
+            return;
+        }
+
+        CFFCIDFont cffCIDFont = parseROS(topDict);
+        if (cffCIDFont != null)
+        {
+            outHeaders.setOtfROS(
+                    cffCIDFont.getRegistry(),
+                    cffCIDFont.getOrdering(),
+                    cffCIDFont.getSupplement());
+        }
+    }
+
+    private DataInput skipHeader(DataInput input) throws IOException
     {
         String firstTag = readTagName(input);
         // try to determine which kind of font we have
@@ -132,6 +181,19 @@ public class CFFParser
 
         @SuppressWarnings("unused")
         Header header = readHeader(input);
+        return input;
+    }
+
+    /**
+     * Parse CFF font using a DataInput as input.
+     * 
+     * @param input the source to be parsed
+     * 
+     * @throws IOException If there is an error reading from the stream
+     */
+    private List<CFFFont> parse(DataInput input) throws IOException
+    {
+        input = skipHeader(input);
         String[] nameIndex = readStringIndexData(input);
         if (nameIndex.length == 0)
         {
@@ -463,6 +525,28 @@ public class CFFParser
         }
     }
 
+    /**
+     * Extracts Registry, Ordering and Supplement from {@code topDict["ROS"]}.
+     */
+    private CFFCIDFont parseROS(DictData topDict) throws IOException
+    {
+        // determine if this is a Type 1-equivalent font or a CIDFont
+        DictData.Entry rosEntry = topDict.getEntry("ROS");
+        if (rosEntry != null)
+        {
+            if (rosEntry.size() < 3)
+            {
+                throw new IOException("ROS entry must have 3 elements");
+            }
+            CFFCIDFont cffCIDFont = new CFFCIDFont();
+            
cffCIDFont.setRegistry(readString(rosEntry.getNumber(0).intValue()));
+            
cffCIDFont.setOrdering(readString(rosEntry.getNumber(1).intValue()));
+            cffCIDFont.setSupplement(rosEntry.getNumber(2).intValue());
+            return cffCIDFont;
+        }
+        return null;
+    }
+
     private CFFFont parseFont(DataInput input, String name, byte[] 
topDictIndex) throws IOException
     {
         // top dict
@@ -476,21 +560,12 @@ public class CFFParser
             throw new IOException("Synthetic Fonts are not supported");
         }
 
-        // determine if this is a Type 1-equivalent font or a CIDFont
         CFFFont font;
-        boolean isCIDFont = topDict.getEntry("ROS") != null;
-        if (isCIDFont)
+        CFFCIDFont cffCIDFont = parseROS(topDict);
+        // determine if this is a Type 1-equivalent font or a CIDFont
+        boolean isCIDFont = cffCIDFont != null;
+        if (cffCIDFont != null)
         {
-            CFFCIDFont cffCIDFont = new CFFCIDFont();
-            DictData.Entry rosEntry = topDict.getEntry("ROS");
-            if (rosEntry == null || rosEntry.size() < 3)
-            {
-                throw new IOException("ROS entry must have 3 elements");
-            }
-            
cffCIDFont.setRegistry(readString(rosEntry.getNumber(0).intValue()));
-            
cffCIDFont.setOrdering(readString(rosEntry.getNumber(1).intValue()));
-            cffCIDFont.setSupplement(rosEntry.getNumber(2).intValue());
-
             font = cffCIDFont;
         }
         else

Modified: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java?rev=1918774&r1=1918773&r2=1918774&view=diff
==============================================================================
--- 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java 
(original)
+++ 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java 
Mon Jul  1 10:36:07 2024
@@ -1,87 +1,110 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.fontbox.ttf;
-
-import java.io.IOException;
-import org.apache.fontbox.cff.CFFFont;
-import org.apache.fontbox.cff.CFFParser;
-
-/**
- * PostScript font program (compact font format).
- */
-public class CFFTable extends TTFTable
-{
-    /**
-     * A tag that identifies this table type.
-     */
-    public static final String TAG = "CFF ";
-
-    private CFFFont cffFont;
-
-    CFFTable()
-    {
-        super();
-    }
-
-    /**
-     * This will read the required data from the stream.
-     *
-     * @param ttf The font that is being read.
-     * @param data The stream to read the data from.
-     * @throws java.io.IOException If there is an error reading the data.
-     */
-    @Override
-    void read(TrueTypeFont ttf, TTFDataStream data) throws IOException
-    {
-        byte[] bytes = data.read((int)getLength());
-
-        CFFParser parser = new CFFParser();
-        cffFont = parser.parse(bytes, new CFFBytesource(ttf)).get(0);
-
-        initialized = true;
-    }
-
-    /**
-     * Returns the CFF font, which is a compact representation of a PostScript 
Type 1, or CIDFont
-     * 
-     * @return the associated CFF font
-     */
-    public CFFFont getFont()
-    {
-        return cffFont;
-    }
-    
-    /**
-     * Allows bytes to be re-read later by CFFParser.
-     */
-    private static class CFFBytesource implements CFFParser.ByteSource
-    {
-        private final TrueTypeFont ttf;
-        
-        CFFBytesource(TrueTypeFont ttf)
-        {
-           this.ttf = ttf; 
-        }
-        
-        @Override
-        public byte[] getBytes() throws IOException
-        {
-            return ttf.getTableBytes(ttf.getTableMap().get(CFFTable.TAG));
-        }
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fontbox.ttf;
+
+import java.io.IOException;
+import org.apache.fontbox.cff.CFFFont;
+import org.apache.fontbox.cff.CFFParser;
+import org.apache.pdfbox.io.RandomAccessRead;
+import org.apache.pdfbox.io.RandomAccessReadBuffer;
+
+/**
+ * PostScript font program (compact font format).
+ */
+public class CFFTable extends TTFTable
+{
+    /**
+     * A tag that identifies this table type.
+     */
+    public static final String TAG = "CFF ";
+
+    private CFFFont cffFont;
+
+    CFFTable()
+    {
+        super();
+    }
+
+    /**
+     * This will read the required data from the stream.
+     *
+     * @param ttf The font that is being read.
+     * @param data The stream to read the data from.
+     * @throws java.io.IOException If there is an error reading the data.
+     */
+    @Override
+    void read(TrueTypeFont ttf, TTFDataStream data) throws IOException
+    {
+        byte[] bytes = data.read((int)getLength());
+
+        CFFParser parser = new CFFParser();
+        cffFont = parser.parse(bytes, new CFFBytesource(ttf)).get(0);
+
+        initialized = true;
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    void readHeaders(TrueTypeFont ttf, TTFDataStream data, FontHeaders 
outHeaders) throws IOException
+    {
+        try (RandomAccessRead subReader = data.createSubView(getLength()))
+        {
+            RandomAccessRead reader;
+            if (subReader != null)
+            {
+                reader = subReader;
+            }
+            else
+            {
+                assert false : "It is inefficient to read TTFDataStream into 
an array";
+                byte[] bytes = data.read((int)getLength());
+                reader = new RandomAccessReadBuffer(bytes);
+            }
+            new CFFParser().parseFirstSubFontROS(reader, outHeaders);
+        }
+    }
+
+    /**
+     * Returns the CFF font, which is a compact representation of a PostScript 
Type 1, or CIDFont
+     * 
+     * @return the associated CFF font
+     */
+    public CFFFont getFont()
+    {
+        return cffFont;
+    }
+    
+    /**
+     * Allows bytes to be re-read later by CFFParser.
+     */
+    private static class CFFBytesource implements CFFParser.ByteSource
+    {
+        private final TrueTypeFont ttf;
+        
+        CFFBytesource(TrueTypeFont ttf)
+        {
+           this.ttf = ttf; 
+        }
+        
+        @Override
+        public byte[] getBytes() throws IOException
+        {
+            return ttf.getTableBytes(ttf.getTableMap().get(CFFTable.TAG));
+        }
+    }
+}

Added: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/FontHeaders.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/FontHeaders.java?rev=1918774&view=auto
==============================================================================
--- 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/FontHeaders.java
 (added)
+++ 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/FontHeaders.java
 Mon Jul  1 10:36:07 2024
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.fontbox.ttf;
+
+/**
+ * To improve performance of {@code FileSystemFontProvider.scanFonts(...)},
+ * this class is used both as a marker (to skip unused data) and as a storage 
for collected data.
+ * <p>
+ * Tables it needs:<ul>
+ * <li>NamingTable.TAG
+ * <li>HeaderTable.TAG
+ * <li>OS2WindowsMetricsTable.TAG
+ * <li>CFFTable.TAG (for OTF)
+ * <li>"gcid" (for non-OTF)
+ * </ul>
+ *
+ * @author Mykola Bohdiuk
+ */
+public final class FontHeaders
+{
+    static final int BYTES_GCID = 142;
+
+    private String error;
+    private String name;
+    private Integer headerMacStyle;
+    private OS2WindowsMetricsTable os2Windows;
+    private String fontFamily;
+    private String fontSubFamily;
+    private byte[] nonOtfGcid142;
+    //
+    private boolean isOTFAndPostScript;
+    private String otfRegistry;
+    private String otfOrdering;
+    private int otfSupplement;
+
+    public String getError()
+    {
+        return error;
+    }
+
+    public String getName()
+    {
+        return name;
+    }
+
+    /**
+     * null == no HeaderTable, {@code ttf.getHeader().getMacStyle()}
+     */
+    public Integer getHeaderMacStyle()
+    {
+        return headerMacStyle;
+    }
+
+    public OS2WindowsMetricsTable getOS2Windows()
+    {
+        return os2Windows;
+    }
+
+    // only when LOGGER(FileSystemFontProvider).isTraceEnabled() tracing: 
FontFamily, FontSubfamily
+    public String getFontFamily()
+    {
+        return fontFamily;
+    }
+
+    public String getFontSubFamily()
+    {
+        return fontSubFamily;
+    }
+
+    public boolean isOpenTypePostScript()
+    {
+        return isOTFAndPostScript;
+    }
+
+    public byte[] getNonOtfTableGCID142()
+    {
+        return nonOtfGcid142;
+    }
+
+    public String getOtfRegistry()
+    {
+        return otfRegistry;
+    }
+
+    public String getOtfOrdering()
+    {
+        return otfOrdering;
+    }
+
+    public int getOtfSupplement()
+    {
+        return otfSupplement;
+    }
+
+    public void setError(String exception)
+    {
+        this.error = exception;
+    }
+
+    void setName(String name)
+    {
+        this.name = name;
+    }
+
+    void setHeaderMacStyle(Integer headerMacStyle)
+    {
+        this.headerMacStyle = headerMacStyle;
+    }
+
+    void setOs2Windows(OS2WindowsMetricsTable os2Windows)
+    {
+        this.os2Windows = os2Windows;
+    }
+
+    void setFontFamily(String fontFamily, String fontSubFamily)
+    {
+        this.fontFamily = fontFamily;
+        this.fontSubFamily = fontSubFamily;
+    }
+
+    void setNonOtfGcid142(byte[] nonOtfGcid142)
+    {
+        this.nonOtfGcid142 = nonOtfGcid142;
+    }
+
+    void setIsOTFAndPostScript(boolean isOTFAndPostScript)
+    {
+        this.isOTFAndPostScript = isOTFAndPostScript;
+    }
+
+    // public because CFFParser is in a different package
+    public void setOtfROS(String otfRegistry, String otfOrdering, int 
otfSupplement)
+    {
+        this.otfRegistry = otfRegistry;
+        this.otfOrdering = otfOrdering;
+        this.otfSupplement = otfSupplement;
+    }
+}

Propchange: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/FontHeaders.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java?rev=1918774&r1=1918773&r2=1918774&view=diff
==============================================================================
--- 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java
 (original)
+++ 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java
 Mon Jul  1 10:36:07 2024
@@ -64,6 +64,16 @@ public class HeaderTable extends TTFTabl
         super();
     }
 
+    /** {@inheritDoc} */
+    @Override
+    void readHeaders(TrueTypeFont ttf, TTFDataStream data, FontHeaders 
outHeaders) throws IOException
+    {
+        // 44 == 4 + 4 + 4 + 4 + 2 + 2 + 2*8 + 4*2, see read()
+        data.seek(data.getCurrentPosition() + 44);
+        macStyle = data.readUnsignedShort();
+        outHeaders.setHeaderMacStyle(macStyle);
+    }
+
     /**
      * This will read the required data from the stream.
      *

Modified: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java?rev=1918774&r1=1918773&r2=1918774&view=diff
==============================================================================
--- 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java
 (original)
+++ 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java
 Mon Jul  1 10:36:07 2024
@@ -58,6 +58,21 @@ public class NamingTable extends TTFTabl
     @Override
     void read(TrueTypeFont ttf, TTFDataStream data) throws IOException
     {
+        read(ttf, data, false);
+        initialized = true;
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    void readHeaders(TrueTypeFont ttf, TTFDataStream data, FontHeaders 
outHeaders) throws IOException
+    {
+        read(ttf, data, true);
+        outHeaders.setName(psName);
+        outHeaders.setFontFamily(fontFamily, fontSubFamily);
+    }
+
+    private void read(TrueTypeFont ttf, TTFDataStream data, boolean 
onlyHeaders) throws IOException
+    {
         int formatSelector = data.readUnsignedShort();
         int numberOfNameRecords = data.readUnsignedShort();
         int offsetToStartOfStringStorage = data.readUnsignedShort();
@@ -66,7 +81,10 @@ public class NamingTable extends TTFTabl
         {
             NameRecord nr = new NameRecord();
             nr.initData(ttf, data);
-            nameRecords.add(nr);
+            if (!onlyHeaders || isUsefulForOnlyHeaders(nr))
+            {
+                nameRecords.add(nr);
+            }
         }
 
         for (NameRecord nr : nameRecords)
@@ -87,8 +105,6 @@ public class NamingTable extends TTFTabl
         lookupTable = new HashMap<>(nameRecords.size());
         fillLookupTable();
         readInterestingStrings();
-
-        initialized = true;
     }
 
     private Charset getCharset(NameRecord nr)
@@ -162,6 +178,21 @@ public class NamingTable extends TTFTabl
         }
     }
 
+    private static boolean isUsefulForOnlyHeaders(NameRecord nr)
+    {
+        int nameId = nr.getNameId();
+        // see "psName =" and "getEnglishName()"
+        if (nameId == NameRecord.NAME_POSTSCRIPT_NAME
+                || nameId == NameRecord.NAME_FONT_FAMILY_NAME
+                || nameId == NameRecord.NAME_FONT_SUB_FAMILY_NAME)
+        {
+            int languageId = nr.getLanguageId();
+            return languageId == NameRecord.LANGUAGE_UNICODE
+                    || languageId == NameRecord.LANGUAGE_WINDOWS_EN_US;
+        }
+        return false;
+    }
+
     /**
      * Helper to get English names by best effort.
      */

Modified: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadDataStream.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadDataStream.java?rev=1918774&r1=1918773&r2=1918774&view=diff
==============================================================================
--- 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadDataStream.java
 (original)
+++ 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadDataStream.java
 Mon Jul  1 10:36:07 2024
@@ -19,9 +19,12 @@ package org.apache.fontbox.ttf;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 
 import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.io.RandomAccessRead;
+import org.apache.pdfbox.io.RandomAccessReadBuffer;
 
 /**
  * An implementation of the TTFDataStream using RandomAccessRead as source.
@@ -30,6 +33,8 @@ import org.apache.pdfbox.io.RandomAccess
  */
 class RandomAccessReadDataStream extends TTFDataStream
 {
+    private static final Log LOG = 
LogFactory.getLog(RandomAccessReadDataStream.class);
+
     private final long length;
     private final byte[] data;
     private int currentPosition = 0;
@@ -174,6 +179,20 @@ class RandomAccessReadDataStream extends
         return bytesToRead;
     }
 
+    @Override
+    public RandomAccessRead createSubView(long length)
+    {
+        try
+        {
+            return new 
RandomAccessReadBuffer(data).createView(currentPosition, length);
+        }
+        catch (IOException e)
+        {
+            LOG.warn("Could not create a SubView", e);
+            return null;
+        }
+    }
+
     /**
      * {@inheritDoc}
      */

Added: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadUnbufferedDataStream.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadUnbufferedDataStream.java?rev=1918774&view=auto
==============================================================================
--- 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadUnbufferedDataStream.java
 (added)
+++ 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadUnbufferedDataStream.java
 Mon Jul  1 10:36:07 2024
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.fontbox.ttf;
+
+import java.io.IOException;
+import java.io.InputStream;
+import org.apache.pdfbox.io.RandomAccessRead;
+import org.apache.pdfbox.io.RandomAccessReadView;
+
+/**
+ * In contrast to {@link RandomAccessReadDataStream},
+ * this class doesn't pre-load {@code RandomAccessRead} into a {@code byte[]},
+ * it works with {@link RandomAccessRead} directly.
+ * 
+ * Performance: it is much faster if most of the buffer is skipped, and slower 
if whole buffer is read()
+ */
+class RandomAccessReadUnbufferedDataStream extends TTFDataStream
+{
+    private final long length;
+    private final RandomAccessRead randomAccessRead;
+
+    /**
+     * @throws IOException If there is a problem reading the source length.
+     */
+    RandomAccessReadUnbufferedDataStream(RandomAccessRead randomAccessRead) 
throws IOException
+    {
+        this.length = randomAccessRead.length();
+        this.randomAccessRead = randomAccessRead;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public long getCurrentPosition() throws IOException
+    {
+        return randomAccessRead.getPosition();
+    }
+
+    /**
+     * Close the underlying resources.
+     *
+     * @throws IOException If there is an error closing the resources.
+     */
+    @Override
+    public void close() throws IOException
+    {
+        randomAccessRead.close();
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public int read() throws IOException
+    {
+        return randomAccessRead.read();
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public final long readLong() throws IOException
+    {
+        return ((long) readInt() << 32) | (readInt() & 0xFFFFFFFFL);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    private int readInt() throws IOException
+    {
+        int b1 = read();
+        int b2 = read();
+        int b3 = read();
+        int b4 = read();
+        return (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public void seek(long pos) throws IOException
+    {
+        randomAccessRead.seek(pos);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public int read(byte[] b, int off, int len) throws IOException
+    {
+        return randomAccessRead.read(b, off, len);
+    }
+
+    /**
+     * Lifetime of returned InputStream is bound by {@code this} lifetime, it 
won't close underlying {@code RandomAccessRead}.
+     * 
+     * {@inheritDoc}
+     */
+    @Override
+    public InputStream getOriginalData() throws IOException
+    {
+        return new 
RandomAccessReadNonClosingInputStream(randomAccessRead.createView(0, length));
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public long getOriginalDataSize()
+    {
+        return length;
+    }
+
+    @Override
+    public RandomAccessRead createSubView(long length)
+    {
+        try
+        {
+            return randomAccessRead.createView(randomAccessRead.getPosition(), 
length);
+        }
+        catch (IOException ex)
+        {
+            assert false : "Please implement " + randomAccessRead.getClass() + 
".createView()";
+            return null;
+        }
+    }
+
+    private static final class RandomAccessReadNonClosingInputStream extends 
InputStream
+    {
+
+        private final RandomAccessReadView randomAccessRead;
+
+        public RandomAccessReadNonClosingInputStream(RandomAccessReadView 
randomAccessRead)
+        {
+            this.randomAccessRead = randomAccessRead;
+        }
+
+        @Override
+        public int read() throws IOException
+        {
+            return randomAccessRead.read();
+        }
+
+        @Override
+        public int read(byte[] b) throws IOException
+        {
+            return randomAccessRead.read(b);
+        }
+
+        @Override
+        public int read(byte[] b, int off, int len) throws IOException
+        {
+            return randomAccessRead.read(b, off, len);
+        }
+
+        @Override
+        public long skip(long n) throws IOException
+        {
+            randomAccessRead.seek(randomAccessRead.getPosition() + n);
+            return n;
+        }
+
+        @Override
+        public void close() throws IOException
+        {
+            // WARNING: .close() will close RandomAccessReadMemoryMappedFile 
if this View was based on it
+//            randomAccessRead.close();
+        }
+    }
+}

Propchange: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadUnbufferedDataStream.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTCDataStream.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTCDataStream.java?rev=1918774&r1=1918773&r2=1918774&view=diff
==============================================================================
--- 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTCDataStream.java
 (original)
+++ 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTCDataStream.java
 Mon Jul  1 10:36:07 2024
@@ -19,6 +19,7 @@ package org.apache.fontbox.ttf;
 
 import java.io.IOException;
 import java.io.InputStream;
+import org.apache.pdfbox.io.RandomAccessRead;
 
 /**
  * A wrapper for a TTF stream inside a TTC file, does not close the underlying 
shared stream.
@@ -83,4 +84,9 @@ class TTCDataStream extends TTFDataStrea
         return stream.getOriginalDataSize();
     }
 
+    @Override
+    public RandomAccessRead createSubView(long length)
+    {
+        return stream.createSubView(length);
+    }
 }

Modified: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java?rev=1918774&r1=1918773&r2=1918774&view=diff
==============================================================================
--- 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java
 (original)
+++ 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java
 Mon Jul  1 10:36:07 2024
@@ -24,6 +24,7 @@ import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.Calendar;
 import java.util.TimeZone;
+import org.apache.pdfbox.io.RandomAccessRead;
 
 /**
  * An abstract class to read a data stream.
@@ -280,6 +281,18 @@ abstract class TTFDataStream implements
     public abstract int read(byte[] b, int off, int len) throws IOException;
 
     /**
+     * Creates a view from current position to {@code pos + length}.
+     * It can be faster than {@code read(length)} if you only need a few bytes.
+     * {@code SubView.close()} should never close {@code TTFDataStream.this}, 
only itself.
+     *
+     * @return A view or null (caller can use {@link #read} instead). Please 
close() the result
+     */
+    public RandomAccessRead createSubView(long length)
+    {
+        return null;
+    }
+
+    /**
      * Get the current position in the stream.
      *
      * @return The current position in the stream.

Modified: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java?rev=1918774&r1=1918773&r2=1918774&view=diff
==============================================================================
--- 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java 
(original)
+++ 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java 
Mon Jul  1 10:36:07 2024
@@ -105,13 +105,29 @@ public class TTFParser
     }
 
     /**
+     * Parse a RandomAccessRead and return a TrueType font.
+     *
+     * @param randomAccessRead The RandomAccessREad to be read from. It will 
be closed before returning.
+     * @return TrueType font headers.
+     * @throws IOException If there is an error parsing the TrueType font.
+     */
+    public FontHeaders parseTableHeaders(RandomAccessRead randomAccessRead) 
throws IOException
+    {
+        try (TTFDataStream dataStream = new 
RandomAccessReadUnbufferedDataStream(randomAccessRead))
+        {
+            return parseTableHeaders(dataStream);
+            // dataStream closes randomAccessRead
+        }
+    }
+
+    /**
      * Parse a file and get a true type font.
      *
      * @param raf The TTF file.
      * @return A TrueType font.
      * @throws IOException If there is an error parsing the TrueType font.
      */
-    TrueTypeFont parse(TTFDataStream raf) throws IOException
+    private TrueTypeFont createFontWithTables(TTFDataStream raf) throws 
IOException
     {
         TrueTypeFont font = newFont(raf);
         font.setVersion(raf.read32Fixed());
@@ -140,7 +156,12 @@ public class TTFParser
                 }
             }
         }
-        // parse tables
+        return font;
+    }
+
+    TrueTypeFont parse(TTFDataStream raf) throws IOException
+    {
+        TrueTypeFont font = createFontWithTables(raf);
         parseTables(font);
         return font;
     }
@@ -227,6 +248,81 @@ public class TTFParser
         }
     }
 
+    /**
+     * Based on {@link #parseTables()}.
+     * Parse all table headers and check if all needed tables are present.
+     * 
+     * This method can be optimized further by skipping unused portions inside 
each individual table parser
+     *
+     * @param font the TrueTypeFont instance holding the parsed data.
+     * @throws IOException If there is an error parsing the TrueType font.
+     */
+    FontHeaders parseTableHeaders(TTFDataStream raf) throws IOException
+    {
+        FontHeaders outHeaders = new FontHeaders();
+        try (TrueTypeFont font = createFontWithTables(raf))
+        {
+            font.readTableHeaders(NamingTable.TAG, outHeaders); // calls 
NamingTable.readHeaders();
+            font.readTableHeaders(HeaderTable.TAG, outHeaders); // calls 
HeaderTable.readHeaders();
+
+            // only these 5 are used
+            //   sFamilyClass = os2WindowsMetricsTable.getFamilyClass();
+            //   usWeightClass = os2WindowsMetricsTable.getWeightClass();
+            //   ulCodePageRange1 = (int) 
os2WindowsMetricsTable.getCodePageRange1();
+            //   ulCodePageRange2 = (int) 
os2WindowsMetricsTable.getCodePageRange2();
+            //   panose = os2WindowsMetricsTable.getPanose();
+            outHeaders.setOs2Windows(font.getOS2Windows());
+
+            boolean isOTFAndPostScript;
+            if (font instanceof OpenTypeFont && ((OpenTypeFont) 
font).isPostScript())
+            {
+                isOTFAndPostScript = true;
+                if (((OpenTypeFont) font).isSupportedOTF())
+                {
+                    font.readTableHeaders(CFFTable.TAG, outHeaders); // calls 
CFFTable.readHeaders();
+                }
+            }
+            else if (!(font instanceof OpenTypeFont) && 
font.tables.containsKey(CFFTable.TAG))
+            {
+                outHeaders.setError("True Type fonts using CFF outlines are 
not supported");
+                return outHeaders;
+            }
+            else
+            {
+                isOTFAndPostScript = false;
+                TTFTable gcid = font.getTableMap().get("gcid");
+                if (gcid != null && gcid.getLength() >= FontHeaders.BYTES_GCID)
+                {
+                    outHeaders.setNonOtfGcid142(font.getTableNBytes(gcid, 
FontHeaders.BYTES_GCID));
+                }
+            }
+            outHeaders.setIsOTFAndPostScript(isOTFAndPostScript);
+
+            // list taken from parseTables(), detect them, but don't spend 
time parsing
+            final String[] mandatoryTables = {
+                HeaderTable.TAG,
+                HorizontalHeaderTable.TAG,
+                MaximumProfileTable.TAG,
+                isEmbedded ? null : PostScriptTable.TAG, // in an embedded 
font this table is optional
+                isOTFAndPostScript ? null : IndexToLocationTable.TAG,
+                isOTFAndPostScript ? null : GlyphTable.TAG,
+                isEmbedded ? null : NamingTable.TAG,
+                HorizontalMetricsTable.TAG,
+                isEmbedded ? null : CmapTable.TAG,
+            };
+
+            for (String tag : mandatoryTables)
+            {
+                if (tag != null && !font.tables.containsKey(tag))
+                {
+                    outHeaders.setError("'" + tag + "' table is mandatory");
+                    return outHeaders;
+                }
+            }
+        }
+        return outHeaders;
+    }
+
     protected boolean allowCFF()
     {
         return false;

Modified: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTFTable.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTFTable.java?rev=1918774&r1=1918773&r2=1918774&view=diff
==============================================================================
--- 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTFTable.java 
(original)
+++ 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TTFTable.java 
Mon Jul  1 10:36:07 2024
@@ -126,4 +126,16 @@ public class TTFTable
     void read(TrueTypeFont ttf, TTFDataStream data) throws IOException
     {
     }
+    
+    /**
+     * This will read required headers from the stream into outHeaders.
+     * 
+     * @param ttf The font that is being read.
+     * @param data The stream to read the data from.
+     * @param outHeaders The class to write the data to.
+     * @throws IOException If there is an error reading the data.
+     */
+    void readHeaders(TrueTypeFont ttf, TTFDataStream data, FontHeaders 
outHeaders) throws IOException
+    {
+    }
 }

Modified: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeCollection.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeCollection.java?rev=1918774&r1=1918773&r2=1918774&view=diff
==============================================================================
--- 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeCollection.java
 (original)
+++ 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeCollection.java
 Mon Jul  1 10:36:07 2024
@@ -47,7 +47,7 @@ public class TrueTypeCollection implemen
      */
     public TrueTypeCollection(File file) throws IOException
     {
-        this(new RandomAccessReadBufferedFile(file), true);
+        this(createBufferedDataStream(new RandomAccessReadBufferedFile(file), 
true));
     }
 
     /**
@@ -58,7 +58,7 @@ public class TrueTypeCollection implemen
      */
     public TrueTypeCollection(InputStream stream) throws IOException
     {
-        this(new RandomAccessReadBuffer(stream), false);
+        this(createBufferedDataStream(new RandomAccessReadBuffer(stream), 
false));
     }
 
     /**
@@ -66,21 +66,12 @@ public class TrueTypeCollection implemen
      *
      * @param randomAccessRead
      * @param closeAfterReading {@code true} to close randomAccessRead
+     * @param buffered {@code true} to use {@link RandomAccessReadDataStream}, 
{@code false} to use {@link RandomAccessReadUnbufferedDataStream}
      * @throws IOException If the font could not be parsed.
      */
-    private TrueTypeCollection(RandomAccessRead randomAccessRead, boolean 
closeAfterReading) throws IOException
+    private TrueTypeCollection(TTFDataStream stream) throws IOException
     {
-        try
-        {
-            this.stream = new RandomAccessReadDataStream(randomAccessRead);
-        }
-        finally
-        {
-            if (closeAfterReading)
-            {
-                IOUtils.closeQuietly(randomAccessRead);
-            }
-        }
+        this.stream = stream;
 
         // TTC header
         String tag = stream.readTag();
@@ -107,12 +98,27 @@ public class TrueTypeCollection implemen
             int ulDsigOffset = stream.readUnsignedShort();
         }
     }
-    
+
+    private static TTFDataStream createBufferedDataStream(RandomAccessRead 
randomAccessRead, boolean closeAfterReading) throws IOException
+    {
+        try
+        {
+            return new RandomAccessReadDataStream(randomAccessRead);
+        }
+        finally
+        {
+            if (closeAfterReading)
+            {
+                IOUtils.closeQuietly(randomAccessRead);
+            }
+        }
+    }
+
     /**
      * Run the callback for each TT font in the collection.
      * 
      * @param trueTypeFontProcessor the object with the callback method.
-     * @throws IOException if something went wrong when calling the 
TrueTypeFontProcessor
+     * @throws IOException if something went wrong when parsing any font or 
calling the TrueTypeFontProcessor
      */
     public void processAllFonts(TrueTypeFontProcessor trueTypeFontProcessor) 
throws IOException
     {
@@ -122,9 +128,38 @@ public class TrueTypeCollection implemen
             trueTypeFontProcessor.process(font);
         }
     }
-    
+
+    /**
+     * Run the callback for each TT font in the collection.
+     * 
+     * @param trueTypeFontProcessor the object with the callback method.
+     * @throws IOException if something went wrong when parsing any font
+     */
+    public static void processAllFontHeaders(File ttcFile, 
TrueTypeFontHeadersProcessor trueTypeFontProcessor) throws IOException
+    {
+        try (
+                RandomAccessRead read = new 
RandomAccessReadBufferedFile(ttcFile);
+                TTFDataStream stream = new 
RandomAccessReadUnbufferedDataStream(read);
+                TrueTypeCollection ttc = new TrueTypeCollection(stream)
+        )
+        {
+            for (int i = 0; i < ttc.numFonts; i++)
+            {
+                TTFParser parser = ttc.createFontParserAtIndexAndSeek(i);
+                FontHeaders headers = parser.parseTableHeaders(new 
TTCDataStream(ttc.stream));
+                trueTypeFontProcessor.process(headers);
+            }
+        }
+    }
+
     private TrueTypeFont getFontAtIndex(int idx) throws IOException
     {
+        TTFParser parser = createFontParserAtIndexAndSeek(idx);
+        return parser.parse(new TTCDataStream(stream));
+    }
+
+    private TTFParser createFontParserAtIndexAndSeek(int idx) throws 
IOException
+    {
         stream.seek(fontOffsets[idx]);
         TTFParser parser;
         if (stream.readTag().equals("OTTO"))
@@ -136,7 +171,7 @@ public class TrueTypeCollection implemen
             parser = new TTFParser(false);
         }
         stream.seek(fontOffsets[idx]);
-        return parser.parse(new TTCDataStream(stream));
+        return parser;
     }
 
     /**
@@ -167,7 +202,16 @@ public class TrueTypeCollection implemen
     {
         void process(TrueTypeFont ttf) throws IOException;
     }
-    
+
+    /**
+     * Implement the callback method to call {@link 
TrueTypeCollection#processAllFontHeaders(File, TrueTypeFontHeadersProcessor)}.
+     */
+    @FunctionalInterface
+    public interface TrueTypeFontHeadersProcessor
+    {
+        void process(FontHeaders fontHeaders);
+    }
+
     @Override
     public void close() throws IOException
     {

Modified: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java?rev=1918774&r1=1918773&r2=1918774&view=diff
==============================================================================
--- 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java
 (original)
+++ 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java
 Mon Jul  1 10:36:07 2024
@@ -179,6 +179,32 @@ public class TrueTypeFont implements Fon
     }
 
     /**
+     * Returns the raw bytes of the given table, no more than {@code limit} 
bytes.
+     * 
+     * @param table the table to read.
+     * @param limit maximum length of array to return
+     * @return the raw bytes of the given table
+     * 
+     * @throws IOException if there was an error accessing the table.
+     */
+    public byte[] getTableNBytes(TTFTable table, int limit) throws IOException
+    {
+        synchronized (lockReadtable)
+        {
+            // save current position
+            long currentPosition = data.getCurrentPosition();
+            data.seek(table.getOffset());
+
+            // read all data
+            byte[] bytes = data.read(Math.min(limit, (int) table.getLength()));
+
+            // restore current position
+            data.seek(currentPosition);
+            return bytes;
+        }
+    }
+
+    /**
      * This will get the naming table for the true type font.
      * 
      * @return The naming table or null if it doesn't exist.
@@ -386,6 +412,28 @@ public class TrueTypeFont implements Fon
     }
 
     /**
+     * Read the given table headers. Package-private, used by TTFParser only.
+     * 
+     * @param tag the name of the table to be read
+     * @param outHeaders consumes headers
+     * 
+     * @throws IOException if there was an error reading the table.
+     */
+    void readTableHeaders(String tag, FontHeaders outHeaders) throws 
IOException
+    {
+        TTFTable table = tables.get(tag);
+        if (table != null)
+        {
+            // save current position
+            long currentPosition = data.getCurrentPosition();
+            data.seek(table.getOffset());
+            table.readHeaders(this, data, outHeaders);
+            // restore current position
+            data.seek(currentPosition);
+        }
+    }
+
+    /**
      * Returns the number of glyphs (MaximumProfile.numGlyphs).
      * 
      * @return the number of glyphs

Modified: 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java?rev=1918774&r1=1918773&r2=1918774&view=diff
==============================================================================
--- 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java
 (original)
+++ 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java
 Mon Jul  1 10:36:07 2024
@@ -37,9 +37,7 @@ import java.util.zip.CRC32;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.fontbox.FontBoxFont;
-import org.apache.fontbox.cff.CFFCIDFont;
-import org.apache.fontbox.cff.CFFFont;
-import org.apache.fontbox.ttf.NamingTable;
+import org.apache.fontbox.ttf.FontHeaders;
 import org.apache.fontbox.ttf.OS2WindowsMetricsTable;
 import org.apache.fontbox.ttf.OTFParser;
 import org.apache.fontbox.ttf.OpenTypeFont;
@@ -59,6 +57,13 @@ import org.apache.pdfbox.io.RandomAccess
 final class FileSystemFontProvider extends FontProvider
 {
     private static final Log LOG = 
LogFactory.getLog(FileSystemFontProvider.class);
+
+    /**
+     * This option changes publicly visible behaviour: ".pdfbox.cache" file 
will have hash="-" for all files.
+     * After implementing {@link FontHeaders}, parsing font headers is faster 
than checksumming anyway.
+     */
+    private static final boolean SKIP_CHECKSUMS = 
"true".equals(System.getProperty("pdfbox.fontcache.skipchecksums"));
+    private static final String CHECKSUM_PLACEHOLDER = "-";
     
     private final List<FSFontInfo> fontInfoList = new ArrayList<>();
     private final FontCache cache;
@@ -315,7 +320,7 @@ final class FileSystemFontProvider exten
         String hash;
         try
         {
-            hash = computeHash(Files.newInputStream(file.toPath()));
+            hash = SKIP_CHECKSUMS ? CHECKSUM_PLACEHOLDER : 
computeHash(Files.newInputStream(file.toPath()));
         }
         catch (IOException ex)
         {
@@ -382,25 +387,18 @@ final class FileSystemFontProvider exten
 
         for (File file : files)
         {
-            try
+            String filePath = file.getPath().toLowerCase();
+            if (filePath.endsWith(".ttf") || filePath.endsWith(".otf"))
             {
-                String filePath = file.getPath().toLowerCase();
-                if (filePath.endsWith(".ttf") || filePath.endsWith(".otf"))
-                {
-                    addTrueTypeFont(file);
-                }
-                else if (filePath.endsWith(".ttc") || 
filePath.endsWith(".otc"))
-                {
-                    addTrueTypeCollection(file);
-                }
-                else if (filePath.endsWith(".pfb"))
-                {
-                    addType1Font(file);
-                }
+                addTrueTypeFont(file);
             }
-            catch (IOException e)
+            else if (filePath.endsWith(".ttc") || filePath.endsWith(".otc"))
+            {
+                addTrueTypeCollection(file);
+            }
+            else if (filePath.endsWith(".pfb"))
             {
-                LOG.warn("Error parsing font " + file.getPath(), e);
+                addType1Font(file);
             }
         }
     }
@@ -537,6 +535,11 @@ final class FileSystemFontProvider exten
         {
             try (BufferedReader reader = new BufferedReader(new 
FileReader(diskCacheFile)))
             {
+                // consequent lines usually share the same font file (e.g. 
"Courier", "Courier-Bold", "Courier-Oblique").
+                // unused if SKIP_CHECKSUMS
+                File lastFile = null;
+                String lastHash = null;
+                //
                 String line;
                 while ((line = reader.readLine()) != null)
                 {
@@ -599,23 +602,36 @@ final class FileSystemFontProvider exten
                     }
                     if (fontFile.exists())
                     {
-                        boolean keep = false;
                         // if the file exists, find out whether it's the same 
file.
                         // first check whether time is different and if yes, 
whether hash is different
-                        if (fontFile.lastModified() != lastModified)
+                        boolean keep = fontFile.lastModified() == lastModified;
+                        if (!keep && !SKIP_CHECKSUMS)
                         {
-                            String newHash = 
computeHash(Files.newInputStream(fontFile.toPath()));
-                            if (newHash.equals(hash))
+                            String newHash;
+                            if (hash.equals(lastHash) && 
fontFile.equals(lastFile))
+                            {
+                                newHash = lastHash; // already computed
+                            }
+                            else
+                            {
+                                try
+                                {
+                                    newHash = 
computeHash(Files.newInputStream(fontFile.toPath()));
+                                    lastFile = fontFile;
+                                    lastHash = newHash;
+                                }
+                                catch (IOException ex)
+                                {
+                                    LOG.debug("Error reading font file " + 
fontFile.getAbsolutePath(), ex);
+                                    newHash = "<err>";
+                                }
+                            }
+                            if (hash.equals(newHash))
                             {
                                 keep = true;
                                 lastModified = fontFile.lastModified();
-                                hash = newHash;
                             }
                         }
-                        else
-                        {
-                            keep = true;
-                        }
                         if (keep)
                         {
                             FSFontInfo info = new FSFontInfo(fontFile, format, 
postScriptName,
@@ -656,11 +672,13 @@ final class FileSystemFontProvider exten
     /**
      * Adds a TTC or OTC to the file cache. To reduce memory, the parsed font 
is not cached.
      */
-    private void addTrueTypeCollection(final File ttcFile) throws IOException
+    private void addTrueTypeCollection(final File ttcFile)
     {
-        try (TrueTypeCollection ttc = new TrueTypeCollection(ttcFile))
+        try
         {
-            ttc.processAllFonts(ttf -> addTrueTypeFontImpl(ttf, ttcFile));
+            String hash = SKIP_CHECKSUMS ? CHECKSUM_PLACEHOLDER : 
computeHash(Files.newInputStream(ttcFile.toPath()));
+            TrueTypeCollection.processAllFontHeaders(ttcFile,
+                    fontHeaders -> addTrueTypeFontImpl(fontHeaders, ttcFile, 
hash));
         }
         catch (IOException e)
         {
@@ -672,25 +690,25 @@ final class FileSystemFontProvider exten
     /**
      * Adds an OTF or TTF font to the file cache. To reduce memory, the parsed 
font is not cached.
      */
-    private void addTrueTypeFont(File ttfFile) throws IOException
+    private void addTrueTypeFont(File ttfFile)
     {
         FontFormat fontFormat = null;
         try
         {
+            TTFParser parser;
             if (ttfFile.getPath().toLowerCase().endsWith(".otf"))
             {
                 fontFormat = FontFormat.OTF;
-                OTFParser parser = new OTFParser(false);
-                OpenTypeFont otf = parser.parse(new 
RandomAccessReadBufferedFile(ttfFile));
-                addTrueTypeFontImpl(otf, ttfFile);
+                parser = new OTFParser(false);
             }
             else
             {
                 fontFormat = FontFormat.TTF;
-                TTFParser parser = new TTFParser(false);
-                TrueTypeFont ttf = parser.parse(new 
RandomAccessReadBufferedFile(ttfFile));
-                addTrueTypeFontImpl(ttf, ttfFile);
+                parser = new TTFParser(false);
             }
+            FontHeaders headers = parser.parseTableHeaders(new 
RandomAccessReadBufferedFile(ttfFile));
+            addTrueTypeFontImpl(headers, ttfFile,
+                    SKIP_CHECKSUMS ? CHECKSUM_PLACEHOLDER : 
computeHash(Files.newInputStream(ttfFile.toPath())));
         }
         catch (IOException e)
         {
@@ -702,32 +720,34 @@ final class FileSystemFontProvider exten
     /**
      * Adds an OTF or TTF font to the file cache. To reduce memory, the parsed 
font is not cached.
      */
-    private void addTrueTypeFontImpl(TrueTypeFont ttf, File file) throws 
IOException
+    private void addTrueTypeFontImpl(FontHeaders fontHeaders, File file, 
String hash)
     {
-        try
+        final String error = fontHeaders.getError();
+        if (error == null)
         {
             // read PostScript name, if any
-            if (ttf.getName() != null && ttf.getName().contains("|"))
+            final String name = fontHeaders.getName();
+            if (name != null && name.contains("|"))
             {
                 fontInfoList.add(createFSIgnored(file, FontFormat.TTF, 
"*skippipeinname*"));
-                LOG.warn("Skipping font with '|' in name " + ttf.getName() + " 
in file " + file);
+                LOG.warn("Skipping font with '|' in name " + name + " in file 
" + file);
             }
-            else if (ttf.getName() != null)
+            else if (name != null)
             {
                 // ignore bitmap fonts
-                if (ttf.getHeader() == null)
+                Integer macStyle = fontHeaders.getHeaderMacStyle();
+                if (macStyle == null)
                 {
-                    fontInfoList.add(createFSIgnored(file, FontFormat.TTF, 
ttf.getName()));
+                    fontInfoList.add(createFSIgnored(file, FontFormat.TTF, 
name));
                     return;
                 }
-                int macStyle = ttf.getHeader().getMacStyle();
 
                 int sFamilyClass = -1;
                 int usWeightClass = -1;
                 int ulCodePageRange1 = 0;
                 int ulCodePageRange2 = 0;
                 byte[] panose = null;
-                OS2WindowsMetricsTable os2WindowsMetricsTable = 
ttf.getOS2Windows();
+                OS2WindowsMetricsTable os2WindowsMetricsTable = 
fontHeaders.getOS2Windows();
                 // Apple's AAT fonts don't have an OS/2 table
                 if (os2WindowsMetricsTable != null)
                 {
@@ -738,36 +758,24 @@ final class FileSystemFontProvider exten
                     panose = os2WindowsMetricsTable.getPanose();
                 }
 
-                String hash = computeHash(ttf.getOriginalData());
-                String format;
-                if (ttf instanceof OpenTypeFont && ((OpenTypeFont) 
ttf).isPostScript())
-                {
-                    format = "OTF";
-                    CIDSystemInfo ros = null;
-                    OpenTypeFont otf = (OpenTypeFont) ttf;
-                    if (otf.isSupportedOTF() && otf.getCFF() != null)
+                FontFormat format;
+                CIDSystemInfo ros = null;
+                if (fontHeaders.isOpenTypePostScript())
+                {
+                    format = FontFormat.OTF;
+                    String registry = fontHeaders.getOtfRegistry();
+                    String ordering = fontHeaders.getOtfOrdering();
+                    if (registry != null || ordering != null)
                     {
-                        CFFFont cff = otf.getCFF().getFont();
-                        if (cff instanceof CFFCIDFont)
-                        {
-                            CFFCIDFont cidFont = (CFFCIDFont) cff;
-                            String registry = cidFont.getRegistry();
-                            String ordering = cidFont.getOrdering();
-                            int supplement = cidFont.getSupplement();
-                            ros = new CIDSystemInfo(registry, ordering, 
supplement);
-                        }
+                        ros = new CIDSystemInfo(registry, ordering, 
fontHeaders.getOtfSupplement());
                     }
-                    fontInfoList.add(new FSFontInfo(file, FontFormat.OTF, 
ttf.getName(), ros,
-                            usWeightClass, sFamilyClass, ulCodePageRange1, 
ulCodePageRange2,
-                            macStyle, panose, this, hash, 
file.lastModified()));
                 }
                 else
                 {
-                    CIDSystemInfo ros = null;
-                    if (ttf.getTableMap().containsKey("gcid"))
+                    byte[] bytes = fontHeaders.getNonOtfTableGCID142();
+                    if (bytes != null)
                     {
                         // Apple's AAT fonts have a "gcid" table with CID info
-                        byte[] bytes = 
ttf.getTableBytes(ttf.getTableMap().get("gcid"));
                         String reg = new String(bytes, 10, 64, 
StandardCharsets.US_ASCII);
                         String registryName = reg.substring(0, 
reg.indexOf('\0'));
                         String ord = new String(bytes, 76, 64, 
StandardCharsets.US_ASCII);
@@ -775,22 +783,17 @@ final class FileSystemFontProvider exten
                         int supplementVersion = bytes[140] << 8 & (bytes[141] 
& 0xFF);
                         ros = new CIDSystemInfo(registryName, orderName, 
supplementVersion);
                     }
-                    
-                    format = "TTF";
-                    fontInfoList.add(new FSFontInfo(file, FontFormat.TTF, 
ttf.getName(), ros,
-                            usWeightClass, sFamilyClass, ulCodePageRange1, 
ulCodePageRange2,
-                            macStyle, panose, this, hash, 
file.lastModified()));
+                    format = FontFormat.TTF;
                 }
+                fontInfoList.add(new FSFontInfo(file, format, name, ros,
+                        usWeightClass, sFamilyClass, ulCodePageRange1, 
ulCodePageRange2,
+                        macStyle, panose, this, hash, file.lastModified()));
 
                 if (LOG.isTraceEnabled())
                 {
-                    NamingTable name = ttf.getNaming();
-                    if (name != null)
-                    {
-                        LOG.trace(format +": '" + name.getPostScriptName() + 
"' / '" +
-                                  name.getFontFamily() + "' / '" +
-                                  name.getFontSubFamily() + "'");
-                    }
+                    LOG.trace(format.name() +": '" + name + "' / '" +
+                              fontHeaders.getFontFamily() + "' / '" +
+                              fontHeaders.getFontSubFamily() + "'");
                 }
             }
             else
@@ -799,21 +802,17 @@ final class FileSystemFontProvider exten
                 LOG.warn("Missing 'name' entry for PostScript name in font " + 
file);
             }
         }
-        catch (IOException e)
+        else
         {
             fontInfoList.add(createFSIgnored(file, FontFormat.TTF, 
"*skipexception*"));
-            LOG.warn("Could not load font file: " + file, e);
-        }
-        finally
-        {
-            ttf.close();
+            LOG.warn("Could not load font file: " + file + ": " + error);
         }
     }
 
     /**
      * Adds a Type 1 font to the file cache. To reduce memory, the parsed font 
is not cached.
      */
-    private void addType1Font(File pfbFile) throws IOException
+    private void addType1Font(File pfbFile)
     {
         try (InputStream input = new FileInputStream(pfbFile))
         {
@@ -830,7 +829,7 @@ final class FileSystemFontProvider exten
                 LOG.warn("Skipping font with '|' in name " + type1.getName() + 
" in file " + pfbFile);
                 return;
             }
-            String hash = computeHash(Files.newInputStream(pfbFile.toPath()));
+            String hash = SKIP_CHECKSUMS ? CHECKSUM_PLACEHOLDER : 
computeHash(Files.newInputStream(pfbFile.toPath()));
             fontInfoList.add(new FSFontInfo(pfbFile, FontFormat.PFB, 
type1.getName(),
                                             null, -1, -1, 0, 0, -1, null, 
this, hash, pfbFile.lastModified()));
 


Reply via email to