Author: tilman
Date: Thu Nov 23 04:32:48 2023
New Revision: 1914058

URL: http://svn.apache.org/viewvc?rev=1914058&view=rev
Log:
PDFBOX-5713: rewritten parser to accept more than 3 segments and rearrange them 
as ASCII-BINARY-ASCII, as described in "3.3 IBM PC Format" of Adobe Technical 
Note #5040

Modified:
    pdfbox/branches/3.0/fontbox/pom.xml
    
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/pfb/PfbParser.java
    
pdfbox/branches/3.0/fontbox/src/test/java/org/apache/fontbox/pfb/PfbParserTest.java

Modified: pdfbox/branches/3.0/fontbox/pom.xml
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/pom.xml?rev=1914058&r1=1914057&r2=1914058&view=diff
==============================================================================
--- pdfbox/branches/3.0/fontbox/pom.xml (original)
+++ pdfbox/branches/3.0/fontbox/pom.xml Thu Nov 23 04:32:48 2023
@@ -143,6 +143,18 @@
                       
<sha512>2787fcecc0feb1c9e6ff0d8de6193658413863e44eaab572751ca7e6c3b369c0a9731f4952cb0821f307760f0422f77c5f0d3fe7df6b054643fb39423e8d70ee</sha512>
                   </configuration>
               </execution>
+              <execution>
+                  <id>PDFBOX-5713</id>
+                  <phase>generate-test-resources</phase>
+                  <goals>
+                      <goal>wget</goal>
+                  </goals>
+                  <configuration>
+                      
<url>https://issues.apache.org/jira/secure/attachment/13064282/DejaVuSerifCondensed.pfb</url>
+                      
<outputDirectory>${project.build.directory}/fonts</outputDirectory>
+                      
<sha512>6ef13c3497862dc8e4c2a4261bc3a7ef3e2dd75e00ae2af4912b236b387225541db76c72854fbb2323d1064311ffdda9e64ed7065afc3a7d13f5b71b7df2f2ef</sha512>
+                  </configuration>
+              </execution>
           </executions>
       </plugin>
     </plugins>

Modified: 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/pfb/PfbParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/pfb/PfbParser.java?rev=1914058&r1=1914057&r2=1914058&view=diff
==============================================================================
--- 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/pfb/PfbParser.java 
(original)
+++ 
pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/pfb/PfbParser.java 
Thu Nov 23 04:32:48 2023
@@ -23,7 +23,11 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.nio.file.Files;
 import java.nio.file.Paths;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 
 /**
  * Parser for a pfb-file.
@@ -33,6 +37,8 @@ import java.util.Arrays;
  */
 public class PfbParser 
 {
+    private static final Log LOG = LogFactory.getLog(PfbParser.class);
+    
     /**
      * the pfb header length.
      * (start-marker (1 byte), ascii-/binary-marker (1 byte), size (4 byte))
@@ -56,10 +62,9 @@ public class PfbParser
     private static final int BINARY_MARKER = 0x02;
 
     /**
-     * The record types in the pfb-file.
+     * the EOF marker.
      */
-    private static final int[] PFB_RECORDS = {ASCII_MARKER, BINARY_MARKER,
-            ASCII_MARKER};
+    private static final int EOF_MARKER = 0x03;
     
     /**
      * buffersize.
@@ -72,9 +77,9 @@ public class PfbParser
     private byte[] pfbdata;
 
     /**
-     * the lengths of the records.
+     * the lengths of the records (ASCII, BINARY, ASCII)
      */
-    private int[] lengths;
+    private final int[] lengths = new int[3];
 
     // sample (pfb-file)
     // 00000000 80 01 8b 15  00 00 25 21  50 53 2d 41  64 6f 62 65  
@@ -123,43 +128,92 @@ public class PfbParser
         {
             throw new IOException("PFB header missing");
         }
+        // read into segments and keep them
+        List<Integer> typeList = new ArrayList<>(3);
+        List<byte[]> barrList = new ArrayList<>();
         ByteArrayInputStream in = new ByteArrayInputStream(pfb);
-        pfbdata = new byte[pfb.length - PFB_HEADER_LENGTH];
-        lengths = new int[PFB_RECORDS.length];
-        int pointer = 0;
-        for (int records = 0; records < PFB_RECORDS.length; records++) 
+        int total = 0;
+        do
         {
-            if (in.read() != START_MARKER) 
+            int r = in.read();
+            if (r == -1 && total > 0)
+            {
+                break; // EOF
+            }
+            if (r != START_MARKER) 
             {
                 throw new IOException("Start marker missing");
             }
-
-            if (in.read() != PFB_RECORDS[records]) 
+            int recordType = in.read();
+            if (recordType == EOF_MARKER)
             {
-                throw new IOException("Incorrect record type");
+                break;
+            }
+            if (recordType != ASCII_MARKER && recordType != BINARY_MARKER)
+            {
+                throw new IOException("Incorrect record type: " + recordType);
             }
 
             int size = in.read();
             size += in.read() << 8;
             size += in.read() << 16;
             size += in.read() << 24;
-            lengths[records] = size;
-            if (pointer >= pfbdata.length)
+            LOG.debug("record type: " + recordType + ", segment size: " + 
size);
+            byte ar[] = new byte[size];
+            int got = in.read(ar);
+            if (got != size)
             {
-                throw new EOFException("attempted to read past EOF");
+                throw new EOFException("EOF while reading PFB font");
             }
-            if (size > pfbdata.length - pointer)
+            total += size;
+            typeList.add(recordType);
+            barrList.add(ar);
+        }
+        while (true);
+        
+        // We now have ASCII and binary segments. Lets arrange these so that 
the ASCII segments
+        // come first, then the binary segments, then the last ASCII segment 
if it is
+        // 0000... cleartomark
+        
+        pfbdata = new byte[total];
+        byte[] cleartomarkSegment = null;
+        int dstPos = 0;
+        
+        // copy the ASCII segments
+        for (int i = 0; i < typeList.size(); ++i)
+        {
+            if (typeList.get(i) != ASCII_MARKER)
             {
-                throw new EOFException("attempted to read " + size + " bytes 
at position " + pointer +
-                        " into array of size " + pfbdata.length + ", but only 
space for " + 
-                        (pfbdata.length - pointer) + " bytes left");
+                continue;
             }
-            int got = in.read(pfbdata, pointer, size);
-            if (got < 0) 
+            byte[] ar = barrList.get(i);
+            if (i == typeList.size() - 1 && ar.length < 600 && new 
String(ar).contains("cleartomark"))
             {
-                throw new EOFException();
+                cleartomarkSegment = ar;
+                continue;
             }
-            pointer += got;
+            System.arraycopy(ar, 0, pfbdata, dstPos, ar.length);
+            dstPos += ar.length;
+        }
+        lengths[0] = dstPos;
+
+        // copy the binary segments
+        for (int i = 0; i < typeList.size(); ++i)
+        {
+            if (typeList.get(i) != BINARY_MARKER)
+            {
+                continue;
+            }
+            byte[] ar = barrList.get(i);
+            System.arraycopy(ar, 0, pfbdata, dstPos, ar.length);
+            dstPos += ar.length;
+        }
+        lengths[1] = dstPos - lengths[0];
+        
+        if (cleartomarkSegment != null)
+        {
+            System.arraycopy(cleartomarkSegment, 0, pfbdata, dstPos, 
cleartomarkSegment.length);
+            lengths[2] = cleartomarkSegment.length;
         }
     }
 

Modified: 
pdfbox/branches/3.0/fontbox/src/test/java/org/apache/fontbox/pfb/PfbParserTest.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/test/java/org/apache/fontbox/pfb/PfbParserTest.java?rev=1914058&r1=1914057&r2=1914058&view=diff
==============================================================================
--- 
pdfbox/branches/3.0/fontbox/src/test/java/org/apache/fontbox/pfb/PfbParserTest.java
 (original)
+++ 
pdfbox/branches/3.0/fontbox/src/test/java/org/apache/fontbox/pfb/PfbParserTest.java
 Thu Nov 23 04:32:48 2023
@@ -66,6 +66,34 @@ class PfbParserTest
     }
 
     /**
+     * PDFBOX-5713: font with several binary segments.
+     *
+     * @throws IOException 
+     */
+    @Test
+    void testPfbPDFBox5713() throws IOException
+    {
+        Type1Font font;
+        try (InputStream is = new 
FileInputStream("target/fonts/DejaVuSerifCondensed.pfb"))
+        {
+            font = Type1Font.createWithPFB(is);
+        }
+        Assertions.assertEquals("Version 2.33", font.getVersion());
+        Assertions.assertEquals("DejaVuSerifCondensed", font.getFontName());
+        Assertions.assertEquals("DejaVu Serif Condensed", font.getFullName());
+        Assertions.assertEquals("DejaVu Serif Condensed", 
font.getFamilyName());
+        Assertions.assertEquals("Copyright [c] 2003 by Bitstream, Inc. All 
Rights Reserved.", font.getNotice());
+        Assertions.assertEquals(false, font.isFixedPitch());
+        Assertions.assertEquals(false, font.isForceBold());
+        Assertions.assertEquals(0, font.getItalicAngle());
+        Assertions.assertEquals("Book", font.getWeight());
+        Assertions.assertTrue(font.getEncoding() instanceof BuiltInEncoding);
+        Assertions.assertEquals(5959, font.getASCIISegment().length);
+        Assertions.assertEquals(1056090, font.getBinarySegment().length);
+        Assertions.assertEquals(3399, font.getCharStringsDict().size());
+    }
+
+    /**
      * Test 0 length font.
      */
     @Test


Reply via email to