Author: tilman Date: Thu Nov 23 04:32:48 2023 New Revision: 1914058 URL: http://svn.apache.org/viewvc?rev=1914058&view=rev Log: PDFBOX-5713: rewritten parser to accept more than 3 segments and rearrange them as ASCII-BINARY-ASCII, as described in "3.3 IBM PC Format" of Adobe Technical Note #5040
Modified: pdfbox/branches/3.0/fontbox/pom.xml pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/pfb/PfbParser.java pdfbox/branches/3.0/fontbox/src/test/java/org/apache/fontbox/pfb/PfbParserTest.java Modified: pdfbox/branches/3.0/fontbox/pom.xml URL: http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/pom.xml?rev=1914058&r1=1914057&r2=1914058&view=diff ============================================================================== --- pdfbox/branches/3.0/fontbox/pom.xml (original) +++ pdfbox/branches/3.0/fontbox/pom.xml Thu Nov 23 04:32:48 2023 @@ -143,6 +143,18 @@ <sha512>2787fcecc0feb1c9e6ff0d8de6193658413863e44eaab572751ca7e6c3b369c0a9731f4952cb0821f307760f0422f77c5f0d3fe7df6b054643fb39423e8d70ee</sha512> </configuration> </execution> + <execution> + <id>PDFBOX-5713</id> + <phase>generate-test-resources</phase> + <goals> + <goal>wget</goal> + </goals> + <configuration> + <url>https://issues.apache.org/jira/secure/attachment/13064282/DejaVuSerifCondensed.pfb</url> + <outputDirectory>${project.build.directory}/fonts</outputDirectory> + <sha512>6ef13c3497862dc8e4c2a4261bc3a7ef3e2dd75e00ae2af4912b236b387225541db76c72854fbb2323d1064311ffdda9e64ed7065afc3a7d13f5b71b7df2f2ef</sha512> + </configuration> + </execution> </executions> </plugin> </plugins> Modified: pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/pfb/PfbParser.java URL: http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/pfb/PfbParser.java?rev=1914058&r1=1914057&r2=1914058&view=diff ============================================================================== --- pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/pfb/PfbParser.java (original) +++ pdfbox/branches/3.0/fontbox/src/main/java/org/apache/fontbox/pfb/PfbParser.java Thu Nov 23 04:32:48 2023 @@ -23,7 +23,11 @@ import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Paths; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; /** * Parser for a pfb-file. @@ -33,6 +37,8 @@ import java.util.Arrays; */ public class PfbParser { + private static final Log LOG = LogFactory.getLog(PfbParser.class); + /** * the pfb header length. * (start-marker (1 byte), ascii-/binary-marker (1 byte), size (4 byte)) @@ -56,10 +62,9 @@ public class PfbParser private static final int BINARY_MARKER = 0x02; /** - * The record types in the pfb-file. + * the EOF marker. */ - private static final int[] PFB_RECORDS = {ASCII_MARKER, BINARY_MARKER, - ASCII_MARKER}; + private static final int EOF_MARKER = 0x03; /** * buffersize. @@ -72,9 +77,9 @@ public class PfbParser private byte[] pfbdata; /** - * the lengths of the records. + * the lengths of the records (ASCII, BINARY, ASCII) */ - private int[] lengths; + private final int[] lengths = new int[3]; // sample (pfb-file) // 00000000 80 01 8b 15 00 00 25 21 50 53 2d 41 64 6f 62 65 @@ -123,43 +128,92 @@ public class PfbParser { throw new IOException("PFB header missing"); } + // read into segments and keep them + List<Integer> typeList = new ArrayList<>(3); + List<byte[]> barrList = new ArrayList<>(); ByteArrayInputStream in = new ByteArrayInputStream(pfb); - pfbdata = new byte[pfb.length - PFB_HEADER_LENGTH]; - lengths = new int[PFB_RECORDS.length]; - int pointer = 0; - for (int records = 0; records < PFB_RECORDS.length; records++) + int total = 0; + do { - if (in.read() != START_MARKER) + int r = in.read(); + if (r == -1 && total > 0) + { + break; // EOF + } + if (r != START_MARKER) { throw new IOException("Start marker missing"); } - - if (in.read() != PFB_RECORDS[records]) + int recordType = in.read(); + if (recordType == EOF_MARKER) { - throw new IOException("Incorrect record type"); + break; + } + if (recordType != ASCII_MARKER && recordType != BINARY_MARKER) + { + throw new IOException("Incorrect record type: " + recordType); } int size = in.read(); size += in.read() << 8; size += in.read() << 16; size += in.read() << 24; - lengths[records] = size; - if (pointer >= pfbdata.length) + LOG.debug("record type: " + recordType + ", segment size: " + size); + byte ar[] = new byte[size]; + int got = in.read(ar); + if (got != size) { - throw new EOFException("attempted to read past EOF"); + throw new EOFException("EOF while reading PFB font"); } - if (size > pfbdata.length - pointer) + total += size; + typeList.add(recordType); + barrList.add(ar); + } + while (true); + + // We now have ASCII and binary segments. Lets arrange these so that the ASCII segments + // come first, then the binary segments, then the last ASCII segment if it is + // 0000... cleartomark + + pfbdata = new byte[total]; + byte[] cleartomarkSegment = null; + int dstPos = 0; + + // copy the ASCII segments + for (int i = 0; i < typeList.size(); ++i) + { + if (typeList.get(i) != ASCII_MARKER) { - throw new EOFException("attempted to read " + size + " bytes at position " + pointer + - " into array of size " + pfbdata.length + ", but only space for " + - (pfbdata.length - pointer) + " bytes left"); + continue; } - int got = in.read(pfbdata, pointer, size); - if (got < 0) + byte[] ar = barrList.get(i); + if (i == typeList.size() - 1 && ar.length < 600 && new String(ar).contains("cleartomark")) { - throw new EOFException(); + cleartomarkSegment = ar; + continue; } - pointer += got; + System.arraycopy(ar, 0, pfbdata, dstPos, ar.length); + dstPos += ar.length; + } + lengths[0] = dstPos; + + // copy the binary segments + for (int i = 0; i < typeList.size(); ++i) + { + if (typeList.get(i) != BINARY_MARKER) + { + continue; + } + byte[] ar = barrList.get(i); + System.arraycopy(ar, 0, pfbdata, dstPos, ar.length); + dstPos += ar.length; + } + lengths[1] = dstPos - lengths[0]; + + if (cleartomarkSegment != null) + { + System.arraycopy(cleartomarkSegment, 0, pfbdata, dstPos, cleartomarkSegment.length); + lengths[2] = cleartomarkSegment.length; } } Modified: pdfbox/branches/3.0/fontbox/src/test/java/org/apache/fontbox/pfb/PfbParserTest.java URL: http://svn.apache.org/viewvc/pdfbox/branches/3.0/fontbox/src/test/java/org/apache/fontbox/pfb/PfbParserTest.java?rev=1914058&r1=1914057&r2=1914058&view=diff ============================================================================== --- pdfbox/branches/3.0/fontbox/src/test/java/org/apache/fontbox/pfb/PfbParserTest.java (original) +++ pdfbox/branches/3.0/fontbox/src/test/java/org/apache/fontbox/pfb/PfbParserTest.java Thu Nov 23 04:32:48 2023 @@ -66,6 +66,34 @@ class PfbParserTest } /** + * PDFBOX-5713: font with several binary segments. + * + * @throws IOException + */ + @Test + void testPfbPDFBox5713() throws IOException + { + Type1Font font; + try (InputStream is = new FileInputStream("target/fonts/DejaVuSerifCondensed.pfb")) + { + font = Type1Font.createWithPFB(is); + } + Assertions.assertEquals("Version 2.33", font.getVersion()); + Assertions.assertEquals("DejaVuSerifCondensed", font.getFontName()); + Assertions.assertEquals("DejaVu Serif Condensed", font.getFullName()); + Assertions.assertEquals("DejaVu Serif Condensed", font.getFamilyName()); + Assertions.assertEquals("Copyright [c] 2003 by Bitstream, Inc. All Rights Reserved.", font.getNotice()); + Assertions.assertEquals(false, font.isFixedPitch()); + Assertions.assertEquals(false, font.isForceBold()); + Assertions.assertEquals(0, font.getItalicAngle()); + Assertions.assertEquals("Book", font.getWeight()); + Assertions.assertTrue(font.getEncoding() instanceof BuiltInEncoding); + Assertions.assertEquals(5959, font.getASCIISegment().length); + Assertions.assertEquals(1056090, font.getBinarySegment().length); + Assertions.assertEquals(3399, font.getCharStringsDict().size()); + } + + /** * Test 0 length font. */ @Test