Author: lehmi Date: Mon Oct 20 16:56:38 2014 New Revision: 1633186 URL: http://svn.apache.org/r1633186 Log: PDFBOX-2250: skip empty xref table followed by trailer, leave call that will create empty instead of null curXrefTrailerObj when xref table is empty (merged from trunk)
Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (contents, props changed) Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1633186&r1=1633185&r2=1633186&view=diff ============================================================================== --- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original) +++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Mon Oct 20 16:56:38 2014 @@ -357,10 +357,10 @@ public class PDFParser extends BaseParse // read first line String header = readLine(); // some pdf-documents are broken and the pdf-version is in one of the following lines - if ((header.indexOf( PDF_HEADER ) == -1) && (header.indexOf( FDF_HEADER ) == -1)) + if (!header.contains(PDF_HEADER) && !header.contains(FDF_HEADER)) { header = readLine(); - while ((header.indexOf( PDF_HEADER ) == -1) && (header.indexOf( FDF_HEADER ) == -1)) + while (!header.contains(PDF_HEADER) && !header.contains(FDF_HEADER)) { // if a line starts with a digit, it has to be the first one with data in it if ((header.length() > 0) && (Character.isDigit(header.charAt(0)))) @@ -791,10 +791,21 @@ public class PDFParser extends BaseParse { return false; } - + + // check for trailer after xref + String str = readString(); + byte[] b = str.getBytes("ISO-8859-1"); + pdfSource.unread(b, 0, b.length); + // signal start of new XRef xrefTrailerResolver.nextXrefObj( startByteOffset ); + if (str.startsWith("trailer")) + { + LOG.warn("skipping empty xref table"); + return false; + } + /* * Xref tables can have multiple sections. * Each starts with a starting object id and a count. Propchange: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java ------------------------------------------------------------------------------ --- svn:mergeinfo (added) +++ svn:mergeinfo Mon Oct 20 16:56:38 2014 @@ -0,0 +1 @@ +/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java:1460370,1460372,1461796,1465163,1465217,1465266,1465359-1465360,1465366,1467638,1468195,1469999,1470107,1470242,1476795,1477806,1478472,1479136,1479287,1480000,1480030,1484089,1484277,1484547,1485771,1485781,1486337,1486413,1486423,1486440,1487557,1488049,1490022-1490023,1490408,1493503,1494083,1495799,1495802-1495803,1497532,1504210,1504214,1505737,1509187,1512367,1512433,1512661,1515101,1515165,1515905,1517273,1517281,1517288,1519174,1521194,1528826,1528833,1530018,1530740,1535953,1535956,1536136,1536173,1536441,1536463,1537287,1538191,1538203,1538222,1538341,1538371,1538394-1538395,1540801,1540811,1541625,1541714,1541987,1542291,1542780,1544789,1544806,1544818,1544972,1544977,1548387,1548394,1549022,1549025,1549027,1551220,1552521,1552533,1552552,1553017,1553175,1553220,1554632,1554774,1554792,1554845,1555186,1555345,1555550,1557339,1557374,1557546,1557553,1557561,1557793,1558205,1558570,1561095,1561191 ,1563199,1563210,1563215,1563426,1563429,1564846,1588736,1598655,1598885,1599016,1599656,1599786,1601144,1601451,1619255-1624567,1631169