Author: lehmi Date: Sun May 31 09:59:17 2015 New Revision: 1682706 URL: http://svn.apache.org/r1682706 Log: PDFBOX-2301: use RandomAccessRead instead of PushBackInputStream as pdfSource
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java Sun May 31 09:59:17 2015 @@ -16,7 +16,6 @@ */ package org.apache.pdfbox.pdfparser; -import java.io.BufferedInputStream; import java.io.ByteArrayOutputStream; import java.io.Closeable; import java.io.IOException; @@ -36,8 +35,10 @@ import org.apache.pdfbox.cos.COSNull; import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSString; -import org.apache.pdfbox.io.PushBackInputStream; +import org.apache.pdfbox.io.RandomAccessBuffer; +import org.apache.pdfbox.io.RandomAccessRead; import org.apache.pdfbox.cos.COSObjectKey; + import static org.apache.pdfbox.util.Charsets.ISO_8859_1; /** @@ -137,7 +138,7 @@ public abstract class BaseParser impleme /** * This is the stream that will be read from. */ - protected PushBackInputStream pdfSource; + protected RandomAccessRead pdfSource; /** * This is the document that will be parsed. @@ -159,19 +160,20 @@ public abstract class BaseParser impleme */ public BaseParser(InputStream input) throws IOException { - int pushbacksize = 65536; - try - { - pushbacksize = Integer.getInteger(PROP_PUSHBACK_SIZE, 65536); - } - catch (SecurityException e) + pdfSource = copyInputStream(input); + } + + protected RandomAccessRead copyInputStream(InputStream input) throws IOException + { + RandomAccessBuffer buffer = new RandomAccessBuffer(); + byte[] byteBuffer = new byte[8192]; + int bytesRead = 0; + while ((bytesRead = input.read(byteBuffer)) > -1) { - // PDFBOX-1946 getInteger calls System.getProperties, - // which can get exception in an applet - // ignore and use default + buffer.write(byteBuffer, 0, bytesRead); } - this.pdfSource = new PushBackInputStream( - new BufferedInputStream(input, 16384), pushbacksize); + buffer.seek(0); + return buffer; } private static boolean isHexDigit(char ch) @@ -190,14 +192,14 @@ public abstract class BaseParser impleme */ private COSBase parseCOSDictionaryValue() throws IOException { - long numOffset = pdfSource.getOffset(); + long numOffset = pdfSource.getPosition(); COSBase number = parseDirObject(); skipSpaces(); if (!isDigit()) { return number; } - long genOffset = pdfSource.getOffset(); + long genOffset = pdfSource.getPosition(); COSBase generationNumber = parseDirObject(); skipSpaces(); readExpectedChar('R'); @@ -218,7 +220,7 @@ public abstract class BaseParser impleme { if (document == null) { - throw new IOException("object reference " + key + " at offset " + pdfSource.getOffset() + throw new IOException("object reference " + key + " at offset " + pdfSource.getPosition() + " in content stream"); } return document.getObjectFromPool(key); @@ -308,7 +310,7 @@ public abstract class BaseParser impleme { return true; } - pdfSource.unread(c); + pdfSource.rewind(1); return false; } @@ -324,7 +326,7 @@ public abstract class BaseParser impleme String potentialDEF = readString(); if (!potentialDEF.equals(DEF)) { - pdfSource.unread(potentialDEF.getBytes(ISO_8859_1)); + pdfSource.rewind(potentialDEF.getBytes(ISO_8859_1).length); } else { @@ -363,7 +365,7 @@ public abstract class BaseParser impleme whitespace = pdfSource.read(); if (ASCII_LF != whitespace) { - pdfSource.unread(whitespace); + pdfSource.rewind(1); //The spec says this is invalid but it happens in the real //world so we must support it. } @@ -373,7 +375,7 @@ public abstract class BaseParser impleme //we are in an error. //but again we will do a lenient parsing and just assume that everything //is fine - pdfSource.unread(whitespace); + pdfSource.rewind(1); } } @@ -475,7 +477,7 @@ public abstract class BaseParser impleme if ( charMatchCount == keyw.length ) { // keyword matched; unread matched keyword (endstream/endobj) and following buffered content - pdfSource.unread( strmBuf, contentBytes, bufSize - contentBytes ); + pdfSource.rewind( bufSize - contentBytes ); break; } else @@ -543,7 +545,7 @@ public abstract class BaseParser impleme } if (amountRead > 0) { - pdfSource.unread( nextThreeBytes, 0, amountRead ); + pdfSource.rewind( amountRead ); } return braces; } @@ -715,7 +717,7 @@ public abstract class BaseParser impleme } if (c != -1) { - pdfSource.unread(c); + pdfSource.rewind(1); } return new COSString(out.toByteArray()); } @@ -834,12 +836,12 @@ public abstract class BaseParser impleme else { //it could be a bad object in the array which is just skipped - LOG.warn("Corrupt object reference at offset " + pdfSource.getOffset()); + LOG.warn("Corrupt object reference at offset " + pdfSource.getPosition()); // This could also be an "endobj" or "endstream" which means we can assume that // the array has ended. String isThisTheEnd = readString(); - pdfSource.unread(isThisTheEnd.getBytes(ISO_8859_1)); + pdfSource.rewind(isThisTheEnd.getBytes(ISO_8859_1).length); if(ENDOBJ_STRING.equals(isThisTheEnd) || ENDSTREAM_STRING.equals(isThisTheEnd)) { return po; @@ -909,7 +911,7 @@ public abstract class BaseParser impleme } else { - pdfSource.unread(ch2); + pdfSource.rewind(1); c = ch1; buffer.append( ch ); } @@ -926,7 +928,7 @@ public abstract class BaseParser impleme } if (c != -1) { - pdfSource.unread(c); + pdfSource.rewind(1); } return COSName.getPDFName( buffer.toString() ); } @@ -948,7 +950,7 @@ public abstract class BaseParser impleme if( !trueString.equals( TRUE ) ) { throw new IOException( "Error parsing boolean: expected='true' actual='" + trueString - + "' at offset " + pdfSource.getOffset()); + + "' at offset " + pdfSource.getPosition()); } else { @@ -961,7 +963,7 @@ public abstract class BaseParser impleme if( !falseString.equals( FALSE ) ) { throw new IOException( "Error parsing boolean: expected='true' actual='" + falseString - + "' at offset " + pdfSource.getOffset()); + + "' at offset " + pdfSource.getPosition()); } else { @@ -971,7 +973,7 @@ public abstract class BaseParser impleme else { throw new IOException( "Error parsing boolean expected='t or f' actual='" + c - + "' at offset " + pdfSource.getOffset()); + + "' at offset " + pdfSource.getPosition()); } return retval; } @@ -998,7 +1000,7 @@ public abstract class BaseParser impleme int leftBracket = pdfSource.read(); // check for second left bracket c = (char)pdfSource.peek(); - pdfSource.unread( leftBracket ); + pdfSource.rewind(1); if(c == '<') { @@ -1083,7 +1085,7 @@ public abstract class BaseParser impleme } if( ic != -1 ) { - pdfSource.unread( ic ); + pdfSource.rewind(1); } retval = COSNumber.get( buf.toString() ); } @@ -1099,13 +1101,13 @@ public abstract class BaseParser impleme // we can end up in an infinite loop otherwise throw new IOException( "Unknown dir object c='" + c + "' cInt=" + (int)c + " peek='" + (char)peek - + "' peekInt=" + peek + " " + pdfSource.getOffset() ); + + "' peekInt=" + peek + " " + pdfSource.getPosition() ); } // if it's an endstream/endobj, we want to put it back so the caller will see it if(ENDOBJ_STRING.equals(badString) || ENDSTREAM_STRING.equals(badString)) { - pdfSource.unread(badString.getBytes(ISO_8859_1)); + pdfSource.rewind(badString.getBytes(ISO_8859_1).length); } } } @@ -1132,7 +1134,7 @@ public abstract class BaseParser impleme } if (c != -1) { - pdfSource.unread(c); + pdfSource.rewind(1); } return buffer.toString(); } @@ -1165,7 +1167,7 @@ public abstract class BaseParser impleme { throw new IOException("Expected string '" + new String(expectedString) + "' but missed at character '" + c + "' at offset " - + pdfSource.getOffset()); + + pdfSource.getPosition()); } } skipSpaces(); @@ -1183,7 +1185,7 @@ public abstract class BaseParser impleme char c = (char) pdfSource.read(); if (c != ec) { - throw new IOException("expected='" + ec + "' actual='" + c + "' at offset " + pdfSource.getOffset()); + throw new IOException("expected='" + ec + "' actual='" + c + "' at offset " + pdfSource.getPosition()); } } @@ -1216,7 +1218,7 @@ public abstract class BaseParser impleme } if (c != -1) { - pdfSource.unread(c); + pdfSource.rewind(1); } return buffer.toString(); } @@ -1410,7 +1412,7 @@ public abstract class BaseParser impleme } if (c != -1) { - pdfSource.unread(c); + pdfSource.rewind(1); } } @@ -1468,8 +1470,8 @@ public abstract class BaseParser impleme } catch( NumberFormatException e ) { - pdfSource.unread(intBuffer.toString().getBytes(ISO_8859_1)); - throw new IOException( "Error: Expected an integer type at offset "+pdfSource.getOffset(), e); + pdfSource.rewind(intBuffer.toString().getBytes(ISO_8859_1).length); + throw new IOException( "Error: Expected an integer type at offset "+pdfSource.getPosition(), e); } return retval; } @@ -1495,9 +1497,9 @@ public abstract class BaseParser impleme } catch( NumberFormatException e ) { - pdfSource.unread(longBuffer.toString().getBytes(ISO_8859_1)); + pdfSource.rewind(longBuffer.toString().getBytes(ISO_8859_1).length); throw new IOException( "Error: Expected a long type at offset " - + pdfSource.getOffset() + ", instead got '" + longBuffer + "'", e); + + pdfSource.getPosition() + ", instead got '" + longBuffer + "'", e); } return retval; } @@ -1526,7 +1528,7 @@ public abstract class BaseParser impleme } if( lastByte != -1 ) { - pdfSource.unread( lastByte ); + pdfSource.rewind(1); } return buffer; } Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun May 31 09:59:17 2015 @@ -16,6 +16,8 @@ */ package org.apache.pdfbox.pdfparser; +import static org.apache.pdfbox.util.Charsets.ISO_8859_1; + import java.io.File; import java.io.FileOutputStream; import java.io.IOException; @@ -52,7 +54,6 @@ import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType; import org.apache.pdfbox.pdmodel.encryption.SecurityHandler; -import static org.apache.pdfbox.util.Charsets.ISO_8859_1; /** * PDF-Parser which first reads startxref and xref tables in order to know valid objects and parse only these objects. @@ -224,11 +225,11 @@ public class COSParser extends BaseParse // use existing parser to parse xref table parseXrefTable(prev); // parse the last trailer. - trailerOffset = pdfSource.getOffset(); + trailerOffset = pdfSource.getPosition(); // PDFBOX-1739 skip extra xref entries in RegisSTAR documents while (isLenient && pdfSource.peek() != 't') { - if (pdfSource.getOffset() == trailerOffset) + if (pdfSource.getPosition() == trailerOffset) { // warn only the first time LOG.warn("Expected trailer object at position " + trailerOffset @@ -239,7 +240,7 @@ public class COSParser extends BaseParse if (!parseTrailer()) { throw new IOException("Expected trailer object at position: " - + pdfSource.getOffset()); + + pdfSource.getPosition()); } COSDictionary trailer = xrefTrailerResolver.getCurrentTrailer(); // check for a XRef stream, it may contain some object ids of compressed objects @@ -733,8 +734,7 @@ public class COSParser extends BaseParse if (endObjectKey.equals(STREAM_STRING)) { - pdfSource.unread(endObjectKey.getBytes(ISO_8859_1)); - pdfSource.unread(' '); + pdfSource.rewind(endObjectKey.getBytes(ISO_8859_1).length); if (pb instanceof COSDictionary) { COSStream stream = parseCOSStream((COSDictionary) pb); @@ -852,7 +852,7 @@ public class COSParser extends BaseParse if (lengthObj.getObject() == null) { // not read so far, keep current stream position - final long curFileOffset = pdfSource.getOffset(); + final long curFileOffset = pdfSource.getPosition(); parseObjectDynamically(lengthObj, true); // reset current stream position pdfSource.seek(curFileOffset); @@ -918,7 +918,7 @@ public class COSParser extends BaseParse if (isLenient) { LOG.warn("The stream doesn't provide any stream length, using fallback readUntilEnd, at offset " - + pdfSource.getOffset()); + + pdfSource.getPosition()); } else { @@ -941,22 +941,22 @@ public class COSParser extends BaseParse if (endStream.equals("endobj") && isLenient) { LOG.warn("stream ends with 'endobj' instead of 'endstream' at offset " - + pdfSource.getOffset()); + + pdfSource.getPosition()); // avoid follow-up warning about missing endobj - pdfSource.unread(ENDOBJ); + pdfSource.rewind(ENDOBJ.length); } else if (endStream.length() > 9 && isLenient && endStream.substring(0,9).equals(ENDSTREAM_STRING)) { LOG.warn("stream ends with '" + endStream + "' instead of 'endstream' at offset " - + pdfSource.getOffset()); + + pdfSource.getPosition()); // unread the "extra" bytes - pdfSource.unread(endStream.substring(9).getBytes(ISO_8859_1)); + pdfSource.rewind(endStream.substring(9).getBytes(ISO_8859_1).length); } else if (!endStream.equals(ENDSTREAM_STRING)) { throw new IOException( "Error reading stream, expected='endstream' actual='" - + endStream + "' at offset " + pdfSource.getOffset()); + + endStream + "' at offset " + pdfSource.getPosition()); } } finally @@ -979,7 +979,7 @@ public class COSParser extends BaseParse if (readBytes <= 0) { // shouldn't happen, the stream length has already been validated - throw new IOException("read error at offset " + pdfSource.getOffset() + throw new IOException("read error at offset " + pdfSource.getPosition() + ": expected " + chunk + " bytes, but read() returns " + readBytes); } out.write(streamCopyBuf, 0, readBytes); @@ -990,7 +990,7 @@ public class COSParser extends BaseParse private boolean validateStreamLength(long streamLength) throws IOException { boolean streamLengthIsValid = true; - long originOffset = pdfSource.getOffset(); + long originOffset = pdfSource.getPosition(); long expectedEndOfStream = originOffset + streamLength; if (expectedEndOfStream > fileLen) { @@ -1171,7 +1171,7 @@ public class COSParser extends BaseParse } long objectNr = objectKey.getNumber(); int objectGen = objectKey.getGeneration(); - long originOffset = pdfSource.getOffset(); + long originOffset = pdfSource.getPosition(); pdfSource.seek(offset); String objectString = createObjectString(objectNr, objectGen); try @@ -1216,7 +1216,7 @@ public class COSParser extends BaseParse if (bfSearchCOSObjectKeyOffsets == null) { bfSearchCOSObjectKeyOffsets = new HashMap<COSObjectKey, Long>(); - long originOffset = pdfSource.getOffset(); + long originOffset = pdfSource.getPosition(); long currentOffset = MINIMUM_SEARCH_OFFSET; String objString = " obj"; char[] string = objString.toCharArray(); @@ -1368,14 +1368,14 @@ public class COSParser extends BaseParse { // a pdf may contain more than one xref entry bfSearchXRefTablesOffsets = new Vector<Long>(); - long originOffset = pdfSource.getOffset(); + long originOffset = pdfSource.getPosition(); pdfSource.seek(MINIMUM_SEARCH_OFFSET); // search for xref tables while (!pdfSource.isEOF()) { if (isString(XREF_TABLE)) { - long newOffset = pdfSource.getOffset(); + long newOffset = pdfSource.getPosition(); pdfSource.seek(newOffset - 1); // ensure that we don't read "startxref" instead of "xref" if (isWhitespace()) @@ -1401,7 +1401,7 @@ public class COSParser extends BaseParse { // a pdf may contain more than one /XRef entry bfSearchXRefStreamsOffsets = new Vector<Long>(); - long originOffset = pdfSource.getOffset(); + long originOffset = pdfSource.getPosition(); pdfSource.seek(MINIMUM_SEARCH_OFFSET); // search for XRef streams String objString = " obj"; @@ -1412,7 +1412,7 @@ public class COSParser extends BaseParse { // search backwards for the beginning of the stream long newOffset = -1; - long xrefOffset = pdfSource.getOffset(); + long xrefOffset = pdfSource.getPosition(); boolean objFound = false; for (int i = 1; i < 30 && !objFound; i++) { @@ -1445,7 +1445,7 @@ public class COSParser extends BaseParse if (length > 0) { pdfSource.read(); - newOffset = pdfSource.getOffset(); + newOffset = pdfSource.getPosition(); } } } @@ -1584,7 +1584,7 @@ public class COSParser extends BaseParse { bytesMatching = true; } - pdfSource.unread(bytesRead, 0, numberOfBytes); + pdfSource.rewind(numberOfBytes); } return bytesMatching; } @@ -1599,7 +1599,7 @@ public class COSParser extends BaseParse private boolean isString(char[] string) throws IOException { boolean bytesMatching = true; - long originOffset = pdfSource.getOffset(); + long originOffset = pdfSource.getPosition(); for (char c : string) { if (pdfSource.read() != c) @@ -1624,7 +1624,7 @@ public class COSParser extends BaseParse return false; } //read "trailer" - long currentOffset = pdfSource.getOffset(); + long currentOffset = pdfSource.getPosition(); String nextLine = readLine(); if( !nextLine.trim().equals( "trailer" ) ) { @@ -1729,7 +1729,7 @@ public class COSParser extends BaseParse { String headerGarbage = header.substring(headerMarker.length() + 3, header.length()) + "\n"; header = header.substring(0, headerMarker.length() + 3); - pdfSource.unread(headerGarbage.getBytes(ISO_8859_1)); + pdfSource.rewind(headerGarbage.getBytes(ISO_8859_1).length); } } float headerVersion = -1; @@ -1777,7 +1777,7 @@ public class COSParser extends BaseParse // check for trailer after xref String str = readString(); byte[] b = str.getBytes(ISO_8859_1); - pdfSource.unread(b, 0, b.length); + pdfSource.rewind(b.length); // signal start of new XRef xrefTrailerResolver.nextXrefObj( startByteOffset, XRefType.TABLE ); Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java Sun May 31 09:59:17 2015 @@ -27,18 +27,13 @@ import org.apache.pdfbox.cos.COSDictiona import org.apache.pdfbox.cos.COSDocument; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.io.IOUtils; -import org.apache.pdfbox.io.PushBackInputStream; -import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream; +import org.apache.pdfbox.io.RandomAccessFile; import org.apache.pdfbox.pdmodel.fdf.FDFDocument; public class FDFParser extends COSParser { private static final Log LOG = LogFactory.getLog(FDFParser.class); - private final RandomAccessBufferedFileInputStream raStream; - - private File tempPDFFile; - /** * Constructs parser for given file using memory buffer. * @@ -62,7 +57,7 @@ public class FDFParser extends COSParser public FDFParser(File file) throws IOException { fileLen = file.length(); - raStream = new RandomAccessBufferedFileInputStream(file); + pdfSource = new RandomAccessFile(file, "r"); init(); } @@ -74,9 +69,8 @@ public class FDFParser extends COSParser */ public FDFParser(InputStream input) throws IOException { - tempPDFFile = createTmpFile(input); - fileLen = tempPDFFile.length(); - raStream = new RandomAccessBufferedFileInputStream(tempPDFFile); + pdfSource = copyInputStream(input); + fileLen = pdfSource.length(); init(); } @@ -96,7 +90,6 @@ public class FDFParser extends COSParser } } document = new COSDocument(false); - pdfSource = new PushBackInputStream(raStream, 4096); } /** @@ -154,8 +147,6 @@ public class FDFParser extends COSParser finally { IOUtils.closeQuietly(pdfSource); - deleteTempFile(); - if (exceptionOccurred && document != null) { IOUtils.closeQuietly(document); @@ -177,25 +168,4 @@ public class FDFParser extends COSParser return new FDFDocument( getDocument() ); } - /** - * Remove the temporary file. A temporary file is created if this class is instantiated with an InputStream - */ - private void deleteTempFile() - { - if (tempPDFFile != null) - { - try - { - if (!tempPDFFile.delete()) - { - LOG.warn("Temporary file '" + tempPDFFile.getName() + "' can't be deleted"); - } - } - catch (SecurityException e) - { - LOG.warn("Temporary file '" + tempPDFFile.getName() + "' can't be deleted", e); - } - } - } - } Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Sun May 31 09:59:17 2015 @@ -30,8 +30,8 @@ import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSNull; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.io.IOUtils; -import org.apache.pdfbox.io.PushBackInputStream; import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream; +import org.apache.pdfbox.io.RandomAccessFile; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.encryption.AccessPermission; import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial; @@ -43,7 +43,6 @@ public class PDFParser extends COSParser { private static final Log LOG = LogFactory.getLog(PDFParser.class); - private final RandomAccessBufferedFileInputStream raStream; private String password = ""; private InputStream keyStoreInputStream = null; private String keyAlias = null; @@ -163,7 +162,7 @@ public class PDFParser extends COSParser boolean useScratchFiles) throws IOException { fileLen = file.length(); - raStream = new RandomAccessBufferedFileInputStream(file); + pdfSource = new RandomAccessBufferedFileInputStream(file); password = decryptionPassword; keyStoreInputStream = keyStore; keyAlias = alias; @@ -251,9 +250,17 @@ public class PDFParser extends COSParser public PDFParser(InputStream input, String decryptionPassword, InputStream keyStore, String alias, boolean useScratchFiles) throws IOException { - tempPDFFile = createTmpFile(input); - fileLen = tempPDFFile.length(); - raStream = new RandomAccessBufferedFileInputStream(tempPDFFile); + if (useScratchFiles) + { + tempPDFFile = createTmpFile(input); + fileLen = tempPDFFile.length(); + pdfSource = new RandomAccessBufferedFileInputStream(tempPDFFile); + } + else + { + pdfSource = copyInputStream(input); + fileLen = pdfSource.length(); + } password = decryptionPassword; keyStoreInputStream = keyStore; keyAlias = alias; @@ -276,7 +283,6 @@ public class PDFParser extends COSParser } } document = new COSDocument(useScratchFiles); - pdfSource = new PushBackInputStream(raStream, 4096); } /** @@ -355,7 +361,6 @@ public class PDFParser extends COSParser { IOUtils.closeQuietly(pdfSource); IOUtils.closeQuietly(keyStoreInputStream); - deleteTempFile(); if (exceptionOccurred && document != null) Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java Sun May 31 09:59:17 2015 @@ -19,14 +19,14 @@ package org.apache.pdfbox.pdfparser; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.PushbackInputStream; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; - +import org.apache.pdfbox.contentstream.operator.Operator; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSBoolean; import org.apache.pdfbox.cos.COSDictionary; @@ -35,8 +35,8 @@ import org.apache.pdfbox.cos.COSNull; import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.io.RandomAccessRead; import org.apache.pdfbox.pdmodel.common.PDStream; -import org.apache.pdfbox.contentstream.operator.Operator; /** * This will parse a PDF byte stream and extract operands and such. @@ -209,7 +209,7 @@ public class PDFStreamParser extends Bas c = (char) pdfSource.peek(); // put back first bracket - pdfSource.unread(leftBracket); + pdfSource.rewind(1); if (c == '<') { @@ -408,7 +408,7 @@ public class PDFStreamParser extends Bas * @return <code>true</code> if next bytes are probably printable ASCII * characters starting with a PDF operator, otherwise <code>false</code> */ - private boolean hasNoFollowingBinData(final PushbackInputStream pdfSource) + private boolean hasNoFollowingBinData(final RandomAccessRead pdfSource) throws IOException { // as suggested in PDFBOX-1164 @@ -453,7 +453,7 @@ public class PDFStreamParser extends Bas noBinData = false; } } - pdfSource.unread(binCharTestArr, 0, readBytes); + pdfSource.rewind(readBytes); } if (!noBinData) { Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff ============================================================================== --- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java (original) +++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java Sun May 31 09:59:17 2015 @@ -64,13 +64,13 @@ import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSNull; import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.cos.COSObject; +import org.apache.pdfbox.cos.COSObjectKey; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.pdfparser.PDFObjectStreamParser; import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType; import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.cos.COSObjectKey; import org.apache.pdfbox.preflight.Format; import org.apache.pdfbox.preflight.PreflightConfiguration; import org.apache.pdfbox.preflight.PreflightConstants; @@ -347,7 +347,7 @@ public class PreflightParser extends PDF // the number of objects in the xref table int count; - long offset = pdfSource.getOffset(); + long offset = pdfSource.getPosition(); String line = readLine(); Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)(\\s*)"); Matcher matcher = pattern.matcher(line); @@ -359,7 +359,8 @@ public class PreflightParser extends PDF else { addValidationError(new ValidationError(ERROR_SYNTAX_CROSS_REF, - "Cross reference subsection header is invalid: '" + line + "' at position " + pdfSource.getOffset())); + "Cross reference subsection header is invalid: '" + line + "' at position " + + pdfSource.getPosition())); // reset pdfSource cursor to read xref information pdfSource.seek(offset); // first obj id @@ -454,16 +455,16 @@ public class PreflightParser extends PDF if (!streamV.equals("stream")) { addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, - "Expected 'stream' keyword but found '" + streamV + "' at offset "+pdfSource.getOffset())); + "Expected 'stream' keyword but found '" + streamV + "' at offset "+pdfSource.getPosition())); } int nextChar = pdfSource.read(); if (!((nextChar == 13 && pdfSource.peek() == 10) || nextChar == 10)) { addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, - "Expected 'EOL' after the stream keyword at offset "+pdfSource.getOffset())); + "Expected 'EOL' after the stream keyword at offset "+pdfSource.getPosition())); } // set the offset before stream - pdfSource.seek(pdfSource.getOffset() - 7); + pdfSource.seek(pdfSource.getPosition() - 7); } /** @@ -473,17 +474,17 @@ public class PreflightParser extends PDF */ protected void checkEndstreamKeyWord() throws IOException { - pdfSource.seek(pdfSource.getOffset() - 10); + pdfSource.seek(pdfSource.getPosition() - 10); if (!nextIsEOL()) { addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, - "Expected 'EOL' before the endstream keyword at offset "+pdfSource.getOffset()+" but found '"+pdfSource.peek()+"'")); + "Expected 'EOL' before the endstream keyword at offset "+pdfSource.getPosition()+" but found '"+pdfSource.peek()+"'")); } String endstreamV = readString(); if (!endstreamV.equals("endstream")) { addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, - "Expected 'endstream' keyword at offset "+pdfSource.getOffset()+" but found '" + endstreamV + "'")); + "Expected 'endstream' keyword at offset "+pdfSource.getPosition()+" but found '" + endstreamV + "'")); } } @@ -544,7 +545,7 @@ public class PreflightParser extends PDF protected COSString parseCOSString() throws IOException { // offset reminder - long offset = pdfSource.getOffset(); + long offset = pdfSource.getPosition(); char nextChar = (char) pdfSource.read(); int count = 0; if (nextChar == '<') @@ -566,7 +567,7 @@ public class PreflightParser extends PDF else { addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_INVALID, - "Hexa String must have only Hexadecimal Characters (found '" + nextChar + "') at offset " + pdfSource.getOffset())); + "Hexa String must have only Hexadecimal Characters (found '" + nextChar + "') at offset " + pdfSource.getPosition())); break; } } @@ -577,7 +578,7 @@ public class PreflightParser extends PDF if (count % 2 != 0) { addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER, - "Hexa string shall contain even number of non white space char at offset " + pdfSource.getOffset())); + "Hexa string shall contain even number of non white space char at offset " + pdfSource.getPosition())); } // reset the offset to parse the COSString @@ -586,7 +587,7 @@ public class PreflightParser extends PDF if (result.getString().length() > MAX_STRING_LENGTH) { - addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_TOO_LONG, "Hexa string is too long at offset "+pdfSource.getOffset())); + addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_TOO_LONG, "Hexa string is too long at offset "+pdfSource.getPosition())); } return result; } @@ -612,7 +613,7 @@ public class PreflightParser extends PDF if (real > MAX_POSITIVE_FLOAT || real < MAX_NEGATIVE_FLOAT) { addValidationError(new ValidationError(ERROR_SYNTAX_NUMERIC_RANGE, - "Float is too long or too small: " + real+" at offset "+pdfSource.getOffset())); + "Float is too long or too small: " + real+" at offset "+pdfSource.getPosition())); } } else @@ -621,7 +622,7 @@ public class PreflightParser extends PDF if (numAsLong > Integer.MAX_VALUE || numAsLong < Integer.MIN_VALUE) { addValidationError(new ValidationError(ERROR_SYNTAX_NUMERIC_RANGE, - "Numeric is too long or too small: " + numAsLong+" at offset "+pdfSource.getOffset())); + "Numeric is too long or too small: " + numAsLong+" at offset "+pdfSource.getPosition())); } } } @@ -631,7 +632,7 @@ public class PreflightParser extends PDF COSDictionary dic = (COSDictionary) result; if (dic.size() > MAX_DICT_ENTRIES) { - addValidationError(new ValidationError(ERROR_SYNTAX_TOO_MANY_ENTRIES, "Too Many Entries In Dictionary at offset "+pdfSource.getOffset())); + addValidationError(new ValidationError(ERROR_SYNTAX_TOO_MANY_ENTRIES, "Too Many Entries In Dictionary at offset "+pdfSource.getPosition())); } } return result; @@ -680,7 +681,7 @@ public class PreflightParser extends PDF long readObjNr; int readObjGen; - long offset = pdfSource.getOffset(); + long offset = pdfSource.getPosition(); String line = readLine(); Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)\\sobj"); Matcher matcher = pattern.matcher(line); @@ -721,7 +722,7 @@ public class PreflightParser extends PDF skipSpaces(); COSBase pb = parseDirObject(); skipSpaces(); - long endObjectOffset = pdfSource.getOffset(); + long endObjectOffset = pdfSource.getPosition(); String endObjectKey = readString(); if (endObjectKey.equals("stream")) @@ -743,7 +744,7 @@ public class PreflightParser extends PDF throw new IOException("Stream not preceded by dictionary (offset: " + offsetOrObjstmObNr + ")."); } skipSpaces(); - endObjectOffset = pdfSource.getOffset(); + endObjectOffset = pdfSource.getPosition(); endObjectKey = readString(); // we have case with a second 'endstream' before endobj @@ -774,12 +775,12 @@ public class PreflightParser extends PDF } else { - offset = pdfSource.getOffset(); + offset = pdfSource.getPosition(); pdfSource.seek(endObjectOffset - 1); if (!nextIsEOL()) { addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, - "EOL expected before the 'endobj' keyword at offset "+pdfSource.getOffset())); + "EOL expected before the 'endobj' keyword at offset "+pdfSource.getPosition())); } pdfSource.seek(offset); } @@ -787,7 +788,7 @@ public class PreflightParser extends PDF if (!nextIsEOL()) { addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, - "EOL expected after the 'endobj' keyword at offset "+pdfSource.getOffset())); + "EOL expected after the 'endobj' keyword at offset "+pdfSource.getPosition())); } } else @@ -838,8 +839,17 @@ public class PreflightParser extends PDF || (buf.length - tmpOffset == 2 && (buf[tmpOffset] != 13 || buf[tmpOffset + 1] != 10)) || (buf.length - tmpOffset == 1 && (buf[tmpOffset] != 13 && buf[tmpOffset] != 10))) { + long position = 0; + try + { + position = pdfSource.getPosition(); + } + catch(IOException excpetion) + { + position = Long.MIN_VALUE; + } addValidationError(new ValidationError(ERROR_SYNTAX_TRAILER_EOF, - "File contains data after the last %%EOF sequence at offset " + pdfSource.getOffset())); + "File contains data after the last %%EOF sequence at offset " + position)); } } }