svn commit: r1682706 - in /pdfbox/trunk: pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ preflight/src/main/java/org/apache/pdfbox/preflight/parser/

lehmi Sun, 31 May 2015 02:59:29 -0700

Author: lehmi
Date: Sun May 31 09:59:17 2015
New Revision: 1682706

URL: http://svn.apache.org/r1682706
Log:
PDFBOX-2301: use RandomAccessRead instead of PushBackInputStream as pdfSource


Modified:
    
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
    
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
    
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java 
(original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java 
Sun May 31 09:59:17 2015
@@ -16,7 +16,6 @@
  */
 package org.apache.pdfbox.pdfparser;
 
-import java.io.BufferedInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.Closeable;
 import java.io.IOException;
@@ -36,8 +35,10 @@ import org.apache.pdfbox.cos.COSNull;
 import org.apache.pdfbox.cos.COSNumber;
 import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.cos.COSString;
-import org.apache.pdfbox.io.PushBackInputStream;
+import org.apache.pdfbox.io.RandomAccessBuffer;
+import org.apache.pdfbox.io.RandomAccessRead;
 import org.apache.pdfbox.cos.COSObjectKey;
+
 import static org.apache.pdfbox.util.Charsets.ISO_8859_1;
 
 /**
@@ -137,7 +138,7 @@ public abstract class BaseParser impleme
     /**
      * This is the stream that will be read from.
      */
-    protected PushBackInputStream pdfSource;
+    protected RandomAccessRead pdfSource;
 
     /**
      * This is the document that will be parsed.
@@ -159,19 +160,20 @@ public abstract class BaseParser impleme
      */
     public BaseParser(InputStream input) throws IOException
     {
-        int pushbacksize = 65536;
-        try
-        {
-            pushbacksize = Integer.getInteger(PROP_PUSHBACK_SIZE, 65536);
-        }
-        catch (SecurityException e) 
+        pdfSource = copyInputStream(input);
+    }
+
+    protected RandomAccessRead copyInputStream(InputStream input) throws 
IOException
+    {
+        RandomAccessBuffer buffer = new RandomAccessBuffer();
+        byte[] byteBuffer = new byte[8192];
+        int bytesRead = 0;
+        while ((bytesRead = input.read(byteBuffer)) > -1)
         {
-            // PDFBOX-1946 getInteger calls System.getProperties, 
-            // which can get exception in an applet
-            // ignore and use default
+            buffer.write(byteBuffer, 0, bytesRead);
         }
-        this.pdfSource = new PushBackInputStream(
-                new BufferedInputStream(input, 16384), pushbacksize);
+        buffer.seek(0);
+        return buffer;    
     }
 
     private static boolean isHexDigit(char ch)
@@ -190,14 +192,14 @@ public abstract class BaseParser impleme
      */
     private COSBase parseCOSDictionaryValue() throws IOException
     {
-        long numOffset = pdfSource.getOffset();
+        long numOffset = pdfSource.getPosition();
         COSBase number = parseDirObject();
         skipSpaces();
         if (!isDigit())
         {
             return number;
         }
-        long genOffset = pdfSource.getOffset();
+        long genOffset = pdfSource.getPosition();
         COSBase generationNumber = parseDirObject();
         skipSpaces();
         readExpectedChar('R');
@@ -218,7 +220,7 @@ public abstract class BaseParser impleme
     {
         if (document == null)
         {
-            throw new IOException("object reference " + key + " at offset " + 
pdfSource.getOffset()
+            throw new IOException("object reference " + key + " at offset " + 
pdfSource.getPosition()
                     + " in content stream");
         }
         return document.getObjectFromPool(key);
@@ -308,7 +310,7 @@ public abstract class BaseParser impleme
         {
             return true;
         }
-        pdfSource.unread(c);
+        pdfSource.rewind(1);
         return false;
     }
 
@@ -324,7 +326,7 @@ public abstract class BaseParser impleme
             String potentialDEF = readString();
             if (!potentialDEF.equals(DEF))
             {
-                pdfSource.unread(potentialDEF.getBytes(ISO_8859_1));
+                pdfSource.rewind(potentialDEF.getBytes(ISO_8859_1).length);
             }
             else
             {
@@ -363,7 +365,7 @@ public abstract class BaseParser impleme
             whitespace = pdfSource.read();
             if (ASCII_LF != whitespace)
             {
-                pdfSource.unread(whitespace);
+                pdfSource.rewind(1);
                 //The spec says this is invalid but it happens in the real
                 //world so we must support it.
             }
@@ -373,7 +375,7 @@ public abstract class BaseParser impleme
             //we are in an error.
             //but again we will do a lenient parsing and just assume that 
everything
             //is fine
-            pdfSource.unread(whitespace);
+            pdfSource.rewind(1);
         }
     }
 
@@ -475,7 +477,7 @@ public abstract class BaseParser impleme
             if ( charMatchCount == keyw.length ) 
             {
                 // keyword matched; unread matched keyword (endstream/endobj) 
and following buffered content
-                pdfSource.unread( strmBuf, contentBytes, bufSize - 
contentBytes );
+                pdfSource.rewind( bufSize - contentBytes );
                 break;
             } 
             else 
@@ -543,7 +545,7 @@ public abstract class BaseParser impleme
             }
         if (amountRead > 0)
         {
-            pdfSource.unread( nextThreeBytes, 0, amountRead );
+            pdfSource.rewind( amountRead );
         }
         return braces;
     }
@@ -715,7 +717,7 @@ public abstract class BaseParser impleme
         }
         if (c != -1)
         {
-            pdfSource.unread(c);
+            pdfSource.rewind(1);
         }
         return new COSString(out.toByteArray());
     }
@@ -834,12 +836,12 @@ public abstract class BaseParser impleme
             else
             {
                 //it could be a bad object in the array which is just skipped
-                LOG.warn("Corrupt object reference at offset " + 
pdfSource.getOffset());
+                LOG.warn("Corrupt object reference at offset " + 
pdfSource.getPosition());
 
                 // This could also be an "endobj" or "endstream" which means 
we can assume that
                 // the array has ended.
                 String isThisTheEnd = readString();
-                pdfSource.unread(isThisTheEnd.getBytes(ISO_8859_1));
+                pdfSource.rewind(isThisTheEnd.getBytes(ISO_8859_1).length);
                 if(ENDOBJ_STRING.equals(isThisTheEnd) || 
ENDSTREAM_STRING.equals(isThisTheEnd))
                 {
                     return po;
@@ -909,7 +911,7 @@ public abstract class BaseParser impleme
                 }
                 else
                 {
-                    pdfSource.unread(ch2);
+                    pdfSource.rewind(1);
                     c = ch1;
                     buffer.append( ch );
                 }
@@ -926,7 +928,7 @@ public abstract class BaseParser impleme
         }
         if (c != -1)
         {
-            pdfSource.unread(c);
+            pdfSource.rewind(1);
         }
         return COSName.getPDFName( buffer.toString() );
     }
@@ -948,7 +950,7 @@ public abstract class BaseParser impleme
             if( !trueString.equals( TRUE ) )
             {
                 throw new IOException( "Error parsing boolean: expected='true' 
actual='" + trueString 
-                        + "' at offset " + pdfSource.getOffset());
+                        + "' at offset " + pdfSource.getPosition());
             }
             else
             {
@@ -961,7 +963,7 @@ public abstract class BaseParser impleme
             if( !falseString.equals( FALSE ) )
             {
                 throw new IOException( "Error parsing boolean: expected='true' 
actual='" + falseString 
-                        + "' at offset " + pdfSource.getOffset());
+                        + "' at offset " + pdfSource.getPosition());
             }
             else
             {
@@ -971,7 +973,7 @@ public abstract class BaseParser impleme
         else
         {
             throw new IOException( "Error parsing boolean expected='t or f' 
actual='" + c 
-                    + "' at offset " + pdfSource.getOffset());
+                    + "' at offset " + pdfSource.getPosition());
         }
         return retval;
     }
@@ -998,7 +1000,7 @@ public abstract class BaseParser impleme
             int leftBracket = pdfSource.read();
             // check for second left bracket
             c = (char)pdfSource.peek(); 
-            pdfSource.unread( leftBracket );
+            pdfSource.rewind(1);
             if(c == '<')
             {
 
@@ -1083,7 +1085,7 @@ public abstract class BaseParser impleme
                 }
                 if( ic != -1 )
                 {
-                    pdfSource.unread( ic );
+                    pdfSource.rewind(1);
                 }
                 retval = COSNumber.get( buf.toString() );
             }
@@ -1099,13 +1101,13 @@ public abstract class BaseParser impleme
                     // we can end up in an infinite loop otherwise
                     throw new IOException( "Unknown dir object c='" + c +
                             "' cInt=" + (int)c + " peek='" + (char)peek 
-                            + "' peekInt=" + peek + " " + 
pdfSource.getOffset() );
+                            + "' peekInt=" + peek + " " + 
pdfSource.getPosition() );
                 }
 
                 // if it's an endstream/endobj, we want to put it back so the 
caller will see it
                 if(ENDOBJ_STRING.equals(badString) || 
ENDSTREAM_STRING.equals(badString))
                 {
-                    pdfSource.unread(badString.getBytes(ISO_8859_1));
+                    pdfSource.rewind(badString.getBytes(ISO_8859_1).length);
                 }
             }
         }
@@ -1132,7 +1134,7 @@ public abstract class BaseParser impleme
         }
         if (c != -1)
         {
-            pdfSource.unread(c);
+            pdfSource.rewind(1);
         }
         return buffer.toString();
     }
@@ -1165,7 +1167,7 @@ public abstract class BaseParser impleme
             {
                 throw new IOException("Expected string '" + new 
String(expectedString)
                         + "' but missed at character '" + c + "' at offset "
-                        + pdfSource.getOffset());
+                        + pdfSource.getPosition());
             }
         }
         skipSpaces();
@@ -1183,7 +1185,7 @@ public abstract class BaseParser impleme
         char c = (char) pdfSource.read();
         if (c != ec)
         {
-            throw new IOException("expected='" + ec + "' actual='" + c + "' at 
offset " + pdfSource.getOffset());
+            throw new IOException("expected='" + ec + "' actual='" + c + "' at 
offset " + pdfSource.getPosition());
         }
     }
     
@@ -1216,7 +1218,7 @@ public abstract class BaseParser impleme
         }
         if (c != -1)
         {
-            pdfSource.unread(c);
+            pdfSource.rewind(1);
         }
         return buffer.toString();
     }
@@ -1410,7 +1412,7 @@ public abstract class BaseParser impleme
         }
         if (c != -1)
         {
-            pdfSource.unread(c);
+            pdfSource.rewind(1);
         }
     }
 
@@ -1468,8 +1470,8 @@ public abstract class BaseParser impleme
         }
         catch( NumberFormatException e )
         {
-            pdfSource.unread(intBuffer.toString().getBytes(ISO_8859_1));
-            throw new IOException( "Error: Expected an integer type at offset 
"+pdfSource.getOffset(), e);
+            pdfSource.rewind(intBuffer.toString().getBytes(ISO_8859_1).length);
+            throw new IOException( "Error: Expected an integer type at offset 
"+pdfSource.getPosition(), e);
         }
         return retval;
     }
@@ -1495,9 +1497,9 @@ public abstract class BaseParser impleme
         }
         catch( NumberFormatException e )
         {
-            pdfSource.unread(longBuffer.toString().getBytes(ISO_8859_1));
+            
pdfSource.rewind(longBuffer.toString().getBytes(ISO_8859_1).length);
             throw new IOException( "Error: Expected a long type at offset "
-                    + pdfSource.getOffset() + ", instead got '" + longBuffer + 
"'", e);
+                    + pdfSource.getPosition() + ", instead got '" + longBuffer 
+ "'", e);
         }
         return retval;
     }
@@ -1526,7 +1528,7 @@ public abstract class BaseParser impleme
         }
         if( lastByte != -1 )
         {
-            pdfSource.unread( lastByte );
+            pdfSource.rewind(1);
         }
         return buffer;
     }

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java 
(original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java 
Sun May 31 09:59:17 2015
@@ -16,6 +16,8 @@
  */
 package org.apache.pdfbox.pdfparser;
 
+import static org.apache.pdfbox.util.Charsets.ISO_8859_1;
+
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
@@ -52,7 +54,6 @@ import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
 import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
-import static org.apache.pdfbox.util.Charsets.ISO_8859_1;
 
 /**
  * PDF-Parser which first reads startxref and xref tables in order to know 
valid objects and parse only these objects.
@@ -224,11 +225,11 @@ public class COSParser extends BaseParse
                 // use existing parser to parse xref table
                 parseXrefTable(prev);
                 // parse the last trailer.
-                trailerOffset = pdfSource.getOffset();
+                trailerOffset = pdfSource.getPosition();
                 // PDFBOX-1739 skip extra xref entries in RegisSTAR documents
                 while (isLenient && pdfSource.peek() != 't')
                 {
-                    if (pdfSource.getOffset() == trailerOffset)
+                    if (pdfSource.getPosition() == trailerOffset)
                     {
                         // warn only the first time
                         LOG.warn("Expected trailer object at position " + 
trailerOffset
@@ -239,7 +240,7 @@ public class COSParser extends BaseParse
                 if (!parseTrailer())
                 {
                     throw new IOException("Expected trailer object at 
position: "
-                            + pdfSource.getOffset());
+                            + pdfSource.getPosition());
                 }
                 COSDictionary trailer = 
xrefTrailerResolver.getCurrentTrailer();
                 // check for a XRef stream, it may contain some object ids of 
compressed objects 
@@ -733,8 +734,7 @@ public class COSParser extends BaseParse
 
         if (endObjectKey.equals(STREAM_STRING))
         {
-            pdfSource.unread(endObjectKey.getBytes(ISO_8859_1));
-            pdfSource.unread(' ');
+            pdfSource.rewind(endObjectKey.getBytes(ISO_8859_1).length);
             if (pb instanceof COSDictionary)
             {
                 COSStream stream = parseCOSStream((COSDictionary) pb);
@@ -852,7 +852,7 @@ public class COSParser extends BaseParse
                 if (lengthObj.getObject() == null)
                 {
                     // not read so far, keep current stream position
-                    final long curFileOffset = pdfSource.getOffset();
+                    final long curFileOffset = pdfSource.getPosition();
                     parseObjectDynamically(lengthObj, true);
                     // reset current stream position
                     pdfSource.seek(curFileOffset);
@@ -918,7 +918,7 @@ public class COSParser extends BaseParse
                 if (isLenient)
                 {
                    LOG.warn("The stream doesn't provide any stream length, 
using fallback readUntilEnd, at offset "
-                        + pdfSource.getOffset());
+                        + pdfSource.getPosition());
                 }
                 else
                 {
@@ -941,22 +941,22 @@ public class COSParser extends BaseParse
             if (endStream.equals("endobj") && isLenient)
             {
                 LOG.warn("stream ends with 'endobj' instead of 'endstream' at 
offset "
-                        + pdfSource.getOffset());
+                        + pdfSource.getPosition());
                 // avoid follow-up warning about missing endobj
-                pdfSource.unread(ENDOBJ);
+                pdfSource.rewind(ENDOBJ.length);
             }
             else if (endStream.length() > 9 && isLenient && 
endStream.substring(0,9).equals(ENDSTREAM_STRING))
             {
                 LOG.warn("stream ends with '" + endStream + "' instead of 
'endstream' at offset "
-                        + pdfSource.getOffset());
+                        + pdfSource.getPosition());
                 // unread the "extra" bytes
-                pdfSource.unread(endStream.substring(9).getBytes(ISO_8859_1));
+                
pdfSource.rewind(endStream.substring(9).getBytes(ISO_8859_1).length);
             }
             else if (!endStream.equals(ENDSTREAM_STRING))
             {
                 throw new IOException(
                         "Error reading stream, expected='endstream' actual='"
-                        + endStream + "' at offset " + pdfSource.getOffset());
+                        + endStream + "' at offset " + 
pdfSource.getPosition());
             }
         }
         finally
@@ -979,7 +979,7 @@ public class COSParser extends BaseParse
             if (readBytes <= 0)
             {
                 // shouldn't happen, the stream length has already been 
validated
-                throw new IOException("read error at offset " + 
pdfSource.getOffset()
+                throw new IOException("read error at offset " + 
pdfSource.getPosition()
                         + ": expected " + chunk + " bytes, but read() returns 
" + readBytes);
             }
             out.write(streamCopyBuf, 0, readBytes);
@@ -990,7 +990,7 @@ public class COSParser extends BaseParse
     private boolean validateStreamLength(long streamLength) throws IOException
     {
         boolean streamLengthIsValid = true;
-        long originOffset = pdfSource.getOffset();
+        long originOffset = pdfSource.getPosition();
         long expectedEndOfStream = originOffset + streamLength;
         if (expectedEndOfStream > fileLen)
         {
@@ -1171,7 +1171,7 @@ public class COSParser extends BaseParse
         }
         long objectNr = objectKey.getNumber();
         int objectGen = objectKey.getGeneration();
-        long originOffset = pdfSource.getOffset();
+        long originOffset = pdfSource.getPosition();
         pdfSource.seek(offset);
         String objectString = createObjectString(objectNr, objectGen);
         try 
@@ -1216,7 +1216,7 @@ public class COSParser extends BaseParse
         if (bfSearchCOSObjectKeyOffsets == null)
         {
             bfSearchCOSObjectKeyOffsets = new HashMap<COSObjectKey, Long>();
-            long originOffset = pdfSource.getOffset();
+            long originOffset = pdfSource.getPosition();
             long currentOffset = MINIMUM_SEARCH_OFFSET;
             String objString = " obj";
             char[] string = objString.toCharArray();
@@ -1368,14 +1368,14 @@ public class COSParser extends BaseParse
         {
             // a pdf may contain more than one xref entry
             bfSearchXRefTablesOffsets = new Vector<Long>();
-            long originOffset = pdfSource.getOffset();
+            long originOffset = pdfSource.getPosition();
             pdfSource.seek(MINIMUM_SEARCH_OFFSET);
             // search for xref tables
             while (!pdfSource.isEOF())
             {
                 if (isString(XREF_TABLE))
                 {
-                    long newOffset = pdfSource.getOffset();
+                    long newOffset = pdfSource.getPosition();
                     pdfSource.seek(newOffset - 1);
                     // ensure that we don't read "startxref" instead of "xref"
                     if (isWhitespace())
@@ -1401,7 +1401,7 @@ public class COSParser extends BaseParse
         {
             // a pdf may contain more than one /XRef entry
             bfSearchXRefStreamsOffsets = new Vector<Long>();
-            long originOffset = pdfSource.getOffset();
+            long originOffset = pdfSource.getPosition();
             pdfSource.seek(MINIMUM_SEARCH_OFFSET);
             // search for XRef streams
             String objString = " obj";
@@ -1412,7 +1412,7 @@ public class COSParser extends BaseParse
                 {
                     // search backwards for the beginning of the stream
                     long newOffset = -1;
-                    long xrefOffset = pdfSource.getOffset();
+                    long xrefOffset = pdfSource.getPosition();
                     boolean objFound = false;
                     for (int i = 1; i < 30 && !objFound; i++)
                     {
@@ -1445,7 +1445,7 @@ public class COSParser extends BaseParse
                                             if (length > 0)
                                             {
                                                 pdfSource.read();
-                                                newOffset = 
pdfSource.getOffset();
+                                                newOffset = 
pdfSource.getPosition();
                                             }
                                         }
                                     }
@@ -1584,7 +1584,7 @@ public class COSParser extends BaseParse
             {
                 bytesMatching = true;
             }
-            pdfSource.unread(bytesRead, 0, numberOfBytes);
+            pdfSource.rewind(numberOfBytes);
         }
         return bytesMatching;
     }
@@ -1599,7 +1599,7 @@ public class COSParser extends BaseParse
     private boolean isString(char[] string) throws IOException
     {
         boolean bytesMatching = true;
-        long originOffset = pdfSource.getOffset();
+        long originOffset = pdfSource.getPosition();
         for (char c : string)
         {
             if (pdfSource.read() != c)
@@ -1624,7 +1624,7 @@ public class COSParser extends BaseParse
             return false;
         }
         //read "trailer"
-        long currentOffset = pdfSource.getOffset();
+        long currentOffset = pdfSource.getPosition();
         String nextLine = readLine();
         if( !nextLine.trim().equals( "trailer" ) )
         {
@@ -1729,7 +1729,7 @@ public class COSParser extends BaseParse
             {
                 String headerGarbage = header.substring(headerMarker.length() 
+ 3, header.length()) + "\n";
                 header = header.substring(0, headerMarker.length() + 3);
-                pdfSource.unread(headerGarbage.getBytes(ISO_8859_1));
+                pdfSource.rewind(headerGarbage.getBytes(ISO_8859_1).length);
             }
         }
         float headerVersion = -1;
@@ -1777,7 +1777,7 @@ public class COSParser extends BaseParse
         // check for trailer after xref
         String str = readString();
         byte[] b = str.getBytes(ISO_8859_1);
-        pdfSource.unread(b, 0, b.length);
+        pdfSource.rewind(b.length);
         
         // signal start of new XRef
         xrefTrailerResolver.nextXrefObj( startByteOffset, XRefType.TABLE );

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java 
(original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java 
Sun May 31 09:59:17 2015
@@ -27,18 +27,13 @@ import org.apache.pdfbox.cos.COSDictiona
 import org.apache.pdfbox.cos.COSDocument;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.io.IOUtils;
-import org.apache.pdfbox.io.PushBackInputStream;
-import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
+import org.apache.pdfbox.io.RandomAccessFile;
 import org.apache.pdfbox.pdmodel.fdf.FDFDocument;
 
 public class FDFParser extends COSParser
 {
     private static final Log LOG = LogFactory.getLog(FDFParser.class);
 
-    private final RandomAccessBufferedFileInputStream raStream;
-
-    private File tempPDFFile;
-
     /**
      * Constructs parser for given file using memory buffer.
      * 
@@ -62,7 +57,7 @@ public class FDFParser extends COSParser
     public FDFParser(File file) throws IOException
     {
         fileLen = file.length();
-        raStream = new RandomAccessBufferedFileInputStream(file);
+        pdfSource = new RandomAccessFile(file, "r");
         init();
     }
 
@@ -74,9 +69,8 @@ public class FDFParser extends COSParser
      */
     public FDFParser(InputStream input) throws IOException
     {
-        tempPDFFile = createTmpFile(input);
-        fileLen = tempPDFFile.length();
-        raStream = new RandomAccessBufferedFileInputStream(tempPDFFile);
+        pdfSource = copyInputStream(input);
+        fileLen = pdfSource.length();
         init();
     }
 
@@ -96,7 +90,6 @@ public class FDFParser extends COSParser
             }
         }
         document = new COSDocument(false);
-        pdfSource = new PushBackInputStream(raStream, 4096);
     }
 
     /**
@@ -154,8 +147,6 @@ public class FDFParser extends COSParser
         finally
         {
             IOUtils.closeQuietly(pdfSource);
-            deleteTempFile();
-    
             if (exceptionOccurred && document != null)
             {
                 IOUtils.closeQuietly(document);
@@ -177,25 +168,4 @@ public class FDFParser extends COSParser
         return new FDFDocument( getDocument() );
     }
 
-    /**
-     * Remove the temporary file. A temporary file is created if this class is 
instantiated with an InputStream
-     */
-    private void deleteTempFile()
-    {
-        if (tempPDFFile != null)
-        {
-            try
-            {
-                if (!tempPDFFile.delete())
-                {
-                    LOG.warn("Temporary file '" + tempPDFFile.getName() + "' 
can't be deleted");
-                }
-            }
-            catch (SecurityException e)
-            {
-                LOG.warn("Temporary file '" + tempPDFFile.getName() + "' can't 
be deleted", e);
-            }
-        }
-    }
-
 }

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java 
(original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java 
Sun May 31 09:59:17 2015
@@ -30,8 +30,8 @@ import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSNull;
 import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.io.IOUtils;
-import org.apache.pdfbox.io.PushBackInputStream;
 import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
+import org.apache.pdfbox.io.RandomAccessFile;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
 import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial;
@@ -43,7 +43,6 @@ public class PDFParser extends COSParser
 {
     private static final Log LOG = LogFactory.getLog(PDFParser.class);
 
-    private final RandomAccessBufferedFileInputStream raStream;
     private String password = "";
     private InputStream keyStoreInputStream = null;
     private String keyAlias = null;
@@ -163,7 +162,7 @@ public class PDFParser extends COSParser
             boolean useScratchFiles) throws IOException
     {
         fileLen = file.length();
-        raStream = new RandomAccessBufferedFileInputStream(file);
+        pdfSource = new RandomAccessBufferedFileInputStream(file);
         password = decryptionPassword;
         keyStoreInputStream = keyStore;
         keyAlias = alias;
@@ -251,9 +250,17 @@ public class PDFParser extends COSParser
     public PDFParser(InputStream input, String decryptionPassword, InputStream 
keyStore,
             String alias, boolean useScratchFiles) throws IOException
     {
-        tempPDFFile = createTmpFile(input);
-        fileLen = tempPDFFile.length();
-        raStream = new RandomAccessBufferedFileInputStream(tempPDFFile);
+        if (useScratchFiles)
+        {
+            tempPDFFile = createTmpFile(input);
+            fileLen = tempPDFFile.length();
+            pdfSource = new RandomAccessBufferedFileInputStream(tempPDFFile);
+        }
+        else
+        {
+            pdfSource = copyInputStream(input);
+            fileLen = pdfSource.length();
+        }
         password = decryptionPassword;
         keyStoreInputStream = keyStore;
         keyAlias = alias;
@@ -276,7 +283,6 @@ public class PDFParser extends COSParser
             }
         }
         document = new COSDocument(useScratchFiles);
-        pdfSource = new PushBackInputStream(raStream, 4096);
     }
 
     /**
@@ -355,7 +361,6 @@ public class PDFParser extends COSParser
         {
             IOUtils.closeQuietly(pdfSource);
             IOUtils.closeQuietly(keyStoreInputStream);
-    
             deleteTempFile();
     
             if (exceptionOccurred && document != null)

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
 (original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
 Sun May 31 09:59:17 2015
@@ -19,14 +19,14 @@ package org.apache.pdfbox.pdfparser;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.PushbackInputStream;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-
+import org.apache.pdfbox.contentstream.operator.Operator;
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSBoolean;
 import org.apache.pdfbox.cos.COSDictionary;
@@ -35,8 +35,8 @@ import org.apache.pdfbox.cos.COSNull;
 import org.apache.pdfbox.cos.COSNumber;
 import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.io.RandomAccessRead;
 import org.apache.pdfbox.pdmodel.common.PDStream;
-import org.apache.pdfbox.contentstream.operator.Operator;
 
 /**
  * This will parse a PDF byte stream and extract operands and such.
@@ -209,7 +209,7 @@ public class PDFStreamParser extends Bas
                 c = (char) pdfSource.peek();
 
                 // put back first bracket
-                pdfSource.unread(leftBracket);
+                pdfSource.rewind(1);
 
                 if (c == '<')
                 {
@@ -408,7 +408,7 @@ public class PDFStreamParser extends Bas
      * @return <code>true</code> if next bytes are probably printable ASCII
      * characters starting with a PDF operator, otherwise <code>false</code>
      */
-    private boolean hasNoFollowingBinData(final PushbackInputStream pdfSource) 
+    private boolean hasNoFollowingBinData(final RandomAccessRead pdfSource)
             throws IOException
     {
         // as suggested in PDFBOX-1164
@@ -453,7 +453,7 @@ public class PDFStreamParser extends Bas
                     noBinData = false;
                 }
             }
-            pdfSource.unread(binCharTestArr, 0, readBytes);
+            pdfSource.rewind(readBytes);
         }
         if (!noBinData)
         {

Modified: 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff
==============================================================================
--- 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
 (original)
+++ 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
 Sun May 31 09:59:17 2015
@@ -64,13 +64,13 @@ import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSNull;
 import org.apache.pdfbox.cos.COSNumber;
 import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSObjectKey;
 import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.cos.COSString;
 import org.apache.pdfbox.pdfparser.PDFObjectStreamParser;
 import org.apache.pdfbox.pdfparser.PDFParser;
 import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
 import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.cos.COSObjectKey;
 import org.apache.pdfbox.preflight.Format;
 import org.apache.pdfbox.preflight.PreflightConfiguration;
 import org.apache.pdfbox.preflight.PreflightConstants;
@@ -347,7 +347,7 @@ public class PreflightParser extends PDF
             // the number of objects in the xref table
             int count; 
 
-            long offset = pdfSource.getOffset();
+            long offset = pdfSource.getPosition();
             String line = readLine();
             Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)(\\s*)");
             Matcher matcher = pattern.matcher(line);
@@ -359,7 +359,8 @@ public class PreflightParser extends PDF
             else
             {
                 addValidationError(new ValidationError(ERROR_SYNTAX_CROSS_REF,
-                        "Cross reference subsection header is invalid: '" + 
line + "' at position " + pdfSource.getOffset()));
+                        "Cross reference subsection header is invalid: '" + 
line + "' at position "
+                                + pdfSource.getPosition()));
                 // reset pdfSource cursor to read xref information
                 pdfSource.seek(offset);
                 // first obj id
@@ -454,16 +455,16 @@ public class PreflightParser extends PDF
         if (!streamV.equals("stream"))
         {
             addValidationError(new 
ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
-                    "Expected 'stream' keyword but found '" + streamV + "' at 
offset "+pdfSource.getOffset()));
+                    "Expected 'stream' keyword but found '" + streamV + "' at 
offset "+pdfSource.getPosition()));
         }
         int nextChar = pdfSource.read();
         if (!((nextChar == 13 && pdfSource.peek() == 10) || nextChar == 10))
         {
             addValidationError(new 
ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
-                    "Expected 'EOL' after the stream keyword at offset 
"+pdfSource.getOffset()));
+                    "Expected 'EOL' after the stream keyword at offset 
"+pdfSource.getPosition()));
         }
         // set the offset before stream
-        pdfSource.seek(pdfSource.getOffset() - 7);
+        pdfSource.seek(pdfSource.getPosition() - 7);
     }
 
     /**
@@ -473,17 +474,17 @@ public class PreflightParser extends PDF
      */
     protected void checkEndstreamKeyWord() throws IOException
     {
-        pdfSource.seek(pdfSource.getOffset() - 10);
+        pdfSource.seek(pdfSource.getPosition() - 10);
         if (!nextIsEOL())
         {
             addValidationError(new 
ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
-                    "Expected 'EOL' before the endstream keyword at offset 
"+pdfSource.getOffset()+" but found '"+pdfSource.peek()+"'"));
+                    "Expected 'EOL' before the endstream keyword at offset 
"+pdfSource.getPosition()+" but found '"+pdfSource.peek()+"'"));
         }
         String endstreamV = readString();
         if (!endstreamV.equals("endstream"))
         {
             addValidationError(new 
ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
-                    "Expected 'endstream' keyword at offset 
"+pdfSource.getOffset()+" but found '" + endstreamV + "'"));
+                    "Expected 'endstream' keyword at offset 
"+pdfSource.getPosition()+" but found '" + endstreamV + "'"));
         }
     }
 
@@ -544,7 +545,7 @@ public class PreflightParser extends PDF
     protected COSString parseCOSString() throws IOException
     {
         // offset reminder
-        long offset = pdfSource.getOffset();
+        long offset = pdfSource.getPosition();
         char nextChar = (char) pdfSource.read();
         int count = 0;
         if (nextChar == '<')
@@ -566,7 +567,7 @@ public class PreflightParser extends PDF
                     else
                     {
                         addValidationError(new 
ValidationError(ERROR_SYNTAX_HEXA_STRING_INVALID,
-                                "Hexa String must have only Hexadecimal 
Characters (found '" + nextChar + "') at offset " + pdfSource.getOffset()));
+                                "Hexa String must have only Hexadecimal 
Characters (found '" + nextChar + "') at offset " + pdfSource.getPosition()));
                         break;
                     }
                 }
@@ -577,7 +578,7 @@ public class PreflightParser extends PDF
         if (count % 2 != 0)
         {
             addValidationError(new 
ValidationError(ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER,
-                    "Hexa string shall contain even number of non white space 
char at offset " + pdfSource.getOffset()));
+                    "Hexa string shall contain even number of non white space 
char at offset " + pdfSource.getPosition()));
         }
 
         // reset the offset to parse the COSString
@@ -586,7 +587,7 @@ public class PreflightParser extends PDF
 
         if (result.getString().length() > MAX_STRING_LENGTH)
         {
-            addValidationError(new 
ValidationError(ERROR_SYNTAX_HEXA_STRING_TOO_LONG, "Hexa string is too long at 
offset "+pdfSource.getOffset()));
+            addValidationError(new 
ValidationError(ERROR_SYNTAX_HEXA_STRING_TOO_LONG, "Hexa string is too long at 
offset "+pdfSource.getPosition()));
         }
         return result;
     }
@@ -612,7 +613,7 @@ public class PreflightParser extends PDF
                 if (real > MAX_POSITIVE_FLOAT || real < MAX_NEGATIVE_FLOAT)
                 {
                     addValidationError(new 
ValidationError(ERROR_SYNTAX_NUMERIC_RANGE,
-                            "Float is too long or too small: " + real+"  at 
offset "+pdfSource.getOffset()));
+                            "Float is too long or too small: " + real+"  at 
offset "+pdfSource.getPosition()));
                 }
             }
             else
@@ -621,7 +622,7 @@ public class PreflightParser extends PDF
                 if (numAsLong > Integer.MAX_VALUE || numAsLong < 
Integer.MIN_VALUE)
                 {
                     addValidationError(new 
ValidationError(ERROR_SYNTAX_NUMERIC_RANGE,
-                            "Numeric is too long or too small: " + numAsLong+" 
 at offset "+pdfSource.getOffset()));
+                            "Numeric is too long or too small: " + numAsLong+" 
 at offset "+pdfSource.getPosition()));
                 }
             }
         }
@@ -631,7 +632,7 @@ public class PreflightParser extends PDF
             COSDictionary dic = (COSDictionary) result;
             if (dic.size() > MAX_DICT_ENTRIES)
             {
-                addValidationError(new 
ValidationError(ERROR_SYNTAX_TOO_MANY_ENTRIES, "Too Many Entries In Dictionary 
at offset "+pdfSource.getOffset()));
+                addValidationError(new 
ValidationError(ERROR_SYNTAX_TOO_MANY_ENTRIES, "Too Many Entries In Dictionary 
at offset "+pdfSource.getPosition()));
             }
         }
         return result;
@@ -680,7 +681,7 @@ public class PreflightParser extends PDF
                 long readObjNr;
                 int readObjGen;
 
-                long offset = pdfSource.getOffset();
+                long offset = pdfSource.getPosition();
                 String line = readLine();
                 Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)\\sobj");
                 Matcher matcher = pattern.matcher(line);
@@ -721,7 +722,7 @@ public class PreflightParser extends PDF
                 skipSpaces();
                 COSBase pb = parseDirObject();
                 skipSpaces();
-                long endObjectOffset = pdfSource.getOffset();
+                long endObjectOffset = pdfSource.getPosition();
                 String endObjectKey = readString();
 
                 if (endObjectKey.equals("stream"))
@@ -743,7 +744,7 @@ public class PreflightParser extends PDF
                         throw new IOException("Stream not preceded by 
dictionary (offset: " + offsetOrObjstmObNr + ").");
                     }
                     skipSpaces();
-                    endObjectOffset = pdfSource.getOffset();
+                    endObjectOffset = pdfSource.getPosition();
                     endObjectKey = readString();
 
                     // we have case with a second 'endstream' before endobj
@@ -774,12 +775,12 @@ public class PreflightParser extends PDF
                 }
                 else
                 {
-                    offset = pdfSource.getOffset();
+                    offset = pdfSource.getPosition();
                     pdfSource.seek(endObjectOffset - 1);
                     if (!nextIsEOL())
                     {
                         addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER,
-                                "EOL expected before the 'endobj' keyword at 
offset "+pdfSource.getOffset()));
+                                "EOL expected before the 'endobj' keyword at 
offset "+pdfSource.getPosition()));
                     }
                     pdfSource.seek(offset);
                 }
@@ -787,7 +788,7 @@ public class PreflightParser extends PDF
                 if (!nextIsEOL())
                 {
                     addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER,
-                            "EOL expected after the 'endobj' keyword at offset 
"+pdfSource.getOffset()));
+                            "EOL expected after the 'endobj' keyword at offset 
"+pdfSource.getPosition()));
                 }
             }
             else
@@ -838,8 +839,17 @@ public class PreflightParser extends PDF
                         || (buf.length - tmpOffset == 2 && (buf[tmpOffset] != 
13 || buf[tmpOffset + 1] != 10))
                         || (buf.length - tmpOffset == 1 && (buf[tmpOffset] != 
13 && buf[tmpOffset] != 10)))
                 {
+                    long position = 0;
+                    try
+                    {
+                        position = pdfSource.getPosition();
+                    }
+                    catch(IOException excpetion)
+                    {
+                        position = Long.MIN_VALUE;
+                    }
                     addValidationError(new 
ValidationError(ERROR_SYNTAX_TRAILER_EOF,
-                            "File contains data after the last %%EOF sequence 
at offset " + pdfSource.getOffset()));
+                            "File contains data after the last %%EOF sequence 
at offset " + position));
                 }
             }
         }

svn commit: r1682706 - in /pdfbox/trunk: pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ preflight/src/main/java/org/apache/pdfbox/preflight/parser/

Reply via email to