Author: tboehme
Date: Fri Apr 6 14:25:10 2012
New Revision: 1310338
URL: http://svn.apache.org/viewvc?rev=1310338&view=rev
Log:
as announced on PDFBOX-1199 this adds new input stream class with buffering and
seek functionality;
PushBackInputStream is extended to support seek operation if the underlying
stream implements RandomAccessRead
Added:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
(with props)
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/PushBackInputStream.java
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/PushBackInputStream.java
URL:
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/PushBackInputStream.java?rev=1310338&r1=1310337&r2=1310338&view=diff
==============================================================================
---
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/PushBackInputStream.java
(original)
+++
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/PushBackInputStream.java
Fri Apr 6 14:25:10 2012
@@ -33,6 +33,10 @@ public class PushBackInputStream extends
*/
private long offset = 0;
+ /** In case provided input stream implements {@link RandomAccessRead} we
hold
+ * a typed reference to it in order to support seek operations. */
+ private final RandomAccessRead raInput;
+
/**
* Constructor.
*
@@ -48,6 +52,9 @@ public class PushBackInputStream extends
{
throw new IOException( "Error: input was null" );
}
+
+ raInput = ( input instanceof RandomAccessRead ) ?
+
(RandomAccessRead) input : null;
}
/**
@@ -198,4 +205,33 @@ public class PushBackInputStream extends
return data;
}
+ /** Allows to seek to another position within stream in case the underlying
+ * stream implements {@link RandomAccessRead}. Otherwise an {@link
IOException}
+ * is thrown.
+ *
+ * Pushback buffer is cleared before seek operation by skipping over all
bytes
+ * of buffer.
+ *
+ * @param newOffset new position within stream from which to read next
+ *
+ * @throws IOException if underlying stream does not implement {@link
RandomAccessRead}
+ * or seek operation on underlying stream was not
successful
+ */
+ public void seek( long newOffset ) throws IOException
+ {
+ if ( raInput == null )
+ throw new IOException( "Provided stream of type " +
in.getClass().getSimpleName() +
+
" is not seekable." );
+
+ // clear unread buffer by skipping over all bytes of buffer
+ int unreadLength = buf.length - pos;
+ if ( unreadLength > 0 )
+ {
+ skip( unreadLength );
+ }
+
+ raInput.seek( newOffset );
+ offset = newOffset;
+ }
+
}
Added:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
URL:
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java?rev=1310338&view=auto
==============================================================================
---
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
(added)
+++
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
Fri Apr 6 14:25:10 2012
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.io;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.RandomAccessFile;
+import java.util.LinkedHashMap;
+
+/**
+ * Provides {@link InputStream} access to portions of a file combined with
+ * buffered reading of content. Start of next bytes to read can be set via
seek method.
+ *
+ * File is accessed via {@link RandomAccessFile} and is read in byte chunks
which are
+ * cached.
+ *
+ * @author Timo Boehme (timo.boehme at ontochem com)
+ */
+public class RandomAccessBufferedFileInputStream extends InputStream
implements RandomAccessRead
+{
+
+ private int pageSizeShift = 12;
+ private int pageSize = 1 << pageSizeShift;
+ private long pageOffsetMask = -1L << pageSizeShift;
+ private int maxCachedPages = 1000;
+
+ private byte[] lastRemovedCachePage = null;
+
+ /** Create a LRU page cache. */
+ private final LinkedHashMap<Long,byte[]> pageCache = new
LinkedHashMap<Long, byte[]>( maxCachedPages, 0.75f, true )
+ {
+ private static final long serialVersionUID =
-6302488539257741101L;
+
+ @Override
+ protected boolean removeEldestEntry(
java.util.Map.Entry<Long, byte[]> _eldest )
+ {
+ final boolean doRemove = size()
> maxCachedPages;
+ if ( doRemove )
+ lastRemovedCachePage =
_eldest.getValue();
+ return doRemove;
+ }
+ };
+
+ private long curPageOffset = -1;
+ private byte[] curPage = new byte[ pageSize ];
+ private int offsetWithinPage = 0;
+
+ private final RandomAccessFile raFile;
+ private final long fileLength;
+ private long fileOffset = 0;
+
+ //
------------------------------------------------------------------------
+ /** Create input stream instance for given file. */
+ public RandomAccessBufferedFileInputStream( File _file )
+ throws FileNotFoundException, IOException
+ {
+ raFile = new RandomAccessFile( _file, "r" );
+ fileLength = _file.length();
+
+ seek( 0 );
+ }
+
+ //
------------------------------------------------------------------------
+ /** Returns offset in file at which next byte would be read. */
+ public final long getFilePointer()
+ {
+ return fileOffset;
+ }
+
+ //
------------------------------------------------------------------------
+ /** Seeks to new position. If new position is outside of
current page
+ * the new page is either taken from cache or read from file
and added to cache. */
+ public final void seek( final long newOffset ) throws
IOException
+ {
+ final long newPageOffset = newOffset &
pageOffsetMask;
+ if ( newPageOffset != curPageOffset )
+ {
+ byte[] newPage = pageCache.get(
newPageOffset );
+ if ( newPage == null )
+ {
+ raFile.seek(
newPageOffset );
+ newPage =
readPage();
+ pageCache.put(
newPageOffset, newPage );
+ }
+ curPageOffset = newPageOffset;
+ curPage = newPage;
+ }
+
+ offsetWithinPage = (int) (newOffset -
curPageOffset);
+ fileOffset = newOffset;
+ }
+
+ //
------------------------------------------------------------------------
+ /** Reads a page with data from current file position. If we
have a previously
+ * removed page from cache the buffer of this page is reused.
Otherwise a new
+ * byte buffer is created. */
+ private final byte[] readPage() throws IOException
+ {
+ byte[] page;
+
+ if ( lastRemovedCachePage != null )
+ {
+ page = lastRemovedCachePage;
+ lastRemovedCachePage = null;
+ } else
+ page = new byte[ pageSize ];
+
+ int readBytes = 0;
+ while ( readBytes < pageSize )
+ {
+ int curBytesRead = raFile.read(
page, readBytes, pageSize - readBytes );
+ if ( curBytesRead < 0 )
+ // EOF
+ break;
+ readBytes += curBytesRead;
+ }
+
+ return page;
+ }
+
+ //
------------------------------------------------------------------------
+ @Override
+ public int read() throws IOException
+ {
+ if ( fileOffset >= fileLength )
+ {
+ return -1;
+ }
+
+ if ( offsetWithinPage == pageSize )
+ {
+ seek( fileOffset );
+ }
+
+ fileOffset++;
+ return curPage[ offsetWithinPage++ ] & 0xff;
+ }
+
+ //
------------------------------------------------------------------------
+ @Override
+ public int read( byte[] b, int off, int len ) throws IOException
+ {
+ if ( fileOffset >= fileLength )
+ {
+ return -1;
+ }
+
+ if ( offsetWithinPage == pageSize )
+ {
+ seek( fileOffset );
+ }
+
+ int commonLen = Math.min( pageSize - offsetWithinPage,
len );
+ if ( ( fileLength - fileOffset ) < pageSize )
+ commonLen = Math.min( commonLen, (int)
( fileLength - fileOffset ) );
+
+ System.arraycopy( curPage, offsetWithinPage, b, off,
commonLen );
+
+ offsetWithinPage += commonLen;
+ fileOffset += commonLen;
+
+ return commonLen;
+ }
+
+ //
------------------------------------------------------------------------
+ @Override
+ public int available() throws IOException
+ {
+ return (int) Math.min( fileLength - fileOffset,
Integer.MAX_VALUE );
+ }
+
+ //
------------------------------------------------------------------------
+ @Override
+ public long skip( long n ) throws IOException
+ {
+ // test if we have to reduce skip count because of EOF
+ long toSkip = n;
+
+ if ( fileLength - fileOffset < toSkip )
+ toSkip = fileLength - fileOffset;
+
+ if ( ( toSkip < pageSize ) && ( ( offsetWithinPage +
toSkip ) <= pageSize ) )
+ {
+ // we can skip within current page
+ offsetWithinPage += toSkip;
+ fileOffset += toSkip;
+ }
+ else
+ {
+ // seek to the page we will get after
skipping
+ seek( fileOffset + toSkip );
+ }
+
+ return toSkip;
+ }
+
+ //
------------------------------------------------------------------------
+ @Override
+ public long length() throws IOException
+ {
+ return fileLength;
+ }
+
+ //
------------------------------------------------------------------------
+ @Override
+ public void close() throws IOException
+ {
+ raFile.close();
+ pageCache.clear();
+ }
+}
Propchange:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
------------------------------------------------------------------------------
svn:mime-type = text/plain