Hi again, On Tue, Jan 31, 2006 at 09:02:31AM +0000, David Holroyd wrote: > The current FileUpload API only allows uploaded data to be read by the > application if an intermediate buffer (i.e. DiskFileItem) is used. It > would be very handy if the low-level interfaces provided read access to > the uploaded file data.
The attached patch tries to get this working (though without much thought yet for efficiency or compliance with the existing code-formatting standards ;). The changed code passes the ServletFileUploadTest suite, and adds a test-suite for the new DelimitedInputStream class. Is there any chance of a future release of FileUpload supporting this kind of interface? ta, dave -- http://david.holroyd.me.uk/
Index: /home/dave/workspace/fileupload/src/java/org/apache/commons/fileupload/DelimitedInputStream.java =================================================================== --- /home/dave/workspace/fileupload/src/java/org/apache/commons/fileupload/DelimitedInputStream.java (revision 0) +++ /home/dave/workspace/fileupload/src/java/org/apache/commons/fileupload/DelimitedInputStream.java (revision 0) @@ -1 +1,112 @@ +package org.apache.commons.fileupload; + +import java.io.IOException; +import java.io.InputStream; +import java.io.PushbackInputStream; +//import org.apache.tomcat.util.buf.ByteChunk; + +class DelimitedInputStream extends InputStream { + private PushbackInputStream in; + private byte[] term; // the sequence that delimits the end of stream + private byte[] tmpBuff; + private boolean eos = false; // reached the end of stream (terminator)? + private boolean closed = false; // close() called? + + public DelimitedInputStream(PushbackInputStream in, byte[] terminator) { + this.in = in; + // take a copy of the original terminator, + term = new byte[terminator.length]; + System.arraycopy(terminator, 0, term, 0, terminator.length); + tmpBuff = new byte[terminator.length * 2]; + } + + public int read() throws IOException { + byte[] tmp = new byte[1]; + int c = read(tmp, 0, 1); + if (c == -1) { + return -1; + } + return tmp[0] & 0xff; + } + + public int read(byte[] buf, int off, int len) throws IOException { + assertOpen(); + if (eos) return -1; + + int c = readChunk(buf, off, len); + return c; + } + + private int fillTmpBuff(int limit) throws IOException { + int read = 0; + while (read < limit) { + int c = in.read(tmpBuff, read, limit-read); + if (c == -1) { + if (read == 0) { + return -1; + } + break; + } + read += c; + } + return read; + } + + private int readChunk(byte[] buf, int off, int len) + throws IOException + { + int read = fillTmpBuff(Math.min(len, term.length)+term.length); + if (read == -1) { + return -1; + } + if (read >= term.length) { + int pos = findTerm(); + if (pos != -1) { + in.unread(tmpBuff, pos, read-pos); + eos = true; + read = pos; + } else if (read >= len) { + in.unread(tmpBuff, len, read-len); + read = len; + } + } else if (read >= len) { + in.unread(tmpBuff, len, read-len); + read = len; + } + System.arraycopy(tmpBuff, 0, buf, off, read); + return read; + } + + private int findTerm() { + int pos = 0; + int end = tmpBuff.length - term.length; +// return ByteChunk.findChars(tmpBuff, 0, end, term); + + outer: while (pos <= end) { + if (tmpBuff[pos] != term[0]) { + pos++; + continue; + } + for (int i=1; i<term.length; i++) { + if (tmpBuff[pos+i] != term[i]) { + pos++; + continue outer; + } + } + // if the above for-loop completes without the + // 'continue outer', we must have found a match, + return pos; + } + return -1; + } + + private void assertOpen() throws IOException { + if (closed) { + throw new IOException("stream is closed"); + } + } + + public void close() { + closed = true; + } +} Index: /home/dave/workspace/fileupload/src/java/org/apache/commons/fileupload/MultipartStream.java =================================================================== --- /home/dave/workspace/fileupload/src/java/org/apache/commons/fileupload/MultipartStream.java (revision 373261) +++ /home/dave/workspace/fileupload/src/java/org/apache/commons/fileupload/MultipartStream.java (working copy) @@ -19,7 +19,9 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.io.PushbackInputStream; import java.io.UnsupportedEncodingException; +import org.apache.commons.io.IOUtils; /** * <p> Low level API for processing file uploads. @@ -149,13 +151,6 @@ CR, LF, DASH, DASH}; - /** - * The number of bytes, over and above the boundary size, to use for the - * keep region. - */ - private static final int KEEP_REGION_PAD = 3; - - // ----------------------------------------------------------- Data members @@ -162,7 +157,7 @@ /** * The input stream from which data is read. */ - private InputStream input; + private PushbackInputStream input; /** @@ -172,13 +167,6 @@ /** - * The amount of data, in bytes, that must be kept in the buffer in order - * to detect delimiters reliably. - */ - private int keepRegion; - - - /** * The byte sequence that partitions the stream. */ private byte[] boundary; @@ -185,34 +173,6 @@ /** - * The length of the buffer used for processing the request. - */ - private int bufSize; - - - /** - * The buffer used for processing the request. - */ - private byte[] buffer; - - - /** - * The index of first valid character in the buffer. - * <br> - * 0 <= head < bufSize - */ - private int head; - - - /** - * The index of last valid characer in the buffer + 1. - * <br> - * 0 <= tail <= bufSize - */ - private int tail; - - - /** * The content encoding to use when reading headers. */ private String headerEncoding; @@ -253,9 +213,7 @@ public MultipartStream(InputStream input, byte[] boundary, int bufSize) { - this.input = input; - this.bufSize = bufSize; - this.buffer = new byte[bufSize]; + this.input = new PushbackInputStream(input, (BOUNDARY_PREFIX.length+boundary.length)*2); // We prepend CR/LF to the boundary to chop trailng CR/LF from // body-data tokens. @@ -261,7 +219,6 @@ // body-data tokens. this.boundary = new byte[boundary.length + BOUNDARY_PREFIX.length]; this.boundaryLength = boundary.length + BOUNDARY_PREFIX.length; - this.keepRegion = boundary.length + KEEP_REGION_PAD; System.arraycopy(BOUNDARY_PREFIX, 0, this.boundary, 0, BOUNDARY_PREFIX.length); System.arraycopy(boundary, 0, this.boundary, BOUNDARY_PREFIX.length, @@ -266,9 +223,6 @@ BOUNDARY_PREFIX.length); System.arraycopy(boundary, 0, this.boundary, BOUNDARY_PREFIX.length, boundary.length); - - head = 0; - tail = 0; } @@ -330,17 +284,12 @@ */ public byte readByte() throws IOException { - // Buffer depleted ? - if (head == tail) { - head = 0; - // Refill. - tail = input.read(buffer, head, bufSize); - if (tail == -1) { - // No more data available. - throw new IOException("No more data is available"); - } + int c = input.read(); + if (c == -1) { + // No more data available. + throw new IOException("No more data is available"); } - return buffer[head++]; + return (byte)c; } @@ -359,8 +308,10 @@ byte[] marker = new byte[2]; boolean nextChunk = false; - head += boundaryLength; try { + byte[] tmp = new byte[boundary.length]; + input.read(tmp); + marker[0] = readByte(); if (marker[0] == LF) { // Work around IE5 Mac bug with input type=image. @@ -494,58 +445,13 @@ public int readBodyData(OutputStream output) throws MalformedStreamException, IOException { - boolean done = false; - int pad; - int pos; - int bytesRead; - int total = 0; - while (!done) { - // Is boundary token present somewere in the buffer? - pos = findSeparator(); - if (pos != -1) { - // Write the rest of the data before the boundary. - output.write(buffer, head, pos - head); - total += pos - head; - head = pos; - done = true; - } else { - // Determine how much data should be kept in the - // buffer. - if (tail - head > keepRegion) { - pad = keepRegion; - } else { - pad = tail - head; - } - // Write out the data belonging to the body-data. - output.write(buffer, head, tail - head - pad); - - // Move the data to the beginning of the buffer. - total += tail - head - pad; - System.arraycopy(buffer, tail - pad, buffer, 0, pad); - - // Refill buffer with new data. - head = 0; - bytesRead = input.read(buffer, pad, bufSize - pad); - - // [pprrrrrrr] - if (bytesRead != -1) { - tail = pad + bytesRead; - } else { - // The last pad amount is left in the buffer. - // Boundary can't be in there so write out the - // data you have and signal an error condition. - output.write(buffer, 0, pad); - output.flush(); - total += pad; - throw new MalformedStreamException( - "Stream ended unexpectedly"); - } - } - } - output.flush(); - return total; + DelimitedInputStream in = new DelimitedInputStream(input, boundary); + return IOUtils.copy(in, output); } + public InputStream readBodyData() { + return new DelimitedInputStream(input, boundary); + } /** * <p> Reads <code>body-data</code> from the current @@ -562,50 +468,15 @@ public int discardBodyData() throws MalformedStreamException, IOException { - boolean done = false; - int pad; - int pos; - int bytesRead; - int total = 0; - while (!done) { - // Is boundary token present somewere in the buffer? - pos = findSeparator(); - if (pos != -1) { - // Write the rest of the data before the boundary. - total += pos - head; - head = pos; - done = true; - } else { - // Determine how much data should be kept in the - // buffer. - if (tail - head > keepRegion) { - pad = keepRegion; - } else { - pad = tail - head; - } - total += tail - head - pad; - - // Move the data to the beginning of the buffer. - System.arraycopy(buffer, tail - pad, buffer, 0, pad); - - // Refill buffer with new data. - head = 0; - bytesRead = input.read(buffer, pad, bufSize - pad); - - // [pprrrrrrr] - if (bytesRead != -1) { - tail = pad + bytesRead; - } else { - // The last pad amount is left in the buffer. - // Boundary can't be in there so signal an error - // condition. - total += pad; - throw new MalformedStreamException( - "Stream ended unexpectedly"); - } - } - } - return total; + DelimitedInputStream in = new DelimitedInputStream(input, boundary); + int total = 0; + for (;;) { + int c = in.read(); + if (c == -1) { + return total; + } + total++; + } } @@ -620,12 +491,12 @@ public boolean skipPreamble() throws IOException { // First delimiter may be not preceeded with a CRLF. - System.arraycopy(boundary, 2, boundary, 0, boundary.length - 2); - boundaryLength = boundary.length - 2; + byte[] oldBoundary = boundary; + boundary = new byte[oldBoundary.length - 2]; + System.arraycopy(oldBoundary, 2, boundary, 0, oldBoundary.length - 2); try { // Discard all data up to the delimiter. - discardBodyData(); - + int c = discardBodyData(); // Read boundary - if succeded, the stream contains an // encapsulation. return readBoundary(); @@ -633,10 +504,7 @@ return false; } finally { // Restore delimiter. - System.arraycopy(boundary, 0, boundary, 2, boundary.length - 2); - boundaryLength = boundary.length; - boundary[0] = CR; - boundary[1] = LF; + boundary = oldBoundary; } } @@ -665,59 +533,6 @@ /** - * Searches for a byte of specified value in the <code>buffer</code>, - * starting at the specified <code>position</code>. - * - * @param value The value to find. - * @param pos The starting position for searching. - * - * @return The position of byte found, counting from beginning of the - * <code>buffer</code>, or <code>-1</code> if not found. - */ - protected int findByte(byte value, - int pos) { - for (int i = pos; i < tail; i++) { - if (buffer[i] == value) { - return i; - } - } - - return -1; - } - - - /** - * Searches for the <code>boundary</code> in the <code>buffer</code> - * region delimited by <code>head</code> and <code>tail</code>. - * - * @return The position of the boundary found, counting from the - * beginning of the <code>buffer</code>, or <code>-1</code> if - * not found. - */ - protected int findSeparator() { - int first; - int match = 0; - int maxpos = tail - boundaryLength; - for (first = head; - (first <= maxpos) && (match != boundaryLength); - first++) { - first = findByte(boundary[0], first); - if (first == -1 || (first > maxpos)) { - return -1; - } - for (match = 1; match < boundaryLength; match++) { - if (buffer[first + match] != boundary[match]) { - break; - } - } - } - if (match == boundaryLength) { - return first - 1; - } - return -1; - } - - /** * Returns a string representation of this object. * * @return The string representation of this object. @@ -726,8 +541,6 @@ StringBuffer sbTemp = new StringBuffer(); sbTemp.append("boundary='"); sbTemp.append(String.valueOf(boundary)); - sbTemp.append("'\nbufSize="); - sbTemp.append(bufSize); return sbTemp.toString(); } @@ -780,71 +593,4 @@ super(message); } } - - - // ------------------------------------------------------ Debugging methods - - - // These are the methods that were used to debug this stuff. - /* - - // Dump data. - protected void dump() - { - System.out.println("01234567890"); - byte[] temp = new byte[buffer.length]; - for(int i=0; i<buffer.length; i++) - { - if (buffer[i] == 0x0D || buffer[i] == 0x0A) - { - temp[i] = 0x21; - } - else - { - temp[i] = buffer[i]; - } - } - System.out.println(new String(temp)); - int i; - for (i=0; i<head; i++) - System.out.print(" "); - System.out.println("h"); - for (i=0; i<tail; i++) - System.out.print(" "); - System.out.println("t"); - System.out.flush(); - } - - // Main routine, for testing purposes only. - // - // @param args A String[] with the command line arguments. - // @throws Exception, a generic exception. - public static void main( String[] args ) - throws Exception - { - File boundaryFile = new File("boundary.dat"); - int boundarySize = (int)boundaryFile.length(); - byte[] boundary = new byte[boundarySize]; - FileInputStream input = new FileInputStream(boundaryFile); - input.read(boundary,0,boundarySize); - - input = new FileInputStream("multipart.dat"); - MultipartStream chunks = new MultipartStream(input, boundary); - - int i = 0; - String header; - OutputStream output; - boolean nextChunk = chunks.skipPreamble(); - while (nextChunk) - { - header = chunks.readHeaders(); - System.out.println("!"+header+"!"); - System.out.println("wrote part"+i+".dat"); - output = new FileOutputStream("part"+(i++)+".dat"); - chunks.readBodyData(output); - nextChunk = chunks.readBoundary(); - } - } - - */ } Index: /home/dave/workspace/fileupload/src/test/org/apache/commons/fileupload/DelimitedInputStreamTest.java =================================================================== --- /home/dave/workspace/fileupload/src/test/org/apache/commons/fileupload/DelimitedInputStreamTest.java (revision 0) +++ /home/dave/workspace/fileupload/src/test/org/apache/commons/fileupload/DelimitedInputStreamTest.java (revision 0) @@ -1 +1,76 @@ +package org.apache.commons.fileupload; + +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.PushbackInputStream; +import java.io.Writer; +import junit.framework.TestCase; + +public class DelimitedInputStreamTest extends TestCase { + + public void testReadByteByByte() throws IOException { + PushbackInputStream in = makeTestData("foo\n\nbar", 2); + byte[] term = { '\n', '\n' }; + DelimitedInputStream test = new DelimitedInputStream(in, term); + assertEquals('f', test.read()); + assertEquals('o', test.read()); + assertEquals('o', test.read()); + assertEquals(-1, test.read()); + assertEquals('\n', in.read()); + assertEquals('\n', in.read()); + assertEquals('b', in.read()); + assertEquals('a', in.read()); + assertEquals('r', in.read()); + assertEquals(-1, in.read()); + } + + public void testReadBuffered() throws IOException { + PushbackInputStream in = makeTestData("foo|bar", 2); + byte[] term = { '|' }; + DelimitedInputStream test = new DelimitedInputStream(in, term); + BufferedReader reader1 = new BufferedReader(new InputStreamReader(test)); + assertEquals("foo", reader1.readLine()); + assertEquals(null, reader1.readLine()); + assertEquals('|', in.read()); + BufferedReader reader2 = new BufferedReader(new InputStreamReader(in)); + assertEquals("bar", reader2.readLine()); + assertEquals(null, reader2.readLine()); + } + + public void testSignedChar() throws IOException { + PushbackInputStream in = makeTestData("\377", 2); + byte[] term = { '|' }; + DelimitedInputStream test = new DelimitedInputStream(in, term); + assertEquals('\377', test.read()); + } + + public void testClose() throws IOException { + PushbackInputStream in = makeTestData("foo\nbar", 1); + byte[] term = {'\n'}; + DelimitedInputStream test = new DelimitedInputStream(in, term); + test.read(); + test.close(); + try { + test.read(); + fail("should have raised exception on attempt to read closed stream"); + } catch (IOException e) { + // expected exception + } + } + + private static PushbackInputStream makeTestData(String data, int termSize) + throws IOException + { + ByteArrayOutputStream s = new ByteArrayOutputStream(); + Writer w = new OutputStreamWriter(s); + w.write(data); + w.flush(); + InputStream in = new ByteArrayInputStream(s.toByteArray()); + return new PushbackInputStream(in, termSize*2); + } +} \ No newline at end of file
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]