I figured that I might as well be adding comments as I am reading and figuring out the code. One thing I was not clear on - characters are stored with 1 to 3 bytes. Is that sufficient to represent all Unicode characters? I thought Unicode was four bytes.
Index: InputStream.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/store/InputStream.java,v retrieving revision 1.1.1.1 diff -w -u -r1.1.1.1 InputStream.java --- InputStream.java 2001/09/18 16:29:59 1.1.1.1 +++ InputStream.java 2001/10/11 18:37:23 @@ -60,8 +60,6 @@ Abstract class for input from a file in a Directory. @author Doug Cutting */ - -/** A random-access input stream */ abstract public class InputStream implements Cloneable { final static int BUFFER_SIZE = OutputStream.BUFFER_SIZE; @@ -81,6 +79,7 @@ return buffer[bufferPosition++]; } + /** InputStream-like methods @see java.io.InputStream */ public final void readBytes(byte[] b, int offset, int len) throws IOException { if (len < BUFFER_SIZE) { @@ -97,11 +96,22 @@ } } + /** Read an integer from the stream. The integer must have been written + * by a call to OutputStream.writeInt. It is stored as four bytes, from most to least + * significant. + */ public final int readInt() throws IOException { return ((readByte() & 0xFF) << 24) | ((readByte() & 0xFF) << 16) | ((readByte() & 0xFF) << 8) | (readByte() & 0xFF); } + /** Read a compressed integer from the stream. The integer must have been + * written by a call to OutputStream.writeVInt. It is stored as a series of bytes, from + * least significant to the most significant. Each byte contains 7 bits of data + * and the 8th (0x80) bit that indicates the last byte of the integer. With this + * format, smaller integers occupy only one byte, larger ones - two bytes, and + * so on up to 4 bytes. + */ public final int readVInt() throws IOException { byte b = readByte(); int i = b & 0x7F; @@ -112,10 +122,18 @@ return i; } + /** Read a long from the stream. The long must have been written by a call to + * OutputStream.writeLong. It is stored as 8 bytes, from most significant to the least + * significant. + */ public final long readLong() throws IOException { return (((long)readInt()) << 32) | (readInt() & 0xFFFFFFFFL); } + /** Read a compressed long from the stream. The long must have been written by + * a call to OutputStream.writeVLong. It is stored similarly to the VInt, but may occupy + * 1 to 8 bytes. + */ public final long readVLong() throws IOException { byte b = readByte(); long i = b & 0x7F; @@ -126,6 +144,10 @@ return i; } + /** Read a string from the stream. The string must have been written by a call + * to OutputStream.writeString. It is stored as a VInt (see readVInt) + * indicating the string size, followed by that many chars (see readChars). + */ public final String readString() throws IOException { int length = readVInt(); if (chars == null || length > chars.length) @@ -134,6 +156,12 @@ return new String(chars, 0, length); } + /** Read an array of characters, placing them into the provided buffer. + * The read characters are placed into array starting with the index <i>start</i> + * and continuing for <i>length</i> characters. The characters must have been + * written with a call to OutputStream.writeChards. Each character is stored + * using one, two, or three bytes, depending on the value of the character. + */ public final void readChars(char[] buffer, int start, int length) throws IOException { final int end = start + length; @@ -179,6 +207,7 @@ return bufferStart + bufferPosition; } + /** RandomAccessFile-like methods @see java.io.RandomAccessFile */ public final void seek(long pos) throws IOException { if (pos >= bufferStart && pos < (bufferStart + bufferLength)) bufferPosition = (int)(pos - bufferStart); // seek within buffer @@ -191,10 +220,16 @@ } abstract protected void seekInternal(long pos) throws IOException; + /** RandomAccessFile-like methods @see java.io.RandomAccessFile */ public final long length() { return length; } + /** Create a clone of this stream. The clone provides access to the same + * undelying descriptor as the original file, however it maintains its own + * buffer and file position so it can be used concurrently with the original + * file and other clones. + */ public Object clone() { InputStream clone = null; try { Index: OutputStream.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/store/OutputStream.java,v retrieving revision 1.1.1.1 diff -w -u -r1.1.1.1 OutputStream.java --- OutputStream.java 2001/09/18 16:29:59 1.1.1.1 +++ OutputStream.java 2001/10/11 18:37:23 @@ -60,8 +60,6 @@ Abstract class for output from a file in a Directory. @author Doug Cutting */ - -/** A random-access output stream */ abstract public class OutputStream { final static int BUFFER_SIZE = 1024; @@ -76,11 +74,15 @@ buffer[bufferPosition++] = b; } + /** OutputStream-like methods @see java.io.InputStream */ public final void writeBytes(byte[] b, int length) throws IOException { for (int i = 0; i < length; i++) writeByte(b[i]); } + /** Write an integer into the stream. The integer can be read by calling + * InputStream.readInt. It is stored using four bytes. + */ public final void writeInt(int i) throws IOException { writeByte((byte)(i >> 24)); writeByte((byte)(i >> 16)); @@ -88,6 +90,10 @@ writeByte((byte) i); } + /** Write a compressed integer into the stream. The integer can be read by + * calling InputStream.readVInt. It is stored using from one to four bytes, + * depending on the value of the integer. + */ public final void writeVInt(int i) throws IOException { while ((i & ~0x7F) != 0) { writeByte((byte)((i & 0x7f) | 0x80)); @@ -96,11 +102,18 @@ writeByte((byte)i); } + /** Write a long into the stream. The long can be read by calling InputStream.readLong. + * It is stored using 8 bytes. + */ public final void writeLong(long i) throws IOException { writeInt((int) (i >> 32)); writeInt((int) i); } + /** Write a compressed long into the stream. The long can be read by calling + * InputStream.readVLong. It is stored using from one to eight bytes depending + * on the value of the long. + */ public final void writeVLong(long i) throws IOException { while ((i & ~0x7F) != 0) { writeByte((byte)((i & 0x7f) | 0x80)); @@ -109,12 +122,20 @@ writeByte((byte)i); } + /** Write a string into the stream. The string can be read by calling + * InputStream.readString. It is stored as a VInt representing the number of + * characters, followed by that many characters (see writeChars). + */ public final void writeString(String s) throws IOException { int length = s.length(); writeVInt(length); writeChars(s, 0, length); } + /** Write an array of characters into the stream. The array can be read by + * calling InputStream.readChars. Each character is stored using from one to + * three bytes depending on the value of the character. + */ public final void writeChars(String s, int start, int length) throws IOException { final int end = start + length; @@ -141,6 +162,7 @@ abstract protected void flushBuffer(byte[] b, int len) throws IOException; + /** Flush and close the stream. */ public void close() throws IOException { flush(); } @@ -150,11 +172,13 @@ return bufferStart + bufferPosition; } + /** RandomAccessFile-like methods @see java.io.RandomAccessFile */ public void seek(long pos) throws IOException { flush(); bufferStart = pos; } + /** RandomAccessFile-like methods @see java.io.RandomAccessFile */ abstract public long length() throws IOException;