Author: jbellis Date: Wed Dec 14 02:18:44 2011 New Revision: 1214034 URL: http://svn.apache.org/viewvc?rev=1214034&view=rev Log: clean up OpenBitSet patch by jbellis; reviewed by slebresne for CASSANDRA-3622
Modified: cassandra/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java cassandra/trunk/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java Modified: cassandra/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java?rev=1214034&r1=1214033&r2=1214034&view=diff ============================================================================== --- cassandra/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java (original) +++ cassandra/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java Wed Dec 14 02:18:44 2011 @@ -113,7 +113,7 @@ public class BloomFilter extends Filter { for (long bucketIndex : getHashBuckets(key)) { - bitset.fastSet(bucketIndex); + bitset.set(bucketIndex); } } @@ -121,7 +121,7 @@ public class BloomFilter extends Filter { for (long bucketIndex : getHashBuckets(key)) { - if (!bitset.fastGet(bucketIndex)) + if (!bitset.get(bucketIndex)) { return false; } Modified: cassandra/trunk/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java?rev=1214034&r1=1214033&r2=1214034&view=diff ============================================================================== --- cassandra/trunk/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java (original) +++ cassandra/trunk/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java Wed Dec 14 02:18:44 2011 @@ -21,8 +21,10 @@ import java.util.Arrays; import java.io.Serializable; import java.util.BitSet; -/** An "open" BitSet implementation that allows direct access to the array of words - * storing the bits. +/** + * An "open" BitSet implementation that allows direct access to the arrays of words + * storing the bits. Derived from Lucene's OpenBitSet, but with a paged backing array + * (see bits delaration, below). * <p/> * Unlike java.util.bitset, the fact that bits are packed into an array of longs * is part of the interface. This allows efficient implementation of other algorithms @@ -39,77 +41,38 @@ import java.util.BitSet; * hence people re-implement their own version in order to get better performance). * If you want a "safe", totally encapsulated (and slower and limited) BitSet * class, use <code>java.util.BitSet</code>. - * <p/> - * <h3>Performance Results</h3> - * - Test system: Pentium 4, Sun Java 1.5_06 -server -Xbatch -Xmx64M -<br/>BitSet size = 1,000,000 -<br/>Results are java.util.BitSet time divided by OpenBitSet time. -<table border="1"> - <tr> - <th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th> <th>nextSetBit</th> <th>get</th> <th>iterator</th> - </tr> - <tr> - <th>50% full</th> <td>3.36</td> <td>3.96</td> <td>1.44</td> <td>1.46</td> <td>1.99</td> <td>1.58</td> - </tr> - <tr> - <th>1% full</th> <td>3.31</td> <td>3.90</td> <td> </td> <td>1.04</td> <td> </td> <td>0.99</td> - </tr> -</table> -<br/> -Test system: AMD Opteron, 64 bit linux, Sun Java 1.5_06 -server -Xbatch -Xmx64M -<br/>BitSet size = 1,000,000 -<br/>Results are java.util.BitSet time divided by OpenBitSet time. -<table border="1"> - <tr> - <th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th> <th>nextSetBit</th> <th>get</th> <th>iterator</th> - </tr> - <tr> - <th>50% full</th> <td>2.50</td> <td>3.50</td> <td>1.00</td> <td>1.03</td> <td>1.12</td> <td>1.25</td> - </tr> - <tr> - <th>1% full</th> <td>2.51</td> <td>3.49</td> <td> </td> <td>1.00</td> <td> </td> <td>1.02</td> - </tr> -</table> */ public class OpenBitSet implements Serializable { - protected long[][] bits; - protected int wlen; // number of words (elements) used in the array - private final int pageCount; /** - * length of bits[][] page in long[] elements. - * Choosing unform size for all sizes of bitsets fight fragmentation for very large - * bloom filters. + * We break the bitset up into multiple arrays to avoid promotion failure caused by attempting to allocate + * large, contiguous arrays (CASSANDRA-2466). All sub-arrays but the last are uniformly PAGE_SIZE words; + * to avoid waste in small bloom filters (of which Cassandra has many: one per row) the last sub-array + * is sized to exactly the remaining number of words required to achieve the desired set size (CASSANDRA-3618). */ - protected static final int PAGE_SIZE= 4096; + private final long[][] bits; + private int wlen; // number of words (elements) used in the array + private final int pageCount; + private static final int PAGE_SIZE = 4096; - /** Constructs an OpenBitSet large enough to hold numBits. - * + /** + * Constructs an OpenBitSet large enough to hold numBits. * @param numBits */ public OpenBitSet(long numBits) { - this(numBits,true); - } - - public OpenBitSet(long numBits, boolean allocatePages) - { - wlen= bits2words(numBits); - int lastPageSize = wlen % PAGE_SIZE; - int fullPageCount = wlen / PAGE_SIZE; - pageCount = fullPageCount + (lastPageSize == 0 ? 0 : 1); - - bits = new long[pageCount][]; + wlen = bits2words(numBits); + int lastPageSize = wlen % PAGE_SIZE; + int fullPageCount = wlen / PAGE_SIZE; + pageCount = fullPageCount + (lastPageSize == 0 ? 0 : 1); - if (allocatePages) - { - for (int i = 0; i < fullPageCount; ++i) - bits[i] = new long[PAGE_SIZE]; + bits = new long[pageCount][]; - if (lastPageSize != 0) - bits[bits.length - 1] = new long[lastPageSize]; - } + for (int i = 0; i < fullPageCount; ++i) + bits[i] = new long[PAGE_SIZE]; + + if (lastPageSize != 0) + bits[bits.length - 1] = new long[lastPageSize]; } public OpenBitSet() { @@ -164,24 +127,11 @@ public class OpenBitSet implements Seria public int getNumWords() { return wlen; } - /** Returns true or false for the specified bit index. */ - public boolean get(int index) { - int i = index >> 6; // div 64 - // signed shift will keep a negative index and force an - // array-index-out-of-bounds-exception, removing the need for an explicit check. - if (i>=wlen) return false; - - int bit = index & 0x3f; // mod 64 - long bitmask = 1L << bit; - // TODO perfectionist one can implement this using bit operations - return (bits[i / PAGE_SIZE ][i % PAGE_SIZE] & bitmask) != 0; - } - - - /** Returns true or false for the specified bit index. + /** + * Returns true or false for the specified bit index. * The index should be less than the OpenBitSet size */ - public boolean fastGet(int index) { + public boolean get(int index) { int i = index >> 6; // div 64 // signed shift will keep a negative index and force an // array-index-out-of-bounds-exception, removing the need for an explicit check. @@ -191,23 +141,11 @@ public class OpenBitSet implements Seria return (bits[i / PAGE_SIZE][i % PAGE_SIZE ] & bitmask) != 0; } - - - /** Returns true or false for the specified bit index - */ - public boolean get(long index) { - int i = (int)(index >> 6); // div 64 - if (i>=wlen) return false; - int bit = (int)index & 0x3f; // mod 64 - long bitmask = 1L << bit; - // TODO perfectionist one can implement this using bit operations - return (bits[i / PAGE_SIZE][i % PAGE_SIZE ] & bitmask) != 0; - } - - /** Returns true or false for the specified bit index. + /** + * Returns true or false for the specified bit index. * The index should be less than the OpenBitSet size. */ - public boolean fastGet(long index) { + public boolean get(long index) { int i = (int)(index >> 6); // div 64 int bit = (int)index & 0x3f; // mod 64 long bitmask = 1L << bit; @@ -224,81 +162,33 @@ public class OpenBitSet implements Seria return ((int)(bits[i / PAGE_SIZE][i % PAGE_SIZE ]>>>bit)) & 0x01; } - - /** sets a bit, expanding the set size if necessary */ + /** + * Sets the bit at the specified index. + * The index should be less than the OpenBitSet size. + */ public void set(long index) { - int wordNum = expandingWordNum(index); + int wordNum = (int)(index >> 6); int bit = (int)index & 0x3f; long bitmask = 1L << bit; bits[ wordNum / PAGE_SIZE ][ wordNum % PAGE_SIZE ] |= bitmask; } - - /** Sets the bit at the specified index. - * The index should be less than the OpenBitSet size. - */ - public void fastSet(int index) { + /** + * Sets the bit at the specified index. + * The index should be less than the OpenBitSet size. + */ + public void set(int index) { int wordNum = index >> 6; // div 64 int bit = index & 0x3f; // mod 64 long bitmask = 1L << bit; bits[ wordNum / PAGE_SIZE ][ wordNum % PAGE_SIZE ] |= bitmask; } - /** Sets the bit at the specified index. - * The index should be less than the OpenBitSet size. - */ - public void fastSet(long index) { - int wordNum = (int)(index >> 6); - int bit = (int)index & 0x3f; - long bitmask = 1L << bit; - bits[ wordNum / PAGE_SIZE ][ wordNum % PAGE_SIZE ] |= bitmask; - } - - /** Sets a range of bits, expanding the set size if necessary - * - * @param startIndex lower index - * @param endIndex one-past the last bit to set - */ - public void set(long startIndex, long endIndex) { - if (endIndex <= startIndex) return; - - int startWord = (int)(startIndex>>6); - - // since endIndex is one past the end, this is index of the last - // word to be changed. - int endWord = expandingWordNum(endIndex-1); - - long startmask = -1L << startIndex; - long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap - - if (startWord == endWord) { - bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] |= (startmask & endmask); - return; - } - - assert startWord / PAGE_SIZE == endWord / PAGE_SIZE : "cross page sets not suppotred at all - they are not used"; - - bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] |= startmask; - Arrays.fill(bits[ startWord / PAGE_SIZE], (startWord+1) % PAGE_SIZE , endWord % PAGE_SIZE , -1L); - bits[endWord / PAGE_SIZE][endWord % PAGE_SIZE] |= endmask; - } - - - - protected int expandingWordNum(long index) { - int wordNum = (int)(index >> 6); - if (wordNum>=wlen) { - ensureCapacity(index+1); - wlen = wordNum+1; - } - return wordNum; - } - - - /** clears a bit. + /** + * clears a bit. * The index should be less than the OpenBitSet size. */ - public void fastClear(int index) { + public void clear(int index) { int wordNum = index >> 6; int bit = index & 0x03f; long bitmask = 1L << bit; @@ -312,26 +202,19 @@ public class OpenBitSet implements Seria // bits[word] &= Long.rotateLeft(0xfffffffe,bit); } - /** clears a bit. + /** + * clears a bit. * The index should be less than the OpenBitSet size. */ - public void fastClear(long index) { - int wordNum = (int)(index >> 6); // div 64 - int bit = (int)index & 0x3f; // mod 64 - long bitmask = 1L << bit; - bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] &= ~bitmask; - } - - /** clears a bit, allowing access beyond the current set size without changing the size.*/ public void clear(long index) { int wordNum = (int)(index >> 6); // div 64 - if (wordNum>=wlen) return; int bit = (int)index & 0x3f; // mod 64 long bitmask = 1L << bit; bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] &= ~bitmask; } - /** Clears a range of bits. Clearing past the end does not change the size of the set. + /** + * Clears a range of bits. Clearing past the end does not change the size of the set. * * @param startIndex lower index * @param endIndex one-past the last bit to clear @@ -448,26 +331,19 @@ public class OpenBitSet implements Seria /** flips a bit. * The index should be less than the OpenBitSet size. */ - public void fastFlip(int index) { + public void flip(int index) { int wordNum = index >> 6; // div 64 int bit = index & 0x3f; // mod 64 long bitmask = 1L << bit; bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] ^= bitmask; } - /** flips a bit. + /** + * flips a bit. * The index should be less than the OpenBitSet size. */ - public void fastFlip(long index) { - int wordNum = (int)(index >> 6); // div 64 - int bit = (int)index & 0x3f; // mod 64 - long bitmask = 1L << bit; - bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] ^= bitmask; - } - - /** flips a bit, expanding the set size if necessary */ public void flip(long index) { - int wordNum = expandingWordNum(index); + int wordNum = (int)(index >> 6); // div 64 int bit = (int)index & 0x3f; // mod 64 long bitmask = 1L << bit; bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] ^= bitmask; @@ -495,44 +371,6 @@ public class OpenBitSet implements Seria return (bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] & bitmask) != 0; } - /** Flips a range of bits, expanding the set size if necessary - * - * @param startIndex lower index - * @param endIndex one-past the last bit to flip - */ - public void flip(long startIndex, long endIndex) { - if (endIndex <= startIndex) return; - int startWord = (int)(startIndex>>6); - - // since endIndex is one past the end, this is index of the last - // word to be changed. - int endWord = expandingWordNum(endIndex-1); - - /*** Grrr, java shifting wraps around so -1L>>>64 == -1 - * for that reason, make sure not to use endmask if the bits to flip will - * be zero in the last word (redefine endWord to be the last changed...) - long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000 - long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111 - ***/ - - long startmask = -1L << startIndex; - long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap - - if (startWord == endWord) { - bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] ^= (startmask & endmask); - return; - } - - bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] ^= startmask; - - for (int i=startWord+1; i<endWord; i++) { - bits[i / PAGE_SIZE][ i % PAGE_SIZE] = ~bits[i / PAGE_SIZE][ i % PAGE_SIZE]; - } - - bits[endWord / PAGE_SIZE][endWord % PAGE_SIZE] ^= endmask; - } - - /** @return the number of set bits */ public long cardinality() { @@ -613,22 +451,6 @@ public class OpenBitSet implements Seria intersect(other); } - - /** Expand the long[] with the size given as a number of words (64 bit longs). - * getNumWords() is unchanged by this call. - */ - public void ensureCapacityWords(int numWords) - { - assert numWords<=wlen : "Growing of paged bitset is not supported"; - } - - /** Ensure that the long[] is big enough to hold numBits, expanding it if necessary. - * getNumWords() is unchanged by this call. - */ - public void ensureCapacity(long numBits) { - ensureCapacityWords(bits2words(numBits)); - } - /** Lowers numWords, the number of words in use, * by checking for trailing zero words. */