Author: jbellis
Date: Wed Dec 14 02:18:44 2011
New Revision: 1214034

URL: http://svn.apache.org/viewvc?rev=1214034&view=rev
Log:
clean up OpenBitSet
patch by jbellis; reviewed by slebresne for CASSANDRA-3622

Modified:
    cassandra/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java
    cassandra/trunk/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java

Modified: cassandra/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java
URL: 
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java?rev=1214034&r1=1214033&r2=1214034&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java 
(original)
+++ cassandra/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java Wed 
Dec 14 02:18:44 2011
@@ -113,7 +113,7 @@ public class BloomFilter extends Filter
     {
         for (long bucketIndex : getHashBuckets(key))
         {
-            bitset.fastSet(bucketIndex);
+            bitset.set(bucketIndex);
         }
     }
 
@@ -121,7 +121,7 @@ public class BloomFilter extends Filter
     {
       for (long bucketIndex : getHashBuckets(key))
       {
-          if (!bitset.fastGet(bucketIndex))
+          if (!bitset.get(bucketIndex))
           {
               return false;
           }

Modified: 
cassandra/trunk/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java
URL: 
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java?rev=1214034&r1=1214033&r2=1214034&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java 
(original)
+++ cassandra/trunk/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java Wed 
Dec 14 02:18:44 2011
@@ -21,8 +21,10 @@ import java.util.Arrays;
 import java.io.Serializable;
 import java.util.BitSet;
 
-/** An "open" BitSet implementation that allows direct access to the array of 
words
- * storing the bits.
+/**
+ * An "open" BitSet implementation that allows direct access to the arrays of 
words
+ * storing the bits.  Derived from Lucene's OpenBitSet, but with a paged 
backing array
+ * (see bits delaration, below).
  * <p/>
  * Unlike java.util.bitset, the fact that bits are packed into an array of 
longs
  * is part of the interface.  This allows efficient implementation of other 
algorithms
@@ -39,77 +41,38 @@ import java.util.BitSet;
  * hence people re-implement their own version in order to get better 
performance).
  * If you want a "safe", totally encapsulated (and slower and limited) BitSet
  * class, use <code>java.util.BitSet</code>.
- * <p/>
- * <h3>Performance Results</h3>
- *
- Test system: Pentium 4, Sun Java 1.5_06 -server -Xbatch -Xmx64M
-<br/>BitSet size = 1,000,000
-<br/>Results are java.util.BitSet time divided by OpenBitSet time.
-<table border="1">
- <tr>
-  <th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th> 
<th>nextSetBit</th> <th>get</th> <th>iterator</th>
- </tr>
- <tr>
-  <th>50% full</th> <td>3.36</td> <td>3.96</td> <td>1.44</td> <td>1.46</td> 
<td>1.99</td> <td>1.58</td>
- </tr>
- <tr>
-   <th>1% full</th> <td>3.31</td> <td>3.90</td> <td>&nbsp;</td> <td>1.04</td> 
<td>&nbsp;</td> <td>0.99</td>
- </tr>
-</table>
-<br/>
-Test system: AMD Opteron, 64 bit linux, Sun Java 1.5_06 -server -Xbatch -Xmx64M
-<br/>BitSet size = 1,000,000
-<br/>Results are java.util.BitSet time divided by OpenBitSet time.
-<table border="1">
- <tr>
-  <th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th> 
<th>nextSetBit</th> <th>get</th> <th>iterator</th>
- </tr>
- <tr>
-  <th>50% full</th> <td>2.50</td> <td>3.50</td> <td>1.00</td> <td>1.03</td> 
<td>1.12</td> <td>1.25</td>
- </tr>
- <tr>
-   <th>1% full</th> <td>2.51</td> <td>3.49</td> <td>&nbsp;</td> <td>1.00</td> 
<td>&nbsp;</td> <td>1.02</td>
- </tr>
-</table>
  */
 
 public class OpenBitSet implements Serializable {
-  protected long[][] bits;
-  protected int wlen;   // number of words (elements) used in the array
-  private final int pageCount;
   /**
-   * length of bits[][] page in long[] elements. 
-   * Choosing unform size for all sizes of bitsets fight fragmentation for 
very large
-   * bloom filters.
+   * We break the bitset up into multiple arrays to avoid promotion failure 
caused by attempting to allocate
+   * large, contiguous arrays (CASSANDRA-2466).  All sub-arrays but the last 
are uniformly PAGE_SIZE words;
+   * to avoid waste in small bloom filters (of which Cassandra has many: one 
per row) the last sub-array
+   * is sized to exactly the remaining number of words required to achieve the 
desired set size (CASSANDRA-3618).
    */
-  protected static final int PAGE_SIZE= 4096; 
+  private final long[][] bits;
+  private int wlen; // number of words (elements) used in the array
+  private final int pageCount;
+  private static final int PAGE_SIZE = 4096;
 
-  /** Constructs an OpenBitSet large enough to hold numBits.
-   *
+  /**
+   * Constructs an OpenBitSet large enough to hold numBits.
    * @param numBits
    */
   public OpenBitSet(long numBits) 
   {
-      this(numBits,true);
-  }
-  
-  public OpenBitSet(long numBits, boolean allocatePages) 
-  {
-    wlen= bits2words(numBits);    
-    int lastPageSize = wlen % PAGE_SIZE;
-    int fullPageCount = wlen / PAGE_SIZE;
-    pageCount = fullPageCount + (lastPageSize == 0 ? 0 : 1);
-    
-    bits = new long[pageCount][];
+      wlen = bits2words(numBits);
+      int lastPageSize = wlen % PAGE_SIZE;
+      int fullPageCount = wlen / PAGE_SIZE;
+      pageCount = fullPageCount + (lastPageSize == 0 ? 0 : 1);
 
-    if (allocatePages)
-    {
-        for (int i = 0; i < fullPageCount; ++i)
-            bits[i] = new long[PAGE_SIZE];
+      bits = new long[pageCount][];
 
-        if (lastPageSize != 0)
-            bits[bits.length - 1] = new long[lastPageSize];
-    }
+      for (int i = 0; i < fullPageCount; ++i)
+          bits[i] = new long[PAGE_SIZE];
+
+      if (lastPageSize != 0)
+          bits[bits.length - 1] = new long[lastPageSize];
   }
 
   public OpenBitSet() {
@@ -164,24 +127,11 @@ public class OpenBitSet implements Seria
   public int getNumWords() { return wlen; }
 
 
-  /** Returns true or false for the specified bit index. */
-  public boolean get(int index) {
-    int i = index >> 6;               // div 64
-    // signed shift will keep a negative index and force an
-    // array-index-out-of-bounds-exception, removing the need for an explicit 
check.
-    if (i>=wlen) return false;
-
-    int bit = index & 0x3f;           // mod 64
-    long bitmask = 1L << bit;
-    // TODO perfectionist one can implement this using bit operations
-    return (bits[i / PAGE_SIZE ][i % PAGE_SIZE] & bitmask) != 0;
-  }
-
-
- /** Returns true or false for the specified bit index.
+  /**
+   * Returns true or false for the specified bit index.
    * The index should be less than the OpenBitSet size
    */
-  public boolean fastGet(int index) {
+  public boolean get(int index) {
     int i = index >> 6;               // div 64
     // signed shift will keep a negative index and force an
     // array-index-out-of-bounds-exception, removing the need for an explicit 
check.
@@ -191,23 +141,11 @@ public class OpenBitSet implements Seria
     return (bits[i / PAGE_SIZE][i % PAGE_SIZE ] & bitmask) != 0;
   }
 
-
-
- /** Returns true or false for the specified bit index
-  */
-  public boolean get(long index) {
-    int i = (int)(index >> 6);             // div 64
-    if (i>=wlen) return false;
-    int bit = (int)index & 0x3f;           // mod 64
-    long bitmask = 1L << bit;
-    // TODO perfectionist one can implement this using bit operations
-    return (bits[i / PAGE_SIZE][i % PAGE_SIZE ] & bitmask) != 0;
-  }
-
-  /** Returns true or false for the specified bit index.
+  /**
+   * Returns true or false for the specified bit index.
    * The index should be less than the OpenBitSet size.
    */
-  public boolean fastGet(long index) {
+  public boolean get(long index) {
     int i = (int)(index >> 6);               // div 64
     int bit = (int)index & 0x3f;           // mod 64
     long bitmask = 1L << bit;
@@ -224,81 +162,33 @@ public class OpenBitSet implements Seria
     return ((int)(bits[i / PAGE_SIZE][i % PAGE_SIZE ]>>>bit)) & 0x01;
   }
 
-
-  /** sets a bit, expanding the set size if necessary */
+  /**
+   * Sets the bit at the specified index.
+   * The index should be less than the OpenBitSet size.
+   */
   public void set(long index) {
-    int wordNum = expandingWordNum(index);
+    int wordNum = (int)(index >> 6);
     int bit = (int)index & 0x3f;
     long bitmask = 1L << bit;
     bits[ wordNum / PAGE_SIZE ][ wordNum % PAGE_SIZE ] |= bitmask;
   }
 
-
- /** Sets the bit at the specified index.
-  * The index should be less than the OpenBitSet size.
-  */
-  public void fastSet(int index) {
+  /**
+   * Sets the bit at the specified index.
+   * The index should be less than the OpenBitSet size.
+   */
+  public void set(int index) {
     int wordNum = index >> 6;      // div 64
     int bit = index & 0x3f;     // mod 64
     long bitmask = 1L << bit;
     bits[ wordNum / PAGE_SIZE ][ wordNum % PAGE_SIZE ] |= bitmask;
   }
 
- /** Sets the bit at the specified index.
-  * The index should be less than the OpenBitSet size.
-  */
-  public void fastSet(long index) {
-    int wordNum = (int)(index >> 6);
-    int bit = (int)index & 0x3f;
-    long bitmask = 1L << bit;
-    bits[ wordNum / PAGE_SIZE ][ wordNum % PAGE_SIZE ] |= bitmask;
-  }
-
-  /** Sets a range of bits, expanding the set size if necessary
-   *
-   * @param startIndex lower index
-   * @param endIndex one-past the last bit to set
-   */
-  public void set(long startIndex, long endIndex) {
-    if (endIndex <= startIndex) return;
-
-    int startWord = (int)(startIndex>>6);
-
-    // since endIndex is one past the end, this is index of the last
-    // word to be changed.
-    int endWord   = expandingWordNum(endIndex-1);
-
-    long startmask = -1L << startIndex;
-    long endmask = -1L >>> -endIndex;  // 64-(endIndex&0x3f) is the same as 
-endIndex due to wrap
-
-    if (startWord == endWord) {
-      bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] |= (startmask & 
endmask);
-      return;
-    }
-
-    assert startWord / PAGE_SIZE == endWord / PAGE_SIZE : "cross page sets not 
suppotred at all - they are not used";
-
-    bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] |= startmask;
-    Arrays.fill(bits[ startWord / PAGE_SIZE], (startWord+1) % PAGE_SIZE , 
endWord % PAGE_SIZE , -1L);
-    bits[endWord / PAGE_SIZE][endWord % PAGE_SIZE] |= endmask;
-  }
-
-
-
-  protected int expandingWordNum(long index) {
-    int wordNum = (int)(index >> 6);
-    if (wordNum>=wlen) {
-      ensureCapacity(index+1);
-      wlen = wordNum+1;
-    }
-    return wordNum;
-  }
-
-
-  /** clears a bit.
+  /**
+   * clears a bit.
    * The index should be less than the OpenBitSet size.
    */
-  public void fastClear(int index) {
+  public void clear(int index) {
     int wordNum = index >> 6;
     int bit = index & 0x03f;
     long bitmask = 1L << bit;
@@ -312,26 +202,19 @@ public class OpenBitSet implements Seria
     // bits[word] &= Long.rotateLeft(0xfffffffe,bit);
   }
 
-  /** clears a bit.
+  /**
+   * clears a bit.
    * The index should be less than the OpenBitSet size.
    */
-  public void fastClear(long index) {
-    int wordNum = (int)(index >> 6); // div 64
-    int bit = (int)index & 0x3f;     // mod 64
-    long bitmask = 1L << bit;
-    bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] &= ~bitmask;
-  }
-
-  /** clears a bit, allowing access beyond the current set size without 
changing the size.*/
   public void clear(long index) {
     int wordNum = (int)(index >> 6); // div 64
-    if (wordNum>=wlen) return;
     int bit = (int)index & 0x3f;     // mod 64
     long bitmask = 1L << bit;
     bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] &= ~bitmask;
   }
 
-  /** Clears a range of bits.  Clearing past the end does not change the size 
of the set.
+  /**
+   * Clears a range of bits.  Clearing past the end does not change the size 
of the set.
    *
    * @param startIndex lower index
    * @param endIndex one-past the last bit to clear
@@ -448,26 +331,19 @@ public class OpenBitSet implements Seria
   /** flips a bit.
    * The index should be less than the OpenBitSet size.
    */
-  public void fastFlip(int index) {
+  public void flip(int index) {
     int wordNum = index >> 6;      // div 64
     int bit = index & 0x3f;     // mod 64
     long bitmask = 1L << bit;
     bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] ^= bitmask;
   }
 
-  /** flips a bit.
+  /**
+   * flips a bit.
    * The index should be less than the OpenBitSet size.
    */
-  public void fastFlip(long index) {
-    int wordNum = (int)(index >> 6);   // div 64
-    int bit = (int)index & 0x3f;       // mod 64
-    long bitmask = 1L << bit;
-    bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] ^= bitmask;
-  }
-
-  /** flips a bit, expanding the set size if necessary */
   public void flip(long index) {
-    int wordNum = expandingWordNum(index);
+    int wordNum = (int)(index >> 6);   // div 64
     int bit = (int)index & 0x3f;       // mod 64
     long bitmask = 1L << bit;
     bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] ^= bitmask;
@@ -495,44 +371,6 @@ public class OpenBitSet implements Seria
     return (bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] & bitmask) != 0;
   }
 
-  /** Flips a range of bits, expanding the set size if necessary
-   *
-   * @param startIndex lower index
-   * @param endIndex one-past the last bit to flip
-   */
-  public void flip(long startIndex, long endIndex) {
-    if (endIndex <= startIndex) return;
-    int startWord = (int)(startIndex>>6);
-
-    // since endIndex is one past the end, this is index of the last
-    // word to be changed.
-    int endWord   = expandingWordNum(endIndex-1);
-
-    /*** Grrr, java shifting wraps around so -1L>>>64 == -1
-     * for that reason, make sure not to use endmask if the bits to flip will
-     * be zero in the last word (redefine endWord to be the last changed...)
-    long startmask = -1L << (startIndex & 0x3f);     // example: 11111...111000
-    long endmask = -1L >>> (64-(endIndex & 0x3f));   // example: 00111...111111
-    ***/
-
-    long startmask = -1L << startIndex;
-    long endmask = -1L >>> -endIndex;  // 64-(endIndex&0x3f) is the same as 
-endIndex due to wrap
-
-    if (startWord == endWord) {
-      bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] ^= (startmask & 
endmask);
-      return;
-    }
-
-    bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] ^= startmask;
-
-    for (int i=startWord+1; i<endWord; i++) {
-      bits[i / PAGE_SIZE][ i % PAGE_SIZE] = ~bits[i / PAGE_SIZE][ i % 
PAGE_SIZE];
-    }
-
-    bits[endWord / PAGE_SIZE][endWord % PAGE_SIZE] ^= endmask;
-  }
-
-
   /** @return the number of set bits */
   public long cardinality() 
   {
@@ -613,22 +451,6 @@ public class OpenBitSet implements Seria
     intersect(other);
   }
 
-
-  /** Expand the long[] with the size given as a number of words (64 bit 
longs).
-   * getNumWords() is unchanged by this call.
-   */
-  public void ensureCapacityWords(int numWords) 
-  {
-    assert numWords<=wlen : "Growing of paged bitset is not supported"; 
-  }
-
-  /** Ensure that the long[] is big enough to hold numBits, expanding it if 
necessary.
-   * getNumWords() is unchanged by this call.
-   */
-  public void ensureCapacity(long numBits) {
-    ensureCapacityWords(bits2words(numBits));
-  }
-
   /** Lowers numWords, the number of words in use,
    * by checking for trailing zero words.
    */


Reply via email to