This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch quotient-filter
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git


The following commit(s) were added to refs/heads/quotient-filter by this push:
     new 078c1f42 style changes
078c1f42 is described below

commit 078c1f42319887ded729db9daf6675a06315687e
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Wed Jun 12 17:07:56 2024 -0700

    style changes
---
 .../filters/quotientfilter/Filter.java             |    2 +-
 .../filters/quotientfilter/Iterator.java           |   10 +-
 .../filters/quotientfilter/QuotientFilter.java     | 1315 +++++++++-----------
 .../filters/quotientfilter/QuotientFilterTest.java |   36 +-
 4 files changed, 610 insertions(+), 753 deletions(-)

diff --git 
a/src/main/java/org/apache/datasketches/filters/quotientfilter/Filter.java 
b/src/main/java/org/apache/datasketches/filters/quotientfilter/Filter.java
index bfb1ad25..53dfd1c4 100644
--- a/src/main/java/org/apache/datasketches/filters/quotientfilter/Filter.java
+++ b/src/main/java/org/apache/datasketches/filters/quotientfilter/Filter.java
@@ -99,7 +99,7 @@ public abstract class Filter {
         return XxHash.hashLong(input, 0L) ; // CD edit for datasketches hash 
function using same seed.
     }
 
-    public long get_space_use() { return 0 ; }
+    public long getSpaceUse() { return 0 ; }
 //    public int get_bits_per_entry() { return 0 ; }
 //
 //    public abstract long get_num_entries(boolean 
include_all_internal_filters);
diff --git 
a/src/main/java/org/apache/datasketches/filters/quotientfilter/Iterator.java 
b/src/main/java/org/apache/datasketches/filters/quotientfilter/Iterator.java
index 6b76a178..e04e6cd1 100644
--- a/src/main/java/org/apache/datasketches/filters/quotientfilter/Iterator.java
+++ b/src/main/java/org/apache/datasketches/filters/quotientfilter/Iterator.java
@@ -49,22 +49,22 @@ public class Iterator  {
 
     boolean next() {
 
-        if (index == qf.get_num_slots()) {
+        if (index == qf.getNumSlots()) {
             return false;
         }
 
-        long slot = qf.get_slot(index);
+        long slot = qf.getSlot(index);
         boolean occupied = (slot & 1) != 0;
         boolean continuation = (slot & 2) != 0;
         boolean shifted = (slot & 4) != 0;
 
 
-        while (!occupied && !continuation && !shifted && index < 
qf.get_num_slots()) {
+        while (!occupied && !continuation && !shifted && index < 
qf.getNumSlots()) {
             index++;
-            if (index == qf.get_num_slots()) {
+            if (index == qf.getNumSlots()) {
                 return false;
             }
-            slot = qf.get_slot(index);
+            slot = qf.getSlot(index);
             occupied = (slot & 1) != 0;
             continuation = (slot & 2) != 0;
             shifted = (slot & 4) != 0;
diff --git 
a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java
 
b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java
index d90f243c..bd8c2abd 100644
--- 
a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java
+++ 
b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java
@@ -28,756 +28,613 @@ import 
org.apache.datasketches.filters.common.HeapBitArray;
 
 public class QuotientFilter extends Filter {
 
-    int bitPerEntry;
-    int fingerprintLength;
-    int power_of_two_size;
-    int num_entries;
-    BitArray filter;
-
-    double expansion_threshold;
-    long max_entries_before_expansion;
-    boolean expand_autonomously;
-    boolean is_full;
-
-    // statistics, computed in the compute_statistics method. method should be 
called before these are used
-    long num_runs;
-    long num_clusters;
-    public double avg_run_length;
-    public double avg_cluster_length;
-
-    int original_fingerprint_size;
-    int num_expansions;
-
-
-    public QuotientFilter(int power_of_two, int bits_per_entry) {
-        power_of_two_size = power_of_two;
-        bitPerEntry = bits_per_entry;
-        fingerprintLength = bits_per_entry - 3;
-        final long init_size = 1L << power_of_two;
-        filter = make_filter(init_size, bits_per_entry);
-
-        expansion_threshold = 0.8;
-        max_entries_before_expansion = (int) (init_size * expansion_threshold);
-        expand_autonomously = false;
-        is_full = false;
-
-        original_fingerprint_size = fingerprintLength;
-        num_expansions = 0;
-        //hash_type = XxHash.hashLong ; //HashType.xxh;
-    }
-
-    public boolean rejuvenate(long key) {
-        return false;
-    }
-
-    public long get_num_entries() {
-        return num_entries;
-    }
-
-    public long get_max_entries_before_expansion() {
-        return max_entries_before_expansion;
-    }
-
-    public boolean expand_autonomously() {
-        return expand_autonomously;
-    }
-
-    public void set_expand_autonomously(boolean val) {
-        expand_autonomously = val;
-    }
-
-    BitArray make_filter(long init_size, int bits_per_entry) {
-//             System.out.println(init_size ) ;
-//             System.out.println(num_extension_slots);
-//             System.out.println("Making BitVector with: " + (init_size + 
num_extension_slots) + "SLOTS");
-        //return new QuickBitVectorWrapper(bits_per_entry,  init_size);
-        return new HeapBitArray(init_size * bits_per_entry);
-    }
-
-    public int get_fingerprint_length() {
-        return fingerprintLength;
-    }
-
-    QuotientFilter(int power_of_two, int bits_per_entry, BitArray bitmap) {
-        power_of_two_size = power_of_two;
-        bitPerEntry = bits_per_entry;
-        fingerprintLength = bits_per_entry - 3;
-        filter = bitmap;
-    }
-
-    boolean expand() {
-        is_full = true;
-        return false;
-    }
-
-    // measures the number of bits per entry for the filter
-    public double measure_num_bits_per_entry() {
-        return measure_num_bits_per_entry(this, new 
ArrayList<QuotientFilter>());
-    }
-
-    // measures the number of bits per entry for the filter
-    // it takes an array of filters as a parameter since some filter 
implementations here consist of multiple filter objects
-    protected static double measure_num_bits_per_entry(QuotientFilter current, 
ArrayList<QuotientFilter> other_filters) {
-        //System.out.println("--------------------------");
-        //current.print_filter_summary();
-        //System.out.println();
-        double num_entries = current.get_num_entries();
-        for (QuotientFilter q : other_filters) {
-            //q.print_filter_summary();
-            //System.out.println();
-            num_entries += q.get_num_entries();
-        }
-        long init_size = 1L << current.power_of_two_size;
-        long num_bits = current.bitPerEntry * init_size;
-        for (QuotientFilter q : other_filters) {
-            init_size = 1L << q.power_of_two_size;
-            num_bits += q.bitPerEntry * init_size;
-        }
-        //System.out.println("total entries: \t\t" + num_entries);
-        //System.out.println("total bits: \t\t" + num_bits);
-        final double bits_per_entry = num_bits / num_entries;
-        //System.out.println("total bits/entry: \t" + bits_per_entry);
-        //System.out.println();
-        return bits_per_entry;
-    }
-
-    // returns the fraction of occupied slots in the filter
-    public double get_utilization() {
-        long num_logical_slots = 1L << power_of_two_size;
-        double util = num_entries / (double) num_logical_slots;
-        return util;
-    }
-
-    // returns the number of slots in the filter without the extension/buffer 
slots
-    public long get_num_slots() {
-        return 1L << power_of_two_size;
-    }
-
-    long getMask() {
-      return get_num_slots() - 1;
-    }
-
-    // sets the metadata flag bits for a given slot index
-    void modify_slot(boolean is_occupied, boolean is_continuation, boolean 
is_shifted,
-                     long index) {
-        set_occupied(index, is_occupied);
-        set_continuation(index, is_continuation);
-        set_shifted(index, is_shifted);
-    }
-
-    // sets the fingerprint for a given slot index
-    void set_fingerprint(long index, long fingerprint) {
-        filter.setBits(index * bitPerEntry + 3, fingerprintLength, 
fingerprint);
-    }
-
-    // print a nice representation of the filter that can be understood.
-    // if vertical is on, each line will represent a slot
-    public String get_pretty_str(boolean vertical) {
-        StringBuffer sbr = new StringBuffer();
-
-        long numBits = get_num_slots() * bitPerEntry;
-
-        for (long i = 0; i < numBits; i++) {
-            long remainder = i % bitPerEntry;
-            if (remainder == 0) {
-                long slot_num = i/bitPerEntry;
-                sbr.append(" ");
-                if (vertical) {
-                    sbr.append("\n" + String.format("%-10d", slot_num) + "\t");
-                }
-            }
-            if (remainder == 3) {
-                sbr.append(" ");
-            }
-            sbr.append(filter.getBit(i) ? "1" : "0");
-        }
-        sbr.append("\n");
-        return sbr.toString();
-    }
-
-    // print a nice representation of the filter that can be humanly read.
-    public void pretty_print() {
-        System.out.print(get_pretty_str(true));
-    }
-
-    // return a fingerprint in a given slot index
-    long get_fingerprint(long index) {
-        return filter.getBits(index * bitPerEntry + 3, fingerprintLength);
-    }
-
-    // return an entire slot representation, including metadata flags and 
fingerprint
-    long get_slot(long index) {
-        return filter.getBits(index * bitPerEntry, bitPerEntry);
-    }
-
-    // compare a fingerprint input to the fingerprint in some slot index
-    protected boolean compare(long index, long fingerprint) {
-        return get_fingerprint(index) == fingerprint;
-    }
-
-    // modify the flags and fingerprint of a given slot
-    void modify_slot(boolean is_occupied, boolean is_continuation, boolean 
is_shifted,
-                     long index, long fingerprint) {
-        modify_slot(is_occupied, is_continuation, is_shifted, index);
-        set_fingerprint(index, fingerprint);
-    }
-
-    // summarize some statistical measures about the filter
-    public void print_filter_summary() {
-        long slots = get_num_slots();
-        long num_bits = slots * bitPerEntry;
-        System.out.println("slots:\t" + slots);
-        System.out.println("entries:\t" + num_entries);
-        System.out.println("bits\t:" + num_bits);
-        System.out.println("bits/entry\t:" + num_bits / (double)num_entries);
-        System.out.println("FP length:\t" + fingerprintLength);
-        System.out.println("Is full?\t" + is_full);
-        double capacity = num_entries / (double)(slots) ;
-        System.out.println("Capacity\t" + capacity);
-        compute_statistics();
-        //System.out.println("num runs: \t\t" + num_runs);
-        //System.out.println("avg run length: \t" + avg_run_length);
-        //System.out.println("num clusters: \t\t" + num_clusters);
-        //System.out.println("avg cluster length: \t" + avg_cluster_length);
-    }
-
-    /*
-    Returns the number of bits used for the filter
-     */
-    @Override
-    public long get_space_use() {
-        long num_bits = get_num_slots() * bitPerEntry;
-        return num_bits;
-    }
-
-    public int get_bits_per_entry() {
-        return bitPerEntry;
-    }
-
-    boolean is_occupied(long index) {
-        return filter.getBit(index * bitPerEntry);
-    }
-
-    boolean is_continuation(long index) {
-        return filter.getBit(index * bitPerEntry + 1);
-    }
-
-    boolean is_shifted(long index) {
-        return filter.getBit(index * bitPerEntry + 2);
-    }
-
-    void set_occupied(long index, boolean val) {
-        filter.assignBit(index * bitPerEntry, val);
-    }
-
-    void set_continuation(long index, boolean val) {
-        filter.assignBit(index * bitPerEntry + 1, val);
-    }
-
-    void set_shifted(long index, boolean val) {
-        filter.assignBit(index * bitPerEntry + 2, val);
-    }
-
-    boolean is_slot_empty(long index) {
-        return !is_occupied(index) && !is_continuation(index) && 
!is_shifted(index);
-    }
-
-    // scan the cluster leftwards until finding the start of the cluster and 
returning its slot index
-    // used by deletes
-    long find_cluster_start(long index) {
-        long current_index = index;
-        while (is_shifted(current_index)) {
-            current_index = (current_index - 1) & getMask();
-        }
-        return current_index;
-    }
-
-    // given a canonical slot A, finds the actual index B of where the run 
belonging to slot A now resides
-    // since the run might have been shifted to the right due to collisions
-    long find_run_start(long index) {
-      int num_runs_to_skip = 0;
-      while (is_shifted(index)) {
-        index = (index - 1) & getMask();
-        if (is_occupied(index)) {
-          num_runs_to_skip++;
+  int bitPerEntry_;
+  int fingerprintLength_;
+  int powerOfTwoSize_;
+  int numEntries_;
+  BitArray bitArray_;
+
+  double expansionThreshold_;
+  long maxEntriesBeforeExpansion_;
+  boolean expandAutonomously_;
+  boolean isFull_;
+
+  // statistics, computed in the compute_statistics method. method should be 
called before these are used
+  long numRuns_;
+  long numClusters_;
+  public double avgRunLength_;
+  public double avgClusterLength_;
+
+  int originalFingerprintSize_;
+  int numExpansions_;
+
+  public QuotientFilter(final int powerOfTwo, final int bitsPerEntry) {
+    powerOfTwoSize_ = powerOfTwo;
+    bitPerEntry_ = bitsPerEntry;
+    fingerprintLength_ = bitsPerEntry - 3;
+    final long init_size = 1L << powerOfTwo;
+    bitArray_ = makeFilter(init_size, bitsPerEntry);
+
+    expansionThreshold_ = 0.8;
+    maxEntriesBeforeExpansion_ = (int) (init_size * expansionThreshold_);
+    expandAutonomously_ = false;
+    isFull_ = false;
+
+    originalFingerprintSize_ = fingerprintLength_;
+    numExpansions_ = 0;
+    //hash_type = XxHash.hashLong ; //HashType.xxh;
+  }
+
+  public boolean rejuvenate(final long key) {
+    return false;
+  }
+
+  public long getNumEntries() {
+    return numEntries_;
+  }
+
+  public long getMaxEntriesBeforeExpansion() {
+    return maxEntriesBeforeExpansion_;
+  }
+
+  public boolean expandAutonomously() {
+    return expandAutonomously_;
+  }
+
+  public void setExpandAutonomously(final boolean val) {
+    expandAutonomously_ = val;
+  }
+
+  BitArray makeFilter(final long initSize, final int bitsPerEntry) {
+    return new HeapBitArray(initSize * bitsPerEntry);
+  }
+
+  public int getFingerprintLength() {
+    return fingerprintLength_;
+  }
+
+  QuotientFilter(final int powerOfTwo, final int bitsPerEntry, final BitArray 
bitArray) {
+    powerOfTwoSize_ = powerOfTwo;
+    bitPerEntry_ = bitsPerEntry;
+    fingerprintLength_ = bitsPerEntry - 3;
+    bitArray_ = bitArray;
+  }
+
+  boolean expand() {
+    isFull_ = true;
+    return false;
+  }
+
+  // measures the number of bits per entry for the filter
+  public double measureNumBitsPerEntry() {
+    return measureNumBitsPerEntry(this, new ArrayList<QuotientFilter>());
+  }
+
+  // measures the number of bits per entry for the filter
+  // it takes an array of filters as a parameter since some filter 
implementations here consist of multiple filter objects
+  protected static double measureNumBitsPerEntry(final QuotientFilter current, 
final ArrayList<QuotientFilter> otherFilters) {
+    //System.out.println("--------------------------");
+    //current.print_filter_summary();
+    //System.out.println();
+    double numEntries = current.getNumEntries();
+    for (QuotientFilter q : otherFilters) {
+      //q.print_filter_summary();
+      //System.out.println();
+      numEntries += q.getNumEntries();
+    }
+    long init_size = 1L << current.powerOfTwoSize_;
+    long numBits = current.bitPerEntry_ * init_size;
+    for (QuotientFilter q : otherFilters) {
+      init_size = 1L << q.powerOfTwoSize_;
+      numBits += q.bitPerEntry_ * init_size;
+    }
+    //System.out.println("total entries: \t\t" + num_entries);
+    //System.out.println("total bits: \t\t" + num_bits);
+    final double bits_per_entry = numBits / numEntries;
+    //System.out.println("total bits/entry: \t" + bits_per_entry);
+    //System.out.println();
+    return bits_per_entry;
+  }
+
+  // returns the fraction of occupied slots in the filter
+  public double getUtilization() {
+    return numEntries_ / (double) getNumSlots();
+  }
+
+  // returns the number of slots in the filter without the extension/buffer 
slots
+  public long getNumSlots() {
+    return 1L << powerOfTwoSize_;
+  }
+
+  long getMask() {
+    return getNumSlots() - 1;
+  }
+
+  // sets the metadata flag bits for a given slot index
+  void modifySlot(final boolean isOccupied, final boolean isContinuation, 
final boolean isShifted, final long index) {
+    setOccupied(index, isOccupied);
+    setContinuation(index, isContinuation);
+    setShifted(index, isShifted);
+  }
+
+  // sets the fingerprint for a given slot index
+  void setFingerprint(final long index, final long fingerprint) {
+    bitArray_.setBits(index * bitPerEntry_ + 3, fingerprintLength_, 
fingerprint);
+  }
+
+  // print a nice representation of the filter that can be understood.
+  // if vertical is on, each line will represent a slot
+  public String get_pretty_str(final boolean vertical) {
+    final StringBuffer sbr = new StringBuffer();
+    final long numBits = getNumSlots() * bitPerEntry_;
+    for (long i = 0; i < numBits; i++) {
+      final long remainder = i % bitPerEntry_;
+      if (remainder == 0) {
+        final long slot = i / bitPerEntry_;
+        sbr.append(" ");
+        if (vertical) {
+          sbr.append("\n" + String.format("%-10d", slot) + "\t");
         }
       }
-      while (num_runs_to_skip > 0) {
-        index = (index + 1) & getMask();
-        if (!is_continuation(index)) {
-          num_runs_to_skip--;
-        }
+      if (remainder == 3) {
+        sbr.append(" ");
+      }
+      sbr.append(bitArray_.getBit(i) ? "1" : "0");
+    }
+    sbr.append("\n");
+    return sbr.toString();
+  }
+
+  // print a representation of the filter that can be humanly read.
+  public void prettyPrint() {
+    System.out.print(get_pretty_str(true));
+  }
+
+  // return a fingerprint in a given slot index
+  long getFingerprint(final long index) {
+    return bitArray_.getBits(index * bitPerEntry_ + 3, fingerprintLength_);
+  }
+
+  // return an entire slot representation, including metadata flags and 
fingerprint
+  long getSlot(final long index) {
+    return bitArray_.getBits(index * bitPerEntry_, bitPerEntry_);
+  }
+
+  // compare a fingerprint input to the fingerprint in some slot index
+  protected boolean compare(final long index, final long fingerprint) {
+    return getFingerprint(index) == fingerprint;
+  }
+
+  // modify the flags and fingerprint of a given slot
+  void modifySlot(final boolean isOccupied, final boolean isContinuation, 
final boolean isShifted,
+    final long index, final long fingerprint) {
+    modifySlot(isOccupied, isContinuation, isShifted, index);
+    setFingerprint(index, fingerprint);
+  }
+
+  // summarize some statistical measures about the filter
+  public void printFilterSummary() {
+    final long slots = getNumSlots();
+    final long num_bits = slots * bitPerEntry_;
+    System.out.println("slots:\t" + slots);
+    System.out.println("entries:\t" + numEntries_);
+    System.out.println("bits\t:" + num_bits);
+    System.out.println("bits/entry\t:" + num_bits / (double)numEntries_);
+    System.out.println("FP length:\t" + fingerprintLength_);
+    System.out.println("Is full?\t" + isFull_);
+    final double capacity = numEntries_ / (double)(slots) ;
+    System.out.println("Capacity\t" + capacity);
+    computeStatistics();
+    //System.out.println("num runs: \t\t" + num_runs);
+    //System.out.println("avg run length: \t" + avg_run_length);
+    //System.out.println("num clusters: \t\t" + num_clusters);
+    //System.out.println("avg cluster length: \t" + avg_cluster_length);
+  }
+
+  /*
+   * Returns the number of bits used for the filter
+   */
+  @Override
+  public long getSpaceUse() {
+    return getNumSlots() * bitPerEntry_;
+  }
+
+  public int getBitsPerEntry() {
+    return bitPerEntry_;
+  }
+
+  boolean isOccupied(final long index) {
+    return bitArray_.getBit(index * bitPerEntry_);
+  }
+
+  boolean isContinuation(final long index) {
+    return bitArray_.getBit(index * bitPerEntry_ + 1);
+  }
+
+  boolean isShifted(final long index) {
+    return bitArray_.getBit(index * bitPerEntry_ + 2);
+  }
+
+  void setOccupied(final long index, final boolean val) {
+    bitArray_.assignBit(index * bitPerEntry_, val);
+  }
+
+  void setContinuation(final long index, final boolean val) {
+    bitArray_.assignBit(index * bitPerEntry_ + 1, val);
+  }
+
+  void setShifted(final long index, final boolean val) {
+    bitArray_.assignBit(index * bitPerEntry_ + 2, val);
+  }
+
+  boolean isSlotEmpty(final long index) {
+    return !isOccupied(index) && !isContinuation(index) && !isShifted(index);
+  }
+
+  // scan the cluster leftwards until finding the start of the cluster and 
returning its slot index
+  // used by deletes
+  long findClusterStart(long index) {
+    while (isShifted(index)) {
+      index = (index - 1) & getMask();
+    }
+    return index;
+  }
+
+  // given a canonical slot A, finds the actual index B of where the run 
belonging to slot A now resides
+  // since the run might have been shifted to the right due to collisions
+  long findRunStart(long index) {
+    int numRunsToSkip = 0;
+    while (isShifted(index)) {
+      index = (index - 1) & getMask();
+      if (isOccupied(index)) {
+        numRunsToSkip++;
       }
-      return index;
-    }
-
-    // given the start of a run, scan the run and return the index of the 
first matching fingerprint
-    // if not found returns the insertion position as bitwise complement to 
make it negative
-    long find_first_fingerprint_in_run(long index, long fingerprint) {
-      assert(!is_continuation(index));
-      do {
-        final long fingerprintAtIndex = get_fingerprint(index);
-        if (fingerprintAtIndex == fingerprint) {
-          return index;
-        } else if (fingerprintAtIndex > fingerprint) {
-          return ~index; 
-        }
-        index = (index + 1) & getMask();
-      } while (is_continuation(index));
-      return ~index;
     }
-
-    // delete the last matching fingerprint in the run
-    long decide_which_fingerprint_to_delete(long index, long fingerprint) {
-        assert(!is_continuation(index));
-        long matching_fingerprint_index = -1;
-        do {
-            if (compare(index, fingerprint)) {
-                //System.out.println("found matching FP at index " + index);
-                matching_fingerprint_index = index;
-            }
-            index = (index + 1) & getMask();
-        } while (is_continuation(index));
-        return matching_fingerprint_index;
+    while (numRunsToSkip > 0) {
+      index = (index + 1) & getMask();
+      if (!isContinuation(index)) {
+        numRunsToSkip--;
+      }
     }
+    return index;
+  }
 
-    // given the start of a run, find the last slot index that still belongs 
to this run
-    long find_run_end(long index) {
-        while (is_continuation((index + 1) & getMask())) {
-            index = (index + 1) & getMask();
-        }
+  // given the start of a run, scan the run and return the index of the first 
matching fingerprint
+  // if not found returns the insertion position as bitwise complement to make 
it negative
+  long findFirstFingerprintInRun(long index, final long fingerprint) {
+    assert(!isContinuation(index));
+    do {
+      final long fingerprintAtIndex = getFingerprint(index);
+      if (fingerprintAtIndex == fingerprint) {
         return index;
-    }
-
-    // given a canonical index slot and a fingerprint, find the relevant run 
and check if there is a matching fingerprint within it
-    boolean search(long fingerprint, long index) {
-        boolean does_run_exist = is_occupied(index);
-        if (!does_run_exist) {
-            return false;
-        }
-        long run_start_index = find_run_start(index);
-        long found_index = find_first_fingerprint_in_run(run_start_index, 
fingerprint);
-        return found_index >= 0;
-    }
-
-    // Given a canonical slot index, find the corresponding run and return all 
fingerprints in the run.
-    // This method is only used for testing purposes.
-    Set<Long> get_all_fingerprints(long bucket_index) {
-        boolean does_run_exist = is_occupied(bucket_index);
-        HashSet<Long> set = new HashSet<Long>();
-        if (!does_run_exist) {
-            return set;
-        }
-        long run_index = find_run_start(bucket_index);
-        do {
-            set.add(get_fingerprint(run_index));
-            run_index = (run_index + 1) & getMask();
-        } while (is_continuation(run_index));
-        return set;
-    }
-
-    boolean insert(long fingerprint, long index) {
-      if (index >= get_num_slots() || num_entries == get_num_slots()) {
-        return false;
-      }
-      final long run_start = find_run_start(index);
-      if (!is_occupied(index)) {
-        return insert_fingerprint_and_push_all_else(fingerprint, run_start, 
index, true, true);
+      } else if (fingerprintAtIndex > fingerprint) {
+        return ~index; 
       }
-      final long found_index = find_first_fingerprint_in_run(run_start, 
fingerprint);
-      if (found_index >= 0) {
-        return false;
+      index = (index + 1) & getMask();
+    } while (isContinuation(index));
+    return ~index;
+  }
+
+  // delete the last matching fingerprint in the run
+  long decideWhichFingerprintToDelete(long index, final long fingerprint) {
+    assert(!isContinuation(index));
+    long matchingFingerprintIndex = -1;
+    do {
+      if (compare(index, fingerprint)) {
+        //System.out.println("found matching FP at index " + index);
+        matchingFingerprintIndex = index;
       }
-      return insert_fingerprint_and_push_all_else(fingerprint, ~found_index, 
index, false, ~found_index == run_start);
-    }
-
-    boolean insert_fingerprint_and_push_all_else(long fingerprint, long index, 
long canonical, boolean is_new_run, boolean is_run_start) {
-      // in the first shifted entry set is_continuation flag if inserting at 
the start of the existing run
-      // otherwise just shift the existing flag as it is
-      boolean force_continuation = !is_new_run && is_run_start;
-
-      // prepare flags for the current slot
-      boolean is_continuation = !is_run_start;
-      boolean is_shifted = index != canonical;
-
-      // remember the existing entry from the current slot to be shifted to 
the next slot
-      // is_occupied flag belongs to the slot, therefore it is never shifted
-      // is_shifted flag is always true for all shifted entries, no need to 
remember it
-      long existing_fingerprint = get_fingerprint(index);
-      boolean existing_is_continuation = is_continuation(index);
-      boolean existing_is_empty = is_slot_empty(index);
-
-      while (!existing_is_empty) {
-        // set the current slot
-        set_fingerprint(index, fingerprint);
-        set_continuation(index, is_continuation);
-        set_shifted(index, is_shifted);
-
-        // prepare values for the next slot
-        fingerprint = existing_fingerprint;
-        is_continuation = existing_is_continuation | force_continuation;
-        is_shifted = true;
-
-        index = (index + 1) & getMask();
-
-        // save the existing entry to be shifted
-        existing_fingerprint = get_fingerprint(index);
-        existing_is_continuation = is_continuation(index);
-        existing_is_empty = is_slot_empty(index);
-
-        force_continuation = false; // this is needed for the first shift only
+      index = (index + 1) & getMask();
+    } while (isContinuation(index));
+    return matchingFingerprintIndex;
+  }
+
+  // given the start of a run, find the last slot index that still belongs to 
this run
+  long findRunEnd(long index) {
+    while (isContinuation((index + 1) & getMask())) {
+      index = (index + 1) & getMask();
+    }
+    return index;
+  }
+
+  // given a canonical index slot and a fingerprint, find the relevant run and 
check if there is a matching fingerprint within it
+  boolean search(long fingerprint, long index) {
+    final boolean doesRunExist = isOccupied(index);
+    if (!doesRunExist) {
+      return false;
+    }
+    final long runStartIndex = findRunStart(index);
+    final long foundIndex = findFirstFingerprintInRun(runStartIndex, 
fingerprint);
+    return foundIndex >= 0;
+  }
+
+  // Given a canonical slot index, find the corresponding run and return all 
fingerprints in the run.
+  // This method is only used for testing purposes.
+  Set<Long> getAllFingerprints(final long bucketIndex) {
+    final boolean doesRunExist = isOccupied(bucketIndex);
+    final HashSet<Long> set = new HashSet<Long>();
+    if (!doesRunExist) {
+      return set;
+    }
+    long runIndex = findRunStart(bucketIndex);
+    do {
+      set.add(getFingerprint(runIndex));
+      runIndex = (runIndex + 1) & getMask();
+    } while (isContinuation(runIndex));
+    return set;
+  }
+
+  boolean insert(final long fingerprint, final long index) {
+    if (index >= getNumSlots() || numEntries_ == getNumSlots()) {
+      return false;
+    }
+    final long run_start = findRunStart(index);
+    if (!isOccupied(index)) {
+      return insertFingerprintAndPushAllElse(fingerprint, run_start, index, 
true, true);
+    }
+    final long found_index = findFirstFingerprintInRun(run_start, fingerprint);
+    if (found_index >= 0) {
+      return false;
+    }
+    return insertFingerprintAndPushAllElse(fingerprint, ~found_index, index, 
false, ~found_index == run_start);
+  }
+
+  boolean insertFingerprintAndPushAllElse(long fingerprint, long index, final 
long canonical,
+      final boolean isNewRun, final boolean isRunStart) {
+    // in the first shifted entry set isContinuation flag if inserting at the 
start of the existing run
+    // otherwise just shift the existing flag as it is
+    boolean forceContinuation = !isNewRun && isRunStart;
+
+    // prepare flags for the current slot
+    boolean isContinuation = !isRunStart;
+    boolean isShifted = index != canonical;
+
+    // remember the existing entry from the current slot to be shifted to the 
next slot
+    // isOccupied flag belongs to the slot, therefore it is never shifted
+    // isShifted flag is always true for all shifted entries, no need to 
remember it
+    long existingFingerprint = getFingerprint(index);
+    boolean existingIsContinuation = isContinuation(index);
+
+    while (!isSlotEmpty(index)) {
+      // set the current slot
+      setFingerprint(index, fingerprint);
+      setContinuation(index, isContinuation);
+      setShifted(index, isShifted);
+
+      // prepare values for the next slot
+      fingerprint = existingFingerprint;
+      isContinuation = existingIsContinuation | forceContinuation;
+      isShifted = true;
+
+      index = (index + 1) & getMask();
+
+      // remember the existing entry to be shifted
+      existingFingerprint = getFingerprint(index);
+      existingIsContinuation = isContinuation(index);
+
+      forceContinuation = false; // this is needed for the first shift only
+    }
+    // at this point the current slot is empty, so just populate with prepared 
values
+    // either the incoming fingerprint or the last shifted one
+    setFingerprint(index, fingerprint);
+    setContinuation(index, isContinuation);
+    setShifted(index, isShifted);
+
+    if (isNewRun) {
+      setOccupied(canonical, true);
+    }
+    numEntries_++;
+    return true;
+  }
+
+  boolean delete(final long fingerprint, final long canonicalSlot, long 
runStartIndex, long matchingFingerprintIndex) {
+    long runEnd = findRunEnd(matchingFingerprintIndex);
+
+    // the run has only one entry, we need to disable its is_occupied flag
+    // we just remember we need to do this here, and we do it later to not 
interfere with counts
+    boolean turnOffOccupied = runStartIndex == runEnd;
+
+    // First thing to do is move everything else in the run back by one slot
+    for (long i = matchingFingerprintIndex; i != runEnd; i = (i + 1) & 
getMask()) {
+      long f = getFingerprint((i + 1) & getMask());
+      setFingerprint(i, f);
+    }
+
+    // for each slot, we want to know by how much the entry there is shifted
+    // we can do this by counting the number of continuation flags set to true
+    // and the number of occupied flags set to false from the start of the 
cluster to the given cell
+    // and then subtracting: num_shifted_count - num_non_occupied = number of 
slots by which an entry is shifted
+    long clusterStart = findClusterStart(canonicalSlot);
+    long numShiftedCount = 0;
+    long numNonOccupied = 0;
+    for (long i = clusterStart; i != ((runEnd + 1) & getMask()); i = (i + 1) & 
getMask()) {
+      if (isContinuation(i)) {
+        numShiftedCount++;
       }
-      // at this point the current slot is empty, so just populate with 
prepared values
-      // either the incoming fingerprint or the last shifted one
-      set_fingerprint(index, fingerprint);
-      set_continuation(index, is_continuation);
-      set_shifted(index, is_shifted);
-
-      if (is_new_run) {
-        set_occupied(canonical, true);
+      if (!isOccupied(i)) {
+        numNonOccupied++;
       }
-      num_entries++;
-      return true;
     }
 
-    boolean delete(long fingerprint, long canonical_slot, long 
run_start_index, long matching_fingerprint_index) {
-        long run_end = find_run_end(matching_fingerprint_index);
-
-        // the run has only one entry, we need to disable its is_occupied flag
-        // we just remember we need to do this here, and we do it later to not 
interfere with counts
-        boolean turn_off_occupied = run_start_index == run_end;
-
-        // First thing to do is move everything else in the run back by one 
slot
-        for (long i = matching_fingerprint_index; i != run_end; i = (i + 1) & 
getMask()) {
-            long f = get_fingerprint((i + 1) & getMask());
-            set_fingerprint(i, f);
+    setFingerprint(runEnd, 0);
+    setShifted(runEnd, false);
+    setContinuation(runEnd, false);
+
+    // we now have a nested loop. The outer do-while iterates over the 
remaining runs in the cluster.
+    // the inner for loop iterates over cells of particular runs, pushing 
entries one slot back.
+    do {
+      // we first check if the next run actually exists and if it is shifted.
+      // only if both conditions hold, we need to shift it back one slot.
+      //boolean does_next_run_exist = !is_slot_empty(run_end + 1);
+      //boolean is_next_run_shifted = is_shifted(run_end + 1);
+      //if (!does_next_run_exist || !is_next_run_shifted) {
+      if (isSlotEmpty((runEnd + 1) & getMask()) || !isShifted((runEnd + 1) & 
getMask())) {
+        if (turnOffOccupied) {
+          // if we eliminated a run and now need to turn the isOccupied flag 
off, we do it at the end to not interfere in our counts
+          setOccupied(canonicalSlot, false);
         }
+        return true;
+      }
 
-        // for each slot, we want to know by how much the entry there is 
shifted
-        // we can do this by counting the number of continuation flags set to 
true
-        // and the number of occupied flags set to false from the start of the 
cluster to the given cell
-        // and then subtracting: num_shifted_count - num_non_occupied = number 
of slots by which an entry is shifted
-        long cluster_start = find_cluster_start(canonical_slot);
-        long num_shifted_count = 0;
-        long num_non_occupied = 0;
-        for (long i = cluster_start; i != ((run_end + 1) & getMask()); i = (i 
+ 1) & getMask()) {
-            if (is_continuation(i)) {
-                num_shifted_count++;
-            }
-            if (!is_occupied(i)) {
-                num_non_occupied++;
-            }
-        }
+      // we now find the start and end of the next run
+      final long nextRunStart = (runEnd + 1) & getMask();
+      runEnd = findRunEnd(nextRunStart);
 
-        set_fingerprint(run_end, 0);
-        set_shifted(run_end, false);
-        set_continuation(run_end, false);
-
-        // we now have a nested loop. The outer do-while iterates over the 
remaining runs in the cluster.
-        // the inner for loop iterates over cells of particular runs, pushing 
entries one slot back.
-        do {
-            // we first check if the next run actually exists and if it is 
shifted.
-            // only if both conditions hold, we need to shift it back one slot.
-            //boolean does_next_run_exist = !is_slot_empty(run_end + 1);
-            //boolean is_next_run_shifted = is_shifted(run_end + 1);
-            //if (!does_next_run_exist || !is_next_run_shifted) {
-            if (is_slot_empty((run_end + 1) & getMask()) || 
!is_shifted((run_end + 1) & getMask())) {
-                if (turn_off_occupied) {
-                    // if we eliminated a run and now need to turn the 
is_occupied flag off, we do it at the end to not interfere in our counts
-                    set_occupied(canonical_slot, false);
-                }
-                return true;
-            }
-
-            // we now find the start and end of the next run
-            final long next_run_start = (run_end + 1) & getMask();
-            run_end = find_run_end(next_run_start);
-
-            // before we start processing the next run, we check whether the 
previous run we shifted is now back to its canonical slot
-            // The condition num_shifted_count - num_non_occupied == 1 ensures 
that the run was shifted by only 1 slot, meaning it is now back in its proper 
place
-            if (is_occupied((next_run_start - 1) & getMask()) && 
num_shifted_count - num_non_occupied == 1) {
-                set_shifted((next_run_start - 1) & getMask(), false);
-            }
-            else  {
-                set_shifted((next_run_start - 1) & getMask(), true);
-            }
-
-            for (long i = next_run_start; i != ((run_end + 1) & getMask()); i 
= (i + 1) & getMask()) {
-                long f = get_fingerprint(i);
-                set_fingerprint((i - 1) & getMask(), f);
-                if (is_continuation(i)) {
-                    set_continuation((i - 1) & getMask(), true);
-                }
-                if (!is_occupied(i)) {
-                    num_non_occupied++;
-                }
-                if (i != next_run_start) {
-                  num_shifted_count++;
-                }
-            }
-            set_fingerprint(run_end, 0);
-            set_shifted(run_end, false);
-            set_continuation(run_end, false);
-        } while (true);
-    }
+      // before we start processing the next run, we check whether the 
previous run we shifted is now back to its canonical slot
+      // The condition num_shifted_count - num_non_occupied == 1 ensures that 
the run was shifted by only 1 slot, meaning it is now back in its proper place
+      if (isOccupied((nextRunStart - 1) & getMask()) && numShiftedCount - 
numNonOccupied == 1) {
+        setShifted((nextRunStart - 1) & getMask(), false);
+      } else {
+        setShifted((nextRunStart - 1) & getMask(), true);
+      }
 
-    boolean delete(long fingerprint, long canonical_slot) {
-        // if the run doesn't exist, the key can't have possibly been inserted
-        boolean does_run_exist = is_occupied(canonical_slot);
-        if (!does_run_exist) {
-            return false;
+      for (long i = nextRunStart; i != ((runEnd + 1) & getMask()); i = (i + 1) 
& getMask()) {
+        long f = getFingerprint(i);
+        setFingerprint((i - 1) & getMask(), f);
+        if (isContinuation(i)) {
+          setContinuation((i - 1) & getMask(), true);
         }
-        long run_start_index = find_run_start(canonical_slot);
-
-        long matching_fingerprint_index = 
decide_which_fingerprint_to_delete(run_start_index, fingerprint);
-
-        if (matching_fingerprint_index == -1) {
-            // we didn't find a matching fingerprint
-            return false;
+        if (!isOccupied(i)) {
+          numNonOccupied++;
         }
-
-        return delete(fingerprint, canonical_slot, run_start_index, 
matching_fingerprint_index);
-    }
-
-    /*
-    Performs the modular arithmetic of large_hash % bits_per_entry and uses 
this as the slot_index
-     */
-    long get_slot_index(long large_hash) {
-        long slot_index_mask = (1L << power_of_two_size) - 1;
-        long slot_index = large_hash & slot_index_mask;
-        //System.out.format("\n**get_slot_index(): 
[total_hash:index_hash:int_index] --> [%016x:%016x:%016x]\n", large_hash, 
(int)large_hash, slot_index);
-        return slot_index;
-    }
-
-    long gen_fingerprint(long large_hash) {
-        long fingerprint_mask = (1L << fingerprintLength) - 1L;
-        fingerprint_mask = fingerprint_mask << power_of_two_size;
-        long fingerprint = (large_hash & fingerprint_mask) >> 
power_of_two_size;
-        //System.out.format("\n**gen_fingerprint(): 
[total_hash:fingerprint_hash:int_fingerprint] --> [%016x:%016x:%016x]\n", 
large_hash, ((int)(large_hash>>32)), fingerprint);
-        return fingerprint;
-    }
-
-//    void print_key(int input) {
-//        long large_hash = HashFunctions.normal_hash(input);
-//        long slot_index = get_slot_index(large_hash);
-//        long fingerprint = gen_fingerprint(large_hash);
-//
-//        System.out.println("num   :  " + input);
-//        System.out.print("hash  :  ");
-//        print_long_in_binary(large_hash, fingerprintLength + 
power_of_two_size);
-//        //print_int_in_binary(slot_index_mask, 31);
-//        System.out.print("bucket:  ");
-//        print_long_in_binary(slot_index, power_of_two_size);
-//        System.out.print("FP    :  ");
-//        //print_int_in_binary(fingerprint_mask, 31);
-//        print_long_in_binary(fingerprint, fingerprintLength);
-//        System.out.println();
-//
-//    }
-//
-//    void set_expansion_threshold(double thresh) {
-//        expansion_threshold = thresh;
-//        max_entries_before_expansion = (long)(Math.pow(2, power_of_two_size) 
* expansion_threshold);
-//    }
-//
-    /*
-    This is the main insertion function accessed externally.
-    It calls the underlying filter _insert function which hashes the input
-    item internally.
-    Hence, the `large_hash` argument is already a hash key that has been 
generated
-    by the hashing library (eg xxhash).
-     */
-    protected boolean _insert(long large_hash) {
-        //System.out.println("Inserting long hash " + large_hash);
-        if (is_full) {
-            return false;
+        if (i != nextRunStart) {
+          numShiftedCount++;
         }
-        long slot_index = get_slot_index(large_hash);
-        long fingerprint = gen_fingerprint(large_hash);
-
-               /*print_long_in_binary(large_hash, 64);
-               print_long_in_binary(slot_index, 32);
-               print_long_in_binary((int)fingerprint, 64);
-               System.out.println(slot_index + "  " + fingerprint );
-               System.out.println(); */
-
-        boolean success = insert(fingerprint, slot_index);
-               /*if (!success) {
-                       System.out.println("insertion failure");
-                       System.out.println(input + "\t" + slot_index + "\t" + 
get_fingerprint_str(fingerprint, fingerprintLength));
-                       pretty_print();
-                       System.exit(1);
-               }*/
-
-//        if (expand_autonomously && num_entries >= 
max_entries_before_expansion) {
-//            boolean expanded = expand();
-//            if (expanded) {
-//                num_expansions++;
-//            }
-//        }
-        return success;
-    }
-//
-//    protected boolean _delete(long large_hash) {
-//        long slot_index = get_slot_index(large_hash);
-//        long fp_long = gen_fingerprint(large_hash);
-//        boolean success = delete(fp_long, slot_index);
-//        if (success) {
-//            num_entries--;
-//        }
-//        return success;
-//    }
-//
-    protected boolean _search(long large_hash) {
-        long slot_index = get_slot_index(large_hash);
-        long fingerprint = gen_fingerprint(large_hash);
-        return search(fingerprint, slot_index);
-    }
-
-    public boolean get_bit_at_offset(int offset) {
-        return filter.getBit(offset);
+      }
+      setFingerprint(runEnd, 0);
+      setShifted(runEnd, false);
+      setContinuation(runEnd, false);
+    } while (true);
+  }
+
+  boolean delete(final long fingerprint, final long canonicalSlot) {
+    // if the run doesn't exist, the key can't have possibly been inserted
+    boolean doesRunExist = isOccupied(canonicalSlot);
+    if (!doesRunExist) {
+      return false;
+    }
+    long runStartIndex = findRunStart(canonicalSlot);
+    long matchingFingerprintIndex = 
decideWhichFingerprintToDelete(runStartIndex, fingerprint);
+    if (matchingFingerprintIndex == -1) {
+      // we didn't find a matching fingerprint
+      return false;
+    }
+    return delete(fingerprint, canonicalSlot, runStartIndex, 
matchingFingerprintIndex);
+  }
+
+  /*
+   * Performs the modular arithmetic of large_hash % bits_per_entry and uses 
this as the slot_index
+   */
+  long getSlotIndex(final long largeHash) {
+    return largeHash & getMask();
+  }
+
+  long genFingerprint(final long largeHash) {
+    long fingerprintMask = (1L << fingerprintLength_) - 1L;
+    fingerprintMask = fingerprintMask << powerOfTwoSize_;
+    return (largeHash & fingerprintMask) >> powerOfTwoSize_;
+  }
+
+  void setExpansionThreshold(double thresh) {
+    expansionThreshold_ = thresh;
+    maxEntriesBeforeExpansion_ = (long)(Math.pow(2, powerOfTwoSize_) * 
expansionThreshold_);
+  }
+
+  /*
+  This is the main insertion function accessed externally.
+  It calls the underlying filter _insert function which hashes the input
+  item internally.
+  Hence, the `large_hash` argument is already a hash key that has been 
generated
+  by the hashing library (eg xxhash).
+   */
+  protected boolean _insert(long large_hash) {
+    if (isFull_) {
+      return false;
+    }
+    final long slotIndex = getSlotIndex(large_hash);
+    final long fingerprint = genFingerprint(large_hash);
+    boolean success = insert(fingerprint, slotIndex);
+
+    if (expandAutonomously_ && numEntries_ >= maxEntriesBeforeExpansion_) {
+      final boolean expanded = expand();
+      if (expanded) {
+        numExpansions_++;
+      }
     }
-
-    public void compute_statistics() {
-        num_runs = 0;
-        num_clusters = 0;
-        double sum_run_lengths = 0;
-        double sum_cluster_lengths = 0;
-
-        int current_run_length = 0;
-        int current_cluster_length = 0;
-
-        long num_slots = get_num_slots();
-        for (long i = 0; i < num_slots; i++) {
-
-            boolean occupied = is_occupied(i);
-            boolean continuation = is_continuation(i);
-            boolean shifted = is_shifted(i);
-
-            if         ( !occupied && !continuation && !shifted ) { // empty 
slot
-                sum_cluster_lengths += current_cluster_length;
-                current_cluster_length = 0;
-                sum_run_lengths += current_run_length;
-                current_run_length = 0;
-            }
-            else if ( !occupied && !continuation && shifted ) { // start of 
new run
-                num_runs++;
-                sum_run_lengths += current_run_length;
-                current_run_length = 1;
-                current_cluster_length++;
-            }
-            else if ( !occupied && continuation && !shifted ) {
-                // not used
-            }
-            else if ( !occupied && continuation && shifted ) { // continuation 
of run
-                current_cluster_length++;
-                current_run_length++;
-            }
-            else if ( occupied && !continuation && !shifted ) { // start of 
new cluster & run
-                num_runs++;
-                num_clusters++;
-                sum_cluster_lengths += current_cluster_length;
-                sum_run_lengths += current_run_length;
-                current_cluster_length = 1;
-                current_run_length = 1;
-            }
-            else if (occupied && !continuation && shifted ) { // start of new 
run
-                num_runs++;
-                sum_run_lengths += current_run_length;
-                current_run_length = 1;
-                current_cluster_length++;
-            }
-            else if (occupied && continuation && !shifted ) {
-                // not used
-            }
-            else if (occupied && continuation && shifted ) { // continuation 
of run
-                current_cluster_length++;
-                current_run_length++;
-            }
-        }
-        avg_run_length = sum_run_lengths / num_runs;
-        avg_cluster_length = sum_cluster_lengths / num_clusters;
+    return success;
+  }
+
+  protected boolean _delete(final long largeHash) {
+    final long slotIndex = getSlotIndex(largeHash);
+    long fingerprint = genFingerprint(largeHash);
+    boolean success = delete(fingerprint, slotIndex);
+    if (success) {
+      numEntries_--;
+    }
+    return success;
+  }
+
+  protected boolean _search(final long largeHash) {
+    final long slotIndex = getSlotIndex(largeHash);
+    long fingerprint = genFingerprint(largeHash);
+    return search(fingerprint, slotIndex);
+  }
+
+  public boolean getBitAtOffset(final int offset) {
+    return bitArray_.getBit(offset);
+  }
+
+  public void computeStatistics() {
+    numRuns_ = 0;
+    numClusters_ = 0;
+    double sumRunLengths = 0;
+    double sumClusterLengths = 0;
+
+    int currentRunLength = 0;
+    int currentCluster_length = 0;
+
+    final long numSlots = getNumSlots();
+    for (long i = 0; i < numSlots; i++) {
+      final boolean occupied = isOccupied(i);
+      final boolean continuation = isContinuation(i);
+      final boolean shifted = isShifted(i);
+
+      if (!occupied && !continuation && !shifted) { // empty slot
+        sumClusterLengths += currentCluster_length;
+        currentCluster_length = 0;
+        sumRunLengths += currentRunLength;
+        currentRunLength = 0;
+      } else if ( !occupied && !continuation && shifted ) { // start of new run
+        numRuns_++;
+        sumRunLengths += currentRunLength;
+        currentRunLength = 1;
+        currentCluster_length++;
+      } else if ( !occupied && continuation && !shifted ) {
+        // not used
+      } else if ( !occupied && continuation && shifted ) { // continuation of 
run
+        currentCluster_length++;
+        currentRunLength++;
+      } else if ( occupied && !continuation && !shifted ) { // start of new 
cluster & run
+        numRuns_++;
+        numClusters_++;
+        sumClusterLengths += currentCluster_length;
+        sumRunLengths += currentRunLength;
+        currentCluster_length = 1;
+        currentRunLength = 1;
+      } else if (occupied && !continuation && shifted ) { // start of new run
+        numRuns_++;
+        sumRunLengths += currentRunLength;
+        currentRunLength = 1;
+        currentCluster_length++;
+      } else if (occupied && continuation && !shifted ) {
+        // not used
+      } else if (occupied && continuation && shifted ) { // continuation of run
+        currentCluster_length++;
+        currentRunLength++;
+      }
     }
-
-//
-//    void ar_sum1(ArrayList<Integer> ar, int index)
-//    {
-//        int s = ar.size();
-//        if (s <= index)
-//        {
-//            for (int i = s; i<index+1; i++)
-//            {
-//                ar.add(0);
-//            }
-//        }
-//        ar.set(index, ar.get(index)+1);
-//    }
-//
-//    public ArrayList<Integer> measure_cluster_length()
-//    {
-//        ArrayList<Integer> ar = new ArrayList<Integer>();
-//
-//        num_runs = 0;
-//        num_clusters = 0;
-//
-//        int current_run_length = 0;
-//        int current_cluster_length = 0;
-//
-//        int cnt = 0;
-//
-//        for (int i = 0; i < get_logical_num_slots_plus_extensions(); i++) {
-//
-//            boolean occupied = is_occupied(i);
-//            boolean continuation = is_continuation(i);
-//            boolean shifted = is_shifted(i);
-//
-//            if       (!occupied && !continuation && !shifted ) { // empty 
slot
-//                if(current_cluster_length != 0) ar_sum1(ar, 
current_cluster_length-1);
-//                current_cluster_length = 0;
-//                current_run_length = 0;
-//            }
-//            else if (!occupied && !continuation && shifted ) { // start of 
new run
-//                num_runs++;
-//                current_run_length = 1;
-//                current_cluster_length++;
-//            }
-//            else if (!occupied && continuation && shifted ) { // 
continuation of run
-//                current_cluster_length++;
-//                current_run_length++;
-//            }
-//            else if (occupied && !continuation && !shifted ) { // start of 
new cluster & run
-//                if(current_cluster_length != 0) ar_sum1(ar, 
current_cluster_length-1);
-//                num_runs++;
-//                num_clusters++;
-//                //if(current_cluster_length == 0) cnt++;
-//                current_cluster_length = 1;
-//                current_run_length = 1;
-//            }
-//            else if (occupied && !continuation && shifted ) { // start of 
new run
-//                num_runs++;
-//                current_run_length = 1;
-//                current_cluster_length++;
-//            }
-//            else if (occupied && continuation && shifted ) { // continuation 
of run
-//                current_cluster_length++;
-//                current_run_length++;
-//            }
-//        }
-//        if(current_cluster_length != 0) ar_sum1(ar, 
current_cluster_length-1);
-//        //System.out.println("CNT = " + cnt);
-//        return ar;
-//    }
-//
-//    /*
-//    @charlied
-//    Returns the fraction of the filter that is occupied by inserted items.
-//    Extension slots are omitted from the calculation of the load factor 
because they are used to accomodate
-//    items in the filter at the top end of the filter.
-//    Asymptotically, these will make little-to-no difference to the load in 
these calculations as the slots
-//    contributed 2*j / (2^j) --> 0 entries.
-//     */
-//    public double get_load() {
-//        return num_entries / (double) get_logical_num_slots();
-//    }
+    avgRunLength_ = sumRunLengths / numRuns_;
+    avgClusterLength_ = sumClusterLengths / numClusters_;
+  }
 
 }
diff --git 
a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java
 
b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java
index 4f68cb1f..60db5be8 100644
--- 
a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java
+++ 
b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java
@@ -62,30 +62,30 @@ public class QuotientFilterTest {
         qf.insert(C, 1);
         qf.insert(D, 2);
         qf.insert(A, 1);
-        assertEquals(qf.get_num_entries(), 6);
+        assertEquals(qf.getNumEntries(), 6);
 
         assertEquals(getState(qf, 0), 0);
-        assertEquals(qf.get_fingerprint(0), 0);
+        assertEquals(qf.getFingerprint(0), 0);
         assertEquals(getState(qf, 1), 0b100);
-        assertEquals(qf.get_fingerprint(1), A);
+        assertEquals(qf.getFingerprint(1), A);
         assertEquals(getState(qf, 2), 0b111);
-        assertEquals(qf.get_fingerprint(2), B);
+        assertEquals(qf.getFingerprint(2), B);
         assertEquals(getState(qf, 3), 0b011);
-        assertEquals(qf.get_fingerprint(3), C);
+        assertEquals(qf.getFingerprint(3), C);
         assertEquals(getState(qf, 4), 0b101);
-        assertEquals(qf.get_fingerprint(4), D);
+        assertEquals(qf.getFingerprint(4), D);
         assertEquals(getState(qf, 5), 0b001);
-        assertEquals(qf.get_fingerprint(5), E);
+        assertEquals(qf.getFingerprint(5), E);
         assertEquals(getState(qf, 6), 0);
-        assertEquals(qf.get_fingerprint(6), 0);
+        assertEquals(qf.getFingerprint(6), 0);
         assertEquals(getState(qf, 7), 0b100);
-        assertEquals(qf.get_fingerprint(7), F);
+        assertEquals(qf.getFingerprint(7), F);
     }
 
     public int getState(QuotientFilter filter, int slot) {
-      return (filter.is_occupied(slot) ? 1 : 0) << 2
-          | (filter.is_continuation(slot) ? 1 : 0) << 1
-          | (filter.is_shifted(slot) ? 1 : 0);
+      return (filter.isOccupied(slot) ? 1 : 0) << 2
+          | (filter.isContinuation(slot) ? 1 : 0) << 1
+          | (filter.isShifted(slot) ? 1 : 0);
     }
 
     /*
@@ -157,13 +157,13 @@ public class QuotientFilterTest {
         final long fp1 = 1;
         final long fp2 = 1 << fingerprint_size - 1;
         qf.insert(fp1, num_entries - 1);
-        assertEquals(qf.get_fingerprint(num_entries - 1), fp1);
+        assertEquals(qf.getFingerprint(num_entries - 1), fp1);
         assertEquals(getState(qf, num_entries - 1), 0b100);
         qf.insert(fp2, num_entries - 1);
-        assertEquals(qf.get_fingerprint(0), fp2);
+        assertEquals(qf.getFingerprint(0), fp2);
         assertEquals(getState(qf, 0), 0b011);
         qf.delete(fp2, num_entries - 1);
-        assertEquals(qf.get_fingerprint(0), 0);
+        assertEquals(qf.getFingerprint(0), 0);
         assertEquals(getState(qf, 0), 0b000);
         final boolean found = qf.search(fp1, num_entries - 1);
         assertTrue(found);
@@ -190,7 +190,7 @@ public class QuotientFilterTest {
         qf.insert(0x1F, 4);
         qf.insert(0x1F, 15); // last slot in the filter
         qf.insert(0x1F, 16); // outside the bounds
-        qf.pretty_print() ;
+//        qf.pretty_print();
 
         Iterator it = new Iterator(qf);
         int[] arr = new int[] {2, 3, 4, 15};
@@ -261,8 +261,8 @@ public class QuotientFilterTest {
 
     static public boolean check_equality(QuotientFilter qf, BitSet bs, boolean 
check_also_fingerprints) {
         for (int i = 0; i < bs.size(); i++) {
-            if (check_also_fingerprints || (i % qf.bitPerEntry == 0 || i % 
qf.bitPerEntry == 1 || i % qf.bitPerEntry == 2)) {
-                if (qf.get_bit_at_offset(i) != bs.get(i)) {
+            if (check_also_fingerprints || (i % qf.getBitsPerEntry() == 0 || i 
% qf.getBitsPerEntry() == 1 || i % qf.getBitsPerEntry() == 2)) {
+                if (qf.getBitAtOffset(i) != bs.get(i)) {
                     return false;
                 }
             }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to