[uima-uimaj] 01/01: no jira, experiment, robin hood hashing.

schor Wed, 18 Dec 2019 06:05:54 -0800

This is an automated email from the ASF dual-hosted git repository.

schor pushed a commit to branch robin-hood-hash
in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git


commit 9a426895dfaa761ff74dd1de3d43ed47c79a834d
Author: Marshall Schor <m...@schor.com>
AuthorDate: Wed Dec 18 09:04:09 2019 -0500

    no jira, experiment, robin hood hashing.
    
    Initial results: worse performance for IntHashSet, might be OK for Obj
    HashSet
---
 .../uima/internal/util/Common_hash_support_rh.java | 561 +++++++++++++++
 .../apache/uima/internal/util/IntHashSetRh.java    | 794 +++++++++++++++++++++
 .../apache/uima/internal/util/ObjHashSetRh.java    | 520 ++++++++++++++
 .../uima/internal/util/IntHashSetPerfTestRh.java   | 219 ++++++
 .../uima/internal/util/IntHashSetTestRh.java       | 250 +++++++
 .../uima/internal/util/ObjHashSetTestRh.java       | 185 +++++
 6 files changed, 2529 insertions(+)

diff --git 
a/uimaj-core/src/main/java/org/apache/uima/internal/util/Common_hash_support_rh.java
 
b/uimaj-core/src/main/java/org/apache/uima/internal/util/Common_hash_support_rh.java
new file mode 100644
index 0000000..01c0edd
--- /dev/null
+++ 
b/uimaj-core/src/main/java/org/apache/uima/internal/util/Common_hash_support_rh.java
@@ -0,0 +1,561 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.internal.util;
+
+import java.util.Arrays;
+import java.util.function.IntPredicate;
+
+/**
+ * A common superclass for hash maps and hash sets
+ * Uses robin hood with backward shift for deletion 
+ * 
http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
+ *
+ * uses linear probing (no delta expansion)
+ * 
+ * extra table (1 byte per slot) holds lower 7 bits of hash 
+ *   and therefore also serves to indicate distance from initial probe =
+ *     probe spot % 128 - lower 7 bits of hash (if negative, add 128, unless 
wrap around?)
+ * 
+ * 8th bit , if 1 , is flag showing empty; no tombstones, because using 
backward-shift technique for removes
+ * 
+ * find: stop early, if # probe-spot > distance from initial probe.  (mod 
wrap-around)
+ * 
+ */
+public abstract class Common_hash_support_rh {
+
+  // set to true to collect statistics for tuning
+  // you have to also put a call to showHistogram() at the end of the run
+  protected static final boolean TUNE = false;
+  
+  protected static final int MIN_SIZE = 10;   // 10 / .66 =15.15
+  protected static final int MIN_CAPACITY = 16;
+  protected static final int MIN_CAPACITY_SHRINK = 64;  // don't shrink below 
this - thrashing
+  
+  private static final byte LHB_EMPTY = (byte) 0x80;
+  private static final int LHB_HASH_MASK =  0x7f;
+  
+  protected final float loadFactor;  
+  
+  protected final int initialCapacity; 
+
+  protected int histogram [];
+  protected int maxProbe = 0;
+
+  protected int sizeWhichTriggersExpansion;
+  private int size = 0; // number of elements in the table  
+
+  protected boolean secondTimeShrinkable = false;
+  
+  protected abstract boolean is_valid_key(int pos);
+  protected abstract int keys_length();
+  protected abstract void newKeysAndValues(int capacity);
+  protected abstract void clearKeysAndValues();
+
+  protected int bitMask;  // = key size -1
+  /**
+   * 7 bits of lower hash map, serves as distance from initial bucket probe 
+   * initialize to "empty" = 0x80
+   * If not "empty", the value is 
+   *   - often the same as the lower 7 bits of the index
+   *   - if not that, it's because there's a collision, and some slot with a 
higher index will have the lower 7 bits of the index
+   *   - so, the distance from the initial probe is the probe addr with the 
matching value - initial probe addr.
+   *   -   which, is the same as the 
+   *         ((probe addr with the matching value) & LBH_HASH_MASK) 
+   *         - matching value
+   */
+  private byte[] lower_hash_bits; 
+  protected int initial_probe;
+  protected byte initial_probe_lhb;
+  private int smaller_of_mask;
+
+  private static Common_hash_support_rh tune_instance;
+  
+  protected abstract void shift(int prev, int pos);
+  protected abstract void setEmpty(int pos);
+  
+  protected abstract void expand_table(); // double table size
+
+  /**
+   * @param initialSizeBeforeExpanding the number of elements the table should 
hold before expanding
+   */
+  public Common_hash_support_rh(int initialSizeBeforeExpanding) {
+    this(initialSizeBeforeExpanding, 0.66F);
+  }
+   
+  public Common_hash_support_rh(int initialSizeBeforeExpanding, float factor) {
+    this.loadFactor = factor;
+    this.initialCapacity = tableSpace(initialSizeBeforeExpanding, factor);
+    lower_hash_bits = init_lhb(this.initialCapacity);
+    bitMask = this.initialCapacity - 1;
+    setSmallerOfMask();
+    if (TUNE) {
+      tune_instance = this;
+      histogram = new int[200];
+      Arrays.fill(histogram, 0);
+      maxProbe = 0;
+    }
+  }
+
+  public Common_hash_support_rh(Common_hash_support_rh orig) {
+    this(orig.initialCapacity);
+    this.sizeWhichTriggersExpansion = orig.sizeWhichTriggersExpansion;
+    this.size = orig.size;
+    this.secondTimeShrinkable = orig.secondTimeShrinkable;
+
+    // copy doesn't do tuning measurements
+    this.histogram = null;
+  }
+  
+  public void clear() {
+    // see if size is less than the 1/2 size that triggers expansion
+    if (size <  (sizeWhichTriggersExpansion >>> 1)) {
+      // if 2nd time then shrink by 50%
+      //   this is done to avoid thrashing around the threshold
+      if (secondTimeShrinkable) {
+        secondTimeShrinkable = false;
+        final int newCapacity = Math.max(initialCapacity, keys_length() >>> 1);
+        if (newCapacity < keys_length()) { 
+          newTable(newCapacity);  // shrink table by 50%
+          size = 0;
+          resetHistogram();
+          if (PositiveIntSet.IS_TRACE_MODE_SWITCH) {
+            System.out.println("TRAcE_MODE Common_hash clear 2nd time 
shrinkable, newCapacity=" + newCapacity + ", keys_length: " + keys_length());
+          }
+          return;
+        } else { // don't shrink below minimum
+          clearExisting();
+          if (PositiveIntSet.IS_TRACE_MODE_SWITCH) {
+            System.out.println("TRAcE_MODE Common_hash clear 2nd time 
shrinkable but nothing done, below minimum: newCapacity=" + newCapacity + ", 
keys_length: " + keys_length());
+          }
+          return;
+        }
+      } else {
+        if (PositiveIntSet.IS_TRACE_MODE_SWITCH) 
System.out.println("TRACE_MODE Common_hash clear setting 2nd time shrinkable");
+        secondTimeShrinkable = true;
+      }
+    } else {
+      secondTimeShrinkable = false; // reset this to require 2 triggers in a 
row
+    }
+    clearExisting();
+  }
+  
+  private void clearExisting() {
+    clearKeysAndValues();
+    Arrays.fill(lower_hash_bits, LHB_EMPTY);
+    size = 0;
+    resetHistogram();    
+  }
+  
+  /** It gets a ref to the current value of table, and then searches that 
array.
+   * 
+   * speedups: avoid looking in lower_hash_bits array for 90% of the time the 
hit is in the first 2 positions?
+   * 
+   * @param hash     the hash code of the key
+   * @param is_eq    true if the key at the int position is == to the key, or 
is 0
+   * @return the probeAddr in keys array.  
+   *         If not found, the probe value is to empty cell or to some other 
value
+   *         Use isEmpty(probeAddr) to distinguish.
+   */
+  protected int findPosition(
+                   int hash,
+                   IntPredicate is_eq_or_not_present
+                   ) {
+        
+    int probeAddr = initial_probe = hash & bitMask;
+    final byte initialProbeLhb = initial_probe_lhb = (byte) (hash & 
LHB_HASH_MASK);
+ 
+    if (is_eq_or_not_present.test(probeAddr) ) {
+      if (TUNE) {
+        updateHistogram(0);
+      }
+      return probeAddr;
+    }
+    int extraProbes = 0;
+    
+    for (;;) {      
+
+      byte plhb = lower_hash_bits[probeAddr];
+          // not found if hit an empty slot
+      if (plhb == LHB_EMPTY || 
+       // or the distance to original probe for this slot is < number of probes
+       //   (because otherwise, the other item would have replaced if present)
+          extraProbesForEntry(plhb, probeAddr) < extraProbes
+         ) {
+        // not found
+        if (TUNE) {
+          updateHistogram(extraProbes);
+        }
+        return probeAddr;
+      }
+      
+      if (plhb == initialProbeLhb) {
+        if (is_eq_or_not_present.test(probeAddr)) {
+          // found
+          if (TUNE) {
+            updateHistogram(extraProbes);
+          }
+          return probeAddr;
+        }
+        // else was collision, not equal
+      }
+      
+     extraProbes++;
+      
+      probeAddr = incrPos(probeAddr);
+    }
+    
+  } 
+ 
+  /** 
+   * Find the position for a new (guaranteed not in the table) item
+   * 
+   * @param hash     the hash code of the key
+   * @return the probeAddr in keys array.  
+   *         If not found, the probe value is to empty cell or to some other 
value
+   *         Use isEmpty(probeAddr) to distinguish.
+   */
+  protected int findPosition_new(int hash) {
+        
+    int extraProbes = 0;
+    int probeAddr = initial_probe = hash & bitMask;
+    initial_probe_lhb = (byte) (hash & LHB_HASH_MASK);
+    
+    
+    for (;;) {
+
+      byte plhb = lower_hash_bits[probeAddr];
+          // not found if hit an empty slot
+      if (plhb == LHB_EMPTY || 
+       // or the distance to original probe for this slot is < number of probes
+       //   (because otherwise, the other item would have replaced if present)
+          extraProbesForEntry(plhb, probeAddr) < extraProbes
+         ) {
+        // not found
+        if (TUNE) {
+          updateHistogram(extraProbes);
+        }
+        return probeAddr;
+      }
+            
+      extraProbes++;
+      
+      probeAddr = incrPos(probeAddr);
+    }
+  }
+  
+  protected boolean isEmpty(int probeAddr) {
+    return lower_hash_bits[probeAddr] == LHB_EMPTY;
+  }
+  
+  protected byte setLhb(int pos, byte lbh) {
+    byte r = lower_hash_bits[pos];
+    assert lbh >= 0;
+    if (extraProbesForEntry(lbh, pos) >= 20) {
+      System.out.println("debug");
+    }
+    assert extraProbesForEntry(lbh, pos) < 20;
+    lower_hash_bits[pos] = lbh;
+    return r;
+  }
+    
+  protected void remove_common(int position) {
+    int prev = position;
+    int i = incrPos(position);
+    size --;
+    for (;;) {
+      byte lhb = lower_hash_bits[i];
+      if (lhb == LHB_EMPTY || extraProbesForEntry(lhb, i) == 0 ) {
+        lower_hash_bits[prev] = LHB_EMPTY;
+        setEmpty(prev);
+        return;
+      }
+      //debug
+      if (extraProbesForEntry(lhb, prev) > 20) {
+        System.out.println("debug");
+      }
+      lower_hash_bits[prev] = lhb;
+      shift(prev, i);
+      
+      prev = i;
+      i = incrPos(i);
+    }
+  }
+  
+//  /**
+//   * This method calls the subclass's copy_to_new_table method, 
+//   *   passing an inner lambda containing common code for copying old to new.
+//   *   
+//   *   That inner lambda, when invoked by the copy_to_new_table method, 
+//   *     is passed another lambda of one argument (the old index) 
+//   *     which is called to copy each element.  
+//   * @param new_capacity
+//   * @param old_capacity
+//   */
+//  private void copyOld2New(int new_capacity, int old_capacity) {
+//    copy_to_new_table( 
+//      new_capacity,
+//      old_capacity,
+//      
+//        // this code assigned to "commonCopy" arg
+//        (IntConsumer copyToNew, IntPredicate is_valid_old_key) -> {  
+//      newTable(new_capacity);
+//      for (int i = 0; i < old_capacity; i++) {
+//        if (is_valid_old_key.test(i)) {
+//          copyToNew.accept(i);
+//        }
+//      }
+//    });    
+//  }
+
+  /**
+   * advance pos until it points to a non 0 or is 1 past end
+   * If pos is negative, start at 0.
+   * Don't move if pos already has valid key
+   * @param pos -
+   * @return updated pos
+   */
+  protected int moveToNextFilled(int pos) {
+    final int max = keys_length();
+    if (pos < 0) {
+      pos = 0;
+    }
+    while (true) {
+      if (pos >= max) {
+        return pos;
+      }
+      if (is_valid_key(pos)) {
+        return pos;
+      }
+      pos ++;
+    }
+  }
+
+  /**
+   * decrement pos until it points to a non 0 or is -1
+   * If pos is beyond end start at end.
+   * Don't move if pos already has valid key
+   * @param pos -
+   * @return updated pos
+   */
+  protected int moveToPreviousFilled(int pos) {
+    final int max = keys_length();
+    if (pos > max) {
+      pos = max - 1;
+    }
+    
+    while (true) {
+      if (pos < 0) {
+        return pos;
+      }
+      if (is_valid_key(pos)) {
+        return pos;
+      }
+      pos --;
+    }
+  }
+  
+  // called by clear, increase table size, decrease table size
+  protected void newTable(int capacity) {
+    capacity = Math.max(MIN_SIZE, Misc.nextHigherPowerOf2(capacity));
+    newKeysAndValues(capacity);
+    lower_hash_bits = init_lhb(capacity);
+    setSmallerOfMask();
+    bitMask = capacity - 1;
+    sizeWhichTriggersExpansion = (int)(capacity * loadFactor);
+  }
+  
+  protected void incrementSize() {
+    size++;
+    if (size >= sizeWhichTriggersExpansion) {
+      maybeIncreaseTableCapacity();
+    }
+  }
+
+  private void maybeIncreaseTableCapacity() {
+    
+    if (TUNE) {
+      int old_capacity = keys_length();
+      int new_capacity = 2 * old_capacity;
+      System.out.println("Capacity increasing from " + old_capacity + " to " + 
new_capacity + ", size=" + size);
+    }   
+    expand_table();
+  }
+    
+//  /**
+//   * only called if actually found and removed an entry
+//   */
+//  protected void commonRemove() {
+//    size --;
+////    debugValidate();
+//  }
+  
+  public int size() {
+    return size;
+  }
+  
+  protected void resetHistogram() {
+//    if (TUNE) {
+//      histogram = new int[200];
+//      Arrays.fill(histogram, 0);
+//      maxProbe = 0;
+//    }    
+  }
+
+  private void updateHistogram(int extraProbes) {
+    histogram[extraProbes] = 1 + histogram[extraProbes];
+    if (maxProbe < extraProbes) {
+      maxProbe = extraProbes;
+    }
+  }
+
+  public void showHistogram() {
+    if (TUNE) {
+      int sumI = 0;
+      for (int i : histogram) {
+        sumI += i;
+      }
+      
+      System.out.format(
+          "Histogram of number of probes, loadfactor = %.1f, maxProbe=%,d nbr 
of find operations at last table size=%,d%n",
+          loadFactor, maxProbe, sumI);
+      for (int i = 0; i <= maxProbe; i++) {
+        if (i == maxProbe && histogram[i] == 0) {
+          System.out.println("huh?");
+        }
+        System.out.println(i + ": " + histogram[i]);
+      }     
+      
+      System.out.println("bytes / entry = " + (float) (keys_length()) * 8 / 
size());
+      System.out.format("size = %,d, prevExpansionTriggerSize = %,d, next = 
%,d%n",
+          size(),
+          (int) ((keys_length() >>> 1) * loadFactor),
+          (int) (keys_length() * loadFactor));
+    }
+  }
+  
+  static {
+    if (TUNE) {
+      Runtime.getRuntime().addShutdownHook(new Thread(null, () -> {
+        tune_instance.showHistogram();
+      }));     
+    }
+  }
+
+  // test case use
+  int getCapacity() {
+    return keys_length();
+  }
+  
+  protected abstract class CommonKeyIterator implements IntListIterator {
+
+    protected int curPosition;
+    
+    protected final int firstPosition;
+    
+    protected CommonKeyIterator() {
+        this.curPosition = moveToNextFilled(0);
+        firstPosition = curPosition;
+    }
+    
+    @Override
+    public boolean hasNext() {
+      return curPosition < keys_length() && curPosition >= 0;
+    }
+
+    @Override
+    public boolean hasPrevious() {
+      if (curPosition > keys_length() || curPosition <= 0) {
+        return false;
+      }
+      
+      int test = moveToPreviousFilled(curPosition - 1);
+      return test >= 0;
+    }
+
+    @Override
+    /**
+     * @see org.apache.uima.internal.util.IntListIterator#moveToStart()
+     */
+    public void moveToStart() {
+      curPosition = moveToNextFilled(0);
+    }
+
+    @Override
+    /**
+     * @see org.apache.uima.internal.util.IntListIterator#moveToEnd()
+     */
+    public void moveToEnd() {
+      curPosition = moveToPreviousFilled(bitMask);
+    }
+    
+  }
+  
+  /**
+   * 
+   * @param numberOfElements -
+   * @param factor -
+   * @return capacity of the main table (either 2 byte or 4 byte entries)
+   */
+  public static int tableSpace(int numberOfElements, Float factor) {
+    if (numberOfElements < 0) {
+      throw new IllegalArgumentException("must be > 0");
+    }
+    final int capacity = Math.round(numberOfElements / factor);
+    return  Math.max(16, Misc.nextHigherPowerOf2(capacity));
+  }
+
+  protected void debugValidate() {
+    // count non-0, non-removed, compare to size
+    int sum = 0;
+    for (int i = 0; i < keys_length(); i ++) {
+      if (is_valid_key(i)) {
+        sum ++;
+        if (sum > size) {
+          System.out.println("debug");
+        }
+      }
+    }
+  }
+  
+  protected int incrPos(int pos) {
+    return bitMask & (pos + 1);
+  }
+  
+  private byte[] init_lhb(int size) {
+    byte[] lhb = new byte[size];
+    Arrays.fill(lhb,  LHB_EMPTY);
+    return lhb;
+  }
+  
+  /**
+   * Compute extra probes for an entry.
+   * Handle cases where the table size is bigger or smaller than the
+   * size of the lower-hash-bits
+   * @param entry the lower-hash-bits, represents the original probe's last 7 
bits
+   * @param pos the current position
+   * @return the extra distance of this entry from the original probe
+   */
+  private int extraProbesForEntry(byte entry, int pos) {
+    return smaller_of_mask & (pos - entry);
+  }
+  
+  private void setSmallerOfMask () {
+    smaller_of_mask = Math.min(lower_hash_bits.length, 128) - 1; 
+  }
+}
diff --git 
a/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSetRh.java 
b/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSetRh.java
new file mode 100644
index 0000000..571be51
--- /dev/null
+++ b/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSetRh.java
@@ -0,0 +1,794 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import java.util.Arrays;
+import java.util.NoSuchElementException;
+
+import org.apache.uima.util.impl.Constants;
+
+/**
+ * A set of non-zero ints. 
+ *   Can be negative.
+ *   
+ *   0 reserved internally to indicate "not in the map";
+ *   you will get an exception if you try to store 0 as a value.
+ *   
+ *   0 will be returned if the value is missing from the map.
+ *    
+ *   allowed range is Integer.MIN_VALUE to Integer.MAX_VALUE 
+ *     0 is the value for an empty cell
+ *     Integer.MIN_VALUE is the value for a deleted (removed) value
+ *   
+ * based on Int2IntHashMap
+ * This impl is for use in a single thread case only
+ * 
+ * Supports shrinking (reallocating the big table)
+ *
+ * Supports representing ints as "short" 2byte values if possible,
+ *   together with an offset amount.
+ *   Because of the offset, the adjusted key could be == to the offset,
+ *   so we subtract 1 from it to preserve 0 value as being the null / empty.
+ *   For short values, the range is:
+ *        Short.MIN_VALUE+2 to Short.MAX_VALUE after Offset,
+ *        with the "0" value moved down by 1 and 
+ *        the Short.MIN_VALUE used for deleted (removed) items 
+ *   
+ *   Automatically switches to full int representation if needed  
+ */
+public class IntHashSetRh extends Common_hash_support_rh implements 
PositiveIntSet {
+  
+  public static final int SIZE_NEEDING_4_BYTES = 256 * 256 - 2; 
+    
+  // this load factor gives, for array doubling strategy:
+  //   between 1.5 * 4 bytes (1 word for key) = 6 and
+  //           3   * 4                          12 bytes per entry
+  // This compares with 160 bytes/entry for the IntArrayRBT impl
+      
+  // offset only used with keys2.  values stored are key - offset
+  //   intent is to have them fit into short data type.
+  //   If the offset is 100,000, then the keys range from 100000 - 32766 to 
100000 + 32767, includes "0"
+  //                 -32767 == Short.MIN_VALUE + 1,   
+  //                 Numbers 0 and below are stored as n - 1, so "0" itself 
isn't actually stored
+  //                   because it's used to represent an empty slot
+  //   - value after key adjustment of 0 reserved; if value is 0 - means no 
entry
+  //   store values at or below the offset are stored as 1 less to avoid 
mapping to "0".
+  
+  private static final short EMPTY2 = 0;
+  private int offset;
+ 
+  private int [] keys4;
+  private short[] keys2;
+  
+  private boolean isMake4;
+  
+  // these are true values (before any offset adjustment)
+  private int mostPositive = Integer.MIN_VALUE;
+  private int mostNegative = Integer.MAX_VALUE;
+      
+  public IntHashSetRh() {
+    this(10, 0);
+  }
+  
+  public IntHashSetRh(int initialCapacity) {
+    this(initialCapacity, 0);
+  }
+  
+  /**
+   * 
+   * @param initialSizeBeforeExpanding - you can add this many before expansion
+   * @param offset - for values in the short range, the amount to subtract
+   *                 before storing.
+   *                 If == MIN_VALUE, then force 4 byte ints
+   */
+  public IntHashSetRh(int initialSizeBeforeExpanding, int offset) {
+    super(initialSizeBeforeExpanding);
+    isMake4 = offset == Integer.MIN_VALUE;
+    this.offset = isMake4 ? 0 : offset;
+    newTable(this.initialCapacity);
+    resetTable();
+    if (IS_TRACE_MODE_SWITCH) {
+      System.out.println("TRACE_MODE new IntHashSet, sizeBeforeExpanding = " + 
initialSizeBeforeExpanding + ", offset= " + offset);
+    }
+  }
+  
+//  /**
+//   * The number of 32 bit words that are reserved when 
+//   * creating a table to hold the specified number of elements
+//   * 
+//   * The number is a power of 2.
+//   * 
+//   * The number is at least 16.
+//   * 
+//   * The number is such that you could add this many elements without
+//   *   triggering the capacity expansion.
+//   *   
+//   * @param numberOfElements -
+//   * @return -
+//   */
+//  public int tableSpace(int numberOfElements) {
+//    return tableSpace(numberOfElements, loadFactor);
+//  }
+//  
+  
+  public static int tableSpace(int numberOfElements) {
+    return Common_hash_support.tableSpace(numberOfElements, 0.66f);
+  }
+  
+  
+  /**
+   * Method called by handleHashSet in PositiveIntSet
+   * to indicate if adding this many items would cause an expansion
+   * @return true if would not expand
+   */
+  public boolean wontExpand() {
+    return wontExpand(1);
+  }
+  
+  /**
+   * Method called by handleHashSet in PositiveIntSet
+   * to indicate if adding this many items would cause an expansion
+   * @param n the number of items added
+   * @return true if would not expand
+   */
+  public boolean wontExpand(int n) {
+    return (size() + n) < sizeWhichTriggersExpansion;  
+  }
+  
+  public int getSpaceUsedInWords() {
+    return (keys4 != null) ? keys4.length : (keys2.length >> 1);  
+  }
+  
+  public static int getSpaceOverheadInWords() {
+    return 11;
+  }
+    
+  /**
+   * Only call this if using short values with offset
+   * @param adjKey
+   * @return raw key
+   */
+  private int getRawFromAdjKey(int adjKey) {
+    assert (adjKey != Short.MIN_VALUE);
+    return adjKey + offset + ((adjKey < 0) ? 1 : 0); 
+  }
+  
+  
+  private void resetTable() {
+    mostPositive = Integer.MIN_VALUE;
+    mostNegative = Integer.MAX_VALUE;
+//    resetHistogram();
+//    size = 0;
+//    nbrRemoved = 0;    
+  }
+  
+  @Override
+  public void clear() {
+    super.clear();
+    resetTable();
+  }
+
+   
+  // only called when keys are shorts
+  private boolean isAdjKeyOutOfRange(int adjKey) {
+    return (adjKey > Short.MAX_VALUE ||
+           // the minimum adjKey value stored in a short is
+           // Short.MIN_VALUE + 1
+            adjKey <= Short.MIN_VALUE);   
+  }
+  
+  @Override
+  public boolean contains(int rawKey) {
+    int pos = findPosition(rawKey);
+    if (isEmpty(pos)) {
+      return false;
+    }
+    return get(pos) == rawKey;
+  }
+  
+  /** 
+   * This method is part of the PositiveSet API, and
+   * is defined to return an int that could be used with
+   *   iterators to position them.  
+   *   
+   * For this case, it is not used, because 
+   *   the iterators don't support positioning this way
+   *   because they are not sorted.
+   */
+  @Override
+  public int find(int rawKey) {
+    throw new UnsupportedOperationException();
+  }
+  
+
+  /**
+   * @param rawKey the key value to find
+   * @return the position in the table if present, otherwise the position of 
the slot where the 
+   *         key value would be added, unless the new value is at a position 
which would require
+   *         the key2 form to be switched to the key4 form, in which case,
+   *         -1 is returned (means not found, and requires conversion to 4 
byte keys)
+   */
+  private int findPosition(int rawKey) {
+    
+    if (rawKey == 0) {  
+      throw new IllegalArgumentException("0 is an invalid key");
+    }
+    
+    if (keys4 == null) {
+      // special handling for 2 byte keys with offsets
+      // check for keys never stored in short table 
+      //   adjKey of Short.MIN_VALUE which is the removed flag
+      final int adjKey = getAdjKey(rawKey);
+      if (isAdjKeyOutOfRange(adjKey)) {
+        return -1;
+      }
+      
+      
+      return findPositionAdjKey(adjKey);
+      
+    } else {
+      
+      
+      return findPosition4(rawKey);
+    }
+    
+  }
+   
+  private int findPosition4(int rawKey) {
+    
+    return super.findPosition(
+        
+        // key hash
+        Misc.hashInt(rawKey),
+        
+        // is_eq || not present
+        i -> keys4[i] == rawKey || keys4[i] == 0
+        
+      );    
+  }
+  
+  private int findPositionAdjKey(int adjKey) {
+    // special handling for 2 byte keys with offsets
+    // check for keys never stored in short table 
+    //   adjKey of Short.MIN_VALUE which is the removed flag
+        
+    return super.findPosition(
+          
+          // key hash
+          Misc.hashInt(adjKey),
+          
+          // is_eq
+          i -> keys2[i] == 0 || keys2[i] == adjKey
+        );    
+  }
+  
+  
+  /**
+   * return the adjusted key.
+   *   never called for 4 byte form
+   *   for 2 byte key mode, subtract the offset, and adjust by -1 if 0 or less
+   *     Note: returned value can be less than Short.MIN_VALUE 
+   * @param rawKey
+   * @return adjusted key, a range from negative to positive, but never 0
+   */
+  private int getAdjKey(int rawKey) {
+//    if (rawKey == 0 || (rawKey == Integer.MIN_VALUE)) {
+//      throw new ArrayIndexOutOfBoundsException(rawKey);
+//    }
+//    if (keys4 != null) {
+//      return rawKey;
+//    }
+    int adjKey = rawKey - offset;
+    return adjKey - (  (adjKey <= 0) ? 1 : 0);
+  }
+  
+  private void switchTo4byte(int capacity) {
+    // convert to 4 byte because values can't be offset and fit in a short
+    final short[] oldKeys = keys2;
+    isMake4 = true;
+    newTable(capacity);  // make a 4 table. same size
+    for (short adjKey : oldKeys) {
+      if (adjKey != EMPTY2 ) {
+        find4AndAddIfMissing(getRawFromAdjKey(adjKey));
+      }
+    } 
+  }
+   
+  /**
+   * 
+   * @param rawKey -
+   * @return true if this set did not already contain the specified element
+   */
+  @Override
+  public boolean add(int rawKey) {
+    if (rawKey == 0) {
+      throw new IllegalArgumentException("argument must be non-zero");
+    }
+       
+    if (size() == 0) {
+      mostPositive = mostNegative = rawKey;
+    } else {
+      if (rawKey > mostPositive) {
+        mostPositive = rawKey;
+      }
+      if (rawKey < mostNegative) {
+        mostNegative = rawKey;
+      }
+    }
+    
+    if (keys4 != null) {
+      return maybeAdd4(rawKey);      
+      // short keys
+    } else {
+      int adjKey = getAdjKey(rawKey);
+      if (isAdjKeyOutOfRange(adjKey)) {
+        switchTo4byte(getCapacity());
+        return maybeAdd4(rawKey);
+        
+        // key in range
+      } else {
+        return maybeAdd2(adjKey);
+      }
+    }
+  }
+  
+  private boolean maybeAdd2(int adjKey) {
+    boolean wasAdded = find2AndAddIfMissing(adjKey);  
+    if (wasAdded) {
+      incrementSize();
+    }
+    return wasAdded;
+  }
+    
+  private boolean maybeAdd4(int rawKey) {
+    boolean wasAdded = find4AndAddIfMissing(rawKey);
+    if (wasAdded) {
+      incrementSize();
+    }
+    return wasAdded;     
+  }
+  
+  private boolean find4AndAddIfMissing(int rawKey) {
+   
+    int i = findPosition4(rawKey);
+    if (keys4[i] == rawKey) {
+      return false;  // already in table
+    }
+    
+    int c = keys4[i];
+    int saved = c;
+    
+    byte prev_lhb = setLhb(i, initial_probe_lhb);
+    keys4[i] = rawKey;
+    
+    if (saved == 0) {
+      return true;
+    }
+    // robin hood - if stole slot, add that slot back
+    rh_add4(incrPos(i), saved, prev_lhb);   
+    return true;
+  }
+  
+  private void rh_add4(int pos, int v, byte lhb) {
+    while (true) {
+      int c = keys4[pos];
+      int saved = c;  // if empty, is 0
+      
+      byte prev_lhb = setLhb(pos, lhb);
+      keys4[pos] = v;
+      
+      if (saved == 0) {
+        break;
+      }
+      // robin hood - if stole slot, add that slot back
+      v = saved;
+      lhb = prev_lhb;
+      pos = incrPos(pos);
+    }
+  }
+  
+  private boolean find2AndAddIfMissing(int adjKey) {
+    int i = findPositionAdjKey(adjKey);
+ 
+    short c = keys2[i];
+    if (c == (short)adjKey) {
+      return false;  // already in table
+    }
+    
+    short saved = c;  // if empty, is 0
+    
+    byte prev_lhb = setLhb(i, initial_probe_lhb);
+    keys2[i] = (short) adjKey;
+    
+    if (saved == 0) {
+      return true;
+    }
+    // robin hood - if stole slot, add that slot back
+    rh_add2(incrPos(i), saved, prev_lhb);
+    return true;    
+  }
+  
+  private void rh_add2(int pos, short v, byte lhb) {
+    while (true) {
+      short c = keys2[pos];
+      short saved = c;  // if empty, is 0
+      
+      byte prev_lhb = setLhb(pos, lhb);
+      keys2[pos] = v;
+      
+      if (saved == 0) {
+        break;
+      }
+      // robin hood - if stole slot, add that slot back
+      v = saved;
+      lhb = prev_lhb;
+      pos = incrPos(pos);
+    }
+    
+  }
+          
+  @Override
+  protected void shift(int prev, int next) {
+    if (keys4 == null) {
+      keys2[prev] = keys2[next];
+    } else {
+      keys4[prev] = keys4[next];
+    }
+  }
+  
+  @Override
+  protected void setEmpty(int pos) { 
+    if (keys4 == null) {
+      keys2[pos] = 0;
+    } else {
+      keys4[pos] = 0;
+    }
+    
+  }
+  /**
+   * mostPositive and mostNegative are not updated
+   *   for removes.  So these values may be inaccurate,
+   *   but mostPositive is always &gt;= actual most positive,
+   *   and mostNegative is always &lt;= actual most negative.
+   * No conversion from int to short
+   *
+   * Does slot shifting on remove
+   * 
+   * @param rawKey the value to remove
+   * @return true if the key was present
+   */
+  @Override
+  public boolean remove(int rawKey) {
+//    debugValidate();
+    final int pos = findPosition(rawKey);
+    if (isEmpty(pos)) {
+      return false;
+    }
+    
+    if (get(pos) != rawKey) {
+      return false;
+    }
+    remove_common(pos);
+    
+//    //debug
+//    if (size() <= 0) 
+//      System.out.println("debug");
+//    assert size() > 0;
+
+    if (rawKey == mostPositive) {
+      mostPositive --;  // a weak adjustment
+    }
+    if (rawKey == mostNegative) {
+      mostNegative ++;  // a weak adjustment
+    }
+   
+    return true;
+  }    
+    
+  /**
+   * 
+   * @return a value that is &gt;= the actual most positive value in the table.
+   *   it will be == unless a remove operation has removed a most positive 
value
+   */
+  public int getMostPositive() {
+    return mostPositive;
+  }
+  
+  /**
+   * 
+   * @return a value that is &lt;= the actual least positive value in the 
table.
+   *   It will be == unless remove operations has removed a least positive 
value.
+   */
+  public int getMostNegative() {
+    return mostNegative;
+  }
+   
+  public void showHistogram() {
+    if (TUNE) {
+      int sumI = 0;
+      for (int i : histogram) {
+        sumI += i;
+      }
+      
+      System.out.format(
+          "Histogram of number of probes, loadfactor = %.1f, maxProbe=%,d nbr 
of find operations at last table size=%,d%n",
+          loadFactor, maxProbe, sumI);
+      for (int i = 0; i <= maxProbe; i++) {
+        if (i == maxProbe && histogram[i] == 0) {
+          System.out.println("huh?");
+        }
+        System.out.println(i + ": " + histogram[i]);
+      }     
+      
+      if (keys4 == null) {
+        System.out.println("bytes / entry = " + (float) (keys2.length) * 2 / 
size());
+        System.out.format("size = %,d, prevExpansionTriggerSize = %,d, next = 
%,d%n",
+          size(),
+          (int) ((keys2.length >>> 1) * loadFactor),
+          (int) (keys2.length * loadFactor));
+      } else {
+        System.out.println("bytes / entry = " + (float) (keys4.length) * 4 / 
size());
+        System.out.format("size = %,d, prevExpansionTriggerSize = %,d, next = 
%,d%n",
+          size(),
+          (int) ((keys4.length >>> 1) * loadFactor),
+          (int) (keys4.length * loadFactor));        
+      }
+    }
+  }
+  
+  /**
+   * For iterator use, position is a magic number returned by the internal find
+   * For short keys, the value stored for adjKey == 0 is -1, adjKey == -1 is 
-2, etc.
+   */
+  @Override
+  public int get(int pos) {
+    final int adjKey;
+    if (keys4 == null) {
+      adjKey = keys2[pos];
+      if (adjKey == 0 ) {
+        return 0;  // null, not present
+      }
+      return getRawFromAdjKey(adjKey);
+    } else {
+      adjKey = keys4[pos];
+      if (adjKey == 0 ) {
+        return 0;  // null, not present
+      }
+      return adjKey;
+    }
+  }
+  
+
+  private class IntHashSetIterator implements IntListIterator {
+
+    private int curPosition;
+
+    private IntHashSetIterator() {
+      this.curPosition = 0;
+    }
+
+    public final boolean hasNext() {
+      curPosition = moveToNextFilled(curPosition);
+      return curPosition < getCapacity();
+    }
+
+    public final int nextNvc() {
+      curPosition = moveToNextFilled(curPosition);
+      int r = get(curPosition);
+      curPosition = moveToNextFilled(curPosition + 1);
+      return r;
+    }
+    
+    /**
+     * @see org.apache.uima.internal.util.IntListIterator#hasPrevious()
+     */
+    public boolean hasPrevious() {
+      int prev = moveToPreviousFilled(curPosition - 1);
+      return (prev >= 0);
+    }
+    
+    @Override
+    public int previous() {
+      curPosition = moveToPreviousFilled(curPosition - 1);
+      if (curPosition < 0) {
+        throw new NoSuchElementException();
+      }
+      return get(curPosition);
+    }
+    
+    @Override
+    public int previousNvc() {
+      curPosition = moveToPreviousFilled(curPosition - 1);
+      return get(curPosition);
+    }
+
+    /**
+     * @see org.apache.uima.internal.util.IntListIterator#moveToEnd()
+     */
+    public void moveToEnd() {
+      curPosition = getCapacity() - 1;
+    }
+
+    /**
+     * @see org.apache.uima.internal.util.IntListIterator#moveToStart()
+     */
+    public void moveToStart() {
+      curPosition = 0;
+    }
+  } 
+  
+  
+  @Override
+  public IntListIterator iterator() {
+    return new IntHashSetIterator();
+  }
+
+  @Override
+  public int moveToFirst() {
+    return (size() == 0) ? -1 : moveToNextFilled(0);
+  }
+
+  @Override
+  public int moveToLast() {
+    return (size() == 0) ? -1 : moveToPreviousFilled(getCapacity() -1);
+  }
+
+  @Override
+  public int moveToNext(int position) {
+    if (position < 0) {
+      return position;
+    }
+    final int n = moveToNextFilled(position + 1); 
+    return (n >= getCapacity()) ? -1 : n;
+  }
+
+  @Override
+  public int moveToPrevious(int position) {
+    if (position >= getCapacity()) {
+      return -1;
+    }
+    return moveToPreviousFilled(position - 1);
+  }
+
+  @Override
+  public boolean isValid(int position) {
+    return (position >= 0) && (position < getCapacity());
+  }
+
+  @Override
+  public void bulkAddTo(IntVector v) {
+    if (null == keys4) {
+      for (int k : keys2) {
+        if (k != 0) {
+          v.add(getRawFromAdjKey(k));
+        }
+      }
+    } else {
+      for (int k : keys4) {
+        if (k != 0) {
+          v.add(k);
+        }
+      }
+    }
+  }
+
+  @Override
+  public int[] toIntArray() {
+    final int s = size();
+    if (s == 0) {
+      return Constants.EMPTY_INT_ARRAY;
+    }
+    final int[] r = new int[size()];
+    int pos = moveToFirst();
+    for (int i = 0; i < r.length; i ++) {
+      r[i] = get(pos);
+      pos = moveToNextFilled(pos + 1);
+    }
+    return r;
+  }
+
+  /* (non-Javadoc)
+   * @see java.lang.Object#toString()
+   */
+  @Override
+  public String toString() {
+    return String
+        .format(
+            "IntHashSet [loadFactor=%s, initialCapacity=%s, 
sizeWhichTriggersExpansion=%s, size=%s, offset=%s%n keys4=%s%n keys2=%s%n 
secondTimeShrinkable=%s, mostPositive=%s, mostNegative=%s]",
+            loadFactor, initialCapacity, sizeWhichTriggersExpansion, size(), 
offset,
+            Arrays.toString(keys4), Arrays.toString(keys2), 
secondTimeShrinkable, mostPositive,
+            mostNegative);
+  }
+  
+  // for testing
+  boolean isShortHashSet() {
+    return keys2 != null;
+  }
+  
+  int getOffset() {
+    return offset;
+  }
+
+  @Override
+  protected boolean is_valid_key(int pos) {
+    if (keys4 == null) {
+      return keys2[pos] != 0;
+    }
+    return keys4[pos] != 0;
+  }
+
+  @Override
+  protected int keys_length() {
+    return (keys4 == null) ? keys2.length : keys4.length;
+  }
+
+  @Override
+  protected void newKeysAndValues(int capacity) {
+    if (isMake4) {
+      keys4 = new int[capacity];
+      keys2 = null;
+    } else {
+      keys2 = new short[capacity];
+      keys4 = null;
+    }
+  }
+
+  @Override
+  protected void clearKeysAndValues() {
+    if (keys4 == null) {
+      Arrays.fill(keys2,  (short)0);
+    } else {
+      Arrays.fill(keys4, 0);
+    }  
+    resetTable();
+  }
+
+  @Override 
+  protected void expand_table() {
+    int old_capacity = keys_length();
+    int new_capacity = old_capacity * 2;
+    
+    if (keys4 == null) {
+      if (new_capacity >= 256 * 256) { 
+        // switch to 4
+        if (TUNE) {System.out.println("Switching to 4 byte keys");}
+        switchTo4byte(new_capacity);
+      } else {
+        final short[] oldKeys = keys2;
+        newTable(new_capacity);
+        for (int v :oldKeys) {
+          if (v != 0) {
+            find2AndAddIfMissing(v);
+          }
+        }
+      }      
+    } else {
+      // keys4
+      final int[] oldKeys = keys4;
+      newTable(new_capacity);
+      for (int v :oldKeys) {
+        if (v != 0) {
+          find4AndAddIfMissing(v);
+        }
+      }      
+    }
+  } 
+  
+}
diff --git 
a/uimaj-core/src/main/java/org/apache/uima/internal/util/ObjHashSetRh.java 
b/uimaj-core/src/main/java/org/apache/uima/internal/util/ObjHashSetRh.java
new file mode 100644
index 0000000..d70c7d3
--- /dev/null
+++ b/uimaj-core/src/main/java/org/apache/uima/internal/util/ObjHashSetRh.java
@@ -0,0 +1,520 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import java.lang.reflect.Array;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.util.Set;
+
+import org.apache.uima.cas.FeatureStructure;
+
+/**
+ * A set of Objects of type T 
+
+ * This impl is for use in a single thread case only, when table is being 
updated.
+ * Multiple reader threads are OK if there's no writing.
+ * 
+ * Supports shrinking (reallocating the big table)
+ * 
+ * Uses robin hood method, with 
+ *   incrementing collision resolution, and shift backwards on remove, and 1 
byte lower-hash-bits aux table 
+ *   for locality of ref.
+ */
+public class ObjHashSetRh<T> extends Common_hash_support_rh implements Set<T> {
+  
+//  public static final float DEFAULT_LOAD_FACTOR = 0.66F;
+
+  private final Class<T> clazz;  // for reifying the T type, resizing table, 
toArray operations
+   
+//  private final float loadFactor = DEFAULT_LOAD_FACTOR;  
+  
+//  private final int initialCapacity; 
+//
+//  private int histogram [];
+//  private int maxProbe = 0;
+//
+//  private int sizeWhichTriggersExpansion;
+//  private int size; // number of elements in the table
+//  private int nbrRemoved; // number of removed elements (coded as removed)
+  
+  // the actual Object table, operated as a hashtable
+  private T [] keys;
+//  final private T [] emptyKeyArray;  
+
+//  private boolean secondTimeShrinkable = false;
+  
+//  private int modificationCount = 0; // not currently used
+  
+  public ObjHashSetRh(Class<T> clazz) {
+    this(12, clazz);  // default initial size
+  }
+    
+  /**
+   * @param initialCapacity - you can add this many before expansion
+   * @param clazz - a superclass of the stored items
+   * @param removedMarker - a unique value never stored in the table, used to 
mark removed items
+   */
+  public ObjHashSetRh(int initialCapacity, Class<T> clazz) {
+    super(initialCapacity);
+    this.clazz = clazz;
+    newTable(initialCapacity);
+  }
+  
+  /**
+   * Copy constructor
+   * @param ohs -
+   */
+  public ObjHashSetRh(ObjHashSetRh<T> ohs) {
+    super(ohs);
+    this.clazz = ohs.clazz;
+    this.keys = Arrays.copyOf(ohs.keys, ohs.keys.length);
+  }
+  
+
+  public ObjHashSetRh(ObjHashSetRh<T> ohs, boolean readOnly) {
+    this(ohs);
+    if (!readOnly) Misc.internalError();
+  }
+
+
+  /** 
+  * returns a position in the key/value table
+  *   if the key is not found, then the position will be to the
+  *   place where the key would be put upon adding (without reusing REMOVED, 
and the 
+  *   current internal value of keys[position] would be 0.
+  *   
+  *   if the key is found, then keys[position] == key
+  * @param obj the object to find
+  * @return the probeAddr in keys array - might reference a slot holding null, 
or the key value if found
+  */
+  private int findPosition(final T key) {
+    if (key == null) {
+      throw new IllegalArgumentException("null is an invalid key");
+    }
+    
+    return findPosition(
+        
+        // key hash
+        Misc.hashInt(key.hashCode()),
+        
+        // is_eq
+        i -> keys[i].equals(key));
+  }
+  
+
+  @Override
+  public boolean contains(Object obj) {  // arg must be Object to fit 
Collection API
+    return (clazz.isAssignableFrom(obj.getClass())) ? (find((T) obj) != -1) : 
false;
+  }
+  
+  /**
+   * @param obj the object to find in the table (if it is there)
+   * @return the position of obj in the table, or -1 if not in the table
+   */
+  public int find(T obj) {
+    if (obj == null || size() == 0) {
+      return -1;
+    }
+    
+    final int pos = findPosition(obj);
+    return obj.equals(keys[pos]) ? pos : -1;  // keys[pos] can be null
+  }
+      
+  /**
+   * 
+   * @param obj - the object to add
+   * @return true if this set did not already contain the specified element
+   */
+  @Override
+  public boolean add(T obj) {           
+    final int i = findPosition(obj);
+    T c = keys[i];
+    if (obj.equals(c)) { // keys[i] may be null
+      return false;  // false if already present
+    }
+    
+    // i is ref to empty or non-empty (but not equal) slot which should be 
stolen
+    add_new(obj, c, i);
+    incrementSize();
+    return true;
+  }
+  
+  private void add_new(T obj, T saved, int i) {
+    keys[i] = obj;
+    byte prev_lhb = setLhb(i, initial_probe_lhb);
+    
+    if (saved != null) {
+      rh_add(incrPos(i), saved, prev_lhb);    
+    }    
+  }
+
+  private void rh_add(int pos,T v, byte lhb) {
+    while (true) {
+      T c = keys[pos];
+      T saved = c;  // if empty, is 0
+      
+      byte prev_lhb = setLhb(pos, lhb);
+      keys[pos] = v;
+      
+      if (saved == null) {
+        break;
+      }
+      // robin hood - if stole slot, add that slot back
+      v = saved;
+      lhb = prev_lhb;
+      pos = incrPos(pos);
+    } 
+  }
+  
+  @Override
+  protected void shift(int prev, int next) {
+      keys[prev] = keys[next];
+  }
+  
+  @Override
+  protected void setEmpty(int pos) { 
+    keys[pos] = null;
+  }
+    
+  
+  /**
+   * Using robin hood shifting for removed item
+   * @param rawKey the value to remove
+   * @return true if the key was present
+   */
+  @Override
+  public boolean remove(Object rawKey) {
+    if (rawKey == null) {
+      return false;
+    }
+    
+    final int pos = findPosition((T) rawKey);  // null or equal or collision 
obj
+    T c = keys[pos];
+    if (c == null || ! c.equals(rawKey)) {
+      return false;
+    }
+    
+    remove_common(pos);
+    return true;
+  } 
+  
+  private void removeAtPosition(int pos) {
+    remove_common(pos);
+  }
+  
+
+//  /**
+//   * @see Set#size()
+//   */
+//  @Override
+//  public int size() {
+//    return size;
+//  }
+  
+//  public void showHistogram() {
+//    if (TUNE) {
+//      int sumI = 0;
+//      for (int i : histogram) {
+//        sumI += i;
+//      }
+//      
+//      System.out.format(
+//          "Histogram of number of probes, loadfactor = %.1f, maxProbe=%,d 
nbr of find operations at last table size=%,d%n",
+//          loadFactor, maxProbe, sumI);
+//      for (int i = 0; i <= maxProbe; i++) {
+//        if (i == maxProbe && histogram[i] == 0) {
+//          System.out.println("huh?");
+//        }
+//        System.out.println(i + ": " + histogram[i]);
+//      }     
+//      
+//      System.out.println("bytes / entry = " + (float) (keys.length) * 4 / 
size());
+//      System.out.format("size = %,d, prevExpansionTriggerSize = %,d, next = 
%,d%n",
+//        size(),
+//        (int) ((keys.length >>> 1) * loadFactor),
+//        (int) (keys.length * loadFactor));        
+//    }
+//  }
+  
+  /**
+   */
+  /**
+   * For iterator use
+   * @param index - a magic number returned by the internal find
+   * @return the T at that spot, or null if nothing there 
+   */
+  public T get(int index) {
+    T obj = keys[index];
+    if (obj == null) {
+      return null;  // null, not present
+    }
+    return obj;
+  }
+  
+////  /**
+////   * advance pos until it points to a non 0 or is 1 past end
+////   * @param pos -
+////   * @return updated pos
+////   */
+////  public int moveToNextFilled(int pos) {
+//////    if (pos < 0) {
+//////      pos = 0;
+//////    }
+////    
+////    final int max = getCapacity();
+////    while (true) {
+////      if (pos >= max) {
+////        return pos;
+////      }
+////      T v = keys[pos];
+////      if (v != null && v != removedMarker) {
+////        return pos;
+////      }
+////      pos ++;
+////    }
+////  }
+//   
+//  /**
+//   * decrement pos until it points to a non 0 or is -1
+//   * @param pos -
+//   * @return updated pos
+//   */
+//  public int moveToPreviousFilled(int pos) {
+//    final int max = getCapacity();
+//    if (pos > max) {
+//      pos = max - 1;
+//    }
+//    
+//    while (true) {
+//      if (pos < 0) {
+//        return pos;
+//      }
+//      T v = keys[pos];
+//      if (v != null && v != removedMarker) {
+//        return pos;
+//      }
+//      pos --;
+//    }
+//  }
+
+  private class ObjHashSetIterator implements Iterator<T> {
+
+    /**
+     * Keep this always pointing to a non-0 entry, or
+     * if not valid, outside the range
+     */
+    protected int curPosition;
+
+    private ObjHashSetIterator() {
+      this.curPosition = moveToNextFilled(0);
+    }
+
+    @Override
+    public final boolean hasNext() {
+      return curPosition < getCapacity();
+    }
+
+    @Override
+    public final T next() {
+      if (!hasNext()) {
+        throw new NoSuchElementException();
+      }
+      T r = get(curPosition);
+      curPosition = moveToNextFilled(curPosition + 1);
+      return r;
+    }
+
+    @Override
+    public void remove() {
+      int pos = moveToPreviousFilled(curPosition - 1);
+      if (pos >= 0) {
+        removeAtPosition(pos);
+      }
+    }   
+  }
+  
+    
+  @Override
+  public Iterator<T> iterator() {
+    return new ObjHashSetIterator();
+  }
+  
+  private int moveToFirst() {
+    return (size() == 0) ? -1 : moveToNextFilled(0);
+  }
+
+//  private int moveToLast() {
+//    return (size() == 0) ? -1 : moveToPreviousFilled(getCapacity() -1);
+//  }
+
+//  private int moveToNext(int position) {
+//    if (position < 0) {
+//      return position;
+//    }
+//    final int n = moveToNextFilled(position + 1); 
+//    return (n >= getCapacity()) ? -1 : n;
+//  }
+//
+//  private int moveToPrevious(int position) {
+//    if (position >= getCapacity()) {
+//      return -1;
+//    }
+//    return moveToPreviousFilled(position - 1);
+//  }
+
+//  public boolean isValid(int position) {
+//    return (position >= 0) && (position < getCapacity());
+//  }
+  
+  /**
+   * if the fs is in the set, the iterator should return it.
+   * if not, return -1 (makes iterator invalid)
+   * @param fs position to this fs
+   * @return the index if present, otherwise -1;
+   */
+  public int moveTo(FeatureStructure fs) {
+    if (clazz.isAssignableFrom(fs.getClass())) {
+      int pos = find((T)fs);
+      if (pos >= 0) {
+        return pos;
+      }
+    }
+    return -1; 
+  }
+
+  @Override
+  public <T2> T2[] toArray(T2[] a) {
+    final int s = size();
+    if (s == 0) {
+      if (a.length >= 1) { 
+        a[0] = null;  // part of the contract of toArray, where the array a 
size is > 
+      }
+      return a;
+    }
+    
+    final T2[] r = (a.length >= s)? a : (T2[]) Array.newInstance(a.getClass(), 
s);
+    int pos = moveToFirst();
+    for (int i = 0; i < s; i ++) {
+      r[i] = (T2) get(pos);
+      pos = moveToNextFilled(pos + 1);
+    }
+    if (a.length > s) {
+      r[s] = null; // part of the contract of toArray, where the array a size 
is > 
+    }
+    return r;
+  }
+
+  @Override
+  public T[] toArray() {
+    return toArray((T[]) Array.newInstance(clazz, size()));
+  }
+
+  /* (non-Javadoc)
+   * @see java.lang.Object#toString()
+   */
+  @Override
+  public String toString() {
+    return String
+        .format(
+            "%s [loadFactor=%s, initialCapacity=%s, 
sizeWhichTriggersExpansion=%s, size=%s, secondTimeShrinkable=%s%n keys=%s]",
+            this.getClass().getName(), loadFactor, initialCapacity, 
sizeWhichTriggersExpansion, size(), secondTimeShrinkable, 
+            Arrays.toString(keys));
+  }
+
+  // Collection methods
+  @Override
+  public boolean isEmpty() {
+    return size() == 0;
+  }
+
+  @Override
+  public boolean containsAll(Collection<?> c) {
+    c.stream().allMatch(item -> contains(item));
+    return false;
+  }
+
+  @Override
+  public boolean addAll(Collection<? extends T> c) {
+    boolean[] anyChanged = {false};
+    c.stream().forEach(item -> anyChanged[0] |= add(item));
+    return anyChanged[0];
+  }
+
+  @Override
+  public boolean removeAll(Collection<?> c) {
+    boolean[] anyChanged = {false};
+    c.stream().forEach(item -> anyChanged[0] |= remove(item));
+    return anyChanged[0];
+  }
+
+  @Override
+  public boolean retainAll(Collection<?> c) {
+    boolean anyChanged = false;
+    Iterator<T> it = iterator();
+    while (it.hasNext()) {
+      T v = it.next();
+      if (!c.contains(v)) {
+        anyChanged = true;
+        it.remove();
+      }
+    }
+    return anyChanged;
+  }
+
+  @Override
+  protected boolean is_valid_key(int pos) {
+    return keys[pos] != null;
+  }
+
+  @Override
+  protected int keys_length() {
+    return keys.length;
+  }
+
+  @Override
+  protected void newKeysAndValues(int capacity) {
+    keys = (T[]) Array.newInstance(clazz, capacity);
+  }
+
+  @Override
+  protected void clearKeysAndValues() {
+    Arrays.fill(keys, null);   
+  }
+
+  @Override
+  protected void expand_table() {
+    int old_capacity = keys_length();
+    int new_capacity = old_capacity * 2;
+    
+    final T[] oldKeys = keys;
+    newTable(new_capacity);
+    for (T v : oldKeys) {
+      if (v != null) { 
+        int pos = findPosition_new( Misc.hashInt(v.hashCode()));
+        add_new(v, keys[pos], pos);
+      }
+    }      
+  }
+
+  
+}
diff --git 
a/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetPerfTestRh.java
 
b/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetPerfTestRh.java
new file mode 100644
index 0000000..5bb5286
--- /dev/null
+++ 
b/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetPerfTestRh.java
@@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import java.util.Random;
+
+import org.apache.uima.internal.util.rb_trees.IntArrayRBT;
+
+import junit.framework.TestCase;
+
+public class IntHashSetPerfTestRh extends TestCase {
+  /**
+   * Set to false to run the performance test
+   * 
+   * Tests both IntHashSet and IntBitSet
+   */
+  final boolean SKIP = false;
+  
+  static int cacheLoadSize;
+  
+  static long seed = 3737463135938899369L;
+//      new Random().nextLong();
+  static {
+    System.out.println("Random seed for IntHashSetPerfTest: "  + seed);
+  }
+  Random r = new Random(seed);
+
+//  Set<Integer> keys = new HashSet<Integer>(1000);
+  
+  int dmv = 0;
+  
+  IntHashSetRh m2;
+  IntArrayRBT m1;
+  IntBitSet m3;
+  
+  final int[] keys10000 = new int[511111];
+  int k10ki = 0;
+  
+  
+  public void testPerf() {
+    if (SKIP) return;
+    m1 = new IntArrayRBT(16);
+    m2 = new IntHashSetRh(16);
+    m3 = new IntBitSet(16);
+     
+    for (int i = 0; i < keys10000.length; i++) {
+      int k = r.nextInt(511110);
+     
+      keys10000[i] = k + 1;
+    }
+
+    System.out.format("%n%n W A R M U P %n%n");
+    cacheLoadSize = 0;
+    warmup(m1);
+    warmup(m2);
+    
+    System.out.format("%n%n Time 100 %n%n");
+    timelp(100);
+    System.out.format("%n%n Time 1000 %n%n");
+    timelp(1000);
+    System.out.format("%n%n Time 10000 %n%n");
+    timelp(10000);
+    System.out.format("%n%n Time 100000 %n%n");
+    timelp(100000);
+    cacheLoadSize = 0; // 1 * 256 * 1;
+    System.out.format("%n%n Time 100000 %n%n");
+    timelp(100000);
+    
+    System.out.format("%n%n Time 500000 %n%n");
+    timelp(500000);
+
+    System.out.format("%n%n For Yourkit: Time 500000 %n%n");
+    timelp(500000);
+    System.out.format("%n%n For Yourkit: Time 500000 %n%n");
+    timelp(500000);
+    System.out.format("%n%n For Yourkit: Time 500000 %n%n");
+    timelp(500000);
+
+    
+    System.out.println(dmv);
+  }
+  private void time2(int n) {
+//    float f1 = time(m1, n);
+    float f2 = time(m2, n);
+    float f3 = time(m3, n);
+    System.out.format(" ratio "
+//        + "RBT/hash = %.3f   RTB/bitset = %.3f  "
+        + "hash/bitset = %.3f%n", 
+//        f1/f2,  f1/f3, 
+        f2/f3);   
+  }
+  
+  private void timelp(int n) {
+    time2(n);
+    time2(n);
+    time2(n);
+  }
+
+  private void warmup(Object m) {
+    for (int i = 0; i < 500; i++) {
+      inner(m,true, 1000) ; // warm up
+    }
+  }
+  
+  private float time(Object m, int ss) {
+    long start = System.nanoTime();
+    for (int i = 0; i < 500; i++) {
+      inner(m,false, ss);
+    }
+    float t = (System.nanoTime() - start) / 1000000.0F;
+    System.out.format("time for %,d:  %s is %.3f milliseconds %n", ss,
+        m.getClass().getSimpleName(),
+        t);
+    return t;
+  }
+  
+  private int nextKey() {
+    int r = keys10000[k10ki++];
+    if (k10ki >= keys10000.length) {
+      k10ki = 0;
+    }
+    return r;
+  }
+  
+  private void inner(Object m, boolean check, int ss) {
+    CS cs = new CS(m);     
+
+    for (int i = 0; i < ss; i++) {
+      
+      int k = keys10000[i];
+//      System.out.print(" " + k);
+//      if (i %100 == 0) System.out.println("");
+//      keys.add(k);
+      cs.add(k);
+      cacheLoad(i);
+      if (check) {
+        assertTrue(cs.contains(k));
+      }
+    }
+    for (int i = 0; i < ss; i++) {
+      boolean v = cs.contains(keys10000[i]); 
+      if (!v) {
+        throw new RuntimeException("never happen");
+      }
+      dmv += 1;
+      cacheLoad(i);
+    }
+    cs.clear();
+    
+
+//    for (int k : keys) {     
+//      assertEquals(10000 + k, (m instanceof IntHashSetRh) ? 
+//          ((IntHashSetRh)m).get(k) :
+//          ((IntArrayRBT)m).getMostlyClose(k));
+//    }
+
+  }
+  
+  static class CS {
+    final Object set;
+    
+    CS(Object set) {
+      this.set = set;
+    }
+    
+    boolean contains(int i) {
+      return (set instanceof IntArrayRBT) ? ((IntArrayRBT)set).contains(i) :
+             (set instanceof IntHashSetRh)  ? ((IntHashSetRh) set).contains(i) 
:
+                                            ((IntBitSet)  set).contains(i);
+    }
+    
+    void add(int i) {
+      if (set instanceof IntArrayRBT) {
+        ((IntArrayRBT)set).add(i);
+      } else if (set instanceof IntHashSetRh) {
+        ((IntHashSetRh)set).add(i);
+      } else {
+        ((IntBitSet)set).add(i);
+      }    
+    }
+    
+    void clear() {
+      if (set instanceof IntArrayRBT) {
+        ((IntArrayRBT)set).clear();
+      } else if (set instanceof IntHashSetRh) {
+        ((IntHashSetRh)set).clear();
+      } else {
+        ((IntBitSet)set).clear();
+      }    
+    }
+
+  }
+  
+  void cacheLoad(int i) {
+    if (cacheLoadSize > 0) {
+      int[] cl = new int[cacheLoadSize];
+      if (i != 100000) {
+        cl = null;
+      }
+    }
+  }
+}
diff --git 
a/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTestRh.java 
b/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTestRh.java
new file mode 100644
index 0000000..09d3365
--- /dev/null
+++ 
b/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTestRh.java
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import java.util.Arrays;
+import java.util.Random;
+
+import junit.framework.TestCase;
+
+
+public class IntHashSetTestRh extends TestCase {
+  
+  IntHashSetRh ihs;
+  
+  Random random;
+  
+  public void setUp() {
+    ihs = new IntHashSetRh();
+  }
+  
+  public void testBasic() {
+    
+    ihs.add(15);
+    ihs.add(188);
+    int[] sv = getSortedValues(ihs);
+    assertEquals(2, sv.length);
+    assertEquals(15, sv[0]);
+    assertEquals(188, sv[1]);
+    assertEquals(15, ihs.getMostNegative());
+    assertEquals(188, ihs.getMostPositive());
+    
+    // test most positive / negative
+    ihs.clear();
+    ihs.add(189);
+    assertEquals(189, ihs.getMostNegative());
+    assertEquals(189, ihs.getMostPositive());
+    ihs.add(1000);
+    ihs.add(-1000);
+    assertEquals(1000, ihs.getMostPositive());
+    assertEquals(-1000, ihs.getMostNegative());
+    ihs.add(500);
+    ihs.add(-500);
+    assertEquals(1000, ihs.getMostPositive());
+    assertEquals(-1000, ihs.getMostNegative());
+    ihs.remove(1000);
+    assertEquals(999, ihs.getMostPositive());
+    assertEquals(-1000, ihs.getMostNegative());
+    ihs.add(1001);    
+    assertEquals(1001, ihs.getMostPositive());
+    sv = getSortedValues(ihs);
+    assertTrue(Arrays.equals(sv, new int[]{-1000, -500, 189, 500, 1001}));
+  }
+  
+  public void testSwitching224() {
+    final int OS = 100000;
+    ihs = new IntHashSetRh(16, OS);
+    ihs.add(OS - 1);
+    ihs.add(OS);
+    ihs.add(OS + 1);
+    int[] sv = getSortedValues(ihs);
+    assertTrue(Arrays.equals(sv, new int[]{99999, 100000, 100001 }));
+    ihs.add(OS - 32767);
+    sv = getSortedValues(ihs);
+    assertTrue(Arrays.equals(sv, new int[]{OS - 32767, 99999, 100000, 
100001}));
+    ihs.add(OS - 32768);
+    sv = getSortedValues(ihs);
+    assertTrue(Arrays.equals(sv, new int[]{OS - 32768, OS - 32767, 99999, 
100000, 100001}));
+    
+  }
+  
+  private int[] getSortedValues(IntHashSetRh s) {
+    IntListIterator it = s.iterator();
+    int[] r = new int[s.size()];
+    int i = 0;
+    while (it.hasNext()) {
+      r[i++] = it.nextNvc();
+    }
+    Arrays.sort(r);
+    return r;
+  }
+  
+  public void testContains() {
+    ihs.add(1188);
+    ihs.add(1040);
+    assertTrue(ihs.contains(1188));
+    assertTrue(ihs.contains(1040));
+    assertFalse(ihs.contains(1));
+    assertFalse(ihs.contains(99));  
+  }
+  
+//  public void testTableSpace() {
+//    assertEquals(32, IntHashSetRh.tableSpace(19, 0.6f));    // 19 / .6 = 
31.xxx, round to 32
+//    assertEquals(64, IntHashSetRh.tableSpace(21, 0.6f));
+//    assertEquals(32, ihs.tableSpace(21));
+//  }
+  
+  public void testWontExpand() {
+    ihs = new IntHashSetRh(21);
+    assertEquals(16, ihs.getSpaceUsedInWords());
+    assertTrue(ihs.wontExpand(20));
+    assertFalse(ihs.wontExpand(21));
+  }
+  
+  public void testExpandNpe() {
+    ihs.add(15);
+    ihs.add(150000);  // makes 4 byte table entries
+    
+    for (int i = 1; i < 256; i++) {  // 0 is invalid key
+      ihs.add(i);  // causes resize, check no NPE etc thrown.
+    }
+  }
+  
+  public void testAddIntoRemovedSlot() {
+    long seed = // -5748775656590176364L;
+        new Random().nextLong();
+    System.out.println("Random seed for testAddIntoRemovedSlot in " + 
this.getClass().getName() + ": "  + seed);
+    random = new Random(seed);
+
+    for (int i = 1; i < 100; i++) {
+      ihs.add(i);
+      assertEquals(i, ihs.size());
+    }
+    
+    assertEquals(99, ihs.size());
+    
+    /** Test with 2 byte numbers */
+    checkRemovedReuse(true);
+    
+    ihs = new IntHashSetRh();
+    for (int i = 1; i < 99; i++) {
+      ihs.add(i);
+    }
+    ihs.add(100000);  // force 4 byte
+    checkRemovedReuse(false);
+  }
+  
+  private void checkRemovedReuse(boolean is2) {
+    assertTrue(ihs.getSpaceUsedInWords() == ((is2) ? 128 : 256));
+    for (int i = 0; i < 100000; i++) {
+//      if (i % 25000 == 0) System.out.println(i);
+      int v = 1 + random.nextInt(100 + (i % 30000)); // random between 1 and 
30,101
+      int sz = ihs.size();
+      boolean wasRemoved = ihs.remove(v);
+      assertEquals(sz - (wasRemoved ? 1 : 0), ihs.size());
+      // debug
+      if (ihs.contains(v)) {
+        System.out.println("debug");
+      }
+      assertTrue(!(ihs.contains(v)));
+      v = 1 + random.nextInt(100 + (i % 30000));
+      sz = ihs.size();
+      boolean wasAdded = ihs.add(v);
+      assertEquals(sz + (wasAdded ? 1 : 0), ihs.size());
+      assertTrue(ihs.contains(v));
+    }
+    assertTrue(ihs.getSpaceUsedInWords() == ((is2) ? 16384 : 32768) );
+    
+    //  32,768, 16,384, 8,192, 4096, 2048, 1024, 512, 256
+    // for 2 byte storage, is2 = true, and expected is: i / 2
+    // for 4 byte storage, is2 = false, and expected is i
+    
+    ihs.clear(); // doesn't set 2nd time because size + removed > 1/2 the 
capacity
+    
+    for (int i = 32768; i > 128; i = i / 2) {
+      ihs.clear(); // sets 2nd time shrinkable
+      assertTrue(ihs.getSpaceUsedInWords() == (is2 ? i/2 : i));
+      ihs.clear();  // shrinks
+      assertTrue(ihs.getSpaceUsedInWords() == (is2 ? i/4 : i/2));
+    }
+//    ihs.clear();
+//    
+    assertTrue(ihs.getSpaceUsedInWords() == (is2 ? 64: 128));
+
+    // table size should be 128, adding 100 items should cause expansion (84 
== .66 * 128)
+    for (int i = 1; i < ((is2) ? 100 : 99); i++) {
+      ihs.add(i);
+    }
+    if (!is2) {
+      ihs.add(100000);
+    }
+    
+    assertTrue(ihs.getSpaceUsedInWords() == ((is2) ? 128 : 256));
+    for (int i = 0; i < 1000; i++) {
+      int v = 1 + random.nextInt(100);
+      ihs.remove(v);
+      assertTrue(!(ihs.contains(v)));
+      ihs.add(v);
+      assertTrue(ihs.contains(v));
+    }
+    
+    assertTrue(ihs.getSpaceUsedInWords() == ((is2) ? 128 : 256));
+    
+  }
+ 
+  public void testRandom() {
+    int countAdd = 0;
+    int dupsA = 0;
+    int notPres = 0;
+    int countRmv = 0;
+    
+    long seed = 
+        new Random().nextLong();
+    System.out.println("Random seed for testRandom in " + 
this.getClass().getName() + ": "  + seed);
+    random = new Random(seed);
+    
+    for (int i = 1; i < 1024 * 1024; i++) {
+      int k = i & (1024 * 256) - 1;
+      if (k == 0) continue;
+      if (random.nextInt(3) > 0) {
+        int sz = ihs.size();
+        if (ihs.add(k)) {
+          countAdd ++;
+          assertEquals(sz + 1, ihs.size());
+        } else {
+          dupsA ++;
+        }
+        
+      } else {
+        int sz = ihs.size();
+        if (ihs.remove(k)) {
+          countRmv ++;
+          assertEquals(sz - 1, ihs.size());
+        } else {
+          notPres ++;
+        }
+        
+      }
+    }
+    
+    System.out.format("added: %,d dups: %,d rmvd: %,d notPres: %,d, size: 
%d%n", countAdd, dupsA, countRmv, notPres, ihs.size());
+    assertEquals(countAdd - countRmv, ihs.size() );
+  }
+}
diff --git 
a/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetTestRh.java 
b/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetTestRh.java
new file mode 100644
index 0000000..e8aefc2
--- /dev/null
+++ 
b/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetTestRh.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import java.util.Random;
+
+import junit.framework.TestCase;
+
+
+public class ObjHashSetTestRh extends TestCase {
+  
+  ObjHashSetRh<Integer> ihs;
+  
+  Random random;
+  
+  public void setUp() {
+    ihs = new ObjHashSetRh<>(Integer.class);
+  }
+  
+  public void testBasic() {
+    
+    ihs.add(15);
+    ihs.add(188);
+    Integer[] sv = ihs.toArray();
+    assertEquals(2, sv.length);
+    assertEquals(15 + 188, sv[0] + sv[1]);
+    
+    // test most positive / negative
+    ihs.clear();
+    ihs.add(189);
+    ihs.add(1000);
+    ihs.add(-1000);
+    ihs.add(500);
+    ihs.add(-500);
+    ihs.remove(1000);
+    ihs.add(1001);    
+  }
+  
+  public void testContains() {
+    ihs.add(1188);
+    ihs.add(1040);
+    assertTrue(ihs.contains(1188));
+    assertTrue(ihs.contains(1040));
+    assertFalse(ihs.contains(1));
+    assertFalse(ihs.contains(99));  
+  }
+  
+//  public void testTableSpace() {
+//    assertEquals(32, ObjHashSetRh.tableSpace(19, 0.6f));    // 19 / .6 = 
31.xxx, round to 32
+//    assertEquals(64, ObjHashSetRh.tableSpace(21, 0.6f));
+//    assertEquals(32, ihs.tableSpace(21));
+//  }
+    
+  public void testExpandNpe() {
+    ihs.add(15);
+    ihs.add(150000);  // makes 4 byte table entries
+    
+    for (int i = 1; i < 256; i++) {  // 0 is invalid key
+      ihs.add(i);  // causes resize, check no NPE etc thrown.
+    }
+  }
+  
+  public void testAddIntoRemovedSlot() {
+    long seed = // -6401324561844760524L;
+        new Random().nextLong();
+    System.out.println("Random seed for testAddIntoRemovedSlot in " + 
this.getClass().getName() + ": "  + seed);
+    random = new Random(seed);
+
+    for (int i = 1; i < 100; i++) {
+      ihs.add(i);
+      assertEquals(i, ihs.size());
+    }
+    
+    assertEquals(99, ihs.size());
+    
+    /** Test with 2 byte numbers */
+    checkRemovedReuse(true);
+    
+    ihs = new ObjHashSetRh<>(Integer.class);
+    for (int i = 1; i < 99; i++) {
+      ihs.add(i);
+    }
+    ihs.add(100000);  // force 4 byte
+    checkRemovedReuse(false);
+  }
+  
+  private void checkRemovedReuse(boolean is2) {
+    for (int i = 0; i < 100000; i++) {
+      int v = 1 + random.nextInt(100 + (i % 30000)); // random between 1 and 
30,101
+      int sz = ihs.size();
+      boolean wasRemoved = ihs.remove(v);
+      assertEquals(sz - (wasRemoved ? 1 : 0), ihs.size());
+      assertTrue(!(ihs.contains(v)));
+      v = 1 + random.nextInt(100 + (i % 30000));
+      sz = ihs.size();
+      boolean wasAdded = ihs.add(v);
+      assertEquals(sz + (wasAdded ? 1 : 0), ihs.size());
+      assertTrue(ihs.contains(v));
+    }
+    
+    ihs.clear(); // doesn't set 2nd time because size + removed > 1/2 the 
capacity
+    
+    for (int i = 32768; i > 128; i = i / 2) {
+      ihs.clear(); // sets 2nd time shrinkable
+      assertEquals(i, ihs.getCapacity());
+      ihs.clear();  // shrinks
+      assertEquals(i/2, ihs.getCapacity());
+    }
+//    ihs.clear();
+//    
+
+    // table size should be 128, adding 100 items should cause expansion (84 
== .66 * 128)
+    for (int i = 1; i < 100; i++) {
+      ihs.add(i);
+    }
+    
+    assertEquals(256, ihs.getCapacity());
+    for (int i = 0; i < 1000; i++) {
+      int v = 1 + random.nextInt(100);
+      ihs.remove(v);
+      assertTrue(!(ihs.contains(v)));
+      ihs.add(v);
+      assertTrue(ihs.contains(v));
+    }
+    
+    assertEquals(256, ihs.getCapacity());
+    
+  }
+ 
+  public void testRandom() {
+    int countAdd = 0;
+    int dupsA = 0;
+    int notPres = 0;
+    int countRmv = 0;
+    
+    long seed = 
+        new Random().nextLong();
+    System.out.println("Random seed for testRandom in " + 
this.getClass().getName() + ": "  + seed);
+    random = new Random(seed);
+    
+    for (int i = 1; i < 1024 * 1024; i++) {
+      int k = i & (1024 * 256) - 1;
+      if (k == 0) continue;
+      if (random.nextInt(3) > 0) {
+        int sz = ihs.size();
+        if (ihs.add(k)) {
+          countAdd ++;
+          assertEquals(sz + 1, ihs.size());
+        } else {
+          dupsA ++;
+        }
+        
+      } else {
+        int sz = ihs.size();
+        if (ihs.remove(k)) {
+          countRmv ++;
+          assertEquals(sz - 1, ihs.size());
+        } else {
+          notPres ++;
+        }
+        
+      }
+    }
+    
+    System.out.format("added: %,d dups: %,d rmvd: %,d notPres: %,d, size: 
%d%n", countAdd, dupsA, countRmv, notPres, ihs.size());
+    assertEquals(countAdd - countRmv, ihs.size() );
+  }
+}

[uima-uimaj] 01/01: no jira, experiment, robin hood hashing.

Reply via email to