This is an automated email from the ASF dual-hosted git repository. rec pushed a commit to branch x-archive/robin-hood-hash in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git
commit 1613bd0b50a5ccb68dc3e07873ef84c1eabf1edf Author: Marshall Schor <m...@schor.com> AuthorDate: Thu Dec 19 15:37:11 2019 -0500 experiment: robin hood hash with obj hash - is slower by 1/3 to 1/2 --- .../apache/uima/internal/util/ObjHashSetRh.java | 4 +- .../uima/internal/util/ObjHashSetPerfTest.java | 192 ++++++++++++++++++++ .../uima/internal/util/ObjHashSetPerfTestRh.java | 194 +++++++++++++++++++++ 3 files changed, 388 insertions(+), 2 deletions(-) diff --git a/uimaj-core/src/main/java/org/apache/uima/internal/util/ObjHashSetRh.java b/uimaj-core/src/main/java/org/apache/uima/internal/util/ObjHashSetRh.java index d70c7d3dc..b1a6e174a 100644 --- a/uimaj-core/src/main/java/org/apache/uima/internal/util/ObjHashSetRh.java +++ b/uimaj-core/src/main/java/org/apache/uima/internal/util/ObjHashSetRh.java @@ -117,8 +117,8 @@ public class ObjHashSetRh<T> extends Common_hash_support_rh implements Set<T> { // key hash Misc.hashInt(key.hashCode()), - // is_eq - i -> keys[i].equals(key)); + // is_eq_or_not_present + i -> keys[i] == null || keys[i].equals(key)); // keys[i] can be null } diff --git a/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetPerfTest.java b/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetPerfTest.java new file mode 100644 index 000000000..aab13fbea --- /dev/null +++ b/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetPerfTest.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.uima.internal.util; + +import java.util.Random; + +import junit.framework.TestCase; + +public class ObjHashSetPerfTest extends TestCase { + /** + * Set to false to run the performance test + * + * Tests both IntHashSet and IntBitSet + */ + final boolean SKIP = false; + + static int cacheLoadSize; + + static long seed = 3737463135938899369L; +// new Random().nextLong(); + static { + System.out.println("Random seed for IntHashSetPerfTest: " + seed); + } + Random r = new Random(seed); + +// Set<Integer> keys = new HashSet<Integer>(1000); + + int dmv = 0; + + ObjHashSet<Integer> m1; + + final int[] keys10000 = new int[511111]; + int k10ki = 0; + + + public void testPerf() { + if (SKIP) return; + m1 = new ObjHashSet<>(16, Integer.class, Integer.MIN_VALUE); + + for (int i = 0; i < keys10000.length; i++) { + int k = r.nextInt(511110); + + keys10000[i] = k + 1; + } + + System.out.format("%n%n W A R M U P %n%n"); + cacheLoadSize = 0; + warmup(m1); + + System.out.format("%n%n Time 100 %n%n"); + timelp(100); + System.out.format("%n%n Time 1000 %n%n"); + timelp(1000); + System.out.format("%n%n Time 10000 %n%n"); + timelp(10000); + System.out.format("%n%n Time 100000 %n%n"); + timelp(100000); + cacheLoadSize = 0; // 1 * 256 * 1; + System.out.format("%n%n Time 100000 %n%n"); + timelp(100000); + + System.out.format("%n%n Time 500000 %n%n"); + timelp(500000); + + System.out.format("%n%n For Yourkit: Time 500000 %n%n"); + timelp(500000); +// System.out.format("%n%n For Yourkit: Time 500000 %n%n"); +// timelp(500000); +// System.out.format("%n%n For Yourkit: Time 500000 %n%n"); +// timelp(500000); + + + System.out.println(dmv); + } + + private void time2(int n) { + float f1 = time(m1, n); + } + + private void timelp(int n) { + time2(n); + time2(n); + time2(n); + } + + private void warmup(ObjHashSet<Integer> m) { + for (int i = 0; i < 500; i++) { + inner(m,true, 1000) ; // warm up + } + } + + private float time(ObjHashSet<Integer> m, int ss) { + long start = System.nanoTime(); + for (int i = 0; i < 500; i++) { + inner(m,false, ss); + } + float t = (System.nanoTime() - start) / 1000000.0F; + System.out.format("time for %,d: %s is %.3f milliseconds %n", ss, + m.getClass().getSimpleName(), + t); + return t; + } + + private int nextKey() { + int r = keys10000[k10ki++]; + if (k10ki >= keys10000.length) { + k10ki = 0; + } + return r; + } + + private void inner(ObjHashSet<Integer> m, boolean check, int ss) { + CS cs = new CS(m); + + for (int i = 0; i < ss; i++) { + + int k = keys10000[i]; +// System.out.print(" " + k); +// if (i %100 == 0) System.out.println(""); +// keys.add(k); + cs.add(k); + cacheLoad(i); + if (check) { + assertTrue(cs.contains(k)); + } + } + for (int i = 0; i < ss; i++) { + boolean v = cs.contains(keys10000[i]); + if (!v) { + throw new RuntimeException("never happen"); + } + dmv += 1; + cacheLoad(i); + } + cs.clear(); + + +// for (int k : keys) { +// assertEquals(10000 + k, (m instanceof IntHashSet) ? +// ((IntHashSet)m).get(k) : +// ((IntArrayRBT)m).getMostlyClose(k)); +// } + + } + + static class CS { + final ObjHashSet<Integer> set; + + CS(ObjHashSet<Integer> set) { + this.set = set; + } + + boolean contains(Integer i) { + return set.contains(i); + } + + void add(int i) { + set.add(i); + } + + void clear() { + set.clear(); + } + + } + + void cacheLoad(int i) { + if (cacheLoadSize > 0) { + int[] cl = new int[cacheLoadSize]; + if (i != 100000) { + cl = null; + } + } + } +} diff --git a/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetPerfTestRh.java b/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetPerfTestRh.java new file mode 100644 index 000000000..24ec76948 --- /dev/null +++ b/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetPerfTestRh.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.uima.internal.util; + +import java.util.Random; + +import org.apache.uima.internal.util.rb_trees.IntArrayRBT; + +import junit.framework.TestCase; + +public class ObjHashSetPerfTestRh extends TestCase { + /** + * Set to false to run the performance test + * + * Tests both IntHashSet and IntBitSet + */ + final boolean SKIP = false; + + static int cacheLoadSize; + + static long seed = 3737463135938899369L; +// new Random().nextLong(); + static { + System.out.println("Random seed for IntHashSetPerfTest: " + seed); + } + Random r = new Random(seed); + +// Set<Integer> keys = new HashSet<Integer>(1000); + + int dmv = 0; + + ObjHashSetRh<Integer> m1; + + final int[] keys10000 = new int[511111]; + int k10ki = 0; + + + public void testPerf() { + if (SKIP) return; + m1 = new ObjHashSetRh<>(16, Integer.class); + + for (int i = 0; i < keys10000.length; i++) { + int k = r.nextInt(511110); + + keys10000[i] = k + 1; + } + + System.out.format("%n%n W A R M U P %n%n"); + cacheLoadSize = 0; + warmup(m1); + + System.out.format("%n%n Time 100 %n%n"); + timelp(100); + System.out.format("%n%n Time 1000 %n%n"); + timelp(1000); + System.out.format("%n%n Time 10000 %n%n"); + timelp(10000); + System.out.format("%n%n Time 100000 %n%n"); + timelp(100000); + cacheLoadSize = 0; // 1 * 256 * 1; + System.out.format("%n%n Time 100000 %n%n"); + timelp(100000); + + System.out.format("%n%n Time 500000 %n%n"); + timelp(500000); + + System.out.format("%n%n For Yourkit: Time 500000 %n%n"); + timelp(500000); +// System.out.format("%n%n For Yourkit: Time 500000 %n%n"); +// timelp(500000); +// System.out.format("%n%n For Yourkit: Time 500000 %n%n"); +// timelp(500000); + + + System.out.println(dmv); + } + + private void time2(int n) { + float f1 = time(m1, n); + } + + private void timelp(int n) { + time2(n); + time2(n); + time2(n); + } + + private void warmup(ObjHashSetRh<Integer> m) { + for (int i = 0; i < 500; i++) { + inner(m,true, 1000) ; // warm up + } + } + + private float time(ObjHashSetRh<Integer> m, int ss) { + long start = System.nanoTime(); + for (int i = 0; i < 500; i++) { + inner(m,false, ss); + } + float t = (System.nanoTime() - start) / 1000000.0F; + System.out.format("time for %,d: %s is %.3f milliseconds %n", ss, + m.getClass().getSimpleName(), + t); + return t; + } + + private int nextKey() { + int r = keys10000[k10ki++]; + if (k10ki >= keys10000.length) { + k10ki = 0; + } + return r; + } + + private void inner(ObjHashSetRh<Integer> m, boolean check, int ss) { + CS cs = new CS(m); + + for (int i = 0; i < ss; i++) { + + int k = keys10000[i]; +// System.out.print(" " + k); +// if (i %100 == 0) System.out.println(""); +// keys.add(k); + cs.add(k); + cacheLoad(i); + if (check) { + assertTrue(cs.contains(k)); + } + } + for (int i = 0; i < ss; i++) { + boolean v = cs.contains(keys10000[i]); + if (!v) { + throw new RuntimeException("never happen"); + } + dmv += 1; + cacheLoad(i); + } + cs.clear(); + + +// for (int k : keys) { +// assertEquals(10000 + k, (m instanceof IntHashSetRh) ? +// ((IntHashSetRh)m).get(k) : +// ((IntArrayRBT)m).getMostlyClose(k)); +// } + + } + + static class CS { + final ObjHashSetRh<Integer> set; + + CS(ObjHashSetRh<Integer> set) { + this.set = set; + } + + boolean contains(Integer i) { + return set.contains(i); + } + + void add(int i) { + set.add(i); + } + + void clear() { + set.clear(); + } + + } + + void cacheLoad(int i) { + if (cacheLoadSize > 0) { + int[] cl = new int[cacheLoadSize]; + if (i != 100000) { + cl = null; + } + } + } +}