Author: tdunning
Date: Thu Oct 11 22:42:38 2012
New Revision: 1397344
URL: http://svn.apache.org/viewvc?rev=1397344&view=rev
Log:
MAHOUT-1096 - Implement hashCode for generated classes
Added:
mahout/trunk/math/src/main/java/org/apache/mahout/math/set/HashUtils.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/set/
mahout/trunk/math/src/test/java/org/apache/mahout/math/set/HashUtilsTest.java
Modified:
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeObjectMap.java.t
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeValueTypeMap.java.t
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractObjectValueTypeMap.java.t
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t
mahout/trunk/math/src/main/java/org/apache/mahout/math/set/OpenHashSet.java
mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSetTest.java.t
Modified:
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeObjectMap.java.t
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeObjectMap.java.t?rev=1397344&r1=1397343&r2=1397344&view=diff
==============================================================================
---
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeObjectMap.java.t
(original)
+++
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeObjectMap.java.t
Thu Oct 11 22:42:38 2012
@@ -28,9 +28,12 @@ package org.apache.mahout.math.map;
import java.util.ArrayList;
import java.util.List;
+import java.nio.IntBuffer;
+import java.util.Arrays;
import org.apache.mahout.math.Sorting;
import org.apache.mahout.math.Swapper;
+import org.apache.mahout.math.set.HashUtils;
import org.apache.mahout.math.function.IntComparator;
import org.apache.mahout.math.function.${keyTypeCap}ObjectProcedure;
@@ -145,6 +148,25 @@ public abstract class Abstract${keyTypeC
);
}
+ public int hashCode() {
+ final int[] buf = new int[size()];
+ forEachPair(
+ new ${keyTypeCap}ObjectProcedure() {
+ int i = 0;
+
+ @Override
+ public boolean apply(${keyType} key, Object value) {
+ buf[i++] = HashUtils.hash(key) ^ value.hashCode();
+ return true;
+ }
+ }
+ );
+ Arrays.sort(buf);
+ return IntBuffer.wrap(buf).hashCode();
+ }
+
+
+
/**
* Applies a procedure to each key of the receiver, if any. Note: Iterates
over the keys in no particular order.
* Subclasses can define a particular order, for example, "sorted by key".
All methods which <i>can</i> be expressed
Modified:
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeValueTypeMap.java.t
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeValueTypeMap.java.t?rev=1397344&r1=1397343&r2=1397344&view=diff
==============================================================================
---
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeValueTypeMap.java.t
(original)
+++
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeValueTypeMap.java.t
Thu Oct 11 22:42:38 2012
@@ -27,8 +27,12 @@ It is provided "as is" without expressed
*/
package org.apache.mahout.math.map;
+import java.nio.IntBuffer;
+import java.util.Arrays;
+
import org.apache.mahout.math.Sorting;
import org.apache.mahout.math.Swapper;
+import org.apache.mahout.math.set.HashUtils;
import org.apache.mahout.math.function.${keyTypeCap}${valueTypeCap}Procedure;
import org.apache.mahout.math.function.${keyTypeCap}Procedure;
import org.apache.mahout.math.list.${keyTypeCap}ArrayList;
@@ -147,6 +151,23 @@ public abstract class Abstract${keyTypeC
);
}
+ public int hashCode() {
+ final int[] buf = new int[size()];
+ forEachPair(
+ new ${keyTypeCap}${valueTypeCap}Procedure() {
+ int i = 0;
+
+ @Override
+ public boolean apply(${keyType} key, ${valueType} value) {
+ buf[i++] = HashUtils.hash(key) ^ HashUtils.hash(value);
+ return true;
+ }
+ }
+ );
+ Arrays.sort(buf);
+ return IntBuffer.wrap(buf).hashCode();
+ }
+
/**
* Applies a procedure to each key of the receiver, if any. Note: Iterates
over the keys in no particular order.
* Subclasses can define a particular order, for example, "sorted by key".
All methods which <i>can</i> be expressed
Modified:
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractObjectValueTypeMap.java.t
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractObjectValueTypeMap.java.t?rev=1397344&r1=1397343&r2=1397344&view=diff
==============================================================================
---
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractObjectValueTypeMap.java.t
(original)
+++
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractObjectValueTypeMap.java.t
Thu Oct 11 22:42:38 2012
@@ -30,7 +30,10 @@ package org.apache.mahout.math.map;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import java.nio.IntBuffer;
+import java.util.Arrays;
+import org.apache.mahout.math.set.HashUtils;
import org.apache.mahout.math.Sorting;
import org.apache.mahout.math.Swapper;
import org.apache.mahout.math.function.Object${valueTypeCap}Procedure;
@@ -149,6 +152,23 @@ public abstract class AbstractObject${va
);
}
+ public int hashCode() {
+ final int[] buf = new int[size()];
+ forEachPair(
+ new Object${valueTypeCap}Procedure() {
+ int i = 0;
+
+ @Override
+ public boolean apply(Object key, ${valueType} value) {
+ buf[i++] = key.hashCode() ^ HashUtils.hash(value);
+ return true;
+ }
+ }
+ );
+ Arrays.sort(buf);
+ return IntBuffer.wrap(buf).hashCode();
+ }
+
/**
* Applies a procedure to each key of the receiver, if any. Note: Iterates
over the keys in no particular order.
* Subclasses can define a particular order, for example, "sorted by key".
All methods which <i>can</i> be expressed
Modified:
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t?rev=1397344&r1=1397343&r2=1397344&view=diff
==============================================================================
---
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t
(original)
+++
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t
Thu Oct 11 22:42:38 2012
@@ -21,6 +21,9 @@ package org.apache.mahout.math.set;
import org.apache.mahout.math.function.${keyTypeCap}Procedure;
import org.apache.mahout.math.list.${keyTypeCap}ArrayList;
+import java.util.Arrays;
+import java.nio.IntBuffer;
+import org.apache.mahout.math.set.HashUtils;
public abstract class Abstract${keyTypeCap}Set extends AbstractSet {
@@ -73,6 +76,23 @@ public abstract class Abstract${keyTypeC
);
}
+ public int hashCode() {
+ final int[] buf = new int[size()];
+ forEachKey(
+ new ${keyTypeCap}Procedure() {
+ int i = 0;
+
+ @Override
+ public boolean apply(${keyType} iterKey) {
+ buf[i++] = HashUtils.hash(iterKey);
+ return true;
+ }
+ }
+ );
+ Arrays.sort(buf);
+ return IntBuffer.wrap(buf).hashCode();
+ }
+
/**
* Applies a procedure to each key of the receiver, if any. Note: Iterates
over the keys in no particular order.
* Subclasses can define a particular order, for example, "sorted by key".
All methods which <i>can</i> be expressed
Added: mahout/trunk/math/src/main/java/org/apache/mahout/math/set/HashUtils.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/set/HashUtils.java?rev=1397344&view=auto
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/set/HashUtils.java
(added)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/set/HashUtils.java
Thu Oct 11 22:42:38 2012
@@ -0,0 +1,35 @@
+package org.apache.mahout.math.set;
+
+/**
+ * Computes hashes of primitive values. Providing these as statics allows the
templated code
+ * to compute hashes of sets.
+ */
+public class HashUtils {
+ public static int hash(byte x) {
+ return x;
+ }
+
+ public static int hash(short x) {
+ return x;
+ }
+
+ public static int hash(char x) {
+ return x;
+ }
+
+ public static int hash(int x) {
+ return x;
+ }
+
+ public static int hash(float x) {
+ return Float.floatToIntBits(x) >>> 3 + Float.floatToIntBits((float)
(Math.PI * x));
+ }
+
+ public static int hash(double x) {
+ return hash(17 * Double.doubleToLongBits(x));
+ }
+
+ public static int hash(long x) {
+ return (int) ((x * 11) >>> 32 ^ x);
+ }
+}
Modified:
mahout/trunk/math/src/main/java/org/apache/mahout/math/set/OpenHashSet.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/set/OpenHashSet.java?rev=1397344&r1=1397343&r2=1397344&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/set/OpenHashSet.java
(original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/set/OpenHashSet.java
Thu Oct 11 22:42:38 2012
@@ -19,6 +19,7 @@
package org.apache.mahout.math.set;
+import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -26,6 +27,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Set;
+import org.apache.mahout.math.MurmurHash;
import org.apache.mahout.math.function.ObjectProcedure;
import org.apache.mahout.math.map.PrimeFinder;
@@ -457,7 +459,19 @@ public class OpenHashSet<T> extends Abst
}
);
}
-
+
+ @Override
+ public int hashCode() {
+ ByteBuffer buf = ByteBuffer.allocate(size());
+ for (int i = 0; i < table.length; i++) {
+ Object v = table[i];
+ if (state[i] == FULL) {
+ buf.putInt(v.hashCode());
+ }
+ }
+ return MurmurHash.hash(buf, this.getClass().getName().hashCode());
+ }
+
/**
* Implement the standard Java Collections iterator. Note that 'remove' is
silently
* ineffectual here. This method is provided for convenience, only.
Modified:
mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSetTest.java.t
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSetTest.java.t?rev=1397344&r1=1397343&r2=1397344&view=diff
==============================================================================
---
mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSetTest.java.t
(original)
+++
mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSetTest.java.t
Thu Oct 11 22:42:38 2012
@@ -166,11 +166,14 @@ public class Open${keyTypeCap}HashSetTes
map.remove(($keyType) 13);
Open${keyTypeCap}HashSet map2 = (Open${keyTypeCap}HashSet) map.copy();
assertTrue(map.equals(map2));
+ assertTrue(map.hashCode() == map2.hashCode());
assertTrue(map2.equals(map));
+ assertTrue(map.hashCode() == map2.hashCode());
assertFalse("Hello Sailor".equals(map));
assertFalse(map.equals("hello sailor"));
map2.remove(($keyType) 11);
assertFalse(map.equals(map2));
assertFalse(map2.equals(map));
+ assertFalse(map.hashCode() == map2.hashCode());
}
}
Added:
mahout/trunk/math/src/test/java/org/apache/mahout/math/set/HashUtilsTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/set/HashUtilsTest.java?rev=1397344&view=auto
==============================================================================
---
mahout/trunk/math/src/test/java/org/apache/mahout/math/set/HashUtilsTest.java
(added)
+++
mahout/trunk/math/src/test/java/org/apache/mahout/math/set/HashUtilsTest.java
Thu Oct 11 22:42:38 2012
@@ -0,0 +1,73 @@
+package org.apache.mahout.math.set;
+
+import com.google.common.collect.HashMultiset;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Multiset;
+import junit.framework.TestCase;
+import org.apache.mahout.common.RandomUtils;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Random;
+
+public class HashUtilsTest extends TestCase {
+ public void testHashFloat() {
+ Multiset<Integer> violations = HashMultiset.create();
+ for (int k = 0; k < 1000; k++) {
+ List<Float> original = Lists.newArrayList();
+
+ Random gen = RandomUtils.getRandom();
+ for (int i = 0; i < 10000; i++) {
+ float x = (float) gen.nextDouble();
+ original.add(x);
+ }
+
+ violations.add(checkCounts(original) <= 12 ? 0 : 1);
+ }
+ // the hashes for floats don't really have 32 bits of entropy so the test
+ // only succeeds at better than about 99% rate.
+ assertTrue(violations.count(0) >= 985);
+ }
+
+ public void testHashDouble() {
+ List<Double> original = Lists.newArrayList();
+
+ for (int k = 0; k < 10; k++) {
+ Random gen = RandomUtils.getRandom();
+ for (int i = 0; i < 10000; i++) {
+ double x = gen.nextDouble();
+ original.add(x);
+ }
+
+ checkCounts(original);
+ }
+ }
+
+ public void testHashLong() {
+ List<Long> original = Lists.newArrayList();
+
+ for (int k = 0; k < 10; k++) {
+ Random gen = RandomUtils.getRandom();
+ for (int i = 0; i < 10000; i++) {
+ long x = gen.nextLong();
+ original.add(x);
+ }
+
+ checkCounts(original);
+ }
+ }
+
+ private <T> int checkCounts(Collection<T> original) {
+ Multiset<T> hashCounts = HashMultiset.create();
+ for (T v : original) {
+ hashCounts.add(v);
+ }
+
+ Multiset<Integer> countCounts = HashMultiset.create();
+ for (T hash : hashCounts) {
+ countCounts.add(hashCounts.count(hash));
+ }
+
+ return original.size() - countCounts.count(1);
+ }
+}