Author: tdunning
Date: Thu Oct 11 22:42:38 2012
New Revision: 1397344

URL: http://svn.apache.org/viewvc?rev=1397344&view=rev
Log:
MAHOUT-1096 - Implement hashCode for generated classes

Added:
    mahout/trunk/math/src/main/java/org/apache/mahout/math/set/HashUtils.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/set/
    
mahout/trunk/math/src/test/java/org/apache/mahout/math/set/HashUtilsTest.java
Modified:
    
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeObjectMap.java.t
    
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeValueTypeMap.java.t
    
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractObjectValueTypeMap.java.t
    
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t
    mahout/trunk/math/src/main/java/org/apache/mahout/math/set/OpenHashSet.java
    
mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSetTest.java.t

Modified: 
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeObjectMap.java.t
URL: 
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeObjectMap.java.t?rev=1397344&r1=1397343&r2=1397344&view=diff
==============================================================================
--- 
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeObjectMap.java.t
 (original)
+++ 
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeObjectMap.java.t
 Thu Oct 11 22:42:38 2012
@@ -28,9 +28,12 @@ package org.apache.mahout.math.map;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.nio.IntBuffer;
+import java.util.Arrays;
 
 import org.apache.mahout.math.Sorting;
 import org.apache.mahout.math.Swapper;
+import org.apache.mahout.math.set.HashUtils;
 import org.apache.mahout.math.function.IntComparator;
 
 import org.apache.mahout.math.function.${keyTypeCap}ObjectProcedure;
@@ -145,6 +148,25 @@ public abstract class Abstract${keyTypeC
             );
   }
 
+  public int hashCode() {
+    final int[] buf = new int[size()];
+    forEachPair(
+      new ${keyTypeCap}ObjectProcedure() {
+        int i = 0;
+
+        @Override
+        public boolean apply(${keyType} key, Object value) {
+          buf[i++] = HashUtils.hash(key) ^ value.hashCode();
+          return true;
+        }
+      }
+    );
+    Arrays.sort(buf);
+    return IntBuffer.wrap(buf).hashCode();
+  }
+
+
+
   /**
    * Applies a procedure to each key of the receiver, if any. Note: Iterates 
over the keys in no particular order.
    * Subclasses can define a particular order, for example, "sorted by key". 
All methods which <i>can</i> be expressed

Modified: 
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeValueTypeMap.java.t
URL: 
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeValueTypeMap.java.t?rev=1397344&r1=1397343&r2=1397344&view=diff
==============================================================================
--- 
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeValueTypeMap.java.t
 (original)
+++ 
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractKeyTypeValueTypeMap.java.t
 Thu Oct 11 22:42:38 2012
@@ -27,8 +27,12 @@ It is provided "as is" without expressed
 */
 package org.apache.mahout.math.map;
 
+import java.nio.IntBuffer;
+import java.util.Arrays;
+
 import org.apache.mahout.math.Sorting;
 import org.apache.mahout.math.Swapper;
+import org.apache.mahout.math.set.HashUtils;
 import org.apache.mahout.math.function.${keyTypeCap}${valueTypeCap}Procedure;
 import org.apache.mahout.math.function.${keyTypeCap}Procedure;
 import org.apache.mahout.math.list.${keyTypeCap}ArrayList;
@@ -147,6 +151,23 @@ public abstract class Abstract${keyTypeC
             );
   }
 
+  public int hashCode() {
+    final int[] buf = new int[size()];
+    forEachPair(
+      new ${keyTypeCap}${valueTypeCap}Procedure() {
+        int i = 0;
+
+        @Override
+        public boolean apply(${keyType} key, ${valueType} value) {
+          buf[i++] = HashUtils.hash(key) ^ HashUtils.hash(value);
+          return true;
+        }
+      }
+    );
+    Arrays.sort(buf);
+    return IntBuffer.wrap(buf).hashCode();
+  }
+
   /**
    * Applies a procedure to each key of the receiver, if any. Note: Iterates 
over the keys in no particular order.
    * Subclasses can define a particular order, for example, "sorted by key". 
All methods which <i>can</i> be expressed

Modified: 
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractObjectValueTypeMap.java.t
URL: 
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractObjectValueTypeMap.java.t?rev=1397344&r1=1397343&r2=1397344&view=diff
==============================================================================
--- 
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractObjectValueTypeMap.java.t
 (original)
+++ 
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/AbstractObjectValueTypeMap.java.t
 Thu Oct 11 22:42:38 2012
@@ -30,7 +30,10 @@ package org.apache.mahout.math.map;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.nio.IntBuffer;
+import java.util.Arrays;
 
+import org.apache.mahout.math.set.HashUtils;
 import org.apache.mahout.math.Sorting;
 import org.apache.mahout.math.Swapper;
 import org.apache.mahout.math.function.Object${valueTypeCap}Procedure;
@@ -149,6 +152,23 @@ public abstract class AbstractObject${va
             );
   }
 
+  public int hashCode() {
+    final int[] buf = new int[size()];
+    forEachPair(
+      new Object${valueTypeCap}Procedure() {
+        int i = 0;
+
+        @Override
+        public boolean apply(Object key, ${valueType} value) {
+          buf[i++] = key.hashCode() ^ HashUtils.hash(value);
+          return true;
+        }
+      }
+    );
+    Arrays.sort(buf);
+    return IntBuffer.wrap(buf).hashCode();
+  }
+
   /**
    * Applies a procedure to each key of the receiver, if any. Note: Iterates 
over the keys in no particular order.
    * Subclasses can define a particular order, for example, "sorted by key". 
All methods which <i>can</i> be expressed

Modified: 
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t
URL: 
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t?rev=1397344&r1=1397343&r2=1397344&view=diff
==============================================================================
--- 
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t
 (original)
+++ 
mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/set/AbstractKeyTypeSet.java.t
 Thu Oct 11 22:42:38 2012
@@ -21,6 +21,9 @@ package org.apache.mahout.math.set;
 
 import org.apache.mahout.math.function.${keyTypeCap}Procedure;
 import org.apache.mahout.math.list.${keyTypeCap}ArrayList;
+import java.util.Arrays;
+import java.nio.IntBuffer;
+import org.apache.mahout.math.set.HashUtils;
 
 public abstract class Abstract${keyTypeCap}Set extends AbstractSet {
 
@@ -73,6 +76,23 @@ public abstract class Abstract${keyTypeC
         );
   }
 
+  public int hashCode() {
+    final int[] buf = new int[size()];
+    forEachKey(
+      new ${keyTypeCap}Procedure() {
+        int i = 0;
+
+        @Override
+        public boolean apply(${keyType} iterKey) {
+          buf[i++] = HashUtils.hash(iterKey);
+          return true;
+        }
+      }
+    );
+    Arrays.sort(buf);
+    return IntBuffer.wrap(buf).hashCode();
+  }
+
   /**
    * Applies a procedure to each key of the receiver, if any. Note: Iterates 
over the keys in no particular order.
    * Subclasses can define a particular order, for example, "sorted by key". 
All methods which <i>can</i> be expressed

Added: mahout/trunk/math/src/main/java/org/apache/mahout/math/set/HashUtils.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/set/HashUtils.java?rev=1397344&view=auto
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/set/HashUtils.java 
(added)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/set/HashUtils.java 
Thu Oct 11 22:42:38 2012
@@ -0,0 +1,35 @@
+package org.apache.mahout.math.set;
+
+/**
+ * Computes hashes of primitive values.  Providing these as statics allows the 
templated code
+ * to compute hashes of sets.
+ */
+public class HashUtils {
+  public static int hash(byte x) {
+    return x;
+  }
+
+  public static int hash(short x) {
+    return x;
+  }
+
+  public static int hash(char x) {
+    return x;
+  }
+
+  public static int hash(int x) {
+    return x;
+  }
+
+  public static int hash(float x) {
+    return Float.floatToIntBits(x) >>> 3 + Float.floatToIntBits((float) 
(Math.PI * x));
+  }
+
+  public static int hash(double x) {
+    return hash(17 * Double.doubleToLongBits(x));
+  }
+
+  public static int hash(long x) {
+    return (int) ((x * 11) >>> 32 ^ x);
+  }
+}

Modified: 
mahout/trunk/math/src/main/java/org/apache/mahout/math/set/OpenHashSet.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/set/OpenHashSet.java?rev=1397344&r1=1397343&r2=1397344&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/set/OpenHashSet.java 
(original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/set/OpenHashSet.java 
Thu Oct 11 22:42:38 2012
@@ -19,6 +19,7 @@
 
 package org.apache.mahout.math.set;
 
+import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -26,6 +27,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 
+import org.apache.mahout.math.MurmurHash;
 import org.apache.mahout.math.function.ObjectProcedure;
 import org.apache.mahout.math.map.PrimeFinder;
 
@@ -457,7 +459,19 @@ public class OpenHashSet<T> extends Abst
             }
         );
   }
-  
+
+  @Override
+  public int hashCode() {
+    ByteBuffer buf = ByteBuffer.allocate(size());
+    for (int i = 0; i < table.length; i++) {
+      Object v = table[i];
+      if (state[i] == FULL) {
+        buf.putInt(v.hashCode());
+      }
+    }
+    return MurmurHash.hash(buf, this.getClass().getName().hashCode());
+  }
+
   /**
    * Implement the standard Java Collections iterator. Note that 'remove' is 
silently
    * ineffectual here. This method is provided for convenience, only.

Modified: 
mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSetTest.java.t
URL: 
http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSetTest.java.t?rev=1397344&r1=1397343&r2=1397344&view=diff
==============================================================================
--- 
mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSetTest.java.t
 (original)
+++ 
mahout/trunk/math/src/test/java-templates/org/apache/mahout/math/set/OpenKeyTypeHashSetTest.java.t
 Thu Oct 11 22:42:38 2012
@@ -166,11 +166,14 @@ public class Open${keyTypeCap}HashSetTes
     map.remove(($keyType) 13);
     Open${keyTypeCap}HashSet map2 = (Open${keyTypeCap}HashSet) map.copy();
     assertTrue(map.equals(map2));
+    assertTrue(map.hashCode() == map2.hashCode());
     assertTrue(map2.equals(map));
+    assertTrue(map.hashCode() == map2.hashCode());
     assertFalse("Hello Sailor".equals(map));
     assertFalse(map.equals("hello sailor"));
     map2.remove(($keyType) 11);
     assertFalse(map.equals(map2));
     assertFalse(map2.equals(map));
+    assertFalse(map.hashCode() == map2.hashCode());
   }
  }

Added: 
mahout/trunk/math/src/test/java/org/apache/mahout/math/set/HashUtilsTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/set/HashUtilsTest.java?rev=1397344&view=auto
==============================================================================
--- 
mahout/trunk/math/src/test/java/org/apache/mahout/math/set/HashUtilsTest.java 
(added)
+++ 
mahout/trunk/math/src/test/java/org/apache/mahout/math/set/HashUtilsTest.java 
Thu Oct 11 22:42:38 2012
@@ -0,0 +1,73 @@
+package org.apache.mahout.math.set;
+
+import com.google.common.collect.HashMultiset;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Multiset;
+import junit.framework.TestCase;
+import org.apache.mahout.common.RandomUtils;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Random;
+
+public class HashUtilsTest extends TestCase {
+  public void testHashFloat() {
+    Multiset<Integer> violations = HashMultiset.create();
+    for (int k = 0; k < 1000; k++) {
+      List<Float> original = Lists.newArrayList();
+
+      Random gen = RandomUtils.getRandom();
+      for (int i = 0; i < 10000; i++) {
+        float x = (float) gen.nextDouble();
+        original.add(x);
+      }
+
+      violations.add(checkCounts(original) <= 12 ? 0 : 1);
+    }
+    // the hashes for floats don't really have 32 bits of entropy so the test
+    // only succeeds at better than about 99% rate.
+    assertTrue(violations.count(0) >= 985);
+  }
+
+  public void testHashDouble() {
+    List<Double> original = Lists.newArrayList();
+
+    for (int k = 0; k < 10; k++) {
+      Random gen = RandomUtils.getRandom();
+      for (int i = 0; i < 10000; i++) {
+        double x = gen.nextDouble();
+        original.add(x);
+      }
+
+      checkCounts(original);
+    }
+  }
+
+  public void testHashLong() {
+    List<Long> original = Lists.newArrayList();
+
+    for (int k = 0; k < 10; k++) {
+      Random gen = RandomUtils.getRandom();
+      for (int i = 0; i < 10000; i++) {
+        long x = gen.nextLong();
+        original.add(x);
+      }
+
+      checkCounts(original);
+    }
+  }
+
+  private <T> int checkCounts(Collection<T> original) {
+    Multiset<T> hashCounts = HashMultiset.create();
+    for (T v : original) {
+      hashCounts.add(v);
+    }
+
+    Multiset<Integer> countCounts = HashMultiset.create();
+    for (T hash : hashCounts) {
+      countCounts.add(hashCounts.count(hash));
+    }
+
+    return original.size() - countCounts.count(1);
+  }
+}


Reply via email to