Author: tdunning
Date: Mon Aug 30 19:34:13 2010
New Revision: 990914

URL: http://svn.apache.org/viewvc?rev=990914&view=rev
Log:
MAHOUT-492 - modified InteractionValueEncoder to allow interactions between 
TextValueEncoder and other encoders

Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/FeatureVectorEncoder.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/StaticWordValueEncoder.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/TextValueEncoder.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/WordValueEncoder.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/FeatureVectorEncoder.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectors/FeatureVectorEncoder.java?rev=990914&r1=990913&r2=990914&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/FeatureVectorEncoder.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/FeatureVectorEncoder.java
 Mon Aug 30 19:34:13 2010
@@ -21,10 +21,7 @@ import com.google.common.collect.Sets;
 import org.apache.mahout.math.Vector;
 
 import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
 
 /**
  * General interface for objects that record features into a feature vector.
@@ -76,13 +73,10 @@ public abstract class FeatureVectorEncod
   protected abstract int hashForProbe(String originalForm, Vector data, String 
name, int i);
 
   protected Iterable<Integer> hashesForProbe(String originalForm, Vector data, 
String name, int i){
-    List<Integer> hashes = new ArrayList<Integer>();
-    hashes.add(hashForProbe(originalForm,data,name,i));
-  return hashes;
+    return Collections.singletonList(hashForProbe(originalForm,data,name,i));
   }
 
-  
-  protected double getWeight(String originalFor, double w){
+  protected double getWeight(String originalForm, double w){
     return 1.0;
   }
 

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java?rev=990914&r1=990913&r2=990914&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
 Mon Aug 30 19:34:13 2010
@@ -17,9 +17,10 @@
 
 package org.apache.mahout.vectors;
 
+import org.apache.mahout.common.iterator.ArrayIterator;
 import org.apache.mahout.math.Vector;
 
-import java.util.Locale;
+import java.util.ArrayList;
 
 public class InteractionValueEncoder extends FeatureVectorEncoder {
 
@@ -52,24 +53,33 @@ public class InteractionValueEncoder ext
    * @param data          The vector to which the value should be added.
    */
   public void addInteractionToVector(String originalForm1, String 
originalForm2, double weight, Vector data) {
-    int probes = getProbes();
     String name = getName();
     double w = getWeight(originalForm1, originalForm2, weight);
-    for (int i = 0; i < probes; i++) {
-      int h1 = firstEncoder.hashForProbe(originalForm1, data, name, i);
-      int h2 = secondEncoder.hashForProbe(originalForm1, data, name, i);
-      int j =  firstEncoder.hashForProbe(originalForm2, data, name, i);
-      int n = (h1 + (j+1)*h2) % data.size();
-      if(n < 0){
-        n = n+data.size();
+    for (int i = 0; i < probes(); i++) {
+      for(Integer k : firstEncoder.hashesForProbe(originalForm1, data, name, 
i)){
+        for(Integer j : secondEncoder.hashesForProbe(originalForm2, data, 
name, i)){
+          int n = 
linearDoubleHash(hash1(k,name,i,data),hash2(k,name,i,data),j,data.size());
+          trace(String.format("%s:%s", originalForm1, originalForm2), n);
+          data.set(n, data.get(n) + w);
+        }
       }
-      trace(String.format("%s:%s", originalForm1, originalForm2), n);
-      data.set(n, data.get(n) + w);
     }
   }
 
+  private int probes() {
+    return getProbes();
+  }
+
   protected double getWeight(String originalForm1, String originalForm2, 
double w) {
-    return firstEncoder.getWeight(originalForm1, 1.0) * 
secondEncoder.getWeight(originalForm2,1.0) * w;
+    return firstEncoder.getWeight(originalForm1, 1.0) * 
secondEncoder.getWeight(originalForm2, 1.0) * w;
+  }
+
+  private int linearDoubleHash(int h1, int h2, int j, int modulus){
+   int n = (h1 + (j+1)*h2) % modulus;
+   if(n < 0){
+    n = n+modulus;
+   }
+   return n;
   }
 
   /**
@@ -90,12 +100,13 @@ public class InteractionValueEncoder ext
     return hash(name, i, data.size());
   }
 
-  protected int hash1(String term1, String term2, int probe, int numFeatures) {
-    return hash(term1, term2, probe+INTERACTION_VALUE_HASH_SEED_1,numFeatures);
+  protected int hash1(int value, String name, int i, Vector data){
+    return hash(name, i+value+INTERACTION_VALUE_HASH_SEED_1, data.size());
   }
 
-  protected int hash2(String term1, String term2, int probe, int numFeatures) {
-    return hash(term1, term2, probe+INTERACTION_VALUE_HASH_SEED_2,numFeatures);
+  protected int hash2(int value, String name, int i, Vector data){
+    return hash(name, i+value+INTERACTION_VALUE_HASH_SEED_2, data.size());
   }
 }
 
+

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/StaticWordValueEncoder.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectors/StaticWordValueEncoder.java?rev=990914&r1=990913&r2=990914&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/StaticWordValueEncoder.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/StaticWordValueEncoder.java
 Mon Aug 30 19:34:13 2010
@@ -41,7 +41,7 @@ public class StaticWordValueEncoder exte
 
   @Override
   protected int hashForProbe(String originalForm, Vector data, String name, 
int i) {
-    return hash(name, i, data.size());
+    return hash(name, originalForm, WORD_LIKE_VALUE_HASH_SEED + i, 
data.size());
   }
 
    /**

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/TextValueEncoder.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectors/TextValueEncoder.java?rev=990914&r1=990913&r2=990914&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/TextValueEncoder.java 
(original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/TextValueEncoder.java 
Mon Aug 30 19:34:13 2010
@@ -52,21 +52,21 @@ public class TextValueEncoder extends Fe
     }
   }
 
-    @Override
-    protected int hashForProbe(String originalForm, Vector data, String name, 
int i) {
-        return 0;
-    }
+  @Override
+  protected int hashForProbe(String originalForm, Vector data, String name, 
int i) {
+    return 0;
+  }
 
-    protected Iterable<Integer> hashesForProbe(String originalForm, Vector 
data, String name, int i){
-        List<Integer> hashes = new ArrayList<Integer>();
-        for (String word : tokenize(originalForm)){
-            hashes.add(hashForProbe(word,data,name,i));
-        }
-        return hashes;
+  protected Iterable<Integer> hashesForProbe(String originalForm, Vector data, 
String name, int i){
+    List<Integer> hashes = new ArrayList<Integer>();
+    for (String word : tokenize(originalForm)){
+      hashes.add(hashForProbe(word,data,name,i));
     }
+    return hashes;
+  }
 
 
-    private Iterable<String> tokenize(CharSequence originalForm) {
+  private Iterable<String> tokenize(CharSequence originalForm) {
     return onNonWord.split(originalForm);
   }
 

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/WordValueEncoder.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectors/WordValueEncoder.java?rev=990914&r1=990913&r2=990914&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/WordValueEncoder.java 
(original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/WordValueEncoder.java 
Mon Aug 30 19:34:13 2010
@@ -49,6 +49,7 @@ public abstract class WordValueEncoder e
     }
   }
 
+
   @Override
   protected double getWeight(String originalForm, double w) {
     return w*weight(originalForm);    

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java?rev=990914&r1=990913&r2=990914&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java
 Mon Aug 30 19:34:13 2010
@@ -55,7 +55,7 @@ public class InteractionValueEncoderTest
   }
 
   @Test
-  public void testaddToVectorUsesProductOfWeights(){
+  public void testAddToVectorUsesProductOfWeights(){
     WordValueEncoder wv = new StaticWordValueEncoder("word");
     ContinuousValueEncoder cv = new ContinuousValueEncoder("cont");
     InteractionValueEncoder enc = new InteractionValueEncoder("interactions", 
wv, cv);
@@ -66,4 +66,17 @@ public class InteractionValueEncoderTest
     Assert.assertEquals((float) k*0.5*0.9, v1.norm(1), 0);
     Assert.assertEquals(0.5*0.9, v1.maxValue(), 0);
   }
+
+  @Test
+  public void testAddToVectorWithTextValueEncoder(){
+    WordValueEncoder wv = new StaticWordValueEncoder("word");
+    TextValueEncoder tv = new TextValueEncoder("text");
+    InteractionValueEncoder enc = new InteractionValueEncoder("interactions", 
wv, tv);
+    Vector v1 = new DenseVector(200);
+    enc.addInteractionToVector("a","some text here",1.0, v1);
+    int k = enc.getProbes();
+    // should interact "a" with each of "some","text" and "here"
+    Assert.assertEquals((float) k*3, v1.norm(1), 0);    
+  }
+
 }


Reply via email to