Author: ssc
Date: Wed Feb 16 19:28:29 2011
New Revision: 1071369

URL: http://svn.apache.org/viewvc?rev=1071369&view=rev
Log:
MAHOUT-610 Not all Coocurrences provided to SimilarityReducer

Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java?rev=1071369&r1=1071368&r2=1071369&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
 Wed Feb 16 19:28:29 2011
@@ -82,6 +82,10 @@ public class RowSimilarityJob extends Ab
 
   private static final int DEFAULT_MAX_SIMILARITIES_PER_ROW = 100;
 
+  public static enum Counter {
+    COOCCURRENCES, SIMILAR_ROWS
+  }
+
   public static void main(String[] args) throws Exception {
     ToolRunner.run(new RowSimilarityJob(), args);
   }
@@ -254,6 +258,7 @@ public class RowSimilarityJob extends Ab
       WeightedRowPair rowPair = new WeightedRowPair();
       Cooccurrence coocurrence = new Cooccurrence();
 
+      int numPairs = 0;
       for (int n = 0; n < weightedOccurrences.length; n++) {
         int rowA = weightedOccurrences[n].getRow();
         double weightA = weightedOccurrences[n].getWeight();
@@ -262,11 +267,17 @@ public class RowSimilarityJob extends Ab
           int rowB = weightedOccurrences[m].getRow();
           double weightB = weightedOccurrences[m].getWeight();
           double valueB = weightedOccurrences[m].getValue();
-          rowPair.set(rowA, rowB, weightA, weightB);
+          if(rowA <= rowB){
+                 rowPair.set(rowA, rowB, weightA, weightB);
+          } else {
+                 rowPair.set(rowB, rowA, weightB, weightA);
+          }
           coocurrence.set(column.get(), valueA, valueB);
           ctx.write(rowPair, coocurrence);
+          numPairs++;
         }
       }
+      ctx.getCounter(Counter.COOCCURRENCES).increment(numPairs);
     }
   }
 
@@ -299,6 +310,7 @@ public class RowSimilarityJob extends Ab
           rowPair.getWeightB(), numberOfColumns);
 
       if (!Double.isNaN(similarityValue)) {
+        ctx.getCounter(Counter.SIMILAR_ROWS).increment(1);
         SimilarityMatrixEntryKey key = new SimilarityMatrixEntryKey();
         MatrixEntryWritable entry = new MatrixEntryWritable();
         entry.setVal(similarityValue);

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java?rev=1071369&r1=1071368&r2=1071369&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java
 Wed Feb 16 19:28:29 2011
@@ -26,6 +26,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapreduce.Counter;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.mahout.common.MahoutTestCase;
@@ -133,36 +134,63 @@ public final class TestRowSimilarityJob 
   public void testCooccurrencesMapper() throws Exception {
     
Mapper<VarIntWritable,WeightedOccurrenceArray,WeightedRowPair,Cooccurrence>.Context
 context =
       EasyMock.createMock(Mapper.Context.class);
+    Counter counter = EasyMock.createMock(Counter.class);
 
     context.write(new WeightedRowPair(34, 34, 1.0, 1.0), new Cooccurrence(12, 
0.5, 0.5));
     context.write(new WeightedRowPair(34, 56, 1.0, 3.0), new Cooccurrence(12, 
0.5, 1.0));
     context.write(new WeightedRowPair(56, 56, 3.0, 3.0), new Cooccurrence(12, 
1.0, 1.0));
+    
EasyMock.expect(context.getCounter(RowSimilarityJob.Counter.COOCCURRENCES)).andReturn(counter);
+    counter.increment(3);
 
-    EasyMock.replay(context);
+    EasyMock.replay(context, counter);
 
     WeightedOccurrenceArray weightedOccurrences = new 
WeightedOccurrenceArray(new WeightedOccurrence[] {
         new WeightedOccurrence(34, 0.5, 1.0), new WeightedOccurrence(56, 1.0, 
3.0) });
 
     new RowSimilarityJob.CooccurrencesMapper().map(new VarIntWritable(12), 
weightedOccurrences, context);
 
-    EasyMock.verify(context);
+    EasyMock.verify(context, counter);
   }
 
+  public void testCooccurrencesMapperOrdering() throws Exception {
+    
Mapper<VarIntWritable,WeightedOccurrenceArray,WeightedRowPair,Cooccurrence>.Context
 context =
+      EasyMock.createMock(Mapper.Context.class);
+    Counter counter = EasyMock.createMock(Counter.class);
+
+    context.write(new WeightedRowPair(34, 34, 1.0, 1.0), new Cooccurrence(12, 
0.5, 0.5));
+    context.write(new WeightedRowPair(34, 56, 1.0, 3.0), new Cooccurrence(12, 
0.5, 1.0));
+    context.write(new WeightedRowPair(56, 56, 3.0, 3.0), new Cooccurrence(12, 
1.0, 1.0));
+    
EasyMock.expect(context.getCounter(RowSimilarityJob.Counter.COOCCURRENCES)).andReturn(counter);
+    counter.increment(3);
+
+    EasyMock.replay(context, counter);
+
+    WeightedOccurrenceArray weightedOccurrences = new 
WeightedOccurrenceArray(new WeightedOccurrence[] {
+        new WeightedOccurrence(56, 1.0, 3.0), new WeightedOccurrence(34, 0.5, 
1.0) });
+
+    new RowSimilarityJob.CooccurrencesMapper().map(new VarIntWritable(12), 
weightedOccurrences, context);
+
+    EasyMock.verify(context, counter);
+  }
+
+
   /**
    * Tests {@link SimilarityReducer}
    */
   @Test
   public void testSimilarityReducer() throws Exception {
-
     
Reducer<WeightedRowPair,Cooccurrence,SimilarityMatrixEntryKey,MatrixEntryWritable>.Context
 context =
       EasyMock.createMock(Reducer.Context.class);
+    Counter counter = EasyMock.createMock(Counter.class);
 
     context.write(EasyMock.eq(new SimilarityMatrixEntryKey(12, 0.5)),
         MathHelper.matrixEntryMatches(12, 34, 0.5));
     context.write(EasyMock.eq(new SimilarityMatrixEntryKey(34, 0.5)),
         MathHelper.matrixEntryMatches(34, 12, 0.5));
+    
EasyMock.expect(context.getCounter(RowSimilarityJob.Counter.SIMILAR_ROWS)).andReturn(counter);
+    counter.increment(1);
 
-    EasyMock.replay(context);
+    EasyMock.replay(context, counter);
 
     SimilarityReducer reducer = new SimilarityReducer();
     setField(reducer, "similarity", new 
DistributedTanimotoCoefficientVectorSimilarity());
@@ -170,7 +198,7 @@ public final class TestRowSimilarityJob 
     reducer.reduce(new WeightedRowPair(12, 34, 3.0, 3.0), Arrays.asList(new 
Cooccurrence(56, 1.0, 2.0),
         new Cooccurrence(78, 3.0, 6.0)), context);
 
-    EasyMock.verify(context);
+    EasyMock.verify(context, counter);
   }
 
   /**
@@ -179,13 +207,15 @@ public final class TestRowSimilarityJob 
    */
   @Test
   public void testSimilarityReducerSelfSimilarity() throws Exception {
-
     
Reducer<WeightedRowPair,Cooccurrence,SimilarityMatrixEntryKey,MatrixEntryWritable>.Context
 context =
       EasyMock.createMock(Reducer.Context.class);
+    Counter counter = EasyMock.createMock(Counter.class);
 
     context.write(EasyMock.eq(new SimilarityMatrixEntryKey(90, 1.0)), 
MathHelper.matrixEntryMatches(90, 90, 1.0));
+    
EasyMock.expect(context.getCounter(RowSimilarityJob.Counter.SIMILAR_ROWS)).andReturn(counter);
+    counter.increment(1);
 
-    EasyMock.replay(context);
+    EasyMock.replay(context, counter);
 
     SimilarityReducer reducer = new SimilarityReducer();
     setField(reducer, "similarity", new 
DistributedTanimotoCoefficientVectorSimilarity());
@@ -193,7 +223,7 @@ public final class TestRowSimilarityJob 
     reducer.reduce(new WeightedRowPair(90, 90, 2.0, 2.0), Arrays.asList(new 
Cooccurrence(56, 1.0, 2.0),
         new Cooccurrence(78, 3.0, 6.0)), context);
 
-    EasyMock.verify(context);
+    EasyMock.verify(context, counter);
   }
 
   /**


Reply via email to