Author: ssc
Date: Wed Feb 16 19:28:29 2011
New Revision: 1071369
URL: http://svn.apache.org/viewvc?rev=1071369&view=rev
Log:
MAHOUT-610 Not all Coocurrences provided to SimilarityReducer
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java?rev=1071369&r1=1071368&r2=1071369&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
Wed Feb 16 19:28:29 2011
@@ -82,6 +82,10 @@ public class RowSimilarityJob extends Ab
private static final int DEFAULT_MAX_SIMILARITIES_PER_ROW = 100;
+ public static enum Counter {
+ COOCCURRENCES, SIMILAR_ROWS
+ }
+
public static void main(String[] args) throws Exception {
ToolRunner.run(new RowSimilarityJob(), args);
}
@@ -254,6 +258,7 @@ public class RowSimilarityJob extends Ab
WeightedRowPair rowPair = new WeightedRowPair();
Cooccurrence coocurrence = new Cooccurrence();
+ int numPairs = 0;
for (int n = 0; n < weightedOccurrences.length; n++) {
int rowA = weightedOccurrences[n].getRow();
double weightA = weightedOccurrences[n].getWeight();
@@ -262,11 +267,17 @@ public class RowSimilarityJob extends Ab
int rowB = weightedOccurrences[m].getRow();
double weightB = weightedOccurrences[m].getWeight();
double valueB = weightedOccurrences[m].getValue();
- rowPair.set(rowA, rowB, weightA, weightB);
+ if(rowA <= rowB){
+ rowPair.set(rowA, rowB, weightA, weightB);
+ } else {
+ rowPair.set(rowB, rowA, weightB, weightA);
+ }
coocurrence.set(column.get(), valueA, valueB);
ctx.write(rowPair, coocurrence);
+ numPairs++;
}
}
+ ctx.getCounter(Counter.COOCCURRENCES).increment(numPairs);
}
}
@@ -299,6 +310,7 @@ public class RowSimilarityJob extends Ab
rowPair.getWeightB(), numberOfColumns);
if (!Double.isNaN(similarityValue)) {
+ ctx.getCounter(Counter.SIMILAR_ROWS).increment(1);
SimilarityMatrixEntryKey key = new SimilarityMatrixEntryKey();
MatrixEntryWritable entry = new MatrixEntryWritable();
entry.setVal(similarityValue);
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java?rev=1071369&r1=1071368&r2=1071369&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java
Wed Feb 16 19:28:29 2011
@@ -26,6 +26,7 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.common.MahoutTestCase;
@@ -133,36 +134,63 @@ public final class TestRowSimilarityJob
public void testCooccurrencesMapper() throws Exception {
Mapper<VarIntWritable,WeightedOccurrenceArray,WeightedRowPair,Cooccurrence>.Context
context =
EasyMock.createMock(Mapper.Context.class);
+ Counter counter = EasyMock.createMock(Counter.class);
context.write(new WeightedRowPair(34, 34, 1.0, 1.0), new Cooccurrence(12,
0.5, 0.5));
context.write(new WeightedRowPair(34, 56, 1.0, 3.0), new Cooccurrence(12,
0.5, 1.0));
context.write(new WeightedRowPair(56, 56, 3.0, 3.0), new Cooccurrence(12,
1.0, 1.0));
+
EasyMock.expect(context.getCounter(RowSimilarityJob.Counter.COOCCURRENCES)).andReturn(counter);
+ counter.increment(3);
- EasyMock.replay(context);
+ EasyMock.replay(context, counter);
WeightedOccurrenceArray weightedOccurrences = new
WeightedOccurrenceArray(new WeightedOccurrence[] {
new WeightedOccurrence(34, 0.5, 1.0), new WeightedOccurrence(56, 1.0,
3.0) });
new RowSimilarityJob.CooccurrencesMapper().map(new VarIntWritable(12),
weightedOccurrences, context);
- EasyMock.verify(context);
+ EasyMock.verify(context, counter);
}
+ public void testCooccurrencesMapperOrdering() throws Exception {
+
Mapper<VarIntWritable,WeightedOccurrenceArray,WeightedRowPair,Cooccurrence>.Context
context =
+ EasyMock.createMock(Mapper.Context.class);
+ Counter counter = EasyMock.createMock(Counter.class);
+
+ context.write(new WeightedRowPair(34, 34, 1.0, 1.0), new Cooccurrence(12,
0.5, 0.5));
+ context.write(new WeightedRowPair(34, 56, 1.0, 3.0), new Cooccurrence(12,
0.5, 1.0));
+ context.write(new WeightedRowPair(56, 56, 3.0, 3.0), new Cooccurrence(12,
1.0, 1.0));
+
EasyMock.expect(context.getCounter(RowSimilarityJob.Counter.COOCCURRENCES)).andReturn(counter);
+ counter.increment(3);
+
+ EasyMock.replay(context, counter);
+
+ WeightedOccurrenceArray weightedOccurrences = new
WeightedOccurrenceArray(new WeightedOccurrence[] {
+ new WeightedOccurrence(56, 1.0, 3.0), new WeightedOccurrence(34, 0.5,
1.0) });
+
+ new RowSimilarityJob.CooccurrencesMapper().map(new VarIntWritable(12),
weightedOccurrences, context);
+
+ EasyMock.verify(context, counter);
+ }
+
+
/**
* Tests {@link SimilarityReducer}
*/
@Test
public void testSimilarityReducer() throws Exception {
-
Reducer<WeightedRowPair,Cooccurrence,SimilarityMatrixEntryKey,MatrixEntryWritable>.Context
context =
EasyMock.createMock(Reducer.Context.class);
+ Counter counter = EasyMock.createMock(Counter.class);
context.write(EasyMock.eq(new SimilarityMatrixEntryKey(12, 0.5)),
MathHelper.matrixEntryMatches(12, 34, 0.5));
context.write(EasyMock.eq(new SimilarityMatrixEntryKey(34, 0.5)),
MathHelper.matrixEntryMatches(34, 12, 0.5));
+
EasyMock.expect(context.getCounter(RowSimilarityJob.Counter.SIMILAR_ROWS)).andReturn(counter);
+ counter.increment(1);
- EasyMock.replay(context);
+ EasyMock.replay(context, counter);
SimilarityReducer reducer = new SimilarityReducer();
setField(reducer, "similarity", new
DistributedTanimotoCoefficientVectorSimilarity());
@@ -170,7 +198,7 @@ public final class TestRowSimilarityJob
reducer.reduce(new WeightedRowPair(12, 34, 3.0, 3.0), Arrays.asList(new
Cooccurrence(56, 1.0, 2.0),
new Cooccurrence(78, 3.0, 6.0)), context);
- EasyMock.verify(context);
+ EasyMock.verify(context, counter);
}
/**
@@ -179,13 +207,15 @@ public final class TestRowSimilarityJob
*/
@Test
public void testSimilarityReducerSelfSimilarity() throws Exception {
-
Reducer<WeightedRowPair,Cooccurrence,SimilarityMatrixEntryKey,MatrixEntryWritable>.Context
context =
EasyMock.createMock(Reducer.Context.class);
+ Counter counter = EasyMock.createMock(Counter.class);
context.write(EasyMock.eq(new SimilarityMatrixEntryKey(90, 1.0)),
MathHelper.matrixEntryMatches(90, 90, 1.0));
+
EasyMock.expect(context.getCounter(RowSimilarityJob.Counter.SIMILAR_ROWS)).andReturn(counter);
+ counter.increment(1);
- EasyMock.replay(context);
+ EasyMock.replay(context, counter);
SimilarityReducer reducer = new SimilarityReducer();
setField(reducer, "similarity", new
DistributedTanimotoCoefficientVectorSimilarity());
@@ -193,7 +223,7 @@ public final class TestRowSimilarityJob
reducer.reduce(new WeightedRowPair(90, 90, 2.0, 2.0), Arrays.asList(new
Cooccurrence(56, 1.0, 2.0),
new Cooccurrence(78, 3.0, 6.0)), context);
- EasyMock.verify(context);
+ EasyMock.verify(context, counter);
}
/**