Author: drew
Date: Mon Aug 23 14:09:59 2010
New Revision: 988126
URL: http://svn.apache.org/viewvc?rev=988126&view=rev
Log:
MAHOUT-486: Null Pointer Exception running DictionaryVectorizer with ngram=2 on
Reuters dataset
Modified:
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocCombiner.java
Modified:
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocCombiner.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocCombiner.java?rev=988126&r1=988125&r2=988126&view=diff
==============================================================================
---
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocCombiner.java
(original)
+++
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocCombiner.java
Mon Aug 23 14:09:59 2010
@@ -18,6 +18,7 @@
package org.apache.mahout.utils.nlp.collocations.llr;
import java.io.IOException;
+import java.util.Iterator;
import org.apache.hadoop.mapreduce.Reducer;
@@ -28,16 +29,19 @@ public class CollocCombiner extends Redu
protected void reduce(GramKey key, Iterable<Gram> values, Context context)
throws IOException, InterruptedException {
int freq = 0;
-
- // accumulate frequencies from values.
- for (Gram gramValue : values) {
- freq += gramValue.getFrequency();
+ Gram value = null;
+
+ // accumulate frequencies from values, preserve the last value
+ // to write to the context.
+ for (Iterator<Gram> it = values.iterator(); it.hasNext(); ) {
+ value = it.next();
+ freq += value.getFrequency();
}
- Gram sum = new Gram();
- sum.setFrequency(freq);
-
- context.write(key, sum);
+ if (value != null) {
+ value.setFrequency(freq);
+ context.write(key, value);
+ }
}
}