m...

srowen Thu, 27 May 2010 11:02:56 -0700

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/text/TextParagraphSplittingJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/text/TextParagraphSplittingJob.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/text/TextParagraphSplittingJob.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/text/TextParagraphSplittingJob.java
 Thu May 27 18:02:20 2010
@@ -53,13 +53,13 @@ public class TextParagraphSplittingJob e
   public static class SplitMap extends Mapper<Text,Text,Text,Text> {
 
     @Override
-    public void map(Text key, Text text, Context context) throws IOException, 
InterruptedException {
+    protected void map(Text key, Text text, Context context) throws 
IOException, InterruptedException {
       Text outText = new Text();
       int loc = 0;
-      while(loc >= 0 && loc < text.getLength()) {
-        int nextLoc = text.find("\n\n", loc+1);
+      while (loc >= 0 && loc < text.getLength()) {
+        int nextLoc = text.find("\n\n", loc + 1);
         if (nextLoc > 0) {
-          outText.set(text.getBytes(), loc, (nextLoc - loc));
+          outText.set(text.getBytes(), loc, nextLoc - loc);
           context.write(key, outText);
         }
         loc = nextLoc;


Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
 Thu May 27 18:02:20 2010
@@ -41,7 +41,7 @@ import org.apache.hadoop.mapred.jobcontr
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public class SequenceFileDumper {
+public final class SequenceFileDumper {
   
   private static final Logger log = 
LoggerFactory.getLogger(SequenceFileDumper.class);
   

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
 Thu May 27 18:02:20 2010
@@ -19,7 +19,6 @@ package org.apache.mahout.utils.clusteri
 
 import java.io.File;
 import java.io.FileWriter;
-import java.io.FilenameFilter;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
@@ -30,7 +29,6 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.SortedMap;
 import java.util.TreeMap;
 
 import org.apache.commons.cli2.CommandLine;
@@ -48,7 +46,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java
 Thu May 27 18:02:20 2010
@@ -45,13 +45,12 @@ import org.apache.lucene.analysis.Analyz
 import org.apache.mahout.common.CommandLineUtil;
 import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.text.DefaultAnalyzer;
-import org.apache.mahout.utils.vectors.text.DictionaryVectorizer;
 import org.apache.mahout.utils.vectors.text.DocumentProcessor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /** Driver for LLR Collocation discovery mapreduce job */
-public class CollocDriver extends Configured implements Tool {
+public final class CollocDriver extends Configured implements Tool {
   public static final String DEFAULT_OUTPUT_DIRECTORY = "output";
   public static final String SUBGRAM_OUTPUT_DIRECTORY = "subgrams";
   public static final String NGRAM_OUTPUT_DIRECTORY = "ngrams";
@@ -214,7 +213,7 @@ public class CollocDriver extends Config
   }
   
   /**
-   * Generate all ngrams for the {...@link DictionaryVectorizer} job
+   * Generate all ngrams for the {...@link 
org.apache.mahout.utils.vectors.text.DictionaryVectorizer} job
    * 
    * @param input
    *          input path containing tokenized documents

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocReducer.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocReducer.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocReducer.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocReducer.java
 Thu May 27 18:02:20 2010
@@ -96,12 +96,10 @@ public class CollocReducer extends MapRe
     if (keyType == Gram.Type.UNIGRAM) {
       // sum frequencies for unigrams.
       processUnigram(key, values, output, reporter);
-    }
-    else if (keyType == Gram.Type.HEAD || keyType == Gram.Type.TAIL) {
+    } else if (keyType == Gram.Type.HEAD || keyType == Gram.Type.TAIL) {
       // sum frequencies for subgrams, ngram and collect for each ngram.
       processSubgram(key, values, output, reporter);
-    }
-    else {
+    } else {
       reporter.incrCounter(Skipped.MALFORMED_TYPES, 1);
     }
   }
@@ -153,27 +151,23 @@ public class CollocReducer extends MapRe
         // collect frequency for subgrams.
         if (subgram == null) {
           subgram = new Gram(value);
-        }
-        else {
+        } else {
           subgram.incrementFrequency(value.getFrequency());
         }
-      }
-      else if (!value.equals(currentNgram)) {
+      } else if (!value.equals(currentNgram)) {
         // we've collected frequency for all subgrams and we've encountered a 
new ngram. 
         // collect the old ngram if there was one and we have sufficient 
support and
         // create the new ngram.
         if (currentNgram != null) {
           if (currentNgram.getFrequency() < minSupport) {
             reporter.incrCounter(Skipped.LESS_THAN_MIN_SUPPORT, 1);
-          }
-          else {
+          } else {
             output.collect(currentNgram, subgram);
           }
         }
 
         currentNgram = new Gram(value);
-      }
-      else {
+      } else {
         currentNgram.incrementFrequency(value.getFrequency());
       }
     }

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java
 Thu May 27 18:02:20 2010
@@ -140,7 +140,7 @@ public class Gram extends BinaryComparab
    */
   public String getString() {
     try {
-      return Text.decode(bytes, 1, length-1);
+      return Text.decode(bytes, 1, length - 1);
     } catch (CharacterCodingException e) {
       throw new IllegalStateException("Should not have happened " + 
e.toString()); 
     }
@@ -194,7 +194,7 @@ public class Gram extends BinaryComparab
    * @param keepData should the old data be kept
    */
   private void setCapacity(int len, boolean keepData) {
-    len+=1; // extra byte to hold type
+    len++; // extra byte to hold type
     if (bytes == null || bytes.length < len) {
       byte[] newBytes = new byte[len];
       if (bytes != null && keepData) {

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java
 Thu May 27 18:02:20 2010
@@ -121,7 +121,7 @@ public class GramKey extends BinaryCompa
 
   public String getPrimaryString() {
     try {
-      return Text.decode(bytes, 1, primaryLength-1);
+      return Text.decode(bytes, 1, primaryLength - 1);
     } catch (CharacterCodingException e) {
       throw new IllegalStateException(e);
     }

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKeyGroupComparator.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKeyGroupComparator.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKeyGroupComparator.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKeyGroupComparator.java
 Thu May 27 18:02:20 2010
@@ -17,13 +17,15 @@
 
 package org.apache.mahout.utils.nlp.collocations.llr;
 
+import java.io.Serializable;
+
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.io.WritableComparator;
 
 /** Group GramKeys based on their Gram, ignoring the secondary sort key, so 
that all keys with the same Gram are sent
  *  to the same call of the reduce method, sorted in natural order (for 
GramKeys).
  */
-public class GramKeyGroupComparator extends WritableComparator {
+public class GramKeyGroupComparator extends WritableComparator implements 
Serializable {
 
   protected GramKeyGroupComparator() {
     super(GramKey.class, true);
@@ -35,7 +37,8 @@ public class GramKeyGroupComparator exte
     GramKey gka = (GramKey) a;
     GramKey gkb = (GramKey) b;
 
-    return WritableComparator.compareBytes(gka.getBytes(), 0, 
gka.getPrimaryLength(), gkb.getBytes(), 0, gkb.getPrimaryLength());
+    return WritableComparator.compareBytes(gka.getBytes(), 0, 
gka.getPrimaryLength(),
+                                           gkb.getBytes(), 0, 
gkb.getPrimaryLength());
   }
 
 }

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java
 Thu May 27 18:02:20 2010
@@ -108,8 +108,7 @@ public class LLRReducer extends MapReduc
                      OutputCollector<Text,DoubleWritable> output,
                      Reporter reporter) throws IOException {
     
-    int[] gramFreq = new int[2];
-    gramFreq[0] = gramFreq[1] = -1;
+    int[] gramFreq = {-1, -1};
     
     if (ngram.getType() == Gram.Type.UNIGRAM && emitUnigrams) {
       DoubleWritable dd = new DoubleWritable(ngram.getFrequency());

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/RowIdJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/RowIdJob.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/RowIdJob.java 
(original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/RowIdJob.java 
Thu May 27 18:02:20 2010
@@ -56,9 +56,9 @@ public class RowIdJob extends AbstractJo
     VectorWritable v = new VectorWritable();
 
     int i = 0;
-    for(FileStatus status : fs.listStatus(inputPath)) {
+    for (FileStatus status : fs.listStatus(inputPath)) {
       SequenceFile.Reader reader = new SequenceFile.Reader(fs, 
status.getPath(), conf);
-      while(reader.next(inputKey, v)) {
+      while (reader.next(inputKey, v)) {
         docId.set(i);
         indexWriter.append(docId, inputKey);
         matrixWriter.append(docId, v);

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java
 Thu May 27 18:02:20 2010
@@ -56,7 +56,7 @@ public class SequenceFileVectorIterable 
     }
   }
   
-  public class SeqFileIterator implements Iterator<Vector> {
+  public final class SeqFileIterator implements Iterator<Vector> {
     private final Writable key;
     private final Writable value;
     

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
 Thu May 27 18:02:20 2010
@@ -150,7 +150,7 @@ public final class VectorDumper {
         if (cmdLine.hasOption(outputOpt)) {
           writer.close();
         }
-        System.err.println("Dumped " + i + " Vectors");
+        System.out.println("Dumped " + i + " Vectors");
       }
       
     } catch (OptionException e) {

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
 Thu May 27 18:02:20 2010
@@ -148,7 +148,7 @@ public class ARFFVectorIterable implemen
     return new ARFFIterator();
   }
   
-  private class ARFFIterator implements Iterator<Vector> {
+  private final class ARFFIterator implements Iterator<Vector> {
     
     private String line;
     

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java
 Thu May 27 18:02:20 2010
@@ -21,11 +21,8 @@ import java.io.IOException;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.FileInputFormat;
 import org.apache.hadoop.mapred.FileOutputFormat;
 import org.apache.hadoop.mapred.JobClient;
@@ -37,7 +34,8 @@ import org.apache.mahout.common.HadoopUt
 import org.apache.mahout.math.VectorWritable;
 
 /**
- * This class groups a set of input vectors. The Sequence file input should 
have a {...@link WritableComparable}
+ * This class groups a set of input vectors. The Sequence file input should 
have a
+ * {...@link org.apache.hadoop.io.WritableComparable}
  * key containing document id and a {...@link VectorWritable} value containing 
the term frequency vector. This
  * class also does normalization of the vector.
  * 
@@ -64,7 +62,7 @@ public final class PartialVectorMerger {
    * {...@link org.apache.mahout.math.RandomAccessSparseVector}
    * 
    * @param partialVectorPaths
-   *          input directory of the vectors in {...@link SequenceFile} format
+   *          input directory of the vectors in {...@link 
org.apache.hadoop.io.SequenceFile} format
    * @param output
    *          output directory were the partial vectors have to be created
    * @param normPower

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
 Thu May 27 18:02:20 2010
@@ -29,7 +29,6 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.Map.Entry;
 
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
@@ -42,7 +41,6 @@ import org.apache.commons.cli2.commandli
 import org.apache.hadoop.fs.Path;
 import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.document.SetBasedFieldSelector;
-import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermDocs;
@@ -116,7 +114,7 @@ public class ClusterLabels {
 
   private String idField;
 
-  private Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints = null;
+  private Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints;
 
   private String output;
 
@@ -124,8 +122,12 @@ public class ClusterLabels {
 
   private int maxLabels = DEFAULT_MAX_LABELS;
 
-  public ClusterLabels(Path seqFileDir, Path pointsDir, String indexDir, 
String contentField, int minNumIds, int maxLabels)
-      throws IOException {
+  public ClusterLabels(Path seqFileDir,
+                       Path pointsDir,
+                       String indexDir,
+                       String contentField,
+                       int minNumIds,
+                       int maxLabels) throws IOException {
     this.seqFileDir = seqFileDir;
     this.pointsDir = pointsDir;
     this.indexDir = indexDir;
@@ -149,7 +151,7 @@ public class ClusterLabels {
       writer = new OutputStreamWriter(System.out);
     }
 
-    for (Entry<Integer, List<WeightedVectorWritable>> integerListEntry : 
clusterIdToPoints.entrySet()) {
+    for (Map.Entry<Integer, List<WeightedVectorWritable>> integerListEntry : 
clusterIdToPoints.entrySet()) {
       List<WeightedVectorWritable> wvws = integerListEntry.getValue();
       List<TermInfoClusterInOut> termInfos = 
getClusterLabels(integerListEntry.getKey(), wvws);
       if (termInfos != null) {
@@ -173,14 +175,9 @@ public class ClusterLabels {
 
   /**
    * Get the list of labels, sorted by best score.
-   * 
-   * @param integer
-   * @param wvws
-   * @return
-   * @throws CorruptIndexException
-   * @throws IOException
    */
-  protected List<TermInfoClusterInOut> getClusterLabels(Integer integer, 
List<WeightedVectorWritable> wvws) throws IOException {
+  protected List<TermInfoClusterInOut> getClusterLabels(Integer integer, 
List<WeightedVectorWritable> wvws)
+      throws IOException {
 
     if (wvws.size() < minNumIds) {
       log.info("Skipping small cluster {} with size: {}", integer, 
wvws.size());
@@ -266,12 +263,14 @@ public class ClusterLabels {
     return clusteredTermInfo.subList(0, Math.min(clusteredTermInfo.size(), 
maxLabels));
   }
 
-  private static OpenBitSet getClusterDocBitset(IndexReader reader, 
Set<String> idSet, String idField) throws IOException {
+  private static OpenBitSet getClusterDocBitset(IndexReader reader, 
Set<String> idSet, String idField)
+      throws IOException {
     int numDocs = reader.numDocs();
 
     OpenBitSet bitset = new OpenBitSet(numDocs);
 
-    FieldSelector idFieldSelector = new 
SetBasedFieldSelector(Collections.singleton(idField), 
Collections.<String>emptySet());
+    FieldSelector idFieldSelector =
+        new SetBasedFieldSelector(Collections.singleton(idField), 
Collections.<String>emptySet());
 
     for (int i = 0; i < numDocs; i++) {
       String id = null;
@@ -319,16 +318,16 @@ public class ClusterLabels {
     GroupBuilder gbuilder = new GroupBuilder();
 
     Option indexOpt = 
obuilder.withLongName("dir").withRequired(true).withArgument(
-        
abuilder.withName("dir").withMinimum(1).withMaximum(1).create()).withDescription("The
 Lucene index directory")
-        .withShortName("d").create();
+        abuilder.withName("dir").withMinimum(1).withMaximum(1).create())
+        .withDescription("The Lucene index 
directory").withShortName("d").create();
 
     Option outputOpt = 
obuilder.withLongName("output").withRequired(false).withArgument(
         
abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription(
         "The output file. If not specified, the result is printed on 
console.").withShortName("o").create();
 
     Option fieldOpt = 
obuilder.withLongName("field").withRequired(true).withArgument(
-        
abuilder.withName("field").withMinimum(1).withMaximum(1).create()).withDescription("The
 content field in the index")
-        .withShortName("f").create();
+        abuilder.withName("field").withMinimum(1).withMaximum(1).create())
+        .withDescription("The content field in the 
index").withShortName("f").create();
 
     Option idFieldOpt = 
obuilder.withLongName("idField").withRequired(false).withArgument(
         
abuilder.withName("idField").withMinimum(1).withMaximum(1).create()).withDescription(
@@ -341,7 +340,8 @@ public class ClusterLabels {
 
     Option pointsOpt = 
obuilder.withLongName("pointsDir").withRequired(true).withArgument(
         
abuilder.withName("pointsDir").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The directory containing points sequence files mapping input vectors 
to their cluster.  ").withShortName("p").create();
+        "The directory containing points sequence files mapping input vectors 
to their cluster.  ")
+        .withShortName("p").create();
     Option minClusterSizeOpt = 
obuilder.withLongName("minClusterSize").withRequired(false).withArgument(
         
abuilder.withName("minClusterSize").withMinimum(1).withMaximum(1).create()).withDescription(
         "The minimum number of points required in a cluster to print the 
labels for").withShortName("m").create();
@@ -350,9 +350,9 @@ public class ClusterLabels {
         "The maximum number of labels to print per 
cluster").withShortName("x").create();
     Option helpOpt = obuilder.withLongName("help").withDescription("Print out 
help").withShortName("h").create();
 
-    Group group = 
gbuilder.withName("Options").withOption(indexOpt).withOption(idFieldOpt).withOption(outputOpt).withOption(
-        
fieldOpt).withOption(seqOpt).withOption(pointsOpt).withOption(helpOpt).withOption(maxLabelsOpt).withOption(
-        minClusterSizeOpt).create();
+    Group group = 
gbuilder.withName("Options").withOption(indexOpt).withOption(idFieldOpt).withOption(outputOpt)
+        
.withOption(fieldOpt).withOption(seqOpt).withOption(pointsOpt).withOption(helpOpt)
+        .withOption(maxLabelsOpt).withOption(minClusterSizeOpt).create();
 
     try {
       Parser parser = new Parser();

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
 Thu May 27 18:02:20 2010
@@ -54,7 +54,7 @@ import org.apache.mahout.utils.vectors.i
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public class Driver {
+public final class Driver {
   private static final Logger log = LoggerFactory.getLogger(Driver.class);
   
   private Driver() { }

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
 Thu May 27 18:02:20 2010
@@ -32,17 +32,17 @@ import org.apache.mahout.math.Vector;
  * {...@link Vector}. The Field used to create the Vector currently must have 
Term Vectors stored for it.
  */
 public class LuceneIterable implements Iterable<Vector> {
-  
+
+  public static final double NO_NORMALIZING = -1.0;
+
   private final IndexReader indexReader;
   private final String field;
-  private final String idField;
-  private final FieldSelector idFieldSelector;
+  //private final String idField;
+  //private final FieldSelector idFieldSelector;
   
   private final VectorMapper mapper;
   private double normPower = NO_NORMALIZING;
-  
-  public static final double NO_NORMALIZING = -1.0;
-  
+
   public LuceneIterable(IndexReader reader, String idField, String field, 
VectorMapper mapper) {
     this(reader, idField, field, mapper, NO_NORMALIZING);
   }
@@ -70,9 +70,9 @@ public class LuceneIterable implements I
     if (normPower != NO_NORMALIZING && normPower < 0) {
       throw new IllegalArgumentException("normPower must either be -1 or >= 
0");
     }
-    idFieldSelector = new 
SetBasedFieldSelector(Collections.singleton(idField), 
Collections.<String>emptySet());
+    //idFieldSelector = new 
SetBasedFieldSelector(Collections.singleton(idField), 
Collections.<String>emptySet());
     this.indexReader = reader;
-    this.idField = idField;
+    //this.idField = idField;
     this.field = field;
     this.mapper = mapper;
     this.normPower = normPower;

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DocumentProcessor.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DocumentProcessor.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DocumentProcessor.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DocumentProcessor.java
 Thu May 27 18:02:20 2010
@@ -21,9 +21,7 @@ import java.io.IOException;
 import java.nio.charset.Charset;
 
 import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.FileInputFormat;
 import org.apache.hadoop.mapred.FileOutputFormat;
@@ -38,7 +36,8 @@ import org.apache.mahout.utils.vectors.t
 
 /**
  * This class converts a set of input documents in the sequence file format of 
{...@link StringTuple}s.The
- * {...@link SequenceFile} input should have a {...@link Text} key containing 
the unique document identifier and a
+ * {...@link org.apache.hadoop.io.SequenceFile} input should have a {...@link 
Text} key
+ * containing the unique document identifier and a
  * {...@link Text} value containing the whole document. The document should be 
stored in UTF-8 encoding which is
  * recognizable by hadoop. It uses the given {...@link Analyzer} to process 
the document into
  * {...@link org.apache.lucene.analysis.Token}s.
@@ -60,10 +59,10 @@ public final class DocumentProcessor {
   
   /**
    * Convert the input documents into token array using the {...@link 
StringTuple} The input documents has to be
-   * in the {...@link SequenceFile} format
+   * in the {...@link org.apache.hadoop.io.SequenceFile} format
    * 
    * @param input
-   *          input directory of the documents in {...@link SequenceFile} 
format
+   *          input directory of the documents in {...@link 
org.apache.hadoop.io.SequenceFile} format
    * @param output
    *          output directory were the {...@link StringTuple} token array of 
each document has to be created
    * @param analyzerClass

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/document/SequenceFileTokenizerMapper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/document/SequenceFileTokenizerMapper.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/document/SequenceFileTokenizerMapper.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/document/SequenceFileTokenizerMapper.java
 Thu May 27 18:02:20 2010
@@ -44,7 +44,7 @@ public class SequenceFileTokenizerMapper
   public void map(Text key, Text value,
                   OutputCollector<Text,StringTuple> output, Reporter reporter) 
throws IOException {
     TokenStream stream = analyzer.tokenStream(key.toString(), new 
StringReader(value.toString()));
-    TermAttribute termAtt = (TermAttribute) 
stream.addAttribute(TermAttribute.class);
+    TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
     StringTuple document = new StringTuple();
     while (stream.incrementToken()) {
       if (termAtt.termLength() > 0) {

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java?rev=948935&r1=948934&r2=948935&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java
 Thu May 27 18:02:20 2010
@@ -32,7 +32,6 @@ import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.FileInputFormat;
 import org.apache.hadoop.mapred.FileOutputFormat;
 import org.apache.hadoop.mapred.JobClient;
@@ -49,7 +48,8 @@ import org.apache.mahout.utils.vectors.t
 
 /**
  * This class converts a set of input vectors with term frequencies to TfIdf 
vectors. The Sequence file input
- * should have a {...@link WritableComparable} key containing and a {...@link 
VectorWritable} value containing the
+ * should have a {...@link org.apache.hadoop.io.WritableComparable} key 
containing and a
+ * {...@link VectorWritable} value containing the
  * term frequency vector. This is conversion class uses multiple map/reduces 
to convert the vectors to TfIdf
  * format
  *

svn commit: r948935 [3/3] - in /mahout/trunk: buildtools/src/main/resources/ core/src/main/java/org/apache/mahout/cf/taste/eval/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ core/src/m...

Reply via email to