Author: drew
Date: Mon May 31 02:04:01 2010
New Revision: 949649
URL: http://svn.apache.org/viewvc?rev=949649&view=rev
Log:
MAHOUT-398: eliminated separate tfidf directory for tfidf vector output.
Modified:
mahout/trunk/examples/bin/build-reuters.sh
mahout/trunk/utils/src/main/java/org/apache/mahout/text/SparseVectorsFromSequenceFiles.java
Modified: mahout/trunk/examples/bin/build-reuters.sh
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/bin/build-reuters.sh?rev=949649&r1=949648&r2=949649&view=diff
==============================================================================
--- mahout/trunk/examples/bin/build-reuters.sh (original)
+++ mahout/trunk/examples/bin/build-reuters.sh Mon May 31 02:04:01 2010
@@ -41,9 +41,9 @@ cd ../..
./bin/mahout seqdirectory -i ./examples/bin/work/reuters-out/ -o
./examples/bin/work/reuters-out-seqdir -c UTF-8 -chunk 5
# to use k-Means clustering, uncomment the next three lines
-#./bin/mahout seq2sparse -i ./examples/bin/work/reuters-out-seqdir/ -o
./examples/bin/work/reuters-out-seqdir-sparse
-#./bin/mahout kmeans -i
./examples/bin/work/reuters-out-seqdir-sparse/tfidf/tfidf-vectors/ -c
./examples/bin/work/clusters -o ./examples/bin/work/reuters-kmeans -x 10 -k 20
-ow
-#./bin/mahout clusterdump -s examples/bin/work/reuters-kmeans/clusters-10 -d
examples/bin/work/reuters-out-seqdir-sparse/dictionary.file-0 -dt sequencefile
-b 100 -n 20
+./bin/mahout seq2sparse -i ./examples/bin/work/reuters-out-seqdir/ -o
./examples/bin/work/reuters-out-seqdir-sparse
+./bin/mahout kmeans -i
./examples/bin/work/reuters-out-seqdir-sparse/tfidf-vectors/ -c
./examples/bin/work/clusters -o ./examples/bin/work/reuters-kmeans -x 10 -k 20
-ow
+./bin/mahout clusterdump -s examples/bin/work/reuters-kmeans/clusters-10 -d
examples/bin/work/reuters-out-seqdir-sparse/dictionary.file-0 -dt sequencefile
-b 100 -n 20
# to use LDA clustering, uncomment the next three lines
#./bin/mahout seq2sparse -i ./examples/bin/work/reuters-out-seqdir/ -o
./examples/bin/work/reuters-out-seqdir-sparse -wt tf -seq -nr 3
Modified:
mahout/trunk/utils/src/main/java/org/apache/mahout/text/SparseVectorsFromSequenceFiles.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/text/SparseVectorsFromSequenceFiles.java?rev=949649&r1=949648&r2=949649&view=diff
==============================================================================
---
mahout/trunk/utils/src/main/java/org/apache/mahout/text/SparseVectorsFromSequenceFiles.java
(original)
+++
mahout/trunk/utils/src/main/java/org/apache/mahout/text/SparseVectorsFromSequenceFiles.java
Mon May 31 02:04:01 2010
@@ -223,7 +223,7 @@ public final class SparseVectorsFromSequ
if (processIdf) {
TFIDFConverter.processTfIdf(
new Path(outputDir,
DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER),
- new Path(outputDir, TFIDFConverter.TFIDF_OUTPUT_FOLDER), chunkSize,
minDf, maxDFPercent, norm,
+ outputDir, chunkSize, minDf, maxDFPercent, norm,
sequentialAccessOutput, reduceTasks);
}
} catch (OptionException e) {