Author: gsingers
Date: Wed Nov  2 04:07:31 2011
New Revision: 1196448

URL: http://svn.apache.org/viewvc?rev=1196448&view=rev
Log:
MAHOUT-854: add minhash example

Modified:
    mahout/trunk/examples/bin/build-reuters.sh

Modified: mahout/trunk/examples/bin/build-reuters.sh
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/bin/build-reuters.sh?rev=1196448&r1=1196447&r2=1196448&view=diff
==============================================================================
--- mahout/trunk/examples/bin/build-reuters.sh (original)
+++ mahout/trunk/examples/bin/build-reuters.sh Wed Nov  2 04:07:31 2011
@@ -37,13 +37,14 @@ fi
 if [ "$1" = "-ni" ]; then
   clustertype=kmeans
 else
-  algorithm=( kmeans fuzzykmeans lda dirichlet)
+  algorithm=( kmeans fuzzykmeans lda dirichlet minhash)
  
   echo "Please select a number to choose the corresponding clustering 
algorithm"
   echo "1. ${algorithm[0]} clustering"
   echo "2. ${algorithm[1]} clustering"
   echo "3. ${algorithm[2]} clustering"
   echo "4. ${algorithm[3]} clustering"
+  echo "5. ${algorithm[4]} clustering"
   read -p "Enter your choice : " choice
 
   echo "ok. You chose $choice and we'll use ${algorithm[$choice-1]} Clustering"
@@ -151,6 +152,14 @@ elif [ "x$clustertype" == "xdirichlet" ]
     -s ${WORK_DIR}/reuters-dirichlet/clusters-*-final \
     -d ${WORK_DIR}/reuters-out-seqdir-sparse-dirichlet/dictionary.file-0 \
     -dt sequencefile -b 100 -n 20
+elif [ "x$clustertype" == "xminhash" ]; then
+  $MAHOUT seq2sparse \
+    -i ${WORK_DIR}/reuters-out-seqdir/ \
+    -o ${WORK_DIR}/reuters-out-seqdir-sparse-minhash \
+  && \
+  $MAHOUT org.apache.mahout.clustering.minhash.MinHashDriver \
+    -i ${WORK_DIR}/reuters-out-seqdir-sparse-minhash/tfidf-vectors \
+    -o ${WORK_DIR}/reuters-minhash
 else 
   echo "unknown cluster type: $clustertype"
 fi 


Reply via email to