Author: srowen
Date: Fri Sep 16 15:47:19 2011
New Revision: 1171636

URL: http://svn.apache.org/viewvc?rev=1171636&view=rev
Log:
MAHOUT-811 fix working directory issue with WORK_DIR and extracting archive

Modified:
    mahout/trunk/examples/bin/build-reuters.sh

Modified: mahout/trunk/examples/bin/build-reuters.sh
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/bin/build-reuters.sh?rev=1171636&r1=1171635&r2=1171636&view=diff
==============================================================================
--- mahout/trunk/examples/bin/build-reuters.sh (original)
+++ mahout/trunk/examples/bin/build-reuters.sh Fri Sep 16 15:47:19 2011
@@ -54,46 +54,37 @@ echo "creating work directory at ${WORK_
 mkdir -p ${WORK_DIR}
 
 if [ ! -e ${WORK_DIR}/reuters-out-seqdir ]; then
-    if [ ! -e ${WORK_DIR}/reuters-out ]; then
-       if [ ! -e ${WORK_DIR}/reuters-sgm ]; then
-           if [ ! -f ${WORK_DIR}/reuters21578.tar.gz ]; then
-               echo "Downloading Reuters-21578"
-               curl 
http://kdd.ics.uci.edu/databases/reuters21578/reuters21578.tar.gz \
-                     -o ${WORK_DIR}/reuters21578.tar.gz
-           fi
-           mkdir -p ${WORK_DIR}/reuters-sgm
-           echo "Extracting..."
-           cd ${WORK_DIR}/reuters-sgm && tar xzf ../reuters21578.tar.gz && cd 
.. && cd ..
-       fi
-       
-       $MAHOUT org.apache.lucene.benchmark.utils.ExtractReuters \
-           ${WORK_DIR}/reuters-sgm \
-           ${WORK_DIR}/reuters-out 
+  if [ ! -e ${WORK_DIR}/reuters-out ]; then
+    if [ ! -e ${WORK_DIR}/reuters-sgm ]; then
+      if [ ! -f ${WORK_DIR}/reuters21578.tar.gz ]; then
+        echo "Downloading Reuters-21578"
+        curl http://kdd.ics.uci.edu/databases/reuters21578/reuters21578.tar.gz 
-o ${WORK_DIR}/reuters21578.tar.gz
+      fi
+      mkdir -p ${WORK_DIR}/reuters-sgm
+      echo "Extracting..."
+      tar xzf ${WORK_DIR}/reuters21578.tar.gz -C ${WORK_DIR}/reuters-sgm
     fi
+       
+    $MAHOUT org.apache.lucene.benchmark.utils.ExtractReuters 
${WORK_DIR}/reuters-sgm ${WORK_DIR}/reuters-out
+  fi
 
-    MAHOUT_LOCAL=true $MAHOUT seqdirectory \
-        -i ${WORK_DIR}/reuters-out \
-        -o ${WORK_DIR}/reuters-out-seqdir \
-        -c UTF-8 -chunk 5
+  MAHOUT_LOCAL=true $MAHOUT seqdirectory -i ${WORK_DIR}/reuters-out -o 
${WORK_DIR}/reuters-out-seqdir -c UTF-8 -chunk 5
 fi
 
 # we know reuters-out-seqdir exists on a local disk at
 # this point, if we're running in clustered mode, 
 # copy it up to hdfs
 if [ "$HADOOP_HOME" != "" ] && [ "$MAHOUT_LOCAL" == "" ] ; then
-    HADOOP="$HADOOP_HOME/bin/hadoop"
-    if [ ! -e $HADOOP ]; then 
-       echo "Can't find hadoop in $HADOOP, exiting"
-       exit 1
-    fi
-
-    set +e
-    $HADOOP dfs -rmr \
-        ${WORK_DIR}/reuters-out-seqdir
-    set -e
-    $HADOOP dfs -put \
-        ${WORK_DIR}/reuters-out-seqdir \
-        ${WORK_DIR}/reuters-out-seqdir
+  HADOOP="$HADOOP_HOME/bin/hadoop"
+  if [ ! -e $HADOOP ]; then
+    echo "Can't find hadoop in $HADOOP, exiting"
+    exit 1
+  fi
+
+  set +e
+  $HADOOP dfs -rmr ${WORK_DIR}/reuters-out-seqdir
+  set -e
+  $HADOOP dfs -put ${WORK_DIR}/reuters-out-seqdir 
${WORK_DIR}/reuters-out-seqdir
 fi
 
 if [ "x$clustertype" == "xkmeans" ]; then


Reply via email to