Author: srowen
Date: Fri Sep 16 15:47:19 2011
New Revision: 1171636
URL: http://svn.apache.org/viewvc?rev=1171636&view=rev
Log:
MAHOUT-811 fix working directory issue with WORK_DIR and extracting archive
Modified:
mahout/trunk/examples/bin/build-reuters.sh
Modified: mahout/trunk/examples/bin/build-reuters.sh
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/bin/build-reuters.sh?rev=1171636&r1=1171635&r2=1171636&view=diff
==============================================================================
--- mahout/trunk/examples/bin/build-reuters.sh (original)
+++ mahout/trunk/examples/bin/build-reuters.sh Fri Sep 16 15:47:19 2011
@@ -54,46 +54,37 @@ echo "creating work directory at ${WORK_
mkdir -p ${WORK_DIR}
if [ ! -e ${WORK_DIR}/reuters-out-seqdir ]; then
- if [ ! -e ${WORK_DIR}/reuters-out ]; then
- if [ ! -e ${WORK_DIR}/reuters-sgm ]; then
- if [ ! -f ${WORK_DIR}/reuters21578.tar.gz ]; then
- echo "Downloading Reuters-21578"
- curl
http://kdd.ics.uci.edu/databases/reuters21578/reuters21578.tar.gz \
- -o ${WORK_DIR}/reuters21578.tar.gz
- fi
- mkdir -p ${WORK_DIR}/reuters-sgm
- echo "Extracting..."
- cd ${WORK_DIR}/reuters-sgm && tar xzf ../reuters21578.tar.gz && cd
.. && cd ..
- fi
-
- $MAHOUT org.apache.lucene.benchmark.utils.ExtractReuters \
- ${WORK_DIR}/reuters-sgm \
- ${WORK_DIR}/reuters-out
+ if [ ! -e ${WORK_DIR}/reuters-out ]; then
+ if [ ! -e ${WORK_DIR}/reuters-sgm ]; then
+ if [ ! -f ${WORK_DIR}/reuters21578.tar.gz ]; then
+ echo "Downloading Reuters-21578"
+ curl http://kdd.ics.uci.edu/databases/reuters21578/reuters21578.tar.gz
-o ${WORK_DIR}/reuters21578.tar.gz
+ fi
+ mkdir -p ${WORK_DIR}/reuters-sgm
+ echo "Extracting..."
+ tar xzf ${WORK_DIR}/reuters21578.tar.gz -C ${WORK_DIR}/reuters-sgm
fi
+
+ $MAHOUT org.apache.lucene.benchmark.utils.ExtractReuters
${WORK_DIR}/reuters-sgm ${WORK_DIR}/reuters-out
+ fi
- MAHOUT_LOCAL=true $MAHOUT seqdirectory \
- -i ${WORK_DIR}/reuters-out \
- -o ${WORK_DIR}/reuters-out-seqdir \
- -c UTF-8 -chunk 5
+ MAHOUT_LOCAL=true $MAHOUT seqdirectory -i ${WORK_DIR}/reuters-out -o
${WORK_DIR}/reuters-out-seqdir -c UTF-8 -chunk 5
fi
# we know reuters-out-seqdir exists on a local disk at
# this point, if we're running in clustered mode,
# copy it up to hdfs
if [ "$HADOOP_HOME" != "" ] && [ "$MAHOUT_LOCAL" == "" ] ; then
- HADOOP="$HADOOP_HOME/bin/hadoop"
- if [ ! -e $HADOOP ]; then
- echo "Can't find hadoop in $HADOOP, exiting"
- exit 1
- fi
-
- set +e
- $HADOOP dfs -rmr \
- ${WORK_DIR}/reuters-out-seqdir
- set -e
- $HADOOP dfs -put \
- ${WORK_DIR}/reuters-out-seqdir \
- ${WORK_DIR}/reuters-out-seqdir
+ HADOOP="$HADOOP_HOME/bin/hadoop"
+ if [ ! -e $HADOOP ]; then
+ echo "Can't find hadoop in $HADOOP, exiting"
+ exit 1
+ fi
+
+ set +e
+ $HADOOP dfs -rmr ${WORK_DIR}/reuters-out-seqdir
+ set -e
+ $HADOOP dfs -put ${WORK_DIR}/reuters-out-seqdir
${WORK_DIR}/reuters-out-seqdir
fi
if [ "x$clustertype" == "xkmeans" ]; then