Author: tcp
Date: Mon Mar 12 18:25:45 2012
New Revision: 1299770
URL: http://svn.apache.org/viewvc?rev=1299770&view=rev
Log:
MAHOUT-822: Make Mahout compatible with Hadoop 0.23.1.
Modified:
mahout/trunk/core/pom.xml
mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java
mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
mahout/trunk/pom.xml
Modified: mahout/trunk/core/pom.xml
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/pom.xml?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/core/pom.xml (original)
+++ mahout/trunk/core/pom.xml Mon Mar 12 18:25:45 2012
@@ -140,10 +140,6 @@
<!-- Third Party -->
<dependency>
-<groupId>org.apache.hadoop</groupId>
-<artifactId>hadoop-core</artifactId>
-</dependency>
-<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-core-asl</artifactId>
</dependency>
@@ -211,4 +207,43 @@
</dependency>
</dependencies>
+
+<profiles>
+<profile>
+<id>hadoop-0.20</id>
+<activation>
+<property>
+<name>!hadoop.version</name>
+</property>
+</activation>
+<dependencies>
+<dependency>
+<groupId>org.apache.hadoop</groupId>
+<artifactId>hadoop-core</artifactId>
+</dependency>
+</dependencies>
+</profile>
+<profile>
+<id>hadoop-0.23</id>
+<activation>
+<property>
+<name>hadoop.version</name>
+</property>
+</activation>
+<dependencies>
+<dependency>
+<groupId>org.apache.hadoop</groupId>
+<artifactId>hadoop-common</artifactId>
+</dependency>
+<dependency>
+<groupId>org.apache.hadoop</groupId>
+<artifactId>hadoop-mapreduce-client-common</artifactId>
+</dependency>
+<dependency>
+<groupId>org.apache.hadoop</groupId>
+<artifactId>hadoop-mapreduce-client-core</artifactId>
+</dependency>
+</dependencies>
+</profile>
+</profiles>
</project>
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
Mon Mar 12 18:25:45 2012
@@ -17,6 +17,7 @@
package org.apache.mahout.common;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
@@ -229,9 +230,9 @@ public final class HadoopUtil {
FileStatus[] statuses;
FileSystem fs = path.getFileSystem(conf);
if (filter == null) {
- statuses = pathType == PathType.GLOB ? fs.globStatus(path) :
fs.listStatus(path);
+ statuses = pathType == PathType.GLOB ? fs.globStatus(path) :
listStatus(fs, path);
} else {
- statuses = pathType == PathType.GLOB ? fs.globStatus(path, filter) :
fs.listStatus(path, filter);
+ statuses = pathType == PathType.GLOB ? fs.globStatus(path, filter) :
listStatus(fs, path, filter);
}
if (ordering != null) {
Arrays.sort(statuses, ordering);
@@ -239,6 +240,22 @@ public final class HadoopUtil {
return statuses;
}
+ public static FileStatus[] listStatus(FileSystem fs, Path path) throws
IOException {
+ try {
+ return fs.listStatus(path);
+ } catch (FileNotFoundException e) {
+ return new FileStatus[0];
+ }
+ }
+
+ public static FileStatus[] listStatus(FileSystem fs, Path path, PathFilter
filter) throws IOException {
+ try {
+ return fs.listStatus(path, filter);
+ } catch (FileNotFoundException e) {
+ return new FileStatus[0];
+ }
+ }
+
public static void cacheFiles(Path fileToCache, Configuration conf) {
DistributedCache.setCacheFiles(new URI[]{fileToCache.toUri()}, conf);
}
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/MockContext.java
Mon Mar 12 18:25:45 2012
@@ -1,70 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.classifier.df.mapreduce.partial;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.hadoop.mapreduce.Mapper.Context;
-import org.apache.mahout.classifier.df.mapreduce.MapredOutput;
-
-/**
- * Special implementation that collects the output of the mappers
- */
-final class MockContext extends Context {
-
- private final TreeID[] keys;
- private final MapredOutput[] values;
- private int index;
-
- MockContext(Mapper<?,?,?,?> mapper, Configuration conf, TaskAttemptID
taskid, int nbTrees)
- throws IOException, InterruptedException {
- mapper.super(conf, taskid, null, null, null, null, null);
-
- keys = new TreeID[nbTrees];
- values = new MapredOutput[nbTrees];
- }
-
- @Override
- public void write(Object key, Object value) throws IOException {
- if (index == keys.length) {
- throw new IOException("Received more output than expected : " + index);
- }
-
- keys[index] = ((TreeID) key).clone();
- values[index] = ((MapredOutput) value).clone();
-
- index++;
- }
-
- /**
- * @return number of outputs collected
- */
- public int nbOutputs() {
- return index;
- }
-
- public TreeID[] getKeys() {
- return keys;
- }
-
- public MapredOutput[] getValues() {
- return values;
- }
-}
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialSequentialBuilder.java
Mon Mar 12 18:25:45 2012
@@ -1,176 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.classifier.df.mapreduce.partial;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.mahout.classifier.df.DFUtils;
-import org.apache.mahout.classifier.df.DecisionForest;
-import org.apache.mahout.classifier.df.builder.TreeBuilder;
-import org.apache.mahout.classifier.df.data.Dataset;
-import org.apache.mahout.classifier.df.mapreduce.Builder;
-import org.apache.mahout.classifier.df.mapreduce.MapredOutput;
-import org.apache.mahout.classifier.df.node.Node;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.collect.Lists;
-
-/**
- * Simulates the Partial mapreduce implementation in a sequential manner. Must
- * receive a seed
- */
-public class PartialSequentialBuilder extends PartialBuilder {
-
- private static final Logger log =
LoggerFactory.getLogger(PartialSequentialBuilder.class);
-
- private MockContext firstOutput;
-
- private final Dataset dataset;
-
- public PartialSequentialBuilder(TreeBuilder treeBuilder, Path dataPath,
- Dataset dataset, long seed, Configuration conf) {
- super(treeBuilder, dataPath, new Path("notUsed"), seed, conf);
- this.dataset = dataset;
- }
-
- public PartialSequentialBuilder(TreeBuilder treeBuilder, Path dataPath,
- Dataset dataset, long seed) {
- this(treeBuilder, dataPath, dataset, seed, new Configuration());
- }
-
- @Override
- protected void configureJob(Job job)
- throws IOException {
- Configuration conf = job.getConfiguration();
-
- int num = conf.getInt("mapred.map.tasks", -1);
-
- super.configureJob(job);
-
- // PartialBuilder sets the number of maps to 1 if we are running in 'local'
- conf.setInt("mapred.map.tasks", num);
- }
-
- @Override
- protected boolean runJob(Job job) throws IOException, InterruptedException {
- Configuration conf = job.getConfiguration();
-
- // retrieve the splits
- TextInputFormat input = new TextInputFormat();
- List<InputSplit> splits = input.getSplits(job);
-
- int nbSplits = splits.size();
- log.debug("Nb splits : {}", nbSplits);
-
- InputSplit[] sorted = new InputSplit[nbSplits];
- splits.toArray(sorted);
- Builder.sortSplits(sorted);
-
- int numTrees = Builder.getNbTrees(conf); // total number of trees
-
- TaskAttemptContext task = new TaskAttemptContext(conf, new
TaskAttemptID());
-
- firstOutput = new MockContext(new Step1Mapper(), conf,
task.getTaskAttemptID(), numTrees);
-
- /* first instance id in hadoop's order */
- //int[] firstIds = new int[nbSplits];
- /* partitions' sizes in hadoop order */
- int[] sizes = new int[nbSplits];
-
- // to compute firstIds, process the splits in file order
- long slowest = 0; // duration of slowest map
- int firstId = 0;
- for (InputSplit split : splits) {
- int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition
-
- RecordReader<LongWritable, Text> reader =
input.createRecordReader(split, task);
- reader.initialize(split, task);
-
- Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset,
getSeed(),
- hp, nbSplits, numTrees);
-
- long time = System.currentTimeMillis();
-
- //firstIds[hp] = firstId;
-
- while (reader.nextKeyValue()) {
- mapper.map(reader.getCurrentKey(), reader.getCurrentValue(),
firstOutput);
- firstId++;
- sizes[hp]++;
- }
-
- mapper.cleanup(firstOutput);
-
- time = System.currentTimeMillis() - time;
- log.info("Duration : {}", DFUtils.elapsedTime(time));
-
- if (time> slowest) {
- slowest = time;
- }
- }
-
- log.info("Longest duration : {}", DFUtils.elapsedTime(slowest));
- return true;
- }
-
- @Override
- protected DecisionForest parseOutput(Job job) throws IOException {
- return processOutput(firstOutput.getKeys(), firstOutput.getValues());
- }
-
- /**
- * extract the decision forest
- */
- protected static DecisionForest processOutput(TreeID[] keys, MapredOutput[]
values) {
- List<Node> trees = Lists.newArrayList();
-
- for (int index = 0; index< keys.length; index++) {
- MapredOutput value = values[index];
- trees.add(value.getTree());
- }
-
- return new DecisionForest(trees);
- }
-
- /**
- * Special Step1Mapper that can be configured without using a Configuration
- *
- */
- private static class MockStep1Mapper extends Step1Mapper {
- protected MockStep1Mapper(TreeBuilder treeBuilder, Dataset dataset, Long
seed,
- int partition, int numMapTasks, int numTrees) {
- configure(false, treeBuilder, dataset);
- configure(seed, partition, numMapTasks, numTrees);
- }
-
- }
-
-}
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java
Mon Mar 12 18:25:45 2012
@@ -17,21 +17,30 @@
package org.apache.mahout.classifier.df.mapreduce.partial;
+import static org.easymock.EasyMock.anyObject;
+import static org.easymock.EasyMock.capture;
+import static org.easymock.EasyMock.createMock;
+import static org.easymock.EasyMock.expectLastCall;
+import static org.easymock.EasyMock.replay;
+import static org.easymock.EasyMock.verify;
+
import java.util.Random;
-import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.mahout.common.MahoutTestCase;
+import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.classifier.df.builder.TreeBuilder;
import org.apache.mahout.classifier.df.data.Data;
import org.apache.mahout.classifier.df.data.DataLoader;
import org.apache.mahout.classifier.df.data.Dataset;
import org.apache.mahout.classifier.df.data.Utils;
+import org.apache.mahout.classifier.df.mapreduce.MapredOutput;
import org.apache.mahout.classifier.df.node.Leaf;
import org.apache.mahout.classifier.df.node.Node;
+import org.apache.mahout.common.MahoutTestCase;
+import org.easymock.Capture;
+import org.easymock.CaptureType;
import org.junit.Test;
public final class Step1MapperTest extends MahoutTestCase {
@@ -71,6 +80,17 @@ public final class Step1MapperTest exten
}
}
+ private static class TreeIDCapture extends Capture<TreeID> {
+
+ public TreeIDCapture() {
+ super(CaptureType.ALL);
+ }
+
+ public void setValue(final TreeID value) {
+ super.setValue(value.clone());
+ }
+ }
+
/** nb attributes per generated data instance */
static final int NUM_ATTRIBUTES = 4;
@@ -83,6 +103,7 @@ public final class Step1MapperTest exten
/** nb mappers to use */
static final int NUM_MAPPERS = 2;
+ @SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testMapper() throws Exception {
Long seed = null;
@@ -109,8 +130,13 @@ public final class Step1MapperTest exten
// expected number of trees that this mapper will build
int mapNbTrees = Step1Mapper.nbTrees(NUM_MAPPERS, NUM_TREES, partition);
- MockContext context = new MockContext(new Step1Mapper(),
- new Configuration(), new TaskAttemptID(), mapNbTrees);
+ Mapper.Context context =
+ createMock(Mapper.Context.class);
+ Capture<TreeID> capturedKeys = new TreeIDCapture();
+ context.write(capture(capturedKeys), anyObject());
+ expectLastCall().anyTimes();
+
+ replay(context);
MockStep1Mapper mapper = new MockStep1Mapper(treeBuilder, dataset, seed,
partition, NUM_MAPPERS, NUM_TREES);
@@ -125,12 +151,13 @@ public final class Step1MapperTest exten
}
mapper.cleanup(context);
+ verify(context);
// make sure the mapper built all its trees
- assertEquals(mapNbTrees, context.nbOutputs());
+ assertEquals(mapNbTrees, capturedKeys.getValues().size());
// check the returned keys
- for (TreeID k : context.getKeys()) {
+ for (TreeID k : capturedKeys.getValues()) {
assertEquals(partition, k.partition());
assertEquals(treeIndex, k.treeId());
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
Mon Mar 12 18:25:45 2012
@@ -34,6 +34,7 @@ import org.apache.mahout.clustering.Clus
import org.apache.mahout.common.DummyRecordWriter;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.common.Pair;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
@@ -126,8 +127,8 @@ public final class TestCanopyCreation ex
int[] expectedNumPoints = { 4, 4, 3 };
double[][] expectedCentroids = { { 1.5, 1.5 }, { 4.0, 4.0 },
{ 4.666666666666667, 4.6666666666666667 } };
- assertEquals("canopy points " + canopyIx, expectedNumPoints[canopyIx],
- testCanopy.getNumObservations());
+ assertEquals("canopy points " + canopyIx,
testCanopy.getNumObservations(),
+ expectedNumPoints[canopyIx]);
double[] refCentroid = expectedCentroids[canopyIx];
Vector testCentroid = testCanopy.computeCentroid();
for (int pointIx = 0; pointIx< refCentroid.length; pointIx++) {
@@ -151,8 +152,8 @@ public final class TestCanopyCreation ex
{ 4.666666666666667, 4.666666666666667 } };
for (int canopyIx = 0; canopyIx< referenceEuclidean.size(); canopyIx++) {
Canopy testCanopy = referenceEuclidean.get(canopyIx);
- assertEquals("canopy points " + canopyIx, expectedNumPoints[canopyIx],
- testCanopy.getNumObservations());
+ assertEquals("canopy points " + canopyIx,
testCanopy.getNumObservations(),
+ expectedNumPoints[canopyIx]);
double[] refCentroid = expectedCentroids[canopyIx];
Vector testCentroid = testCanopy.computeCentroid();
for (int pointIx = 0; pointIx< refCentroid.length; pointIx++) {
@@ -328,20 +329,36 @@ public final class TestCanopyCreation ex
Canopy canopy = new Canopy();
assertTrue("more to come", reader.next(key, canopy));
assertEquals("1st key", "C-0", key.toString());
- assertEquals("1st x value", 1.5, canopy.getCenter().get(0), EPSILON);
- assertEquals("1st y value", 1.5, canopy.getCenter().get(1), EPSILON);
+
+ List<Pair<Double,Double>> refCenters = Lists.newArrayList();
+ refCenters.add(new Pair<Double,Double>(1.5,1.5));
+ refCenters.add(new
Pair<Double,Double>(4.333333333333334,4.333333333333334));
+ Pair<Double,Double> c = new
Pair<Double,Double>(canopy.getCenter().get(0),
+
canopy.getCenter().get(1));
+ assertTrue("center "+c+" not found", findAndRemove(c, refCenters,
EPSILON));
assertTrue("more to come", reader.next(key, canopy));
assertEquals("2nd key", "C-1", key.toString());
- assertEquals("2nd x value", 4.333333333333334, canopy.getCenter().get(0),
- EPSILON);
- assertEquals("2nd y value", 4.333333333333334, canopy.getCenter().get(1),
- EPSILON);
+ c = new Pair<Double,Double>(canopy.getCenter().get(0),
+ canopy.getCenter().get(1));
+ assertTrue("center "+c+" not found", findAndRemove(c, refCenters,
EPSILON));
assertFalse("more to come", reader.next(key, canopy));
} finally {
Closeables.closeQuietly(reader);
}
}
+ boolean findAndRemove(Pair<Double,Double> target,
+ List<Pair<Double,Double>> list, double epsilon) {
+ for (Pair<Double,Double> curr : list) {
+ if ( (Math.abs(target.getFirst() - curr.getFirst())< epsilon)
+&& (Math.abs(target.getSecond() - curr.getSecond())< epsilon) ) {
+ list.remove(curr);
+ return true;
+ }
+ }
+ return false;
+ }
+
/**
* Story: User can produce final canopy centers using a Hadoop map/reduce
job
* and a EuclideanDistanceMeasure.
@@ -368,14 +385,18 @@ public final class TestCanopyCreation ex
Canopy value = new Canopy();
assertTrue("more to come", reader.next(key, value));
assertEquals("1st key", "C-0", key.toString());
- assertEquals("1st x value", 1.8, value.getCenter().get(0), EPSILON);
- assertEquals("1st y value", 1.8, value.getCenter().get(1), EPSILON);
+
+ List<Pair<Double,Double>> refCenters = Lists.newArrayList();
+ refCenters.add(new Pair<Double,Double>(1.8,1.8));
+ refCenters.add(new Pair<Double,Double>(4.433333333333334,
4.433333333333334));
+ Pair<Double,Double> c = new
Pair<Double,Double>(value.getCenter().get(0),
+
value.getCenter().get(1));
+ assertTrue("center "+c+" not found", findAndRemove(c, refCenters,
EPSILON));
assertTrue("more to come", reader.next(key, value));
assertEquals("2nd key", "C-1", key.toString());
- assertEquals("2nd x value", 4.433333333333334, value.getCenter().get(0),
- EPSILON);
- assertEquals("2nd y value", 4.433333333333334, value.getCenter().get(1),
- EPSILON);
+ c = new Pair<Double,Double>(value.getCenter().get(0),
+ value.getCenter().get(1));
+ assertTrue("center "+c+" not found", findAndRemove(c, refCenters,
EPSILON));
assertFalse("more to come", reader.next(key, value));
} finally {
Closeables.closeQuietly(reader);
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java
Mon Mar 12 18:25:45 2012
@@ -20,6 +20,9 @@ package org.apache.mahout.clustering.cla
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.Set;
+
+import com.google.common.collect.Sets;
import junit.framework.Assert;
@@ -195,9 +198,7 @@ public class ClusterClassificationDriver
}
private void assertVectorsWithOutlierRemoval() {
- assertFirstClusterWithOutlierRemoval();
- assertSecondClusterWithOutlierRemoval();
- assertThirdClusterWithOutlierRemoval();
+ checkClustersWithOutlierRemoval();
}
private void assertVectorsWithoutOutlierRemoval() {
@@ -230,25 +231,33 @@ public class ClusterClassificationDriver
"{1:1.0,0:2.0}", "{1:2.0,0:1.0}"}, vector.asFormatString()));
}
}
-
- private void assertThirdClusterWithOutlierRemoval() {
- Assert.assertEquals(1, thirdCluster.size());
- for (Vector vector : thirdCluster) {
- Assert.assertTrue(ArrayUtils.contains(new String[] {"{1:9.0,0:9.0}"},
- vector.asFormatString()));
- }
- }
-
- private void assertSecondClusterWithOutlierRemoval() {
- Assert.assertEquals(0, secondCluster.size());
- }
-
- private void assertFirstClusterWithOutlierRemoval() {
- Assert.assertEquals(1, firstCluster.size());
- for (Vector vector : firstCluster) {
- Assert.assertTrue(ArrayUtils.contains(new String[] {"{1:1.0,0:1.0}"},
- vector.asFormatString()));
- }
+
+ private void checkClustersWithOutlierRemoval() {
+ Set<String> reference = Sets.newHashSet(new String[] {"{1:9.0,0:9.0}",
+ "{1:1.0,0:1.0}"});
+ int singletonCnt = 0;
+ int emptyCnt = 0;
+
+ List<List<Vector>> clusters = Lists.newArrayList();
+ clusters.add(firstCluster);
+ clusters.add(secondCluster);
+ clusters.add(thirdCluster);
+
+ for (List<Vector> vList : clusters) {
+ if (vList.size() == 0) {
+ emptyCnt++;
+ } else {
+ singletonCnt++;
+ Assert.assertTrue("expecting only singleton clusters; got size=" +
vList.size(),
+ vList.size() == 1);
+ Assert.assertTrue("not expecting cluster:" +
vList.get(0).asFormatString(),
+ reference.contains(vList.get(0).asFormatString()));
+ reference.remove(vList.get(0).asFormatString());
+ }
+ }
+ Assert.assertEquals("Different number of empty clusters than expected!",
1, emptyCnt);
+ Assert.assertEquals("Different number of singletons than expected!", 2,
singletonCnt);
+ Assert.assertEquals("Didn't match all reference clusters!", 0,
reference.size());
}
-
+
}
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
Mon Mar 12 18:25:45 2012
@@ -26,6 +26,7 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
@@ -38,6 +39,7 @@ import org.apache.hadoop.util.ToolRunner
import org.apache.mahout.clustering.AbstractCluster;
import org.apache.mahout.clustering.ClusterObservations;
import org.apache.mahout.clustering.ClusteringTestUtils;
+import org.apache.mahout.clustering.canopy.Canopy;
import org.apache.mahout.clustering.canopy.CanopyDriver;
import org.apache.mahout.clustering.classify.WeightedVectorWritable;
import org.apache.mahout.common.DummyOutputCollector;
@@ -486,6 +488,42 @@ public final class TestKmeansClustering
// now run the Canopy job
CanopyDriver.run(conf, pointsPath, outputPath, new
ManhattanDistanceMeasure(), 3.1, 2.1, false, 0.0, false);
+ DummyOutputCollector<Text, Canopy> collector1 =
+ new DummyOutputCollector<Text, Canopy>();
+
+ FileStatus[] outParts = FileSystem.get(conf).globStatus(
+ new Path(outputPath, "clusters-0-final/*-0*"));
+ for (FileStatus outPartStat : outParts) {
+ for (Pair<Text,Canopy> record :
+ new SequenceFileIterable<Text,Canopy>(
+ outPartStat.getPath(), conf)) {
+ collector1.collect(record.getFirst(), record.getSecond());
+ }
+ }
+
+ boolean got15 = false;
+ boolean got43 = false;
+ int count = 0;
+ for (Text k : collector1.getKeys()) {
+ count++;
+ List<Canopy> vl = collector1.getValue(k);
+ assertEquals("non-singleton centroid!", 1, vl.size());
+ Vector v = vl.get(0).getCenter();
+ assertEquals("cetriod vector is wrong length", 2, v.size());
+ if ( (Math.abs(v.get(0) - 1.5)< EPSILON)
+&& (Math.abs(v.get(1) - 1.5)< EPSILON)
+&& !got15) {
+ got15 = true;
+ } else if ( (Math.abs(v.get(0) - 4.333333333333334)< EPSILON)
+&& (Math.abs(v.get(1) - 4.333333333333334)< EPSILON)
+&& !got43) {
+ got43 = true;
+ } else {
+ assertTrue("got unexpected center: "+v+"
["+v.getClass().toString()+"]", false);
+ }
+ }
+ assertEquals("got unexpected number of centers", 2, count);
+
// now run the KMeans job
KMeansDriver.run(pointsPath, new Path(outputPath, "clusters-0-final"),
outputPath, new EuclideanDistanceMeasure(),
0.001, 10, true, false);
@@ -500,7 +538,28 @@ public final class TestKmeansClustering
collector.collect(record.getFirst(), record.getSecond());
}
- assertEquals("num points[0]", 4, collector.getValue(new
IntWritable(0)).size());
- assertEquals("num points[1]", 5, collector.getValue(new
IntWritable(1)).size());
+ boolean gotLowClust = false; // clusters should be [1, *] and [2, *]
+ boolean gotHighClust = false; // vs [3 , *], [4 , *] and [5, *]
+ for (IntWritable k : collector.getKeys()) {
+ List<WeightedVectorWritable> wvList = collector.getValue(k);
+ assertTrue("empty cluster!", wvList.size() != 0);
+ if (wvList.get(0).getVector().get(0)<= 2.0) {
+ for (WeightedVectorWritable wv : wvList) {
+ Vector v = wv.getVector();
+ int idx = v.maxValueIndex();
+ assertTrue("bad cluster!", v.get(idx)<= 2.0);
+ }
+ assertEquals("Wrong size cluster", 4, wvList.size());
+ gotLowClust= true;
+ } else {
+ for (WeightedVectorWritable wv : wvList) {
+ Vector v = wv.getVector();
+ int idx = v.minValueIndex();
+ assertTrue("bad cluster!", v.get(idx)> 2.0);
+ }
+ assertEquals("Wrong size cluster", 5, wvList.size());
+ gotHighClust= true;
+ }
+ }
}
}
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
Mon Mar 12 18:25:45 2012
@@ -21,10 +21,12 @@ import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Random;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
@@ -350,7 +352,13 @@ public final class TestMeanShift extends
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(input.toUri(), conf);
Collection<VectorWritable> points = Lists.newArrayList();
- for (Vector v : raw) {
+ Random r = new Random(123);
+ Vector[] permutedRaw = new Vector[raw.length];
+ for (int i = 0; i< raw.length; i++)
+ permutedRaw = raw;
+ for (int i = 0; i< permutedRaw.length; i++)
+ permutedRaw[i] = permutedRaw[i + r.nextInt(raw.length - i)];
+ for (Vector v : permutedRaw) {
points.add(new VectorWritable(v));
}
ClusteringTestUtils.writePointsToFile(points,
@@ -376,10 +384,12 @@ public final class TestMeanShift extends
optKey(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION), "0.2",
optKey(DefaultOptionCreator.OVERWRITE_OPTION) };
ToolRunner.run(conf, new MeanShiftCanopyDriver(), args);
- Path outPart = new Path(output, "clusters-4-final/part-r-00000");
- long count = HadoopUtil.countRecords(outPart, conf);
- assertEquals("count", 3, count);
- outPart = new Path(output, "clusters-0/part-m-00000");
+ FileStatus[] outParts = FileSystem.get(conf).globStatus(
+ new Path(output, "clusters-?-final/part-r-*"));
+ assertEquals("Wrong number of matching final parts", 1, outParts.length);
+ long count = HadoopUtil.countRecords(outParts[0].getPath(), conf);
+ assertEquals("count", 5, count);
+ Path outPart = new Path(output, "clusters-0/part-m-00000");
Iterator<?> iterator = new SequenceFileValueIterator<Writable>(outPart,
true, conf);
// now test the initial clusters to ensure the type of their centers has
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java
(original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyCounter.java
Mon Mar 12 18:25:45 2012
@@ -1,26 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.mahout.common;
-
-import org.apache.hadoop.mapreduce.Counter;
-
-final class DummyCounter extends Counter {
-
-}
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
Mon Mar 12 18:25:45 2012
@@ -17,16 +17,21 @@
package org.apache.mahout.common;
+import com.google.common.collect.Lists;
+
import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.MapContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.ReduceContext;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
@@ -65,7 +70,18 @@ public final class DummyRecordWriter<K,
Configuration configuration,
RecordWriter<K2, V2> output)
throws IOException, InterruptedException {
- return mapper.new Context(configuration, new TaskAttemptID(), null,
output, null, new DummyStatusReporter(), null);
+
+ // Use reflection since the context types changed incompatibly between 0.20
+ // and 0.23.
+ try {
+ return buildNewMapperContext(configuration, output);
+ } catch (Exception e) {
+ try {
+ return buildOldMapperContext(mapper, configuration, output);
+ } catch (Exception ex) {
+ throw new IllegalStateException(ex);
+ }
+ }
}
public static<K1, V1, K2, V2> Reducer<K1, V1, K2, V2>.Context
build(Reducer<K1, V1, K2, V2> reducer,
@@ -74,17 +90,96 @@ public final class DummyRecordWriter<K,
Class<K1> keyClass,
Class<V1> valueClass)
throws IOException, InterruptedException {
- return reducer.new Context(configuration,
- new TaskAttemptID(),
- new MockIterator(),
- null,
- null,
- output,
- null,
- new DummyStatusReporter(),
- null,
- keyClass,
- valueClass);
+
+ // Use reflection since the context types changed incompatibly between 0.20
+ // and 0.23.
+ try {
+ return buildNewReducerContext(configuration, output, keyClass,
valueClass);
+ } catch (Exception e) {
+ try {
+ return buildOldReducerContext(reducer, configuration, output,
keyClass, valueClass);
+ } catch (Exception ex) {
+ throw new IllegalStateException(ex);
+ }
+ }
+ }
+
+ @SuppressWarnings({ "unchecked", "rawtypes" })
+ private static<K1, V1, K2, V2> Mapper<K1, V1, K2, V2>.Context
buildNewMapperContext(
+ Configuration configuration, RecordWriter<K2, V2> output) throws
Exception {
+ Class<?> mapContextImplClass =
Class.forName("org.apache.hadoop.mapreduce.task.MapContextImpl");
+ Constructor<?> cons = mapContextImplClass.getConstructors()[0];
+ Object mapContextImpl = cons.newInstance(configuration,
+ new TaskAttemptID(), null, output, null, new DummyStatusReporter(),
null);
+
+ Class<?> wrappedMapperClass =
Class.forName("org.apache.hadoop.mapreduce.lib.map.WrappedMapper");
+ Object wrappedMapper = wrappedMapperClass.newInstance();
+ Method getMapContext = wrappedMapperClass.getMethod("getMapContext",
MapContext.class);
+ return (Mapper.Context) getMapContext.invoke(wrappedMapper,
mapContextImpl);
+ }
+
+ @SuppressWarnings({ "unchecked", "rawtypes" })
+ private static<K1, V1, K2, V2> Mapper<K1, V1, K2, V2>.Context
buildOldMapperContext(
+ Mapper<K1, V1, K2, V2> mapper, Configuration configuration,
+ RecordWriter<K2, V2> output) throws Exception {
+ Constructor<?> cons = getNestedContextConstructor(mapper.getClass());
+ // first argument to the constructor is the enclosing instance
+ return (Mapper.Context) cons.newInstance(mapper, configuration,
+ new TaskAttemptID(), null, output, null, new DummyStatusReporter(),
null);
+ }
+
+ @SuppressWarnings({ "unchecked", "rawtypes" })
+ private static<K1, V1, K2, V2> Reducer<K1, V1, K2, V2>.Context
buildNewReducerContext(
+ Configuration configuration, RecordWriter<K2, V2> output, Class<K1>
keyClass,
+ Class<V1> valueClass) throws Exception {
+ Class<?> reduceContextImplClass =
Class.forName("org.apache.hadoop.mapreduce.task.ReduceContextImpl");
+ Constructor<?> cons = reduceContextImplClass.getConstructors()[0];
+ Object reduceContextImpl = cons.newInstance(configuration,
+ new TaskAttemptID(),
+ new MockIterator(),
+ null,
+ null,
+ output,
+ null,
+ new DummyStatusReporter(),
+ null,
+ keyClass,
+ valueClass);
+
+ Class<?> wrappedReducerClass =
Class.forName("org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer");
+ Object wrappedReducer = wrappedReducerClass.newInstance();
+ Method getReducerContext =
wrappedReducerClass.getMethod("getReducerContext", ReduceContext.class);
+ return (Reducer.Context) getReducerContext.invoke(wrappedReducer,
reduceContextImpl);
+ }
+
+ @SuppressWarnings({ "unchecked", "rawtypes" })
+ private static<K1, V1, K2, V2> Reducer<K1, V1, K2, V2>.Context
buildOldReducerContext(
+ Reducer<K1, V1, K2, V2> reducer, Configuration configuration,
+ RecordWriter<K2, V2> output, Class<K1> keyClass,
+ Class<V1> valueClass) throws Exception {
+ Constructor<?> cons = getNestedContextConstructor(reducer.getClass());
+ // first argument to the constructor is the enclosing instance
+ return (Reducer.Context) cons.newInstance(reducer,
+ configuration,
+ new TaskAttemptID(),
+ new MockIterator(),
+ null,
+ null,
+ output,
+ null,
+ new DummyStatusReporter(),
+ null,
+ keyClass,
+ valueClass);
+ }
+
+ private static Constructor<?> getNestedContextConstructor(Class<?>
outerClass) {
+ for (Class<?> nestedClass : outerClass.getClasses()) {
+ if ("Context".equals(nestedClass.getSimpleName())) {
+ return nestedClass.getConstructors()[0];
+ }
+ }
+ throw new IllegalStateException("Cannot find context class for " +
outerClass);
}
}
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
Mon Mar 12 18:25:45 2012
@@ -19,6 +19,8 @@
package org.apache.mahout.common;
+import static org.easymock.EasyMock.createMockBuilder;
+
import java.util.Map;
import com.google.common.collect.Maps;
@@ -30,10 +32,21 @@ public final class DummyStatusReporter e
private final Map<Enum<?>, Counter> counters = Maps.newHashMap();
private final Map<String, Counter> counterGroups = Maps.newHashMap();
+ private Counter newCounter() {
+ try {
+ // 0.23 case
+ String c = "org.apache.hadoop.mapreduce.counters.GenericCounter";
+ return (Counter) createMockBuilder(Class.forName(c)).createMock();
+ } catch (ClassNotFoundException e) {
+ // 0.20 case
+ return createMockBuilder(Counter.class).createMock();
+ }
+ }
+
@Override
public Counter getCounter(Enum<?> name) {
if (!counters.containsKey(name)) {
- counters.put(name, new DummyCounter());
+ counters.put(name, newCounter());
}
return counters.get(name);
}
@@ -42,7 +55,7 @@ public final class DummyStatusReporter e
@Override
public Counter getCounter(String group, String name) {
if (!counterGroups.containsKey(group + name)) {
- counterGroups.put(group + name, new DummyCounter());
+ counterGroups.put(group + name, newCounter());
}
return counterGroups.get(group+name);
}
@@ -55,4 +68,8 @@ public final class DummyStatusReporter e
public void setStatus(String status) {
}
+ public float getProgress() {
+ return 0;
+ }
+
}
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
Mon Mar 12 18:25:45 2012
@@ -26,6 +26,7 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.clustering.ClusteringTestUtils;
+import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.math.DenseVector;
@@ -254,14 +255,14 @@ public final class TestDistributedRowMat
deleteContentsOfPath(conf, outputPath);
- assertEquals(0, fs.listStatus(outputPath).length);
+ assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
Vector result1 = dm.times(v);
- assertEquals(0, fs.listStatus(outputPath).length);
+ assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
deleteContentsOfPath(conf, outputPath);
- assertEquals(0, fs.listStatus(outputPath).length);
+ assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true);
dm.setConf(conf);
@@ -291,14 +292,14 @@ public final class TestDistributedRowMat
deleteContentsOfPath(conf, outputPath);
- assertEquals(0, fs.listStatus(outputPath).length);
+ assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
Vector result1 = dm.timesSquared(v);
- assertEquals(0, fs.listStatus(outputPath).length);
+ assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
deleteContentsOfPath(conf, outputPath);
- assertEquals(0, fs.listStatus(outputPath).length);
+ assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length);
conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true);
dm.setConf(conf);
@@ -325,7 +326,7 @@ public final class TestDistributedRowMat
private static void deleteContentsOfPath(Configuration conf, Path path)
throws Exception {
FileSystem fs = path.getFileSystem(conf);
- FileStatus[] statuses = fs.listStatus(path);
+ FileStatus[] statuses = HadoopUtil.listStatus(fs, path);
for (FileStatus status : statuses) {
fs.delete(status.getPath(), true);
}
Modified:
mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
---
mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
(original)
+++
mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
Mon Mar 12 18:25:45 2012
@@ -193,7 +193,7 @@ public final class TestClusterDumper ext
output, measure, 8, 4, true, 0.0, true);
// run ClusterDumper
ClusterDumper clusterDumper = new ClusterDumper(new Path(output,
- "clusters-0"), new Path(output, "clusteredPoints"));
+ "clusters-0-final"), new Path(output, "clusteredPoints"));
clusterDumper.printClusters(termDictionary);
}
Modified: mahout/trunk/pom.xml
URL:
http://svn.apache.org/viewvc/mahout/trunk/pom.xml?rev=1299770&r1=1299769&r2=1299770&view=diff
==============================================================================
--- mahout/trunk/pom.xml (original)
+++ mahout/trunk/pom.xml Mon Mar 12 18:25:45 2012
@@ -107,6 +107,17 @@
<url>https://issues.apache.org/jira/browse/MAHOUT</url>
</issueManagement>
+<repositories>
+<repository>
+<id>apache.snapshots</id>
+<name>Apache Snapshot Repository</name>
+<url>http://repository.apache.org/snapshots</url>
+<releases>
+<enabled>false</enabled>
+</releases>
+</repository>
+</repositories>
+
<dependencyManagement>
<dependencies>
@@ -264,6 +275,100 @@
</exclusions>
</dependency>
<dependency>
+<groupId>org.apache.hadoop</groupId>
+<artifactId>hadoop-common</artifactId>
+<version>${hadoop.version}</version>
+<exclusions>
+<exclusion>
+<groupId>net.sf.kosmosfs</groupId>
+<artifactId>kfs</artifactId>
+</exclusion>
+<exclusion>
+<groupId>org.mortbay.jetty</groupId>
+<artifactId>jetty</artifactId>
+</exclusion>
+<exclusion>
+<groupId>org.mortbay.jetty</groupId>
+<artifactId>jetty-util</artifactId>
+</exclusion>
+<exclusion>
+<groupId>hsqldb</groupId>
+<artifactId>hsqldb</artifactId>
+</exclusion>
+<exclusion>
+<groupId>commons-el</groupId>
+<artifactId>commons-el</artifactId>
+</exclusion>
+<exclusion>
+<groupId>junit</groupId>
+<artifactId>junit</artifactId>
+</exclusion>
+<exclusion>
+<groupId>oro</groupId>
+<artifactId>oro</artifactId>
+</exclusion>
+<exclusion>
+<groupId>org.mortbay.jetty</groupId>
+<artifactId>jsp-2.1</artifactId>
+</exclusion>
+<exclusion>
+<groupId>org.mortbay.jetty</groupId>
+<artifactId>jsp-api-2.1</artifactId>
+</exclusion>
+<exclusion>
+<groupId>org.mortbay.jetty</groupId>
+<artifactId>servlet-api-2.5</artifactId>
+</exclusion>
+<exclusion>
+<groupId>commons-net</groupId>
+<artifactId>commons-net</artifactId>
+</exclusion>
+<exclusion>
+<groupId>tomcat</groupId>
+<artifactId>jasper-runtime</artifactId>
+</exclusion>
+<exclusion>
+<groupId>tomcat</groupId>
+<artifactId>jasper-compiler</artifactId>
+</exclusion>
+<exclusion>
+<groupId>xmlenc</groupId>
+<artifactId>xmlenc</artifactId>
+</exclusion>
+<exclusion>
+<groupId>net.java.dev.jets3t</groupId>
+<artifactId>jets3t</artifactId>
+</exclusion>
+<exclusion>
+<groupId>org.eclipse.jdt</groupId>
+<artifactId>core</artifactId>
+</exclusion>
+<exclusion>
+<groupId>org.slf4j</groupId>
+<artifactId>slf4j-api</artifactId>
+</exclusion>
+<exclusion>
+<groupId>org.slf4j</groupId>
+<artifactId>slf4j-jcl</artifactId>
+</exclusion>
+<exclusion>
+<groupId>org.slf4j</groupId>
+<artifactId>slf4j-log4j12</artifactId>
+</exclusion>
+</exclusions>
+</dependency>
+<dependency>
+<groupId>org.apache.hadoop</groupId>
+<artifactId>hadoop-mapreduce-client-core</artifactId>
+<version>${hadoop.version}</version>
+</dependency>
+<dependency>
+<groupId>org.apache.hadoop</groupId>
+<artifactId>hadoop-mapreduce-client-common</artifactId>
+<version>${hadoop.version}</version>
+</dependency>
+
+<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-core-asl</artifactId>
<version>1.8.2</version>