Author: srowen
Date: Tue Apr 5 10:21:32 2011
New Revision: 1088957
URL: http://svn.apache.org/viewvc?rev=1088957&view=rev
Log:
MAHOUT-651 Pass Configuration in more cases rather than ignoring and
reinstantiating
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
Tue Apr 5 10:21:32 2011
@@ -112,7 +112,7 @@ public class FuzzyKMeansDriver extends A
DistanceMeasure measure =
ccl.loadClass(measureClass).asSubclass(DistanceMeasure.class).newInstance();
if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
- clusters = RandomSeedGenerator.buildRandom(input, clusters,
Integer.parseInt(parseArguments(args)
+ clusters = RandomSeedGenerator.buildRandom(getConf(), input, clusters,
Integer.parseInt(parseArguments(args)
.get(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), measure);
}
boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
Tue Apr 5 10:21:32 2011
@@ -43,7 +43,8 @@ public class KMeansClusterMapper extends
private KMeansClusterer clusterer;
@Override
- protected void map(WritableComparable<?> key, VectorWritable point, Context
context) throws IOException, InterruptedException {
+ protected void map(WritableComparable<?> key, VectorWritable point, Context
context)
+ throws IOException, InterruptedException {
clusterer.outputPointWithClusterInfo(point.get(), clusters, context);
}
@@ -59,7 +60,7 @@ public class KMeansClusterMapper extends
String clusterPath = conf.get(KMeansConfigKeys.CLUSTER_PATH_KEY);
if ((clusterPath != null) && (clusterPath.length() > 0)) {
- KMeansUtil.configureWithClusterInfo(new Path(clusterPath), clusters);
+ KMeansUtil.configureWithClusterInfo(conf, new Path(clusterPath),
clusters);
if (clusters.isEmpty()) {
throw new IllegalStateException("No clusters found. Check your -c
path.");
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
Tue Apr 5 10:21:32 2011
@@ -98,12 +98,15 @@ public class KMeansDriver extends Abstra
DistanceMeasure measure =
ccl.loadClass(measureClass).asSubclass(DistanceMeasure.class).newInstance();
if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
- clusters = RandomSeedGenerator.buildRandom(input, clusters, Integer
+ clusters = RandomSeedGenerator.buildRandom(getConf(), input, clusters,
Integer
.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)),
measure);
}
boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
boolean runSequential =
getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
DefaultOptionCreator.SEQUENTIAL_METHOD);
+ if(getConf() == null) {
+ setConf(new Configuration());
+ }
run(getConf(), input, clusters, output, measure, convergenceDelta,
maxIterations, runClustering, runSequential);
return 0;
}
@@ -229,13 +232,14 @@ public class KMeansDriver extends Abstra
boolean runSequential)
throws IOException, InterruptedException, ClassNotFoundException {
if (runSequential) {
- return buildClustersSeq(input, clustersIn, output, measure,
maxIterations, delta);
+ return buildClustersSeq(conf, input, clustersIn, output, measure,
maxIterations, delta);
} else {
return buildClustersMR(conf, input, clustersIn, output, measure,
maxIterations, delta);
}
}
- private static Path buildClustersSeq(Path input,
+ private static Path buildClustersSeq(Configuration conf,
+ Path input,
Path clustersIn,
Path output,
DistanceMeasure measure,
@@ -246,7 +250,7 @@ public class KMeansDriver extends Abstra
KMeansClusterer clusterer = new KMeansClusterer(measure);
Collection<Cluster> clusters = new ArrayList<Cluster>();
- KMeansUtil.configureWithClusterInfo(clustersIn, clusters);
+ KMeansUtil.configureWithClusterInfo(conf, clustersIn, clusters);
if (clusters.isEmpty()) {
throw new IllegalStateException("Clusters is empty!");
}
@@ -254,7 +258,6 @@ public class KMeansDriver extends Abstra
int iteration = 1;
while (!converged && iteration <= maxIterations) {
log.info("K-Means Iteration: " + iteration);
- Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(input.toUri(), conf);
for (VectorWritable value :
new SequenceFileDirValueIterable<VectorWritable>(
@@ -410,22 +413,24 @@ public class KMeansDriver extends Abstra
log.info("convergence: {} Input Vectors: {}", convergenceDelta,
VectorWritable.class.getName());
}
if (runSequential) {
- clusterDataSeq(input, clustersIn, output, measure);
+ clusterDataSeq(conf, input, clustersIn, output, measure);
} else {
clusterDataMR(conf, input, clustersIn, output, measure,
convergenceDelta);
}
}
- private static void clusterDataSeq(Path input, Path clustersIn, Path output,
DistanceMeasure measure)
- throws IOException {
+ private static void clusterDataSeq(Configuration conf,
+ Path input,
+ Path clustersIn,
+ Path output,
+ DistanceMeasure measure) throws
IOException {
KMeansClusterer clusterer = new KMeansClusterer(measure);
Collection<Cluster> clusters = new ArrayList<Cluster>();
- KMeansUtil.configureWithClusterInfo(clustersIn, clusters);
+ KMeansUtil.configureWithClusterInfo(conf, clustersIn, clusters);
if (clusters.isEmpty()) {
throw new IllegalStateException("Clusters is empty!");
}
- Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(input.toUri(), conf);
FileStatus[] status = fs.listStatus(input, PathFilters.logsCRCFilter());
int part = 0;
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
Tue Apr 5 10:21:32 2011
@@ -55,7 +55,7 @@ public class KMeansMapper extends Mapper
String clusterPath = conf.get(KMeansConfigKeys.CLUSTER_PATH_KEY);
if ((clusterPath != null) && (clusterPath.length() > 0)) {
- KMeansUtil.configureWithClusterInfo(new Path(clusterPath), clusters);
+ KMeansUtil.configureWithClusterInfo(conf, new Path(clusterPath),
clusters);
if (clusters.isEmpty()) {
throw new IllegalStateException("No clusters found. Check your -c
path.");
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
Tue Apr 5 10:21:32 2011
@@ -68,7 +68,7 @@ public class KMeansReducer extends Reduc
String path = conf.get(KMeansConfigKeys.CLUSTER_PATH_KEY);
if (path.length() > 0) {
Collection<Cluster> clusters = new ArrayList<Cluster>();
- KMeansUtil.configureWithClusterInfo(new Path(path), clusters);
+ KMeansUtil.configureWithClusterInfo(conf, new Path(path), clusters);
setClusterMap(clusters);
if (clusterMap.isEmpty()) {
throw new IllegalStateException("Cluster is empty!");
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
Tue Apr 5 10:21:32 2011
@@ -37,10 +37,11 @@ final class KMeansUtil {
}
/** Configure the mapper with the cluster info */
- public static void configureWithClusterInfo(Path clusterPathStr,
Collection<Cluster> clusters) throws IOException {
+ public static void configureWithClusterInfo(Configuration conf,
+ Path clusterPathStr,
+ Collection<Cluster> clusters)
throws IOException {
// Get the path location where the cluster Info is stored
- Configuration conf = new Configuration();
Path clusterPath = new Path(clusterPathStr, "*");
Collection<Path> result = new ArrayList<Path>();
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
Tue Apr 5 10:21:32 2011
@@ -53,9 +53,12 @@ public final class RandomSeedGenerator {
private RandomSeedGenerator() {
}
- public static Path buildRandom(Path input, Path output, int k,
DistanceMeasure measure) throws IOException {
+ public static Path buildRandom(Configuration conf,
+ Path input,
+ Path output,
+ int k,
+ DistanceMeasure measure) throws IOException {
// delete the output directory
- Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(output.toUri(), conf);
HadoopUtil.delete(conf, output);
Path outFile = new Path(output, "part-randomSeed");
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
Tue Apr 5 10:21:32 2011
@@ -185,7 +185,8 @@ public class SpectralKMeansDriver extend
// Finally, perform k-means clustering on the rows of L (or W)
// generate random initial clusters
- Path initialclusters = RandomSeedGenerator.buildRandom(Wt.getRowPath(),
+ Path initialclusters = RandomSeedGenerator.buildRandom(conf,
+ Wt.getRowPath(),
new Path(output,
Cluster.INITIAL_CLUSTERS_DIR),
clusters,
measure);
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java
Tue Apr 5 10:21:32 2011
@@ -73,7 +73,7 @@ public final class TestRandomSeedGenerat
Path output = getTestTempDirPath("random-output");
ClusteringTestUtils.writePointsToFile(points, input, fs, conf);
- RandomSeedGenerator.buildRandom(input, output, 4, new
ManhattanDistanceMeasure());
+ RandomSeedGenerator.buildRandom(conf, input, output, 4, new
ManhattanDistanceMeasure());
int clusterCount = 0;
Collection<Integer> set = new HashSet<Integer>();
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
Tue Apr 5 10:21:32 2011
@@ -56,7 +56,7 @@ class DisplayFuzzyKMeans extends Display
//boolean b = true;
//if (b) {
writeSampleData(samples);
- Path clusters = RandomSeedGenerator.buildRandom(samples, new Path(output,
"clusters-0"), 3, measure);
+ Path clusters = RandomSeedGenerator.buildRandom(conf, samples, new
Path(output, "clusters-0"), 3, measure);
double threshold = 0.001;
int numIterations = 10;
int m = 3;
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
Tue Apr 5 10:21:32 2011
@@ -53,7 +53,7 @@ class DisplayKMeans extends DisplayClust
int maxIter = 10;
double distanceThreshold = 0.001;
//if (b) {
- Path clusters = RandomSeedGenerator.buildRandom(samples, new Path(output,
"clusters-0"), 3, measure);
+ Path clusters = RandomSeedGenerator.buildRandom(conf, samples, new
Path(output, "clusters-0"), 3, measure);
KMeansDriver.run(samples,
clusters,
output,
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
Tue Apr 5 10:21:32 2011
@@ -131,13 +131,13 @@ public final class Job extends AbstractJ
int k,
double convergenceDelta,
int maxIterations)
- throws IOException, InstantiationException, IllegalAccessException,
InterruptedException, ClassNotFoundException {
+ throws IOException, InterruptedException, ClassNotFoundException {
Path directoryContainingConvertedInput = new Path(output,
DIRECTORY_CONTAINING_CONVERTED_INPUT);
log.info("Preparing Input");
InputDriver.runJob(input, directoryContainingConvertedInput,
"org.apache.mahout.math.RandomAccessSparseVector");
log.info("Running random seed to get initial clusters");
Path clusters = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
- clusters =
RandomSeedGenerator.buildRandom(directoryContainingConvertedInput, clusters, k,
measure);
+ clusters = RandomSeedGenerator.buildRandom(conf,
directoryContainingConvertedInput, clusters, k, measure);
log.info("Running KMeans");
KMeansDriver.run(conf,
directoryContainingConvertedInput,
@@ -185,7 +185,7 @@ public final class Job extends AbstractJ
double t2,
double convergenceDelta,
int maxIterations)
- throws IOException, InstantiationException, IllegalAccessException,
InterruptedException, ClassNotFoundException {
+ throws IOException, InterruptedException, ClassNotFoundException {
Path directoryContainingConvertedInput = new Path(output,
DIRECTORY_CONTAINING_CONVERTED_INPUT);
log.info("Preparing Input");
InputDriver.runJob(input, directoryContainingConvertedInput,
"org.apache.mahout.math.RandomAccessSparseVector");