Author: srowen
Date: Tue Apr  5 10:21:32 2011
New Revision: 1088957

URL: http://svn.apache.org/viewvc?rev=1088957&view=rev
Log:
MAHOUT-651 Pass Configuration in more cases rather than ignoring and 
reinstantiating

Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
 Tue Apr  5 10:21:32 2011
@@ -112,7 +112,7 @@ public class FuzzyKMeansDriver extends A
     DistanceMeasure measure = 
ccl.loadClass(measureClass).asSubclass(DistanceMeasure.class).newInstance();
 
     if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
-      clusters = RandomSeedGenerator.buildRandom(input, clusters, 
Integer.parseInt(parseArguments(args)
+      clusters = RandomSeedGenerator.buildRandom(getConf(), input, clusters, 
Integer.parseInt(parseArguments(args)
           .get(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), measure);
     }
     boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
 Tue Apr  5 10:21:32 2011
@@ -43,7 +43,8 @@ public class KMeansClusterMapper extends
   private KMeansClusterer clusterer;
 
   @Override
-  protected void map(WritableComparable<?> key, VectorWritable point, Context 
context) throws IOException, InterruptedException {
+  protected void map(WritableComparable<?> key, VectorWritable point, Context 
context)
+    throws IOException, InterruptedException {
     clusterer.outputPointWithClusterInfo(point.get(), clusters, context);
   }
 
@@ -59,7 +60,7 @@ public class KMeansClusterMapper extends
       
       String clusterPath = conf.get(KMeansConfigKeys.CLUSTER_PATH_KEY);
       if ((clusterPath != null) && (clusterPath.length() > 0)) {
-        KMeansUtil.configureWithClusterInfo(new Path(clusterPath), clusters);
+        KMeansUtil.configureWithClusterInfo(conf, new Path(clusterPath), 
clusters);
         if (clusters.isEmpty()) {
           throw new IllegalStateException("No clusters found. Check your -c 
path.");
         }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
 Tue Apr  5 10:21:32 2011
@@ -98,12 +98,15 @@ public class KMeansDriver extends Abstra
     DistanceMeasure measure = 
ccl.loadClass(measureClass).asSubclass(DistanceMeasure.class).newInstance();
 
     if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
-      clusters = RandomSeedGenerator.buildRandom(input, clusters, Integer
+      clusters = RandomSeedGenerator.buildRandom(getConf(), input, clusters, 
Integer
           .parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), 
measure);
     }
     boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
     boolean runSequential = 
getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
         DefaultOptionCreator.SEQUENTIAL_METHOD);
+    if(getConf() == null) {
+      setConf(new Configuration());
+    }
     run(getConf(), input, clusters, output, measure, convergenceDelta, 
maxIterations, runClustering, runSequential);
     return 0;
   }
@@ -229,13 +232,14 @@ public class KMeansDriver extends Abstra
                                    boolean runSequential)
     throws IOException, InterruptedException, ClassNotFoundException {
     if (runSequential) {
-      return buildClustersSeq(input, clustersIn, output, measure, 
maxIterations, delta);
+      return buildClustersSeq(conf, input, clustersIn, output, measure, 
maxIterations, delta);
     } else {
       return buildClustersMR(conf, input, clustersIn, output, measure, 
maxIterations, delta);
     }
   }
 
-  private static Path buildClustersSeq(Path input,
+  private static Path buildClustersSeq(Configuration conf,
+                                       Path input,
                                        Path clustersIn,
                                        Path output,
                                        DistanceMeasure measure,
@@ -246,7 +250,7 @@ public class KMeansDriver extends Abstra
     KMeansClusterer clusterer = new KMeansClusterer(measure);
     Collection<Cluster> clusters = new ArrayList<Cluster>();
 
-    KMeansUtil.configureWithClusterInfo(clustersIn, clusters);
+    KMeansUtil.configureWithClusterInfo(conf, clustersIn, clusters);
     if (clusters.isEmpty()) {
       throw new IllegalStateException("Clusters is empty!");
     }
@@ -254,7 +258,6 @@ public class KMeansDriver extends Abstra
     int iteration = 1;
     while (!converged && iteration <= maxIterations) {
       log.info("K-Means Iteration: " + iteration);
-      Configuration conf = new Configuration();
       FileSystem fs = FileSystem.get(input.toUri(), conf);
       for (VectorWritable value :
            new SequenceFileDirValueIterable<VectorWritable>(
@@ -410,22 +413,24 @@ public class KMeansDriver extends Abstra
       log.info("convergence: {} Input Vectors: {}", convergenceDelta, 
VectorWritable.class.getName());
     }
     if (runSequential) {
-      clusterDataSeq(input, clustersIn, output, measure);
+      clusterDataSeq(conf, input, clustersIn, output, measure);
     } else {
       clusterDataMR(conf, input, clustersIn, output, measure, 
convergenceDelta);
     }
   }
 
-  private static void clusterDataSeq(Path input, Path clustersIn, Path output, 
DistanceMeasure measure)
-    throws IOException {
+  private static void clusterDataSeq(Configuration conf,
+                                     Path input,
+                                     Path clustersIn,
+                                     Path output,
+                                     DistanceMeasure measure) throws 
IOException {
 
     KMeansClusterer clusterer = new KMeansClusterer(measure);
     Collection<Cluster> clusters = new ArrayList<Cluster>();
-    KMeansUtil.configureWithClusterInfo(clustersIn, clusters);
+    KMeansUtil.configureWithClusterInfo(conf, clustersIn, clusters);
     if (clusters.isEmpty()) {
       throw new IllegalStateException("Clusters is empty!");
     }
-    Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(input.toUri(), conf);
     FileStatus[] status = fs.listStatus(input, PathFilters.logsCRCFilter());
     int part = 0;

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
 Tue Apr  5 10:21:32 2011
@@ -55,7 +55,7 @@ public class KMeansMapper extends Mapper
 
       String clusterPath = conf.get(KMeansConfigKeys.CLUSTER_PATH_KEY);
       if ((clusterPath != null) && (clusterPath.length() > 0)) {
-        KMeansUtil.configureWithClusterInfo(new Path(clusterPath), clusters);
+        KMeansUtil.configureWithClusterInfo(conf, new Path(clusterPath), 
clusters);
         if (clusters.isEmpty()) {
           throw new IllegalStateException("No clusters found. Check your -c 
path.");
         }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
 Tue Apr  5 10:21:32 2011
@@ -68,7 +68,7 @@ public class KMeansReducer extends Reduc
       String path = conf.get(KMeansConfigKeys.CLUSTER_PATH_KEY);
       if (path.length() > 0) {
         Collection<Cluster> clusters = new ArrayList<Cluster>();
-        KMeansUtil.configureWithClusterInfo(new Path(path), clusters);
+        KMeansUtil.configureWithClusterInfo(conf, new Path(path), clusters);
         setClusterMap(clusters);
         if (clusterMap.isEmpty()) {
           throw new IllegalStateException("Cluster is empty!");

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
 Tue Apr  5 10:21:32 2011
@@ -37,10 +37,11 @@ final class KMeansUtil {
   }
 
   /** Configure the mapper with the cluster info */
-  public static void configureWithClusterInfo(Path clusterPathStr, 
Collection<Cluster> clusters) throws IOException {
+  public static void configureWithClusterInfo(Configuration conf,
+                                              Path clusterPathStr,
+                                              Collection<Cluster> clusters) 
throws IOException {
 
     // Get the path location where the cluster Info is stored
-    Configuration conf = new Configuration();
     Path clusterPath = new Path(clusterPathStr, "*");
     Collection<Path> result = new ArrayList<Path>();
 

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
 Tue Apr  5 10:21:32 2011
@@ -53,9 +53,12 @@ public final class RandomSeedGenerator {
   private RandomSeedGenerator() {
   }
   
-  public static Path buildRandom(Path input, Path output, int k, 
DistanceMeasure measure) throws IOException {
+  public static Path buildRandom(Configuration conf,
+                                 Path input,
+                                 Path output,
+                                 int k,
+                                 DistanceMeasure measure) throws IOException {
     // delete the output directory
-    Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(output.toUri(), conf);
     HadoopUtil.delete(conf, output);
     Path outFile = new Path(output, "part-randomSeed");

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
 Tue Apr  5 10:21:32 2011
@@ -185,7 +185,8 @@ public class SpectralKMeansDriver extend
 
     // Finally, perform k-means clustering on the rows of L (or W)
     // generate random initial clusters
-    Path initialclusters = RandomSeedGenerator.buildRandom(Wt.getRowPath(),
+    Path initialclusters = RandomSeedGenerator.buildRandom(conf,
+                                                           Wt.getRowPath(),
                                                            new Path(output, 
Cluster.INITIAL_CLUSTERS_DIR),
                                                            clusters,
                                                            measure);

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java
 Tue Apr  5 10:21:32 2011
@@ -73,7 +73,7 @@ public final class TestRandomSeedGenerat
     Path output = getTestTempDirPath("random-output");
     ClusteringTestUtils.writePointsToFile(points, input, fs, conf);
     
-    RandomSeedGenerator.buildRandom(input, output, 4, new 
ManhattanDistanceMeasure());
+    RandomSeedGenerator.buildRandom(conf, input, output, 4, new 
ManhattanDistanceMeasure());
 
     int clusterCount = 0;
     Collection<Integer> set = new HashSet<Integer>();

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
 Tue Apr  5 10:21:32 2011
@@ -56,7 +56,7 @@ class DisplayFuzzyKMeans extends Display
     //boolean b = true;
     //if (b) {
     writeSampleData(samples);
-    Path clusters = RandomSeedGenerator.buildRandom(samples, new Path(output, 
"clusters-0"), 3, measure);
+    Path clusters = RandomSeedGenerator.buildRandom(conf, samples, new 
Path(output, "clusters-0"), 3, measure);
     double threshold = 0.001;
     int numIterations = 10;
     int m = 3;

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
 Tue Apr  5 10:21:32 2011
@@ -53,7 +53,7 @@ class DisplayKMeans extends DisplayClust
     int maxIter = 10;
     double distanceThreshold = 0.001;
     //if (b) {
-    Path clusters = RandomSeedGenerator.buildRandom(samples, new Path(output, 
"clusters-0"), 3, measure);
+    Path clusters = RandomSeedGenerator.buildRandom(conf, samples, new 
Path(output, "clusters-0"), 3, measure);
     KMeansDriver.run(samples,
                      clusters,
                      output,

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=1088957&r1=1088956&r2=1088957&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
 Tue Apr  5 10:21:32 2011
@@ -131,13 +131,13 @@ public final class Job extends AbstractJ
                   int k,
                   double convergenceDelta,
                   int maxIterations)
-    throws IOException, InstantiationException, IllegalAccessException, 
InterruptedException, ClassNotFoundException {
+    throws IOException, InterruptedException, ClassNotFoundException {
     Path directoryContainingConvertedInput = new Path(output, 
DIRECTORY_CONTAINING_CONVERTED_INPUT);
     log.info("Preparing Input");
     InputDriver.runJob(input, directoryContainingConvertedInput, 
"org.apache.mahout.math.RandomAccessSparseVector");
     log.info("Running random seed to get initial clusters");
     Path clusters = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
-    clusters = 
RandomSeedGenerator.buildRandom(directoryContainingConvertedInput, clusters, k, 
measure);
+    clusters = RandomSeedGenerator.buildRandom(conf, 
directoryContainingConvertedInput, clusters, k, measure);
     log.info("Running KMeans");
     KMeansDriver.run(conf,
                      directoryContainingConvertedInput,
@@ -185,7 +185,7 @@ public final class Job extends AbstractJ
                   double t2,
                   double convergenceDelta,
                   int maxIterations)
-    throws IOException, InstantiationException, IllegalAccessException, 
InterruptedException, ClassNotFoundException {
+    throws IOException, InterruptedException, ClassNotFoundException {
     Path directoryContainingConvertedInput = new Path(output, 
DIRECTORY_CONTAINING_CONVERTED_INPUT);
     log.info("Preparing Input");
     InputDriver.runJob(input, directoryContainingConvertedInput, 
"org.apache.mahout.math.RandomAccessSparseVector");


Reply via email to