Author: jeastman
Date: Sun Apr 10 18:10:50 2011
New Revision: 1090861

URL: http://svn.apache.org/viewvc?rev=1090861&view=rev
Log:
MAHOUT-552: Added static initialCanopy method to create initial canopies with 
original center type. Added to unit test. All tests run.

Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCreatorMapper.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java?rev=1090861&r1=1090860&r2=1090861&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
 Sun Apr 10 18:10:50 2011
@@ -55,6 +55,20 @@ public class MeanShiftCanopy extends Clu
     super(point, id, measure);
     boundPoints.add(id);
   }
+  
+  /**
+   * Create an initial Canopy, retaining the original type of the given point 
(e.g. NamedVector)
+   * @param point a Vector
+   * @param id an int
+   * @param measure a DistanceMeasure
+   * @return a MeanShiftCanopy
+   */
+  public static MeanShiftCanopy initialCanopy(Vector point, int id, 
DistanceMeasure measure){
+         MeanShiftCanopy result = new MeanShiftCanopy(point, id, measure);
+         // overwrite center so original point type is retained 
+         result.setCenter(point);
+         return result;
+  }
 
   /**
    * Create a new Canopy containing the given point, id and bound points

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCreatorMapper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCreatorMapper.java?rev=1090861&r1=1090860&r2=1090861&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCreatorMapper.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCreatorMapper.java
 Sun Apr 10 18:10:50 2011
@@ -38,7 +38,7 @@ public class MeanShiftCanopyCreatorMappe
 
   @Override
   protected void map(WritableComparable<?> key, VectorWritable point, Context 
context) throws IOException, InterruptedException {
-    MeanShiftCanopy canopy = new MeanShiftCanopy(point.get(), nextCanopyId++, 
measure);
+    MeanShiftCanopy canopy = MeanShiftCanopy.initialCanopy(point.get(), 
nextCanopyId++, measure);
     context.write(new Text(key.toString()), canopy);
   }
 

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=1090861&r1=1090860&r2=1090861&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
 Sun Apr 10 18:10:50 2011
@@ -209,7 +209,7 @@ public class MeanShiftCanopyDriver exten
                                                            
MeanShiftCanopy.class);
       try {
         for (VectorWritable value : new 
SequenceFileValueIterable<VectorWritable>(s.getPath(), conf)) {
-          writer.append(new Text(), new MeanShiftCanopy(value.get(), id++, 
measure));
+          writer.append(new Text(), MeanShiftCanopy.initialCanopy(value.get(), 
id++, measure));
         }
       } finally {
         writer.close();

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=1090861&r1=1090860&r2=1090861&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
 Sun Apr 10 18:10:50 2011
@@ -21,6 +21,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
@@ -28,6 +29,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
@@ -39,6 +41,7 @@ import org.apache.mahout.common.MahoutTe
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
+import 
org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
@@ -327,6 +330,13 @@ public final class TestMeanShift extends
     Path outPart = new Path(output, "clusters-3/part-r-00000");
     long count = HadoopUtil.countRecords(outPart, conf);
     assertEquals("count", 3, count);
+    outPart = new Path(output, "clusters-0/part-m-00000");
+       Iterator<?> iterator = new SequenceFileValueIterator<Writable>(outPart, 
true, conf);
+       // now test the initial clusters to ensure the type of their centers 
has been retained
+       while (iterator.hasNext()) {
+         MeanShiftCanopy canopy = (MeanShiftCanopy) iterator.next();
+         assertTrue(canopy.getCenter()instanceof DenseVector);
+       }
   }
 
   /**


Reply via email to