Hi, I wrote a simple piece of code:
import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.mahout.clustering.Cluster; import org.apache.mahout.clustering.canopy.CanopyDriver; import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable; import org.apache.mahout.common.distance.EuclideanDistanceMeasure; import org.apache.mahout.math.RandomAccessSparseVector; import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; public class Clustering { private final static String root = "C:\\root\\BI\\"; private final static String dataDir = root + "synthetic_control.data"; private final static String seqDir = root + "synthetic_control.seq"; private final static String outputDir = root + "output"; private final static String partMDir = outputDir + "\\" + "clusters-0-final" + "\\part-r-00000"; private final static String SEPARATOR = " "; private final static int NUMBER_OF_ELEMENTS = 2; private Configuration conf; private FileSystem fs; public Clustering() throws IOException { conf = new Configuration(); fs = FileSystem.get(conf); } public void convertToVectorFile() throws IOException { BufferedReader reader = new BufferedReader(new FileReader(dataDir)); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(seqDir), LongWritable.class, VectorWritable.class); String line; long counter = 0; while ((line = reader.readLine()) != null) { String[] c; c = line.split(SEPARATOR); double[] d = new double[c.length]; for (int i = 0; i < NUMBER_OF_ELEMENTS; i++) { try { d[i] = Double.parseDouble(c[i]); } catch (Exception ex) { d[i] = 0; } } Vector vec = new RandomAccessSparseVector(c.length); vec.assign(d); VectorWritable writable = new VectorWritable(); writable.set(vec); writer.append(new LongWritable(counter++), writable); } writer.close(); } public void createClusters(double t1, double t2, double clusterClassificationThreshold, boolean runSequential) throws ClassNotFoundException, IOException, InterruptedException { EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure(); Path inputPath = new Path(seqDir); Path outputPath = new Path(outputDir); CanopyDriver.run(inputPath, outputPath, measure, t1, t2, runSequential, clusterClassificationThreshold, runSequential); } public void printClusters() throws IOException { SequenceFile.Reader readerSequence = new SequenceFile.Reader(fs, new Path(partMDir), conf); IntWritable key = new IntWritable(); WeightedPropertyVectorWritable value = new WeightedPropertyVectorWritable(); while (readerSequence.next(key, value)) { System.out.println(value.toString() + " belongs to cluster " + key.toString()); } readerSequence.close(); } } my synthetic_control.data file looks like this: 0.01 1.0 0.1 0.9 0.1 0.95 12.0 13.0 12.5 12.8 when I run my code it throws: java.io.IOException: wrong value class: wt: 0.0 vec: null is not class org.apache.mahout.clustering.iterator.ClusterWritable at org.apache.hadoop.io.SequenceFile$Reader.next(SequenceFile.java:1936) at com.my.package.bi.canopy.CanopyClustering.printClusters(CanopyClustering.java:129) at com.my.package.bi.BIManager.printClusters(BIManager.java:20) at com.my.package.bi.Main.main(Main.java:15) Eclipse prints log, where everything looks well: DEBUG CanopyClusterer - Created new Canopy:0 at center:[0.010, 1.000] DEBUG CanopyClusterer - Added point: [0.100, 0.900] to canopy: C-0 DEBUG CanopyClusterer - Added point: [0.100, 0.950] to canopy: C-0 DEBUG CanopyClusterer - Created new Canopy:1 at center:[12.000, 13.000] DEBUG CanopyClusterer - Added point: [12.500, 12.800] to canopy: C-1 DEBUG CanopyDriver - Writing Canopy:C-0 center:[0.070, 0.950] numPoints:3 radius:[0.042, 0.041] DEBUG CanopyDriver - Writing Canopy:C-1 center:[12.250, 12.900] numPoints:2 radius:[0.250, 0.100] The exception comes from line while (readerSequence.next(key, value)) { in method printClusters() Where could be a problem? Thank you in advance