Code below, also attached. I put this together from the word count example.
package edu.umd.cs.mapreduce; import java.io.IOException; import java.util.Iterator; import java.util.StringTokenizer; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.RunningJob; public class PageRankHadoop { public static class MapClass extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer itr = new StringTokenizer(line); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); output.collect(word, one); } } } public static class ReduceClass extends MapReduceBase implements Reducer<Text, IntWritable, Text, Text> { public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { int sum = 0; while (values.hasNext()) { sum += values.next().get(); } // copied from the wordcount example, changed IntWritable to Text output.collect(key, new Text(Integer.toString(sum))); } } // dummy constructor private PageRankHadoop() { } public static void main(String[] args) throws IOException, Exception { // paths String remotePath = "/users/chang/"; String remoteInputPath = remotePath + "data/"; String remoteOutputPath = remotePath + "output/"; // HACK: single mapper and reducer int numMapTasks = 1; int numReduceTasks = 1; JobConf conf = new JobConf(PageRankHadoop.class); conf.setJobName("PageRankHadoop"); conf.setNumMapTasks(numMapTasks); conf.setNumReduceTasks(numReduceTasks); conf.setInputPath(new Path(remoteInputPath)); conf.setOutputPath(new Path(remoteOutputPath)); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(ReduceClass.class); conf.setReducerClass(ReduceClass.class); // remove remote files FileSystem.get(conf).delete(new Path(remoteOutputPath)); RunningJob job = JobClient.runJob(conf); } } The exception I got is: 08/03/24 17:54:43 INFO mapred.JobClient: Task Id : task_200803241557_0004_m_000000_0, Status : FAILED java.io.IOException: wrong value class: org.apache.hadoop.io.Text is not class org.apache.hadoop.io.IntWritable at org.apache.hadoop.io.SequenceFile$Writer.append(SequenceFile.java :952) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer$2.collect( MapTask.java:489) at edu.umd.cs.mapreduce.PageRankHadoop$ReduceClass.reduce( PageRankHadoop.java:60) at edu.umd.cs.mapreduce.PageRankHadoop$ReduceClass.reduce( PageRankHadoop.java:1) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.combineAndSpill( MapTask.java:522) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.sortAndSpillToDisk( MapTask.java:493) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.flush(MapTask.java :713) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:209) at org.apache.hadoop.mapred.TaskTracker$Child.main(TaskTracker.java :2071) Will post my cluster configuration if necessary. Thanks, - Chang On Mon, Mar 24, 2008 at 5:13 PM, Doug Cutting <[EMAIL PROTECTED]> wrote: > Can you produce a simple, standalone example program that fails in this > way, and post it to the list? Thanks! > > Doug > -- --------------- Überstehen ist alles. Chang Hu Ph.D. student Computer Science Department University of Maryland
package edu.umd.cs.mapreduce; import java.io.IOException; import java.util.Iterator; import java.util.StringTokenizer; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.RunningJob; public class PageRankHadoop { public static class MapClass extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer itr = new StringTokenizer(line); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); output.collect(word, one); } } } public static class ReduceClass extends MapReduceBase implements Reducer<Text, IntWritable, Text, Text> { public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { int sum = 0; while (values.hasNext()) { sum += values.next().get(); } // copied from the wordcount example, changed IntWritable to Text output.collect(key, new Text(Integer.toString(sum))); } } // dummy constructor private PageRankHadoop() { } public static void main(String[] args) throws IOException, Exception { // paths String remotePath = "/users/chang/"; String remoteInputPath = remotePath + "data/"; String remoteOutputPath = remotePath + "output/"; // HACK: single mapper and reducer int numMapTasks = 1; int numReduceTasks = 1; JobConf conf = new JobConf(PageRankHadoop.class); conf.setJobName("PageRankHadoop"); conf.setNumMapTasks(numMapTasks); conf.setNumReduceTasks(numReduceTasks); conf.setInputPath(new Path(remoteInputPath)); conf.setOutputPath(new Path(remoteOutputPath)); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(ReduceClass.class); conf.setReducerClass(ReduceClass.class); // remove remote files FileSystem.get(conf).delete(new Path(remoteOutputPath)); RunningJob job = JobClient.runJob(conf); } }