I am a Hadoop novice so kindly pardon my ingorance.
I am running the following Hadoop program in Fully Distributed Mode to count
the number of lines in a file. I am running this job from eclipse and I see
it running (based on the output to the eclipse console) but I do not see the
tasks in the TaskTracker web interface. Also eventhough the data is
distributed accross multiple hosts it doesnt seem to be distributing works
accross hosts.
Could someone pelase help me with this.
package LineCount;
import java.util.*;
import java.io.*;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.io.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
import org.apache.hadoop.util.*;
public class LineCount extends Configured implements Tool {
public static class Map extends
MapperLongWritable,Text,Text,IntWritable
{
private static int counter = 1;
private static Text mapOpKey = new Text();
private final static IntWritable mapOpValue = new
IntWritable(1);
@Override
public void map(LongWritable mapInpKey, Text
mapInpValue,
MapperLongWritable,Text,Text,IntWritable.Context context) throws
IOException,InterruptedException{
System.out.println(Calling Map +
counter);
counter++;
mapOpKey.set(Number Of Lines);
context.write(mapOpKey, mapOpValue);
}
}
public static class Reduce extends
ReducerText,IntWritable,Text,IntWritable {
private static int counter = 1;
@Override
public void reduce(Text redIpKey, IterableIntWritable
redIpValue,
ReducerText,IntWritable,Text,IntWritable.Context context) throws
IOException,InterruptedException {
int sum=0;
System.out.println(Calling Reduce + counter);
counter++;
while(redIpValue.iterator().hasNext()){
sum = sum +
redIpValue.iterator().next().get();
}
context.write(redIpKey, new IntWritable(sum));
}
}
@Override
public int run(String[] args) throws Exception{
Configuration conf = new Configuration();
conf.addResource(new
Path(/hadoop-0.20.2/conf/core-site.xml));
Job job = new Job(conf);
job.setJobName(LineCount);
job.setJarByClass(LineCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
//job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
FileInputFormat.setInputPaths(job, new
Path(/usr/foo/hadoopIP));
FileOutputFormat.setOutputPath(job, new
Path(/usr/foo/hadoopOP));
job.waitForCompletion(true);
return 0;
}
public static void main(String[] args) throws Exception{
ToolRunner.run(new LineCount(), args);
}
}
--
View this message in context:
http://old.nabble.com/Job-progress-not-showing-in-Hadoop-Tasktracker--web-interface-tp32096156p32096156.html
Sent from the Hadoop core-user mailing list archive at Nabble.com.