Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Nutch Wiki" for change 
notification.

The following page has been changed by SteveSeverance:
http://wiki.apache.org/nutch/CompleteSourceListing

New page:
{{{
package com.ivirtuoso.linkcounter;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.hadoop.conf.*;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;

import org.apache.nutch.parse.*;
import org.apache.nutch.util.*;

public class LinkCounter {

        public static class CounterMapper extends MapReduceBase implements 
Mapper
        {
                public void map(WritableComparable key, Writable value, 
OutputCollector collector, Reporter reporter) throws IOException {
                        // TODO Auto-generated method stub
                        ParseData data = (ParseData)value;
                        
                        IntWritable outboundLinkCount = new 
IntWritable(data.getOutlinks().length);                                     
                        
                        collector.collect(key, outboundLinkCount);
                }

                public void close() throws IOException {
                        // TODO Auto-generated method stub
                        super.close();
                }

                public void configure(JobConf arg0) {
                        // TODO Auto-generated method stub
                        super.configure(arg0);
                }
                
        }
        
        public static class CounterReducer extends MapReduceBase implements 
Reducer
        {

                public void reduce(WritableComparable url, Iterator iterator, 
OutputCollector output, Reporter reporter) throws IOException {
                        IntWritable linkCount = (IntWritable)iterator.next();
                        output.collect(url, linkCount);
                }

                public void close() throws IOException {
                        // TODO Auto-generated method stub
                        super.close();
                }

                public void configure(JobConf arg0) {
                        // TODO Auto-generated method stub
                        super.configure(arg0);
                }
                
        
        }
        
        public static void main(String[] args) throws IOException{
                Configuration config = NutchConfiguration.create();
                
            JobConf jobConfig = new NutchJob(config);
            jobConfig.setJobName("countlinks");
         
            jobConfig.setInputFormat(SequenceFileInputFormat.class);
            
            jobConfig.setOutputFormat(MapFileOutputFormat.class);
            
            // the keys are words (strings)
            jobConfig.setOutputKeyClass(Text.class);
            // the values are counts (ints)
            jobConfig.setOutputValueClass(IntWritable.class);
            
            jobConfig.setMapperClass(CounterMapper.class);        
            jobConfig.setCombinerClass(CounterReducer.class);
            jobConfig.setReducerClass(CounterReducer.class);
            
            jobConfig.setInputPath(new Path((String) args[0], 
ParseData.DIR_NAME));
            jobConfig.setOutputPath(new Path((String) args[1]));
            
            JobClient.runJob(jobConfig);
        }

}
}}}

-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
Nutch-cvs mailing list
Nutch-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nutch-cvs

Reply via email to