Author: kubes Date: Mon Apr 2 14:40:10 2007 New Revision: 524932 URL: http://svn.apache.org/viewvc?view=rev&rev=524932 Log: NUTCH-333 - SegmentMerger and SegmentReader should use NutchJob. Patch supplied originally by Michael Stack and updated by Doğacan Güney.
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java?view=diff&rev=524932&r1=524931&r2=524932 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java Mon Apr 2 14:40:10 2007 @@ -18,17 +18,37 @@ package org.apache.nutch.segment; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.TreeMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; - -import org.apache.hadoop.conf.*; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.io.*; -import org.apache.hadoop.mapred.*; +import org.apache.hadoop.io.MapFile; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.UTF8; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.Mapper; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.OutputFormatBase; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.mapred.Reducer; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.SequenceFileInputFormat; +import org.apache.hadoop.mapred.SequenceFileRecordReader; import org.apache.hadoop.util.Progressable; import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.crawl.Generator; @@ -39,6 +59,7 @@ import org.apache.nutch.parse.ParseText; import org.apache.nutch.protocol.Content; import org.apache.nutch.util.NutchConfiguration; +import org.apache.nutch.util.NutchJob; /** * This tool takes several segments and merges their data together. Only the @@ -482,7 +503,7 @@ if (LOG.isInfoEnabled()) { LOG.info("Merging " + segs.length + " segments to " + out + "/" + segmentName); } - JobConf job = new JobConf(getConf()); + JobConf job = new NutchJob(getConf()); job.setJobName("mergesegs " + out + "/" + segmentName); job.setBoolean("segment.merger.filter", filter); job.setLong("segment.merger.slice", slice); Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java?view=diff&rev=524932&r1=524931&r2=524932 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Mon Apr 2 14:40:10 2007 @@ -17,18 +17,48 @@ package org.apache.nutch.segment; -import java.io.*; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.PrintStream; +import java.io.PrintWriter; +import java.io.Writer; import java.text.SimpleDateFormat; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.*; -import org.apache.hadoop.io.*; -import org.apache.hadoop.mapred.*; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.io.MapFile; +import org.apache.hadoop.io.ObjectWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.UTF8; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.MapFileOutputFormat; +import org.apache.hadoop.mapred.MapReduceBase; +import org.apache.hadoop.mapred.Mapper; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.mapred.Reducer; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.SequenceFileInputFormat; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.util.Progressable; import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.parse.ParseData; @@ -36,6 +66,7 @@ import org.apache.nutch.protocol.Content; import org.apache.nutch.util.LogUtil; import org.apache.nutch.util.NutchConfiguration; +import org.apache.nutch.util.NutchJob; /** Dump the content of a segment. */ public class SegmentReader extends Configured implements Reducer { @@ -120,7 +151,7 @@ } private JobConf createJobConf() { - JobConf job = new JobConf(getConf()); + JobConf job = new NutchJob(getConf()); job.setBoolean("segment.reader.co", this.co); job.setBoolean("segment.reader.fe", this.fe); job.setBoolean("segment.reader.ge", this.ge); ------------------------------------------------------------------------- Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT & business topics through brief surveys-and earn cash http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs