Author: cutting
Date: Wed Jun 28 14:54:53 2006
New Revision: 417884
URL: http://svn.apache.org/viewvc?rev=417884&view=rev
Log:
NUTCH-312. Upgrade to Hadoop 0.4.0.
Added:
lucene/nutch/trunk/lib/commons-cli-2.0-SNAPSHOT.jar (with props)
lucene/nutch/trunk/lib/hadoop-0.4.0.jar (with props)
Removed:
lucene/nutch/trunk/lib/hadoop-0.3.2.jar
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java
lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
Added: lucene/nutch/trunk/lib/commons-cli-2.0-SNAPSHOT.jar
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/commons-cli-2.0-SNAPSHOT.jar?rev=417884&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/lib/commons-cli-2.0-SNAPSHOT.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/nutch/trunk/lib/hadoop-0.4.0.jar
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/hadoop-0.4.0.jar?rev=417884&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/lib/hadoop-0.4.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java?rev=417884&r1=417883&r2=417884&view=diff
==============================================================================
---
lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java
(original)
+++
lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java
Wed Jun 28 14:54:53 2006
@@ -31,6 +31,7 @@
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.util.Progressable;
import org.apache.nutch.parse.ParseOutputFormat;
import org.apache.nutch.protocol.Content;
@@ -45,7 +46,8 @@
public RecordWriter getRecordWriter(final FileSystem fs,
final JobConf job,
- final String name) throws IOException {
+ final String name,
+ final Progressable progress) throws
IOException {
final Path fetch =
new Path(new Path(job.getOutputPath(), CrawlDatum.FETCH_DIR_NAME), name);
@@ -66,7 +68,7 @@
}
if (Fetcher.isParsing(job)) {
- parseOut = new ParseOutputFormat().getRecordWriter(fs, job, name);
+ parseOut = new ParseOutputFormat().getRecordWriter(fs, job, name,
null);
}
}
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java?rev=417884&r1=417883&r2=417884&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
Wed Jun 28 14:54:53 2006
@@ -26,6 +26,7 @@
import org.apache.hadoop.fs.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.util.Progressable;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
@@ -276,7 +277,8 @@
/** Write nothing. */
public RecordWriter getRecordWriter(final FileSystem fs,
final JobConf job,
- final String name) throws IOException {
+ final String name,
+ final Progressable progress) throws
IOException {
return new RecordWriter() {
public void write(WritableComparable key, Writable value)
throws IOException {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java?rev=417884&r1=417883&r2=417884&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java Wed Jun
28 14:54:53 2006
@@ -27,6 +27,7 @@
import org.apache.hadoop.fs.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.util.Progressable;
import org.apache.nutch.parse.*;
import org.apache.nutch.analysis.*;
@@ -79,7 +80,7 @@
public static class OutputFormat
extends org.apache.hadoop.mapred.OutputFormatBase {
public RecordWriter getRecordWriter(final FileSystem fs, JobConf job,
- String name) throws IOException {
+ String name, Progressable progress)
throws IOException {
final Path perm = new Path(job.getOutputPath(), name);
final Path temp =
job.getLocalPath("index/_"+Integer.toString(new Random().nextInt()));
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java?rev=417884&r1=417883&r2=417884&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java
Wed Jun 28 14:54:53 2006
@@ -31,6 +31,7 @@
import org.apache.nutch.net.*;
import java.io.*;
+import org.apache.hadoop.util.Progressable;
/* Parse content in a segment. */
public class ParseOutputFormat implements OutputFormat {
@@ -46,7 +47,7 @@
}
public RecordWriter getRecordWriter(FileSystem fs, JobConf job,
- String name) throws IOException {
+ String name, Progressable progress)
throws IOException {
this.urlNormalizer = new UrlNormalizerFactory(job).getNormalizer();
this.filters = new URLFilters(job);
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java?rev=417884&r1=417883&r2=417884&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java Wed
Jun 28 14:54:53 2006
@@ -28,6 +28,7 @@
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.util.Progressable;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Generator;
import org.apache.nutch.fetcher.Fetcher;
@@ -168,7 +169,7 @@
public static class SegmentOutputFormat extends
org.apache.hadoop.mapred.OutputFormatBase {
private static final String DEFAULT_SLICE = "default";
- public RecordWriter getRecordWriter(final FileSystem fs, final JobConf
job, final String name) throws IOException {
+ public RecordWriter getRecordWriter(final FileSystem fs, final JobConf
job, final String name, final Progressable progress) throws IOException {
return new RecordWriter() {
MapFile.Writer c_out = null;
MapFile.Writer f_out = null;
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java?rev=417884&r1=417883&r2=417884&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Wed
Jun 28 14:54:53 2006
@@ -28,6 +28,7 @@
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.util.Progressable;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.parse.ParseText;
@@ -70,7 +71,7 @@
/** Implements a text output format */
public static class TextOutputFormat extends
org.apache.hadoop.mapred.OutputFormatBase {
- public RecordWriter getRecordWriter(final FileSystem fs, JobConf job,
String name) throws IOException {
+ public RecordWriter getRecordWriter(final FileSystem fs, JobConf job,
String name, Progressable progress) throws IOException {
final Path segmentDumpFile = new Path(job.getOutputPath(), name);
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
Nutch-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/nutch-cvs