Author: ferdy
Date: Tue Aug 14 07:30:29 2012
New Revision: 1372752

URL: http://svn.apache.org/viewvc?rev=1372752&view=rev
Log:
NUTCH-1365 Fix crawlId functionalilty by making using of new gora configuration

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/java/org/apache/nutch/host/HostInjectorJob.java
    nutch/branches/2.x/src/java/org/apache/nutch/storage/StorageUtils.java
    nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJob.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1372752&r1=1372751&r2=1372752&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Tue Aug 14 07:30:29 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 2.1 - Current Development
 
+* NUTCH-1365 Fix crawlId functionalilty by making using of new gora 
configuration (ferdy)
+
 * NUTCH-1442 indexingfilter.order is property is misread in code (ferdy via 
lewismc)
 
 * NUTCH-1450 Upgrade to gora deps to 0.2.1 (lewismc)

Modified: nutch/branches/2.x/src/java/org/apache/nutch/host/HostInjectorJob.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/host/HostInjectorJob.java?rev=1372752&r1=1372751&r2=1372752&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/host/HostInjectorJob.java 
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/host/HostInjectorJob.java Tue 
Aug 14 07:30:29 2012
@@ -150,7 +150,7 @@ public class HostInjectorJob implements 
     job.setMapOutputValueClass(Host.class);
     job.setOutputFormatClass(GoraOutputFormat.class);
     GoraOutputFormat.setOutput(job,
-        StorageUtils.createWebStore(getConf(), String.class, Host.class), 
true);
+        StorageUtils.createWebStore(job.getConfiguration(), String.class, 
Host.class), true);
     job.setReducerClass(Reducer.class);
     job.setNumReduceTasks(0);
     return job.waitForCompletion(true);

Modified: nutch/branches/2.x/src/java/org/apache/nutch/storage/StorageUtils.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/storage/StorageUtils.java?rev=1372752&r1=1372751&r2=1372752&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/storage/StorageUtils.java 
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/storage/StorageUtils.java Tue 
Aug 14 07:30:29 2012
@@ -33,6 +33,10 @@ import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Partitioner;
 import org.apache.nutch.metadata.Nutch;
 
+/**
+ * Entry point to Gora store/mapreduce functionality.
+ * Translates the concept of "crawlid" to the corresponding Gora support.
+ */
 public class StorageUtils {
 
   /** Creates a store for the given persistentClass.
@@ -61,7 +65,9 @@ public class StorageUtils {
     String crawlId = conf.get(Nutch.CRAWL_ID_KEY, "");
     
     if (!crawlId.isEmpty()) {
-      schema = crawlId + "_" + schema;
+      conf.set("schema.prefix", crawlId + "_");
+    } else {
+      conf.set("schema.prefix", "");
     }
 
     Class<? extends DataStore<K, V>> dataStoreClass =
@@ -71,7 +77,7 @@ public class StorageUtils {
   }
 
   @SuppressWarnings("unchecked")
-  public static <K, V extends Persistent> Class<? extends DataStore<K, V>>
+  private static <K, V extends Persistent> Class<? extends DataStore<K, V>>
   getDataStoreClass(Configuration conf)  throws ClassNotFoundException {
     return (Class<? extends DataStore<K, V>>)
       Class.forName(conf.get("storage.data.store.class",
@@ -81,15 +87,6 @@ public class StorageUtils {
   public static <K, V> void initMapperJob(Job job,
       Collection<WebPage.Field> fields,
       Class<K> outKeyClass, Class<V> outValueClass,
-      Class<? extends GoraMapper<String, WebPage, K, V>> mapperClass, boolean 
reuseObjects)
-  throws ClassNotFoundException, IOException {
-    initMapperJob(job, fields, outKeyClass, outValueClass,
-        mapperClass, null, reuseObjects);
-  }
-
-  public static <K, V> void initMapperJob(Job job,
-      Collection<WebPage.Field> fields,
-      Class<K> outKeyClass, Class<V> outValueClass,
       Class<? extends GoraMapper<String, WebPage, K, V>> mapperClass)
   throws ClassNotFoundException, IOException {
     initMapperJob(job, fields, outKeyClass, outValueClass,

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJob.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJob.java?rev=1372752&r1=1372751&r2=1372752&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJob.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/NutchJob.java Tue Aug 14 
07:30:29 2012
@@ -20,6 +20,7 @@ package org.apache.nutch.util;
 import java.io.IOException;
 
 import org.apache.avro.util.Utf8;
+import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.nutch.metadata.Nutch;
@@ -34,6 +35,12 @@ public class NutchJob extends Job {
 
   public NutchJob(Configuration conf, String jobName) throws IOException {
     super(conf, jobName);
+    //prefix jobName with crawlId if not empty
+    String crawlId = conf.get("storage.crawl.id");
+    if (!StringUtils.isEmpty(crawlId)) {
+      jobName = "["+crawlId+"]"+jobName;
+      setJobName(jobName);
+    }
     setJarByClass(this.getClass());
   }
 


Reply via email to