Author: ab
Date: Mon May 22 14:44:34 2006
New Revision: 408767

URL: http://svn.apache.org/viewvc?rev=408767&view=rev
Log:
Field boosts weren't properly re-initialized when setConf was called.

Noticed by Marko Bauhardt and others.

Modified:
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
    
lucene/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?rev=408767&r1=408766&r2=408767&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Mon May 
22 14:44:34 2006
@@ -71,6 +71,7 @@
     private SelectorEntry entry = new SelectorEntry();
     private FloatWritable sortValue = new FloatWritable();
     private boolean byIP;
+    private long dnsFailure = 0L;
 
     public void configure(JobConf job) {
       curTime = job.getLong("crawl.gen.curTime", System.currentTimeMillis());
@@ -139,6 +140,8 @@
               host = ia.getHostAddress();
             } catch (UnknownHostException uhe) {
               LOG.fine("DNS lookup failed: " + host + ", skipping.");
+              dnsFailure++;
+              if (dnsFailure % 1000 == 0) LOG.warning("DNS failures: " + 
dnsFailure);
               continue;
             }
           }

Modified: 
lucene/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java?rev=408767&r1=408766&r2=408767&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java
 Mon May 22 14:44:34 2006
@@ -33,13 +33,11 @@
  * expanded to search the url, anchor and content document fields.*/
 public class BasicQueryFilter implements QueryFilter {
     
-  private float URL_BOOST;
-
-  private float ANCHOR_BOOST ;
-
-  private float TITLE_BOOST;
-
-  private float HOST_BOOST;
+  private static final int  URL_BOOST       = 0;
+  private static final int  ANCHOR_BOOST    = 1;
+  private static final int  CONTENT_BOOST   = 2;
+  private static final int  TITLE_BOOST     = 3;
+  private static final int  HOST_BOOST      = 4;
 
   private static int SLOP = Integer.MAX_VALUE;
 
@@ -48,18 +46,17 @@
   private static final String[] FIELDS =
   { "url", "anchor", "content", "title", "host" };
 
-  private final float[] FIELD_BOOSTS =
-  { URL_BOOST, ANCHOR_BOOST, 1.0f, TITLE_BOOST, HOST_BOOST };
+  private float[] FIELD_BOOSTS = new float[5];
 
   /**
    * Set the boost factor for url matches, relative to content and anchor
    * matches
    */
-  public void setUrlBoost(float boost) { URL_BOOST = boost; }
+  public void setUrlBoost(float boost) { FIELD_BOOSTS[URL_BOOST] = boost; }
 
   /** Set the boost factor for title/anchor matches, relative to url and
    * content matches. */
-  public void setAnchorBoost(float boost) { ANCHOR_BOOST = boost; }
+  public void setAnchorBoost(float boost) { FIELD_BOOSTS[ANCHOR_BOOST] = 
boost; }
 
   /** Set the boost factor for sloppy phrase matches relative to unordered term
    * matches. */
@@ -173,10 +170,11 @@
 
   public void setConf(Configuration conf) {
     this.conf = conf;
-    this.URL_BOOST = conf.getFloat("query.url.boost", 4.0f);
-    this.ANCHOR_BOOST = conf.getFloat("query.anchor.boost", 2.0f);
-    this.TITLE_BOOST = conf.getFloat("query.title.boost", 1.5f);
-    this.HOST_BOOST = conf.getFloat("query.host.boost", 2.0f);
+    this.FIELD_BOOSTS[URL_BOOST] = conf.getFloat("query.url.boost", 4.0f);
+    this.FIELD_BOOSTS[ANCHOR_BOOST] = conf.getFloat("query.anchor.boost", 
2.0f);
+    this.FIELD_BOOSTS[CONTENT_BOOST] = conf.getFloat("query.content.boost", 
1.0f);
+    this.FIELD_BOOSTS[TITLE_BOOST] = conf.getFloat("query.title.boost", 1.5f);
+    this.FIELD_BOOSTS[HOST_BOOST] = conf.getFloat("query.host.boost", 2.0f);
     this.PHRASE_BOOST = conf.getFloat("query.phrase.boost", 1.0f);
   }
 


Reply via email to