Author: ab Date: Mon May 22 14:44:34 2006 New Revision: 408767 URL: http://svn.apache.org/viewvc?rev=408767&view=rev Log: Field boosts weren't properly re-initialized when setConf was called.
Noticed by Marko Bauhardt and others. Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java lucene/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?rev=408767&r1=408766&r2=408767&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Mon May 22 14:44:34 2006 @@ -71,6 +71,7 @@ private SelectorEntry entry = new SelectorEntry(); private FloatWritable sortValue = new FloatWritable(); private boolean byIP; + private long dnsFailure = 0L; public void configure(JobConf job) { curTime = job.getLong("crawl.gen.curTime", System.currentTimeMillis()); @@ -139,6 +140,8 @@ host = ia.getHostAddress(); } catch (UnknownHostException uhe) { LOG.fine("DNS lookup failed: " + host + ", skipping."); + dnsFailure++; + if (dnsFailure % 1000 == 0) LOG.warning("DNS failures: " + dnsFailure); continue; } } Modified: lucene/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java?rev=408767&r1=408766&r2=408767&view=diff ============================================================================== --- lucene/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java (original) +++ lucene/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java Mon May 22 14:44:34 2006 @@ -33,13 +33,11 @@ * expanded to search the url, anchor and content document fields.*/ public class BasicQueryFilter implements QueryFilter { - private float URL_BOOST; - - private float ANCHOR_BOOST ; - - private float TITLE_BOOST; - - private float HOST_BOOST; + private static final int URL_BOOST = 0; + private static final int ANCHOR_BOOST = 1; + private static final int CONTENT_BOOST = 2; + private static final int TITLE_BOOST = 3; + private static final int HOST_BOOST = 4; private static int SLOP = Integer.MAX_VALUE; @@ -48,18 +46,17 @@ private static final String[] FIELDS = { "url", "anchor", "content", "title", "host" }; - private final float[] FIELD_BOOSTS = - { URL_BOOST, ANCHOR_BOOST, 1.0f, TITLE_BOOST, HOST_BOOST }; + private float[] FIELD_BOOSTS = new float[5]; /** * Set the boost factor for url matches, relative to content and anchor * matches */ - public void setUrlBoost(float boost) { URL_BOOST = boost; } + public void setUrlBoost(float boost) { FIELD_BOOSTS[URL_BOOST] = boost; } /** Set the boost factor for title/anchor matches, relative to url and * content matches. */ - public void setAnchorBoost(float boost) { ANCHOR_BOOST = boost; } + public void setAnchorBoost(float boost) { FIELD_BOOSTS[ANCHOR_BOOST] = boost; } /** Set the boost factor for sloppy phrase matches relative to unordered term * matches. */ @@ -173,10 +170,11 @@ public void setConf(Configuration conf) { this.conf = conf; - this.URL_BOOST = conf.getFloat("query.url.boost", 4.0f); - this.ANCHOR_BOOST = conf.getFloat("query.anchor.boost", 2.0f); - this.TITLE_BOOST = conf.getFloat("query.title.boost", 1.5f); - this.HOST_BOOST = conf.getFloat("query.host.boost", 2.0f); + this.FIELD_BOOSTS[URL_BOOST] = conf.getFloat("query.url.boost", 4.0f); + this.FIELD_BOOSTS[ANCHOR_BOOST] = conf.getFloat("query.anchor.boost", 2.0f); + this.FIELD_BOOSTS[CONTENT_BOOST] = conf.getFloat("query.content.boost", 1.0f); + this.FIELD_BOOSTS[TITLE_BOOST] = conf.getFloat("query.title.boost", 1.5f); + this.FIELD_BOOSTS[HOST_BOOST] = conf.getFloat("query.host.boost", 2.0f); this.PHRASE_BOOST = conf.getFloat("query.phrase.boost", 1.0f); }