Author: siren Date: Mon Jan 15 07:02:37 2007 New Revision: 496358 URL: http://svn.apache.org/viewvc?view=rev&rev=496358 Log: fix NUTCH-430
Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?view=diff&rev=496358&r1=496357&r2=496358 ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Mon Jan 15 07:02:37 2007 @@ -133,6 +133,9 @@ 43. NUTCH-428 - NullPointerException thrown when agent name is not configured properly. Changed to throw RuntimeException instead. + (siren) + +44. NUTCH-430 - Integer overflow in HashComparator.compare (siren) Release 0.8 - 2006-07-25 Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?view=diff&rev=496358&r1=496357&r2=496358 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Mon Jan 15 07:02:37 2007 @@ -264,39 +264,33 @@ output.collect(entry.url, entry.datum); } } - + /** Sort fetch lists by hash of URL. */ public static class HashComparator extends WritableComparator { - public HashComparator() { super(Text.class); } + public HashComparator() { + super(Text.class); + } public int compare(WritableComparable a, WritableComparable b) { - Text url1 = (Text)a; - Text url2 = (Text)b; + Text url1 = (Text) a; + Text url2 = (Text) b; int hash1 = hash(url1.getBytes(), 0, url1.getLength()); int hash2 = hash(url2.getBytes(), 0, url2.getLength()); - if (hash1 != hash2) { - return hash1 - hash2; - } - return compareBytes(url1.getBytes(), 0, url1.getLength(), - url2.getBytes(), 0, url2.getLength()); + return (hash1 < hash2 ? -1 : (hash1 == hash2 ? 0 : 1)); } - public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { int hash1 = hash(b1, s1, l1); int hash2 = hash(b2, s2, l2); - if (hash1 != hash2) { - return hash1 - hash2; - } - return compareBytes(b1, s1, l1, b2, s2, l2); + return (hash1 < hash2 ? -1 : (hash1 == hash2 ? 0 : 1)); } private static int hash(byte[] bytes, int start, int length) { int hash = 1; // make later bytes more significant in hash code, so that sorting by // hashcode correlates less with by-host ordering. - for (int i = length-1; i >= 0; i--) - hash = (31 * hash) + (int)bytes[start+i]; + for (int i = length - 1; i >= 0; i--) + hash = (31 * hash) + (int) bytes[start + i]; return hash; } }