Modified: nutch/branches/2.x/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java?rev=1594812&r1=1594811&r2=1594812&view=diff ============================================================================== --- nutch/branches/2.x/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java (original) +++ nutch/branches/2.x/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java Thu May 15 08:10:07 2014 @@ -17,17 +17,7 @@ package org.apache.nutch.scoring.opic; -import java.net.MalformedURLException; -import java.net.URL; -import java.nio.ByteBuffer; -import java.util.Collection; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - import org.apache.avro.util.Utf8; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.nutch.indexer.NutchDocument; import org.apache.nutch.scoring.ScoreDatum; @@ -35,6 +25,16 @@ import org.apache.nutch.scoring.ScoringF import org.apache.nutch.scoring.ScoringFilterException; import org.apache.nutch.storage.WebPage; import org.apache.nutch.util.Bytes; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; /** * This plugin implements a variant of an Online Page Importance Computation @@ -82,7 +82,7 @@ public class OPICScoringFilter implement public void injectedScore(String url, WebPage row) throws ScoringFilterException { float score = row.getScore(); - row.putToMetadata(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(score))); + row.getMetadata().put(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(score))); } /** Set to 0.0f (unknown value) - inlink contributions will bring it to @@ -90,7 +90,7 @@ public class OPICScoringFilter implement @Override public void initialScore(String url, WebPage row) throws ScoringFilterException { row.setScore(0.0f); - row.putToMetadata(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(0.0f))); + row.getMetadata().put(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(0.0f))); } /** Use {@link WebPage#getScore()}. */ @@ -108,12 +108,12 @@ public class OPICScoringFilter implement } float oldScore = row.getScore(); row.setScore(oldScore + adjust); - ByteBuffer cashRaw = row.getFromMetadata(CASH_KEY); + ByteBuffer cashRaw = row.getMetadata().get(CASH_KEY); float cash = 0.0f; if (cashRaw != null) { cash = Bytes.toFloat(cashRaw.array(), cashRaw.arrayOffset() + cashRaw.position()); } - row.putToMetadata(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(cash + adjust))); + row.getMetadata().put(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(cash + adjust))); } /** Get cash on hand, divide it by the number of outlinks and apply. */ @@ -121,7 +121,7 @@ public class OPICScoringFilter implement public void distributeScoreToOutlinks(String fromUrl, WebPage row, Collection<ScoreDatum> scoreData, int allCount) { - ByteBuffer cashRaw = row.getFromMetadata(CASH_KEY); + ByteBuffer cashRaw = row.getMetadata().get(CASH_KEY); if (cashRaw == null) { return; } @@ -149,7 +149,7 @@ public class OPICScoringFilter implement } } // reset cash to zero - row.putToMetadata(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(0.0f))); + row.getMetadata().put(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(0.0f))); } /** Dampen the boost value by scorePower.*/
Modified: nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java?rev=1594812&r1=1594811&r2=1594812&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java Thu May 15 08:10:07 2014 @@ -87,7 +87,7 @@ public class TestAdaptiveFetchSchedule e wp.setStatus(1); wp.setFetchInterval(interval); wp.setScore(1.0f); - wp.setFetchTime(0); + wp.setFetchTime(0L); return wp; } Modified: nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestGenerator.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestGenerator.java?rev=1594812&r1=1594811&r2=1594812&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestGenerator.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestGenerator.java Thu May 15 08:10:07 2014 @@ -16,13 +16,6 @@ */ package org.apache.nutch.crawl; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.nutch.storage.Mark; import org.apache.nutch.storage.WebPage; @@ -33,7 +26,15 @@ import org.junit.After; import org.junit.Before; import org.junit.Ignore; import org.junit.Test; -import static org.junit.Assert.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; + +import static org.junit.Assert.assertEquals; /** * Basic generator test. 1. Insert entries in webtable 2. Generates entries to @@ -294,10 +295,10 @@ public class TestGenerator extends Abstr */ private URLWebPage createURLWebPage(final String url, final int fetchInterval, final float score) { - WebPage page = new WebPage(); + WebPage page = WebPage.newBuilder().build(); page.setFetchInterval(fetchInterval); page.setScore(score); - page.setStatus(CrawlStatus.STATUS_UNFETCHED); + page.setStatus((int)CrawlStatus.STATUS_UNFETCHED); return new URLWebPage(url, page); } Modified: nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestInjector.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestInjector.java?rev=1594812&r1=1594811&r2=1594812&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestInjector.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestInjector.java Thu May 15 08:10:07 2014 @@ -16,11 +16,6 @@ */ package org.apache.nutch.crawl; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - import org.apache.avro.util.Utf8; import org.apache.hadoop.fs.Path; import org.apache.nutch.storage.WebPage; @@ -30,7 +25,14 @@ import org.apache.nutch.util.CrawlTestUt import org.junit.Before; import org.junit.Ignore; import org.junit.Test; -import static org.junit.Assert.*; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; /** * Basic injector test: 1. Creates a text file with urls 2. Injects them into @@ -110,8 +112,8 @@ public class TestInjector extends Abstra for (URLWebPage up : pages) { WebPage page = up.getDatum(); String representation = up.getUrl(); - representation += "\tnutch.score=" + (int)page.getScore(); - ByteBuffer bb = page.getFromMetadata(new Utf8("custom.attribute")); + representation += "\tnutch.score=" + page.getScore().intValue(); + ByteBuffer bb = page.getMetadata().get(new Utf8("custom.attribute")); if (bb != null) { representation += "\tcustom.attribute=" + Bytes.toString(bb); } Modified: nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestURLPartitioner.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestURLPartitioner.java?rev=1594812&r1=1594811&r2=1594812&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestURLPartitioner.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestURLPartitioner.java Thu May 15 08:10:07 2014 @@ -16,11 +16,6 @@ ******************************************************************************/ package org.apache.nutch.crawl; -import java.net.MalformedURLException; - -import org.junit.Test; -import static org.junit.Assert.*; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IntWritable; import org.apache.nutch.crawl.GeneratorJob.SelectorEntry; @@ -30,6 +25,12 @@ import org.apache.nutch.fetcher.FetchEnt import org.apache.nutch.storage.WebPage; import org.apache.nutch.util.NutchConfiguration; import org.apache.nutch.util.TableUtil; +import org.junit.Test; + +import java.net.MalformedURLException; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotSame; /** * Tests {@link URLPartitioner} @@ -168,7 +169,7 @@ public class TestURLPartitioner { int partitionFromRef = refPartitioner.getPartition("http://www.example.org/", numReduceTasks); //init selector entry (score shouldn't matter) SelectorEntry selectorEntry = new SelectorEntry("http://www.example.org/", 1337); - WebPage page = new WebPage(); + WebPage page = WebPage.newBuilder().build(); int partitionFromSig = sigPartitioner.getPartition(selectorEntry, page, numReduceTasks); assertEquals("partitions should be same", @@ -199,7 +200,7 @@ public class TestURLPartitioner { int partitionFromRef = refPartitioner.getPartition("http://www.example.org/", numReduceTasks); IntWritable intWritable = new IntWritable(1337); //doesn't matter - WebPage page = new WebPage(); + WebPage page = WebPage.newBuilder().build(); String key = TableUtil.reverseUrl("http://www.example.org/"); FetchEntry fetchEntry = new FetchEntry(conf, key, page); int partitionFromSig = sigPartitioner.getPartition(intWritable, fetchEntry, numReduceTasks); Modified: nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java?rev=1594812&r1=1594811&r2=1594812&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java Thu May 15 08:10:07 2014 @@ -16,14 +16,15 @@ */ package org.apache.nutch.indexer; -import org.junit.Test; -import static org.junit.Assert.*; - import org.apache.avro.util.Utf8; import org.apache.hadoop.conf.Configuration; import org.apache.nutch.metadata.Metadata; import org.apache.nutch.storage.WebPage; import org.apache.nutch.util.NutchConfiguration; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; public class TestIndexingFilters { @@ -42,7 +43,7 @@ public class TestIndexingFilters { conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2); IndexingFilters filters = new IndexingFilters(conf); - WebPage page = new WebPage(); + WebPage page = WebPage.newBuilder().build(); page.setText(new Utf8("text")); page.setTitle(new Utf8("title")); filters.filter(new NutchDocument(),"http://www.example.com/",page); @@ -59,7 +60,7 @@ public class TestIndexingFilters { conf.addResource("crawl-tests.xml"); IndexingFilters filters = new IndexingFilters(conf); - WebPage page = new WebPage(); + WebPage page = WebPage.newBuilder().build(); page.setText(new Utf8("text")); page.setTitle(new Utf8("title")); NutchDocument doc = filters.filter(null,"http://www.example.com/",page); @@ -82,7 +83,7 @@ public class TestIndexingFilters { conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1); IndexingFilters filters1 = new IndexingFilters(conf); - WebPage page = new WebPage(); + WebPage page = WebPage.newBuilder().build(); page.setText(new Utf8("text")); page.setTitle(new Utf8("title")); NutchDocument fdoc1 = filters1.filter(new NutchDocument(),"http://www.example.com/",page); Modified: nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java?rev=1594812&r1=1594811&r2=1594812&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java Thu May 15 08:10:07 2014 @@ -16,16 +16,6 @@ ******************************************************************************/ package org.apache.nutch.storage; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; - import org.apache.avro.util.Utf8; import org.apache.commons.io.IOUtils; import org.apache.gora.query.Result; @@ -34,12 +24,23 @@ import org.apache.hadoop.conf.Configurat import org.apache.nutch.util.AbstractNutchTest; import org.apache.nutch.util.CrawlTestUtil; import org.hsqldb.Server; -import org.junit.Ignore; - import org.junit.After; import org.junit.Before; +import org.junit.Ignore; import org.junit.Test; -import static org.junit.Assert.*; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; /** * Tests basic Gora functionality by writing and reading webpages. @@ -71,7 +72,7 @@ public class TestGoraStorage extends Abs private static void readWrite(String id, DataStore<String, WebPage> store) throws IOException, Exception { - WebPage page = new WebPage(); + WebPage page = WebPage.newBuilder().build(); int max = 1000; for (int i = 0; i < max; i++) { // store a page with title Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java?rev=1594812&r1=1594811&r2=1594812&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java Thu May 15 08:10:07 2014 @@ -16,14 +16,9 @@ */ package org.apache.nutch.util; -import java.io.IOException; -import java.net.UnknownHostException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.gora.query.Query; +import org.apache.gora.query.Result; +import org.apache.gora.store.DataStore; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -31,14 +26,19 @@ import org.apache.hadoop.fs.Path; import org.apache.nutch.crawl.URLWebPage; import org.apache.nutch.storage.Mark; import org.apache.nutch.storage.WebPage; -import org.apache.gora.query.Query; -import org.apache.gora.query.Result; -import org.apache.gora.store.DataStore; import org.mortbay.jetty.Handler; import org.mortbay.jetty.Server; import org.mortbay.jetty.handler.DefaultHandler; import org.mortbay.jetty.handler.HandlerList; import org.mortbay.jetty.handler.ResourceHandler; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; public class CrawlTestUtil { @@ -121,7 +121,7 @@ public class CrawlTestUtil { if (requiredMark != null && requiredMark.checkMark(page) == null) continue; - l.add(new URLWebPage(TableUtil.unreverseUrl(url), (WebPage)page.clone())); + l.add(new URLWebPage(TableUtil.unreverseUrl(url), WebPage.newBuilder(page).build())); } catch (Exception e) { e.printStackTrace(); } Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/TestEncodingDetector.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/TestEncodingDetector.java?rev=1594812&r1=1594811&r2=1594812&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/util/TestEncodingDetector.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/util/TestEncodingDetector.java Thu May 15 08:10:07 2014 @@ -16,16 +16,16 @@ */ package org.apache.nutch.util; -import java.io.UnsupportedEncodingException; -import java.nio.ByteBuffer; - -import org.junit.Test; -import static org.junit.Assert.*; - import org.apache.avro.util.Utf8; import org.apache.hadoop.conf.Configuration; import org.apache.nutch.net.protocols.Response; import org.apache.nutch.storage.WebPage; +import org.junit.Test; + +import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; + +import static org.junit.Assert.assertEquals; public class TestEncodingDetector { private static Configuration conf = NutchConfiguration.create(); @@ -50,7 +50,7 @@ public class TestEncodingDetector { // Content content; String encoding; - WebPage page = new WebPage(); + WebPage page = WebPage.newBuilder().build(); page.setBaseUrl(new Utf8("http://www.example.com/")); page.setContentType(new Utf8("text/plain")); page.setContent(ByteBuffer.wrap(contentInOctets)); @@ -61,18 +61,18 @@ public class TestEncodingDetector { // no information is available, so it should return default encoding assertEquals("windows-1252", encoding.toLowerCase()); - page = new WebPage(); + page = WebPage.newBuilder().build(); page.setBaseUrl(new Utf8("http://www.example.com/")); page.setContentType(new Utf8("text/plain")); page.setContent(ByteBuffer.wrap(contentInOctets)); - page.putToHeaders(EncodingDetector.CONTENT_TYPE_UTF8, new Utf8("text/plain; charset=UTF-16")); + page.getHeaders().put(EncodingDetector.CONTENT_TYPE_UTF8, new Utf8("text/plain; charset=UTF-16")); detector = new EncodingDetector(conf); detector.autoDetectClues(page, true); encoding = detector.guessEncoding(page, "windows-1252"); assertEquals("utf-16", encoding.toLowerCase()); - page = new WebPage(); + page = WebPage.newBuilder().build(); page.setBaseUrl(new Utf8("http://www.example.com/")); page.setContentType(new Utf8("text/plain")); page.setContent(ByteBuffer.wrap(contentInOctets)); @@ -85,11 +85,11 @@ public class TestEncodingDetector { // enable autodetection conf.setInt(EncodingDetector.MIN_CONFIDENCE_KEY, 50); - page = new WebPage(); + page = WebPage.newBuilder().build(); page.setBaseUrl(new Utf8("http://www.example.com/")); page.setContentType(new Utf8("text/plain")); page.setContent(ByteBuffer.wrap(contentInOctets)); - page.putToMetadata(new Utf8(Response.CONTENT_TYPE), ByteBuffer.wrap("text/plain; charset=UTF-16".getBytes())); + page.getMetadata().put(new Utf8(Response.CONTENT_TYPE), ByteBuffer.wrap("text/plain; charset=UTF-16".getBytes())); detector = new EncodingDetector(conf); detector.autoDetectClues(page, true);