Modified: nutch/branches/2.x/src/test/org/apache/nutch/fetcher/TestFetcher.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/fetcher/TestFetcher.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/fetcher/TestFetcher.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/fetcher/TestFetcher.java Fri Jan 9 06:34:33 2015 @@ -38,32 +38,29 @@ import org.junit.Test; import static org.junit.Assert.*; /** - * Basic fetcher test - * 1. generate seedlist - * 2. inject - * 3. generate - * 3. fetch - * 4. Verify contents - * + * Basic fetcher test 1. generate seedlist 2. inject 3. generate 3. fetch 4. + * Verify contents + * */ public class TestFetcher extends AbstractNutchTest { - final static Path testdir=new Path("build/test/fetch-test"); + final static Path testdir = new Path("build/test/fetch-test"); Path urlPath; Server server; @Override @Before - public void setUp() throws Exception{ + public void setUp() throws Exception { super.setUp(); urlPath = new Path(testdir, "urls"); - server = CrawlTestUtil.getServer(conf.getInt("content.server.port",50000), "build/test/data/fetch-test-site"); + server = CrawlTestUtil.getServer(conf.getInt("content.server.port", 50000), + "build/test/data/fetch-test-site"); server.start(); } @Override @After - public void tearDown() throws Exception{ + public void tearDown() throws Exception { server.stop(); fs.delete(testdir, true); } @@ -72,28 +69,28 @@ public class TestFetcher extends Abstrac @Ignore("Temporarily diable until NUTCH-1572 is addressed.") public void testFetch() throws Exception { - //generate seedlist + // generate seedlist ArrayList<String> urls = new ArrayList<String>(); - addUrl(urls,"index.html"); - addUrl(urls,"pagea.html"); - addUrl(urls,"pageb.html"); - addUrl(urls,"dup_of_pagea.html"); - addUrl(urls,"nested_spider_trap.html"); - addUrl(urls,"exception.html"); + addUrl(urls, "index.html"); + addUrl(urls, "pagea.html"); + addUrl(urls, "pageb.html"); + addUrl(urls, "dup_of_pagea.html"); + addUrl(urls, "nested_spider_trap.html"); + addUrl(urls, "exception.html"); CrawlTestUtil.generateSeedList(fs, urlPath, urls); - //inject + // inject InjectorJob injector = new InjectorJob(conf); injector.inject(urlPath); - //generate + // generate long time = System.currentTimeMillis(); GeneratorJob g = new GeneratorJob(conf); String batchId = g.generate(Long.MAX_VALUE, time, false, false); - //fetch + // fetch time = System.currentTimeMillis(); conf.setBoolean(FetcherJob.PARSE_KEY, true); FetcherJob fetcher = new FetcherJob(conf); @@ -101,12 +98,13 @@ public class TestFetcher extends Abstrac time = System.currentTimeMillis() - time; - //verify politeness, time taken should be more than (num_of_pages +1)*delay - int minimumTime = (int) ((urls.size() + 1) * 1000 * - conf.getFloat("fetcher.server.delay", 5)); + // verify politeness, time taken should be more than (num_of_pages +1)*delay + int minimumTime = (int) ((urls.size() + 1) * 1000 * conf.getFloat( + "fetcher.server.delay", 5)); assertTrue(time > minimumTime); - List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore, Mark.FETCH_MARK, (String[])null); + List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore, + Mark.FETCH_MARK, (String[]) null); assertEquals(urls.size(), pages.size()); List<String> handledurls = new ArrayList<String>(); for (URLWebPage up : pages) { @@ -115,23 +113,24 @@ public class TestFetcher extends Abstrac continue; } String content = Bytes.toString(bb); - if (content.indexOf("Nutch fetcher test page")!=-1) { + if (content.indexOf("Nutch fetcher test page") != -1) { handledurls.add(up.getUrl()); } } Collections.sort(urls); Collections.sort(handledurls); - //verify that enough pages were handled + // verify that enough pages were handled assertEquals(urls.size(), handledurls.size()); - //verify that correct pages were handled + // verify that correct pages were handled assertTrue(handledurls.containsAll(urls)); assertTrue(urls.containsAll(handledurls)); } private void addUrl(ArrayList<String> urls, String page) { - urls.add("http://127.0.0.1:" + server.getConnectors()[0].getPort() + "/" + page); + urls.add("http://127.0.0.1:" + server.getConnectors()[0].getPort() + "/" + + page); } @Test
Modified: nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java Fri Jan 9 06:34:33 2015 @@ -30,6 +30,7 @@ public class TestIndexingFilters { /** * Test behaviour when defined filter does not exist. + * * @throws IndexingException */ @Test @@ -46,15 +47,16 @@ public class TestIndexingFilters { WebPage page = WebPage.newBuilder().build(); page.setText(new Utf8("text")); page.setTitle(new Utf8("title")); - filters.filter(new NutchDocument(),"http://www.example.com/",page); + filters.filter(new NutchDocument(), "http://www.example.com/", page); } /** * Test behaviour when NutchDOcument is null + * * @throws IndexingException */ @Test - public void testNutchDocumentNullIndexingFilter() throws IndexingException{ + public void testNutchDocumentNullIndexingFilter() throws IndexingException { Configuration conf = NutchConfiguration.create(); conf.addResource("nutch-default.xml"); conf.addResource("crawl-tests.xml"); @@ -63,18 +65,18 @@ public class TestIndexingFilters { WebPage page = WebPage.newBuilder().build(); page.setText(new Utf8("text")); page.setTitle(new Utf8("title")); - NutchDocument doc = filters.filter(null,"http://www.example.com/",page); + NutchDocument doc = filters.filter(null, "http://www.example.com/", page); assertNull(doc); } /** * Test behaviour when reset the index filter order will not take effect - * + * * @throws IndexingException */ @Test - public void testFilterCacheIndexingFilter() throws IndexingException{ + public void testFilterCacheIndexingFilter() throws IndexingException { Configuration conf = NutchConfiguration.create(); conf.addResource("nutch-default.xml"); conf.addResource("crawl-tests.xml"); @@ -86,18 +88,20 @@ public class TestIndexingFilters { WebPage page = WebPage.newBuilder().build(); page.setText(new Utf8("text")); page.setTitle(new Utf8("title")); - NutchDocument fdoc1 = filters1.filter(new NutchDocument(),"http://www.example.com/",page); + NutchDocument fdoc1 = filters1.filter(new NutchDocument(), + "http://www.example.com/", page); // add another index filter String class2 = "org.apache.nutch.indexer.metadata.MetadataIndexer"; // set content metadata Metadata md = new Metadata(); - md.add("example","data"); + md.add("example", "data"); // add MetadataIndxer filter conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2); IndexingFilters filters2 = new IndexingFilters(conf); - NutchDocument fdoc2 = filters2.filter(new NutchDocument(),"http://www.example.com/",page); - assertEquals(fdoc1.getFieldNames().size(),fdoc2.getFieldNames().size()); + NutchDocument fdoc2 = filters2.filter(new NutchDocument(), + "http://www.example.com/", page); + assertEquals(fdoc1.getFieldNames().size(), fdoc2.getFieldNames().size()); } } Modified: nutch/branches/2.x/src/test/org/apache/nutch/metadata/TestMetadata.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/metadata/TestMetadata.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/metadata/TestMetadata.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/metadata/TestMetadata.java Fri Jan 9 06:34:33 2015 @@ -278,4 +278,3 @@ public class TestMetadata { } } - Modified: nutch/branches/2.x/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java Fri Jan 9 06:34:33 2015 @@ -28,7 +28,7 @@ import static org.junit.Assert.*; /** * JUnit based tests of class * {@link org.apache.nutch.metadata.SpellCheckedMetadata}. - * + * * @author Chris Mattmann * @author Jérôme Charron */ @@ -40,20 +40,20 @@ public class TestSpellCheckedMetadata { /** Test for the <code>getNormalizedName(String)</code> method. */ @Test public void testGetNormalizedName() { - assertEquals("Content-Type", SpellCheckedMetadata - .getNormalizedName("Content-Type")); - assertEquals("Content-Type", SpellCheckedMetadata - .getNormalizedName("ContentType")); - assertEquals("Content-Type", SpellCheckedMetadata - .getNormalizedName("Content-type")); - assertEquals("Content-Type", SpellCheckedMetadata - .getNormalizedName("contenttype")); - assertEquals("Content-Type", SpellCheckedMetadata - .getNormalizedName("contentype")); - assertEquals("Content-Type", SpellCheckedMetadata - .getNormalizedName("contntype")); + assertEquals("Content-Type", + SpellCheckedMetadata.getNormalizedName("Content-Type")); + assertEquals("Content-Type", + SpellCheckedMetadata.getNormalizedName("ContentType")); + assertEquals("Content-Type", + SpellCheckedMetadata.getNormalizedName("Content-type")); + assertEquals("Content-Type", + SpellCheckedMetadata.getNormalizedName("contenttype")); + assertEquals("Content-Type", + SpellCheckedMetadata.getNormalizedName("contentype")); + assertEquals("Content-Type", + SpellCheckedMetadata.getNormalizedName("contntype")); } - + /** Test for the <code>add(String, String)</code> method. */ @Test public void testAdd() { @@ -253,8 +253,8 @@ public class TestSpellCheckedMetadata { } /** - * IO Test method, usable only when you plan to do changes in metadata - * to measure relative performance impact. + * IO Test method, usable only when you plan to do changes in metadata to + * measure relative performance impact. */ @Test public final void testHandlingSpeed() { Modified: nutch/branches/2.x/src/test/org/apache/nutch/net/TestURLFilters.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/net/TestURLFilters.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/net/TestURLFilters.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/net/TestURLFilters.java Fri Jan 9 06:34:33 2015 @@ -26,6 +26,7 @@ public class TestURLFilters { /** * Testcase for NUTCH-325. + * * @throws URLFilterException */ @Test Modified: nutch/branches/2.x/src/test/org/apache/nutch/net/TestURLNormalizers.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/net/TestURLNormalizers.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/net/TestURLNormalizers.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/net/TestURLNormalizers.java Fri Jan 9 06:34:33 2015 @@ -32,30 +32,38 @@ public class TestURLNormalizers { String clazz1 = "org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer"; String clazz2 = "org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer"; conf.set("urlnormalizer.order", clazz1 + " " + clazz2); - - URLNormalizers normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_DEFAULT); - + + URLNormalizers normalizers = new URLNormalizers(conf, + URLNormalizers.SCOPE_DEFAULT); + assertNotNull(normalizers); try { - normalizers.normalize("http://www.example.com/", URLNormalizers.SCOPE_DEFAULT); + normalizers.normalize("http://www.example.com/", + URLNormalizers.SCOPE_DEFAULT); } catch (MalformedURLException mue) { fail(mue.toString()); } // NUTCH-1011 - Get rid of superfluous slashes try { - String normalizedSlashes = normalizers.normalize("http://www.example.org//path/to//somewhere.html", URLNormalizers.SCOPE_DEFAULT); - assertEquals(normalizedSlashes, "http://www.example.org/path/to/somewhere.html"); + String normalizedSlashes = normalizers.normalize( + "http://www.example.org//path/to//somewhere.html", + URLNormalizers.SCOPE_DEFAULT); + assertEquals(normalizedSlashes, + "http://www.example.org/path/to/somewhere.html"); } catch (MalformedURLException mue) { fail(mue.toString()); } // check the order int pos1 = -1, pos2 = -1; - URLNormalizer[] impls = normalizers.getURLNormalizers(URLNormalizers.SCOPE_DEFAULT); + URLNormalizer[] impls = normalizers + .getURLNormalizers(URLNormalizers.SCOPE_DEFAULT); for (int i = 0; i < impls.length; i++) { - if (impls[i].getClass().getName().equals(clazz1)) pos1 = i; - if (impls[i].getClass().getName().equals(clazz2)) pos2 = i; + if (impls[i].getClass().getName().equals(clazz1)) + pos1 = i; + if (impls[i].getClass().getName().equals(clazz2)) + pos2 = i; } if (pos1 != -1 && pos2 != -1) { assertTrue("RegexURLNormalizer before BasicURLNormalizer", pos1 < pos2); Modified: nutch/branches/2.x/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java Fri Jan 9 06:34:33 2015 @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.nutch.parse; import org.apache.nutch.parse.Outlink; @@ -35,51 +35,60 @@ import static org.junit.Assert.*; public class TestOutlinkExtractor { private static Configuration conf = NutchConfiguration.create(); + public void testGetNoOutlinks() { - Outlink[] outlinks = null; - + Outlink[] outlinks = null; + outlinks = OutlinkExtractor.getOutlinks(null, conf); assertNotNull(outlinks); assertEquals(0, outlinks.length); - + outlinks = OutlinkExtractor.getOutlinks("", conf); assertNotNull(outlinks); assertEquals(0, outlinks.length); } - + @Test public void testGetOutlinksHttp() { - Outlink[] outlinks = OutlinkExtractor.getOutlinks( - "Test with http://www.nutch.org/index.html is it found? " + - "What about www.google.com at http://www.google.de " + - "A longer URL could be http://www.sybit.com/solutions/portals.html", conf); - + Outlink[] outlinks = OutlinkExtractor + .getOutlinks( + "Test with http://www.nutch.org/index.html is it found? " + + "What about www.google.com at http://www.google.de " + + "A longer URL could be http://www.sybit.com/solutions/portals.html", + conf); + assertTrue("Url not found!", outlinks.length == 3); - assertEquals("Wrong URL", "http://www.nutch.org/index.html", outlinks[0].getToUrl()); + assertEquals("Wrong URL", "http://www.nutch.org/index.html", + outlinks[0].getToUrl()); assertEquals("Wrong URL", "http://www.google.de", outlinks[1].getToUrl()); - assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html", outlinks[2].getToUrl()); + assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html", + outlinks[2].getToUrl()); } - + @Test public void testGetOutlinksHttp2() { - Outlink[] outlinks = OutlinkExtractor.getOutlinks( - "Test with http://www.nutch.org/index.html is it found? " + - "What about www.google.com at http://www.google.de " + - "A longer URL could be http://www.sybit.com/solutions/portals.html", "http://www.sybit.de", conf); - + Outlink[] outlinks = OutlinkExtractor + .getOutlinks( + "Test with http://www.nutch.org/index.html is it found? " + + "What about www.google.com at http://www.google.de " + + "A longer URL could be http://www.sybit.com/solutions/portals.html", + "http://www.sybit.de", conf); + assertTrue("Url not found!", outlinks.length == 3); - assertEquals("Wrong URL", "http://www.nutch.org/index.html", outlinks[0].getToUrl()); + assertEquals("Wrong URL", "http://www.nutch.org/index.html", + outlinks[0].getToUrl()); assertEquals("Wrong URL", "http://www.google.de", outlinks[1].getToUrl()); - assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html", outlinks[2].getToUrl()); + assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html", + outlinks[2].getToUrl()); } - + @Test public void testGetOutlinksFtp() { Outlink[] outlinks = OutlinkExtractor.getOutlinks( - "Test with ftp://www.nutch.org is it found? " + - "What about www.google.com at ftp://www.google.de", conf); - - assertTrue("Url not found!", outlinks.length >1); + "Test with ftp://www.nutch.org is it found? " + + "What about www.google.com at ftp://www.google.de", conf); + + assertTrue("Url not found!", outlinks.length > 1); assertEquals("Wrong URL", "ftp://www.nutch.org", outlinks[0].getToUrl()); assertEquals("Wrong URL", "ftp://www.google.de", outlinks[1].getToUrl()); } Modified: nutch/branches/2.x/src/test/org/apache/nutch/parse/TestParserFactory.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/parse/TestParserFactory.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/parse/TestParserFactory.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/parse/TestParserFactory.java Fri Jan 9 06:34:33 2015 @@ -28,24 +28,24 @@ import org.apache.nutch.util.NutchConfig /** * Unit test for new parse plugin selection. - * + * * @author Sebastien Le Callonnec */ public class TestParserFactory { - + private Configuration conf; private ParserFactory parserFactory; - + /** Inits the Test Case with the test parse-plugin file */ @Before public void setUp() throws Exception { - conf = NutchConfiguration.create(); - conf.set("plugin.includes", ".*"); - conf.set("parse.plugin.file", - "org/apache/nutch/parse/parse-plugin-test.xml"); - parserFactory = new ParserFactory(conf); + conf = NutchConfiguration.create(); + conf.set("plugin.includes", ".*"); + conf.set("parse.plugin.file", + "org/apache/nutch/parse/parse-plugin-test.xml"); + parserFactory = new ParserFactory(conf); } - + /** Unit test for <code>getExtensions(String)</code> method. */ @Test public void testGetExtensions() throws Exception { @@ -56,48 +56,49 @@ public class TestParserFactory { ext = parserFactory.getExtensions("foo/bar").get(0); assertEquals("parse-tika", ext.getDescriptor().getPluginId()); } - + /** Unit test to check <code>getParsers</code> method */ @Test public void testGetParsers() throws Exception { - Parser [] parsers = parserFactory.getParsers("text/html", "http://foo.com"); + Parser[] parsers = parserFactory.getParsers("text/html", "http://foo.com"); assertNotNull(parsers); assertEquals(1, parsers.length); - assertEquals("org.apache.nutch.parse.tika.TikaParser", - parsers[0].getClass().getName()); + assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0] + .getClass().getName()); parsers = parserFactory.getParsers("text/html; charset=ISO-8859-1", - "http://foo.com"); + "http://foo.com"); assertNotNull(parsers); assertEquals(1, parsers.length); - assertEquals("org.apache.nutch.parse.tika.TikaParser", - parsers[0].getClass().getName()); - + assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0] + .getClass().getName()); + parsers = parserFactory.getParsers("application/x-javascript", - "http://foo.com"); + "http://foo.com"); assertNotNull(parsers); assertEquals(1, parsers.length); - assertEquals("org.apache.nutch.parse.js.JSParseFilter", - parsers[0].getClass().getName()); - + assertEquals("org.apache.nutch.parse.js.JSParseFilter", parsers[0] + .getClass().getName()); + parsers = parserFactory.getParsers("text/plain", "http://foo.com"); assertNotNull(parsers); assertEquals(1, parsers.length); - assertEquals("org.apache.nutch.parse.tika.TikaParser", - parsers[0].getClass().getName()); - + assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0] + .getClass().getName()); + Parser parser1 = parserFactory.getParsers("text/plain", "http://foo.com")[0]; Parser parser2 = parserFactory.getParsers("*", "http://foo.com")[0]; - + assertEquals("Different instances!", parser1.hashCode(), parser2.hashCode()); - - //test and make sure that the rss parser is loaded even though its plugin.xml - //doesn't claim to support text/rss, only application/rss+xml - parsers = parserFactory.getParsers("text/rss","http://foo.com"); + + // test and make sure that the rss parser is loaded even though its + // plugin.xml + // doesn't claim to support text/rss, only application/rss+xml + parsers = parserFactory.getParsers("text/rss", "http://foo.com"); assertNotNull(parsers); - assertEquals(1,parsers.length); - assertEquals("org.apache.nutch.parse.tika.TikaParser", - parsers[0].getClass().getName()); + assertEquals(1, parsers.length); + assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0] + .getClass().getName()); } - + } Modified: nutch/branches/2.x/src/test/org/apache/nutch/plugin/HelloWorldExtension.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/plugin/HelloWorldExtension.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/plugin/HelloWorldExtension.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/plugin/HelloWorldExtension.java Fri Jan 9 06:34:33 2015 @@ -24,8 +24,11 @@ package org.apache.nutch.plugin; */ public class HelloWorldExtension implements ITestExtension { - /* (non-Javadoc) - * @see org.apache.nutch.plugin.ITestExtension#testGetExtension(java.lang.String) + /* + * (non-Javadoc) + * + * @see + * org.apache.nutch.plugin.ITestExtension#testGetExtension(java.lang.String) */ public String testGetExtension(String hello) { return hello + " World"; Modified: nutch/branches/2.x/src/test/org/apache/nutch/plugin/ITestExtension.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/plugin/ITestExtension.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/plugin/ITestExtension.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/plugin/ITestExtension.java Fri Jan 9 06:34:33 2015 @@ -15,11 +15,12 @@ * limitations under the License. */ package org.apache.nutch.plugin; + /** * A Simple Test Extension Interface. * * @author joa23 - * + * */ public interface ITestExtension { public String testGetExtension(String hello); Modified: nutch/branches/2.x/src/test/org/apache/nutch/plugin/SimpleTestPlugin.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/plugin/SimpleTestPlugin.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/plugin/SimpleTestPlugin.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/plugin/SimpleTestPlugin.java Fri Jan 9 06:34:33 2015 @@ -28,8 +28,8 @@ import org.apache.hadoop.conf.Configurat public class SimpleTestPlugin extends Plugin { /** - * @param pDescriptor - * @param conf + * @param pDescriptor + * @param conf */ public SimpleTestPlugin(PluginDescriptor pDescriptor, Configuration conf) { @@ -55,4 +55,3 @@ public class SimpleTestPlugin extends Pl } } - Modified: nutch/branches/2.x/src/test/org/apache/nutch/plugin/TestPluginSystem.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/plugin/TestPluginSystem.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/plugin/TestPluginSystem.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/plugin/TestPluginSystem.java Fri Jan 9 06:34:33 2015 @@ -42,266 +42,260 @@ import org.apache.nutch.util.NutchJobCon * @author joa23 */ public class TestPluginSystem { - private int fPluginCount; + private int fPluginCount; - private LinkedList<File> fFolders = new LinkedList<File>(); - private Configuration conf ; - private PluginRepository repository; - - @Before - public void setUp() throws Exception { - this.conf = NutchConfiguration.create(); - conf.set("plugin.includes", ".*"); -// String string = this.conf.get("plugin.includes", ""); -// conf.set("plugin.includes", string + "|Dummy*"); - fPluginCount = 5; - createDummyPlugins(fPluginCount); - this.repository = PluginRepository.get(conf); - } - - @After - public void tearDown() throws Exception { - for (int i = 0; i < fFolders.size(); i++) { - File folder = (File) fFolders.get(i); - delete(folder); - folder.delete(); - } - - } - - /** - */ - @Test - public void testPluginConfiguration() { - String string = getPluginFolder(); - File file = new File(string); - if (!file.exists()) { - file.mkdir(); - } - assertTrue(file.exists()); - } - - /** - */ - @Test - public void testLoadPlugins() { - PluginDescriptor[] descriptors = repository - .getPluginDescriptors(); - int k = descriptors.length; - assertTrue(fPluginCount <= k); - for (int i = 0; i < descriptors.length; i++) { - PluginDescriptor descriptor = descriptors[i]; - if (!descriptor.getPluginId().startsWith("getPluginFolder()")) { - continue; - } - assertEquals(1, descriptor.getExportedLibUrls().length); - assertEquals(1, descriptor.getNotExportedLibUrls().length); - } - } - - @Test - public void testRepositoryCache() { - Configuration config = NutchConfiguration.create(); - PluginRepository repo = PluginRepository.get(config); - JobConf job = new NutchJobConf(config); - PluginRepository repo1 = PluginRepository.get(job); - assertTrue(repo == repo1); - // now construct a config without UUID - config = new Configuration(); - config.addResource("nutch-default.xml"); - config.addResource("nutch-site.xml"); - repo = PluginRepository.get(config); - job = new NutchJobConf(config); - repo1 = PluginRepository.get(job); - assertTrue(repo1 != repo); - } + private LinkedList<File> fFolders = new LinkedList<File>(); + private Configuration conf; + private PluginRepository repository; + + @Before + public void setUp() throws Exception { + this.conf = NutchConfiguration.create(); + conf.set("plugin.includes", ".*"); + // String string = this.conf.get("plugin.includes", ""); + // conf.set("plugin.includes", string + "|Dummy*"); + fPluginCount = 5; + createDummyPlugins(fPluginCount); + this.repository = PluginRepository.get(conf); + } + + @After + public void tearDown() throws Exception { + for (int i = 0; i < fFolders.size(); i++) { + File folder = (File) fFolders.get(i); + delete(folder); + folder.delete(); + } + + } + + /** + */ + @Test + public void testPluginConfiguration() { + String string = getPluginFolder(); + File file = new File(string); + if (!file.exists()) { + file.mkdir(); + } + assertTrue(file.exists()); + } + + /** + */ + @Test + public void testLoadPlugins() { + PluginDescriptor[] descriptors = repository.getPluginDescriptors(); + int k = descriptors.length; + assertTrue(fPluginCount <= k); + for (int i = 0; i < descriptors.length; i++) { + PluginDescriptor descriptor = descriptors[i]; + if (!descriptor.getPluginId().startsWith("getPluginFolder()")) { + continue; + } + assertEquals(1, descriptor.getExportedLibUrls().length); + assertEquals(1, descriptor.getNotExportedLibUrls().length); + } + } + + @Test + public void testRepositoryCache() { + Configuration config = NutchConfiguration.create(); + PluginRepository repo = PluginRepository.get(config); + JobConf job = new NutchJobConf(config); + PluginRepository repo1 = PluginRepository.get(job); + assertTrue(repo == repo1); + // now construct a config without UUID + config = new Configuration(); + config.addResource("nutch-default.xml"); + config.addResource("nutch-site.xml"); + repo = PluginRepository.get(config); + job = new NutchJobConf(config); + repo1 = PluginRepository.get(job); + assertTrue(repo1 != repo); + } - /** + /** * */ - @Test - public void testGetExtensionAndAttributes() { - String xpId = " sdsdsd"; - ExtensionPoint extensionPoint =repository - .getExtensionPoint(xpId); - assertEquals(extensionPoint, null); - Extension[] extension1 = repository - .getExtensionPoint(getGetExtensionId()).getExtensions(); - assertEquals(extension1.length, fPluginCount); - for (int i = 0; i < extension1.length; i++) { - Extension extension2 = extension1[i]; - String string = extension2.getAttribute(getGetConfigElementName()); - assertEquals(string, getParameterValue()); - } + @Test + public void testGetExtensionAndAttributes() { + String xpId = " sdsdsd"; + ExtensionPoint extensionPoint = repository.getExtensionPoint(xpId); + assertEquals(extensionPoint, null); + Extension[] extension1 = repository.getExtensionPoint(getGetExtensionId()) + .getExtensions(); + assertEquals(extension1.length, fPluginCount); + for (int i = 0; i < extension1.length; i++) { + Extension extension2 = extension1[i]; + String string = extension2.getAttribute(getGetConfigElementName()); + assertEquals(string, getParameterValue()); + } + } + + /** + * @throws PluginRuntimeException + */ + @Test + public void testGetExtensionInstances() throws PluginRuntimeException { + Extension[] extensions = repository.getExtensionPoint(getGetExtensionId()) + .getExtensions(); + assertEquals(extensions.length, fPluginCount); + for (int i = 0; i < extensions.length; i++) { + Extension extension = extensions[i]; + Object object = extension.getExtensionInstance(); + if (!(object instanceof HelloWorldExtension)) + fail(" object is not a instance of HelloWorldExtension"); + ((ITestExtension) object).testGetExtension("Bla "); + String string = ((ITestExtension) object).testGetExtension("Hello"); + assertEquals("Hello World", string); } + } - /** - * @throws PluginRuntimeException - */ - @Test - public void testGetExtensionInstances() throws PluginRuntimeException { - Extension[] extensions = repository - .getExtensionPoint(getGetExtensionId()).getExtensions(); - assertEquals(extensions.length, fPluginCount); - for (int i = 0; i < extensions.length; i++) { - Extension extension = extensions[i]; - Object object = extension.getExtensionInstance(); - if (!(object instanceof HelloWorldExtension)) - fail(" object is not a instance of HelloWorldExtension"); - ((ITestExtension) object).testGetExtension("Bla "); - String string = ((ITestExtension) object).testGetExtension("Hello"); - assertEquals("Hello World", string); - } - } - - /** + /** * * */ - @Test - public void testGetClassLoader() { - PluginDescriptor[] descriptors = repository - .getPluginDescriptors(); - for (int i = 0; i < descriptors.length; i++) { - PluginDescriptor descriptor = descriptors[i]; - assertNotNull(descriptor.getClassLoader()); - } - } - - /** - * @throws IOException - */ - @Test - public void testGetResources() throws IOException { - PluginDescriptor[] descriptors = repository - .getPluginDescriptors(); - for (int i = 0; i < descriptors.length; i++) { - PluginDescriptor descriptor = descriptors[i]; - if (!descriptor.getPluginId().startsWith("getPluginFolder()")) { - continue; - } - String value = descriptor.getResourceString("key", Locale.UK); - assertEquals("value", value); - value = descriptor.getResourceString("key", - Locale.TRADITIONAL_CHINESE); - assertEquals("value", value); - - } - } - - /** - * @return a PluginFolderPath - */ - private String getPluginFolder() { - String[] strings = conf.getStrings("plugin.folders"); - if (strings == null || strings.length == 0) - fail("no plugin directory setuped.."); - - String name = strings[0]; - return new PluginManifestParser(conf, this.repository).getPluginFolder(name).toString(); - } - - /** - * Creates some Dummy Plugins - * - * @param pCount - */ - private void createDummyPlugins(int pCount) { - String string = getPluginFolder(); - try { - File folder = new File(string); - folder.mkdir(); - for (int i = 0; i < pCount; i++) { - String pluginFolder = string + File.separator + "DummyPlugin" - + i; - File file = new File(pluginFolder); - file.mkdir(); - fFolders.add(file); - createPluginManifest(i, file.getAbsolutePath()); - createResourceFile(file.getAbsolutePath()); - } - } catch (IOException e) { - e.printStackTrace(); - } - } - - /** - * Creates an ResourceFile - * - * @param pFolderPath - * @throws FileNotFoundException - * @throws IOException - */ - private void createResourceFile(String pFolderPath) - throws FileNotFoundException, IOException { - Properties properties = new Properties(); - properties.setProperty("key", "value"); - properties.store(new FileOutputStream(pFolderPath + File.separator - + "messages" + ".properties"), ""); - } - - /** - * Deletes files in path - * - * @param path - * @throws IOException - */ - private void delete(File path) throws IOException { - File[] files = path.listFiles(); - for (int i = 0; i < files.length; ++i) { - if (files[i].isDirectory()) - delete(files[i]); - files[i].delete(); - } - } - - /** - * Creates an Plugin Manifest File - * - * @param i - * @param pFolderPath - * @throws IOException - */ - private void createPluginManifest(int i, String pFolderPath) - throws IOException { - FileWriter out = new FileWriter(pFolderPath + File.separator - + "plugin.xml"); - String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" - + "<!--this is just a simple plugin for testing issues.-->" - + "<plugin id=\"org.apache.nutch.plugin." - + i - + "\" name=\"" - + i - + "\" version=\"1.0\" provider-name=\"joa23\" " - + "class=\"org.apache.nutch.plugin.SimpleTestPlugin\">" - + "<extension-point id=\"aExtensioID\" " - + "name=\"simple Parser Extension\" " - + "schema=\"schema/testExtensionPoint.exsd\"/>" - + "<runtime><library name=\"libs/exported.jar\"><extport/></library>" - + "<library name=\"libs/not_exported.jar\"/></runtime>" - + "<extension point=\"aExtensioID\">" - + "<implementation name=\"simple Parser Extension\" " - + "id=\"aExtensionId.\" class=\"org.apache.nutch.plugin.HelloWorldExtension\">" - + "<parameter name=\"dummy-name\" value=\"a simple param value\"/>" - + "</implementation></extension></plugin>"; - out.write(xml); - out.flush(); - out.close(); - } - - private String getParameterValue() { - return "a simple param value"; - } - - private static String getGetExtensionId() { - return "aExtensioID"; - } - - private static String getGetConfigElementName() { - return "dummy-name"; - } - - public static void main(String[] args) throws IOException { - new TestPluginSystem().createPluginManifest(1, "/"); - } + @Test + public void testGetClassLoader() { + PluginDescriptor[] descriptors = repository.getPluginDescriptors(); + for (int i = 0; i < descriptors.length; i++) { + PluginDescriptor descriptor = descriptors[i]; + assertNotNull(descriptor.getClassLoader()); + } + } + + /** + * @throws IOException + */ + @Test + public void testGetResources() throws IOException { + PluginDescriptor[] descriptors = repository.getPluginDescriptors(); + for (int i = 0; i < descriptors.length; i++) { + PluginDescriptor descriptor = descriptors[i]; + if (!descriptor.getPluginId().startsWith("getPluginFolder()")) { + continue; + } + String value = descriptor.getResourceString("key", Locale.UK); + assertEquals("value", value); + value = descriptor.getResourceString("key", Locale.TRADITIONAL_CHINESE); + assertEquals("value", value); + + } + } + + /** + * @return a PluginFolderPath + */ + private String getPluginFolder() { + String[] strings = conf.getStrings("plugin.folders"); + if (strings == null || strings.length == 0) + fail("no plugin directory setuped.."); + + String name = strings[0]; + return new PluginManifestParser(conf, this.repository) + .getPluginFolder(name).toString(); + } + + /** + * Creates some Dummy Plugins + * + * @param pCount + */ + private void createDummyPlugins(int pCount) { + String string = getPluginFolder(); + try { + File folder = new File(string); + folder.mkdir(); + for (int i = 0; i < pCount; i++) { + String pluginFolder = string + File.separator + "DummyPlugin" + i; + File file = new File(pluginFolder); + file.mkdir(); + fFolders.add(file); + createPluginManifest(i, file.getAbsolutePath()); + createResourceFile(file.getAbsolutePath()); + } + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** + * Creates an ResourceFile + * + * @param pFolderPath + * @throws FileNotFoundException + * @throws IOException + */ + private void createResourceFile(String pFolderPath) + throws FileNotFoundException, IOException { + Properties properties = new Properties(); + properties.setProperty("key", "value"); + properties.store(new FileOutputStream(pFolderPath + File.separator + + "messages" + ".properties"), ""); + } + + /** + * Deletes files in path + * + * @param path + * @throws IOException + */ + private void delete(File path) throws IOException { + File[] files = path.listFiles(); + for (int i = 0; i < files.length; ++i) { + if (files[i].isDirectory()) + delete(files[i]); + files[i].delete(); + } + } + + /** + * Creates an Plugin Manifest File + * + * @param i + * @param pFolderPath + * @throws IOException + */ + private void createPluginManifest(int i, String pFolderPath) + throws IOException { + FileWriter out = new FileWriter(pFolderPath + File.separator + "plugin.xml"); + String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + + "<!--this is just a simple plugin for testing issues.-->" + + "<plugin id=\"org.apache.nutch.plugin." + + i + + "\" name=\"" + + i + + "\" version=\"1.0\" provider-name=\"joa23\" " + + "class=\"org.apache.nutch.plugin.SimpleTestPlugin\">" + + "<extension-point id=\"aExtensioID\" " + + "name=\"simple Parser Extension\" " + + "schema=\"schema/testExtensionPoint.exsd\"/>" + + "<runtime><library name=\"libs/exported.jar\"><extport/></library>" + + "<library name=\"libs/not_exported.jar\"/></runtime>" + + "<extension point=\"aExtensioID\">" + + "<implementation name=\"simple Parser Extension\" " + + "id=\"aExtensionId.\" class=\"org.apache.nutch.plugin.HelloWorldExtension\">" + + "<parameter name=\"dummy-name\" value=\"a simple param value\"/>" + + "</implementation></extension></plugin>"; + out.write(xml); + out.flush(); + out.close(); + } + + private String getParameterValue() { + return "a simple param value"; + } + + private static String getGetExtensionId() { + return "aExtensioID"; + } + + private static String getGetConfigElementName() { + return "dummy-name"; + } + + public static void main(String[] args) throws IOException { + new TestPluginSystem().createPluginManifest(1, "/"); + } } Modified: nutch/branches/2.x/src/test/org/apache/nutch/protocol/TestContent.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/protocol/TestContent.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/protocol/TestContent.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/protocol/TestContent.java Fri Jan 9 06:34:33 2015 @@ -27,7 +27,6 @@ import org.apache.tika.mime.MimeTypes; import org.junit.Test; import static org.junit.Assert.*; - /** Unit tests for Content. */ public class TestContent { @@ -46,7 +45,7 @@ public class TestContent { metaData.add("Content-Type", "text/html"); Content r = new Content(url, url, page.getBytes("UTF8"), "text/html", - metaData, conf); + metaData, conf); WritableTestUtils.testWritable(r); assertEquals("text/html", r.getMetadata().get("Content-Type")); @@ -60,52 +59,36 @@ public class TestContent { Content c = null; Metadata p = new Metadata(); - c = new Content("http://www.foo.com/", - "http://www.foo.com/", - "".getBytes("UTF8"), - "text/html; charset=UTF-8", p, conf); + c = new Content("http://www.foo.com/", "http://www.foo.com/", + "".getBytes("UTF8"), "text/html; charset=UTF-8", p, conf); assertEquals("text/html", c.getContentType()); - c = new Content("http://www.foo.com/foo.html", - "http://www.foo.com/", - "".getBytes("UTF8"), - "", p, conf); + c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/", + "".getBytes("UTF8"), "", p, conf); assertEquals("text/html", c.getContentType()); - c = new Content("http://www.foo.com/foo.html", - "http://www.foo.com/", - "".getBytes("UTF8"), - null, p, conf); + c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/", + "".getBytes("UTF8"), null, p, conf); assertEquals("text/html", c.getContentType()); - c = new Content("http://www.foo.com/", - "http://www.foo.com/", - "<html></html>".getBytes("UTF8"), - "", p, conf); + c = new Content("http://www.foo.com/", "http://www.foo.com/", + "<html></html>".getBytes("UTF8"), "", p, conf); assertEquals("text/html", c.getContentType()); - c = new Content("http://www.foo.com/foo.html", - "http://www.foo.com/", - "<html></html>".getBytes("UTF8"), - "text/plain", p, conf); + c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/", + "<html></html>".getBytes("UTF8"), "text/plain", p, conf); assertEquals("text/html", c.getContentType()); - c = new Content("http://www.foo.com/foo.png", - "http://www.foo.com/", - "<html></html>".getBytes("UTF8"), - "text/plain", p, conf); + c = new Content("http://www.foo.com/foo.png", "http://www.foo.com/", + "<html></html>".getBytes("UTF8"), "text/plain", p, conf); assertEquals("text/html", c.getContentType()); - c = new Content("http://www.foo.com/", - "http://www.foo.com/", - "".getBytes("UTF8"), - "", p, conf); + c = new Content("http://www.foo.com/", "http://www.foo.com/", + "".getBytes("UTF8"), "", p, conf); assertEquals(MimeTypes.OCTET_STREAM, c.getContentType()); - c = new Content("http://www.foo.com/", - "http://www.foo.com/", - "".getBytes("UTF8"), - null, p, conf); + c = new Content("http://www.foo.com/", "http://www.foo.com/", + "".getBytes("UTF8"), null, p, conf); assertNotNull(c.getContentType()); } Modified: nutch/branches/2.x/src/test/org/apache/nutch/protocol/TestProtocolFactory.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/protocol/TestProtocolFactory.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/protocol/TestProtocolFactory.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/protocol/TestProtocolFactory.java Fri Jan 9 06:34:33 2015 @@ -28,58 +28,59 @@ public class TestProtocolFactory { Configuration conf; ProtocolFactory factory; - + @Before public void setUp() throws Exception { conf = NutchConfiguration.create(); conf.set("plugin.includes", ".*"); conf.set("http.agent.name", "test-bot"); - factory=new ProtocolFactory(conf); + factory = new ProtocolFactory(conf); } @Test - public void testGetProtocol(){ + public void testGetProtocol() { - //non existing protocol + // non existing protocol try { factory.getProtocol("xyzxyz://somehost"); fail("Must throw ProtocolNotFound"); } catch (ProtocolNotFound e) { - //all is ok - } catch (Exception ex){ + // all is ok + } catch (Exception ex) { fail("Must not throw any other exception"); } - - Protocol httpProtocol=null; - - //existing protocol + + Protocol httpProtocol = null; + + // existing protocol try { - httpProtocol=factory.getProtocol("http://somehost"); + httpProtocol = factory.getProtocol("http://somehost"); assertNotNull(httpProtocol); - } catch (Exception ex){ + } catch (Exception ex) { fail("Must not throw any other exception"); } - //cache key - Object protocol = ObjectCache.get(conf).getObject(Protocol.X_POINT_ID + "http"); + // cache key + Object protocol = ObjectCache.get(conf).getObject( + Protocol.X_POINT_ID + "http"); assertNotNull(protocol); assertEquals(httpProtocol, protocol); - - //test same object instance + + // test same object instance try { - assertTrue(httpProtocol==factory.getProtocol("http://somehost")); + assertTrue(httpProtocol == factory.getProtocol("http://somehost")); } catch (ProtocolNotFound e) { fail("Must not throw any exception"); } } - + @Test - public void testContains(){ + public void testContains() { assertTrue(factory.contains("http", "http")); assertTrue(factory.contains("http", "http,ftp")); assertTrue(factory.contains("http", " http , ftp")); assertTrue(factory.contains("smb", "ftp,smb,http")); assertFalse(factory.contains("smb", "smbb")); } - + } Modified: nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java Fri Jan 9 06:34:33 2015 @@ -58,7 +58,7 @@ public class TestGoraStorage extends Abs public void tearDown() throws Exception { super.tearDown(); } - + /** * Sequentially read and write pages to a store. * @@ -71,7 +71,7 @@ public class TestGoraStorage extends Abs readWrite(id, webPageStore); } - private static void readWrite(String id, DataStore<String, WebPage> store) + private static void readWrite(String id, DataStore<String, WebPage> store) throws IOException, Exception { WebPage page = WebPage.newBuilder().build(); int max = 1000; @@ -147,9 +147,9 @@ public class TestGoraStorage extends Abs assertEquals(0, (int) result.get()); } } - + /** - * Tests multiple processes reading and writing to the same store backend, + * Tests multiple processes reading and writing to the same store backend, * this is to simulate a multi process Nutch environment (i.e. MapReduce). * * @throws Exception @@ -159,23 +159,23 @@ public class TestGoraStorage extends Abs public void testMultiProcess() throws Exception { // create and start a hsql server, a stand-alone (memory backed) db // (important: a stand-alone server should be used because simple - // file based access i.e. jdbc:hsqldb:file is NOT process-safe.) + // file based access i.e. jdbc:hsqldb:file is NOT process-safe.) Server server = new Server(); server.setDaemon(true); server.setSilent(true); // disables LOTS of trace final String className = getClass().getName(); String dbName = "test"; - server.setDatabasePath(0, "mem:"+dbName); + server.setDatabasePath(0, "mem:" + dbName); server.setDatabaseName(0, dbName); server.start(); - - //create the store so that the tests can start right away + + // create the store so that the tests can start right away StorageUtils.createWebStore(conf, String.class, WebPage.class); - + // create a fixed thread pool int numThreads = 4; ExecutorService pool = Executors.newFixedThreadPool(numThreads); - + // spawn multiple processes, each thread spawns own process Collection<Callable<Integer>> tasks = new ArrayList<Callable<Integer>>(); for (int i = 0; i < numThreads; i++) { @@ -190,15 +190,16 @@ public class TestGoraStorage extends Abs classpath = "./src/testprocess" + pathSeparator + classpath; String path = System.getProperty("java.home") + separator + "bin" + separator + "java"; - ProcessBuilder processBuilder = new ProcessBuilder(path, "-cp", + ProcessBuilder processBuilder = new ProcessBuilder(path, "-cp", classpath, className); processBuilder.redirectErrorStream(true); Process process = processBuilder.start(); InputStream in = process.getInputStream(); int exit = process.waitFor(); - //print the output of the process - System.out.println("===Process stream for " + Thread.currentThread() - + "\n" + IOUtils.toString(in) + "===End of process stream."); + // print the output of the process + System.out.println("===Process stream for " + + Thread.currentThread() + "\n" + IOUtils.toString(in) + + "===End of process stream."); in.close(); // process should exit with zero code return exit; @@ -218,8 +219,8 @@ public class TestGoraStorage extends Abs for (Future<Integer> result : results) { assertEquals(0, (int) result.get()); } - - //stop db + + // stop db server.stop(); } @@ -228,7 +229,8 @@ public class TestGoraStorage extends Abs System.out.println("Starting!"); Configuration localConf = CrawlTestUtil.createConfiguration(); - localConf.set("storage.data.store.class", "org.apache.gora.memory.store.MemStore"); + localConf.set("storage.data.store.class", + "org.apache.gora.memory.store.MemStore"); DataStore<String, WebPage> store = StorageUtils.createWebStore(localConf, String.class, WebPage.class); Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/AbstractNutchTest.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/AbstractNutchTest.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/util/AbstractNutchTest.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/util/AbstractNutchTest.java Fri Jan 9 06:34:33 2015 @@ -37,7 +37,8 @@ public class AbstractNutchTest { public void setUp() throws Exception { conf = CrawlTestUtil.createConfiguration(); - conf.set("storage.data.store.class", "org.apache.gora.memory.store.MemStore"); + conf.set("storage.data.store.class", + "org.apache.gora.memory.store.MemStore"); fs = FileSystem.get(conf); webPageStore = StorageUtils.createWebStore(conf, String.class, WebPage.class); Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java Fri Jan 9 06:34:33 2015 @@ -42,7 +42,8 @@ import java.util.List; public class CrawlTestUtil { - private static final Logger LOG = LoggerFactory.getLogger(CrawlTestUtil.class); + private static final Logger LOG = LoggerFactory + .getLogger(CrawlTestUtil.class); /** * For now we need to manually construct our Configuration, because we need to @@ -93,15 +94,16 @@ public class CrawlTestUtil { out.flush(); out.close(); } - + /** * Read entries from a data store - * + * * @return list of matching {@link URLWebPage} objects * @throws Exception */ - public static ArrayList<URLWebPage> readContents(DataStore<String,WebPage> store, - Mark requiredMark, String... fields) throws Exception { + public static ArrayList<URLWebPage> readContents( + DataStore<String, WebPage> store, Mark requiredMark, String... fields) + throws Exception { ArrayList<URLWebPage> l = new ArrayList<URLWebPage>(); Query<String, WebPage> query = store.newQuery(); @@ -121,7 +123,8 @@ public class CrawlTestUtil { if (requiredMark != null && requiredMark.checkMark(page) == null) continue; - l.add(new URLWebPage(TableUtil.unreverseUrl(url), WebPage.newBuilder(page).build())); + l.add(new URLWebPage(TableUtil.unreverseUrl(url), WebPage.newBuilder( + page).build())); } catch (Exception e) { e.printStackTrace(); } @@ -129,7 +132,6 @@ public class CrawlTestUtil { return l; } - /** * Creates a new JettyServer with one static root context * @@ -145,7 +147,7 @@ public class CrawlTestUtil { ResourceHandler handler = new ResourceHandler(); handler.setResourceBase(staticContent); HandlerList handlers = new HandlerList(); - handlers.setHandlers(new Handler[]{handler, new DefaultHandler()}); + handlers.setHandlers(new Handler[] { handler, new DefaultHandler() }); webServer.setHandler(handlers); return webServer; } Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/TestEncodingDetector.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/TestEncodingDetector.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/util/TestEncodingDetector.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/util/TestEncodingDetector.java Fri Jan 9 06:34:33 2015 @@ -45,7 +45,7 @@ public class TestEncodingDetector { // first disable auto detection conf.setInt(EncodingDetector.MIN_CONFIDENCE_KEY, -1); - //Metadata metadata = new Metadata(); + // Metadata metadata = new Metadata(); EncodingDetector detector; // Content content; String encoding; @@ -65,8 +65,9 @@ public class TestEncodingDetector { page.setBaseUrl(new Utf8("http://www.example.com/")); page.setContentType(new Utf8("text/plain")); page.setContent(ByteBuffer.wrap(contentInOctets)); - page.getHeaders().put(EncodingDetector.CONTENT_TYPE_UTF8, new Utf8("text/plain; charset=UTF-16")); - + page.getHeaders().put(EncodingDetector.CONTENT_TYPE_UTF8, + new Utf8("text/plain; charset=UTF-16")); + detector = new EncodingDetector(conf); detector.autoDetectClues(page, true); encoding = detector.guessEncoding(page, "windows-1252"); @@ -76,7 +77,7 @@ public class TestEncodingDetector { page.setBaseUrl(new Utf8("http://www.example.com/")); page.setContentType(new Utf8("text/plain")); page.setContent(ByteBuffer.wrap(contentInOctets)); - + detector = new EncodingDetector(conf); detector.autoDetectClues(page, true); detector.addClue("windows-1254", "sniffed"); @@ -89,8 +90,9 @@ public class TestEncodingDetector { page.setBaseUrl(new Utf8("http://www.example.com/")); page.setContentType(new Utf8("text/plain")); page.setContent(ByteBuffer.wrap(contentInOctets)); - page.getMetadata().put(new Utf8(Response.CONTENT_TYPE), ByteBuffer.wrap("text/plain; charset=UTF-16".getBytes())); - + page.getMetadata().put(new Utf8(Response.CONTENT_TYPE), + ByteBuffer.wrap("text/plain; charset=UTF-16".getBytes())); + detector = new EncodingDetector(conf); detector.autoDetectClues(page, true); detector.addClue("utf-32", "sniffed"); Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/TestGZIPUtils.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/TestGZIPUtils.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/util/TestGZIPUtils.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/util/TestGZIPUtils.java Fri Jan 9 06:34:33 2015 @@ -26,222 +26,215 @@ import java.io.IOException; public class TestGZIPUtils { /* a short, highly compressable, string */ - String SHORT_TEST_STRING= - "aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbcccccccccccccccc"; + String SHORT_TEST_STRING = "aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbcccccccccccccccc"; /* a short, highly compressable, string */ - String LONGER_TEST_STRING= - SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING - + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING - + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING - + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING; + String LONGER_TEST_STRING = SHORT_TEST_STRING + SHORT_TEST_STRING + + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING + + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING + + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING + + SHORT_TEST_STRING; /* a snapshot of the nutch webpage */ - String WEBPAGE= - "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" - + "<html>\n" - + "<head>\n" - + " <meta http-equiv=\"content-type\"\n" - + " content=\"text/html; charset=ISO-8859-1\">\n" - + " <title>Nutch</title>\n" - + "</head>\n" - + "<body>\n" - + "<h1\n" - + " style=\"font-family: helvetica,arial,sans-serif; text-align: center; color: rgb(255, 153, 0);\"><a\n" - + " href=\"http://www.nutch.org/\"><font style=\"color: rgb(255, 153, 0);\">Nutch</font></a><br>\n" - + "<small>an open source web-search engine</small></h1>\n" - + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n" - + "<table\n" - + " style=\"width: 100%; text-align: left; margin-left: auto; margin-right: auto;\"\n" - + " border=\"0\" cellspacing=\"0\" cellpadding=\"0\">\n" - + " <tbody>\n" - + " <tr>\n" - + " <td style=\"vertical-align: top; text-align: center;\"><a\n" - + " href=\"http://sourceforge.net/project/showfiles.php?group_id=59548\">Download</a><br>\n" - + " </td>\n" - + " <td style=\"vertical-align: top; text-align: center;\"><a\n" - + " href=\"tutorial.html\">Tutorial</a><br>\n" - + " </td>\n" - + " <td style=\"vertical-align: top; text-align: center;\"><a\n" - + " href=\"http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/nutch/nutch/\">CVS</a><br>\n" - + " </td>\n" - + " <td style=\"vertical-align: top; text-align: center;\"><a\n" - + " href=\"api/index.html\">Javadoc</a><br>\n" - + " </td>\n" - + " <td style=\"vertical-align: top; text-align: center;\"><a\n" - + " href=\"http://sourceforge.net/tracker/?atid=491356&group_id=59548&func=browse\">Bugs</a><br>\n" - + " </td>\n" - + " <td style=\"vertical-align: top; text-align: center;\"><a\n" - + " href=\"http://sourceforge.net/mail/?group_id=59548\">Lists</a></td>\n" - + " <td style=\"vertical-align: top; text-align: center;\"><a\n" - + " href=\"policies.html\">Policies</a><br>\n" - + " </td>\n" - + " </tr>\n" - + " </tbody>\n" - + "</table>\n" - + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n" - + "<h2>Introduction</h2>\n" - + "Nutch is a nascent effort to implement an open-source web search\n" - + "engine. Web search is a basic requirement for internet navigation, yet\n" - + "the number of web search engines is decreasing. Today's oligopoly could\n" - + "soon be a monopoly, with a single company controlling nearly all web\n" - + "search for its commercial gain. That would not be good for the\n" - + "users of internet. Nutch aims to enable anyone to easily and\n" - + "cost-effectively deploy a world-class web search engine.<br>\n" - + "<br>\n" - + "To succeed, the Nutch software must be able to:<br>\n" - + "<ul>\n" - + " <li> crawl several billion pages per month</li>\n" - + " <li>maintain an index of these pages</li>\n" - + " <li>search that index up to 1000 times per second</li>\n" - + " <li>provide very high quality search results</li>\n" - + " <li>operate at minimal cost</li>\n" - + "</ul>\n" - + "<h2>Status</h2>\n" - + "Currently we're just a handful of developers working part-time to put\n" - + "together a demo. The demo is coded entirely in Java. However\n" - + "persistent data is written in well-documented formats so that modules\n" - + "may eventually be re-written in other languages (e.g., Perl, C++) as the\n" - + "project progresses.<br>\n" - + "<br>\n" - + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\"> <a\n" - + " href=\"http://sourceforge.net\"> </a>\n" - + "<div style=\"text-align: center;\"><a href=\"http://sourceforge.net\"><img\n" - + " src=\"http://sourceforge.net/sflogo.php?group_id=59548&type=1\"\n" - + " style=\"border: 0px solid ; width: 88px; height: 31px;\"\n" - + " alt=\"SourceForge.net Logo\" title=\"\"></a></div>\n" - + "</body>\n" - + "</html>\n"; + String WEBPAGE = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" + + "<html>\n" + + "<head>\n" + + " <meta http-equiv=\"content-type\"\n" + + " content=\"text/html; charset=ISO-8859-1\">\n" + + " <title>Nutch</title>\n" + + "</head>\n" + + "<body>\n" + + "<h1\n" + + " style=\"font-family: helvetica,arial,sans-serif; text-align: center; color: rgb(255, 153, 0);\"><a\n" + + " href=\"http://www.nutch.org/\"><font style=\"color: rgb(255, 153, 0);\">Nutch</font></a><br>\n" + + "<small>an open source web-search engine</small></h1>\n" + + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n" + + "<table\n" + + " style=\"width: 100%; text-align: left; margin-left: auto; margin-right: auto;\"\n" + + " border=\"0\" cellspacing=\"0\" cellpadding=\"0\">\n" + + " <tbody>\n" + + " <tr>\n" + + " <td style=\"vertical-align: top; text-align: center;\"><a\n" + + " href=\"http://sourceforge.net/project/showfiles.php?group_id=59548\">Download</a><br>\n" + + " </td>\n" + + " <td style=\"vertical-align: top; text-align: center;\"><a\n" + + " href=\"tutorial.html\">Tutorial</a><br>\n" + + " </td>\n" + + " <td style=\"vertical-align: top; text-align: center;\"><a\n" + + " href=\"http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/nutch/nutch/\">CVS</a><br>\n" + + " </td>\n" + + " <td style=\"vertical-align: top; text-align: center;\"><a\n" + + " href=\"api/index.html\">Javadoc</a><br>\n" + + " </td>\n" + + " <td style=\"vertical-align: top; text-align: center;\"><a\n" + + " href=\"http://sourceforge.net/tracker/?atid=491356&group_id=59548&func=browse\">Bugs</a><br>\n" + + " </td>\n" + + " <td style=\"vertical-align: top; text-align: center;\"><a\n" + + " href=\"http://sourceforge.net/mail/?group_id=59548\">Lists</a></td>\n" + + " <td style=\"vertical-align: top; text-align: center;\"><a\n" + + " href=\"policies.html\">Policies</a><br>\n" + + " </td>\n" + + " </tr>\n" + + " </tbody>\n" + + "</table>\n" + + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n" + + "<h2>Introduction</h2>\n" + + "Nutch is a nascent effort to implement an open-source web search\n" + + "engine. Web search is a basic requirement for internet navigation, yet\n" + + "the number of web search engines is decreasing. Today's oligopoly could\n" + + "soon be a monopoly, with a single company controlling nearly all web\n" + + "search for its commercial gain. That would not be good for the\n" + + "users of internet. Nutch aims to enable anyone to easily and\n" + + "cost-effectively deploy a world-class web search engine.<br>\n" + + "<br>\n" + + "To succeed, the Nutch software must be able to:<br>\n" + + "<ul>\n" + + " <li> crawl several billion pages per month</li>\n" + + " <li>maintain an index of these pages</li>\n" + + " <li>search that index up to 1000 times per second</li>\n" + + " <li>provide very high quality search results</li>\n" + + " <li>operate at minimal cost</li>\n" + + "</ul>\n" + + "<h2>Status</h2>\n" + + "Currently we're just a handful of developers working part-time to put\n" + + "together a demo. The demo is coded entirely in Java. However\n" + + "persistent data is written in well-documented formats so that modules\n" + + "may eventually be re-written in other languages (e.g., Perl, C++) as the\n" + + "project progresses.<br>\n" + + "<br>\n" + + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\"> <a\n" + + " href=\"http://sourceforge.net\"> </a>\n" + + "<div style=\"text-align: center;\"><a href=\"http://sourceforge.net\"><img\n" + + " src=\"http://sourceforge.net/sflogo.php?group_id=59548&type=1\"\n" + + " style=\"border: 0px solid ; width: 88px; height: 31px;\"\n" + + " alt=\"SourceForge.net Logo\" title=\"\"></a></div>\n" + + "</body>\n" + + "</html>\n"; // tests @Test public void testZipUnzip() { - byte[] testBytes= SHORT_TEST_STRING.getBytes(); + byte[] testBytes = SHORT_TEST_STRING.getBytes(); testZipUnzip(testBytes); - testBytes= LONGER_TEST_STRING.getBytes(); + testBytes = LONGER_TEST_STRING.getBytes(); testZipUnzip(testBytes); - testBytes= WEBPAGE.getBytes(); + testBytes = WEBPAGE.getBytes(); testZipUnzip(testBytes); } @Test public void testZipUnzipBestEffort() { - byte[] testBytes= SHORT_TEST_STRING.getBytes(); + byte[] testBytes = SHORT_TEST_STRING.getBytes(); testZipUnzipBestEffort(testBytes); - testBytes= LONGER_TEST_STRING.getBytes(); + testBytes = LONGER_TEST_STRING.getBytes(); testZipUnzipBestEffort(testBytes); - testBytes= WEBPAGE.getBytes(); + testBytes = WEBPAGE.getBytes(); testZipUnzipBestEffort(testBytes); } - + @Test public void testTruncation() { - byte[] testBytes= SHORT_TEST_STRING.getBytes(); + byte[] testBytes = SHORT_TEST_STRING.getBytes(); testTruncation(testBytes); - testBytes= LONGER_TEST_STRING.getBytes(); + testBytes = LONGER_TEST_STRING.getBytes(); testTruncation(testBytes); - testBytes= WEBPAGE.getBytes(); + testBytes = WEBPAGE.getBytes(); testTruncation(testBytes); } @Test public void testLimit() { - byte[] testBytes= SHORT_TEST_STRING.getBytes(); + byte[] testBytes = SHORT_TEST_STRING.getBytes(); testLimit(testBytes); - testBytes= LONGER_TEST_STRING.getBytes(); + testBytes = LONGER_TEST_STRING.getBytes(); testLimit(testBytes); - testBytes= WEBPAGE.getBytes(); + testBytes = WEBPAGE.getBytes(); testLimit(testBytes); } // helpers public void testZipUnzip(byte[] origBytes) { - byte[] compressedBytes= GZIPUtils.zip(origBytes); + byte[] compressedBytes = GZIPUtils.zip(origBytes); assertTrue("compressed array is not smaller!", - compressedBytes.length < origBytes.length); + compressedBytes.length < origBytes.length); - byte[] uncompressedBytes= null; + byte[] uncompressedBytes = null; try { - uncompressedBytes= GZIPUtils.unzip(compressedBytes); + uncompressedBytes = GZIPUtils.unzip(compressedBytes); } catch (IOException e) { e.printStackTrace(); - assertTrue("caught exception '" + e + "' during unzip()", - false); + assertTrue("caught exception '" + e + "' during unzip()", false); } - assertTrue("uncompressedBytes is wrong size", - uncompressedBytes.length == origBytes.length); + assertTrue("uncompressedBytes is wrong size", + uncompressedBytes.length == origBytes.length); - for (int i= 0; i < origBytes.length; i++) + for (int i = 0; i < origBytes.length; i++) if (origBytes[i] != uncompressedBytes[i]) - assertTrue("uncompressedBytes does not match origBytes", false); + assertTrue("uncompressedBytes does not match origBytes", false); } public void testZipUnzipBestEffort(byte[] origBytes) { - byte[] compressedBytes= GZIPUtils.zip(origBytes); + byte[] compressedBytes = GZIPUtils.zip(origBytes); assertTrue("compressed array is not smaller!", - compressedBytes.length < origBytes.length); + compressedBytes.length < origBytes.length); - byte[] uncompressedBytes= GZIPUtils.unzipBestEffort(compressedBytes); - assertTrue("uncompressedBytes is wrong size", - uncompressedBytes.length == origBytes.length); + byte[] uncompressedBytes = GZIPUtils.unzipBestEffort(compressedBytes); + assertTrue("uncompressedBytes is wrong size", + uncompressedBytes.length == origBytes.length); - for (int i= 0; i < origBytes.length; i++) + for (int i = 0; i < origBytes.length; i++) if (origBytes[i] != uncompressedBytes[i]) - assertTrue("uncompressedBytes does not match origBytes", false); + assertTrue("uncompressedBytes does not match origBytes", false); } public void testTruncation(byte[] origBytes) { - byte[] compressedBytes= GZIPUtils.zip(origBytes); + byte[] compressedBytes = GZIPUtils.zip(origBytes); System.out.println("original data has len " + origBytes.length); - System.out.println("compressed data has len " - + compressedBytes.length); + System.out.println("compressed data has len " + compressedBytes.length); - for (int i= compressedBytes.length; i >= 0; i--) { + for (int i = compressedBytes.length; i >= 0; i--) { - byte[] truncCompressed= new byte[i]; + byte[] truncCompressed = new byte[i]; - for (int j= 0; j < i; j++) - truncCompressed[j]= compressedBytes[j]; + for (int j = 0; j < i; j++) + truncCompressed[j] = compressedBytes[j]; - byte[] trunc= GZIPUtils.unzipBestEffort(truncCompressed); + byte[] trunc = GZIPUtils.unzipBestEffort(truncCompressed); if (trunc == null) { - System.out.println("truncated to len " - + i + ", trunc is null"); + System.out.println("truncated to len " + i + ", trunc is null"); } else { - System.out.println("truncated to len " - + i + ", trunc.length= " - + trunc.length); - - for (int j= 0; j < trunc.length; j++) - if (trunc[j] != origBytes[j]) - assertTrue("truncated/uncompressed array differs at pos " - + j + " (compressed data had been truncated to len " - + i + ")", false); + System.out.println("truncated to len " + i + ", trunc.length= " + + trunc.length); + + for (int j = 0; j < trunc.length; j++) + if (trunc[j] != origBytes[j]) + assertTrue("truncated/uncompressed array differs at pos " + j + + " (compressed data had been truncated to len " + i + ")", + false); } } } public void testLimit(byte[] origBytes) { - byte[] compressedBytes= GZIPUtils.zip(origBytes); + byte[] compressedBytes = GZIPUtils.zip(origBytes); assertTrue("compressed array is not smaller!", - compressedBytes.length < origBytes.length); + compressedBytes.length < origBytes.length); - for (int i= 0; i < origBytes.length; i++) { + for (int i = 0; i < origBytes.length; i++) { - byte[] uncompressedBytes= - GZIPUtils.unzipBestEffort(compressedBytes, i); + byte[] uncompressedBytes = GZIPUtils.unzipBestEffort(compressedBytes, i); - assertTrue("uncompressedBytes is wrong size", - uncompressedBytes.length == i); + assertTrue("uncompressedBytes is wrong size", + uncompressedBytes.length == i); - for (int j= 0; j < i; j++) + for (int j = 0; j < i; j++) if (origBytes[j] != uncompressedBytes[j]) assertTrue("uncompressedBytes does not match origBytes", false); } Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/TestMimeUtil.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/TestMimeUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/util/TestMimeUtil.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/util/TestMimeUtil.java Fri Jan 9 06:34:33 2015 @@ -36,7 +36,8 @@ public class TestMimeUtil extends TestCa private File sampleDir = new File(System.getProperty("test.build.data", "."), "test-mime-util"); - /** test data, every element on "test page": + /** + * test data, every element on "test page": * <ol> * <li>MIME type</li> * <li>file name (last URL path element)</li> @@ -67,15 +68,11 @@ public class TestMimeUtil extends TestCa "<?xml version=\"1.0\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\">" + "<html>\n<head>\n" + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />" - + "</head>\n<body>Hello, World!</body></html>" } - }; + + "</head>\n<body>Hello, World!</body></html>" } }; - public static String[][] binaryFiles = { - { + public static String[][] binaryFiles = { { "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "test.xlsx", - "" } - }; + "test.xlsx", "" } }; private String getMimeType(String url, File file, String contentType, boolean useMagic) throws IOException { @@ -121,8 +118,8 @@ public class TestMimeUtil extends TestCa public void testBinaryFiles() throws IOException { for (String[] testPage : binaryFiles) { File dataFile = new File(sampleDir, testPage[1]); - String mimeType = getMimeType(urlPrefix + testPage[1], - dataFile, testPage[2], false); + String mimeType = getMimeType(urlPrefix + testPage[1], dataFile, + testPage[2], false); assertEquals("", testPage[0], mimeType); } } Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/TestNodeWalker.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/TestNodeWalker.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/util/TestNodeWalker.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/util/TestNodeWalker.java Fri Jan 9 06:34:33 2015 @@ -31,41 +31,40 @@ import org.xml.sax.InputSource; public class TestNodeWalker { /* a snapshot of the nutch webpage */ - private final static String WEBPAGE= - "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\" xml:lang=\"en\"><head><title>Nutch</title></head>" - + "<body>" - + "<ul>" - + "<li>crawl several billion pages per month</li>" - + "<li>maintain an index of these pages</li>" - + "<li>search that index up to 1000 times per second</li>" - + "<li>provide very high quality search results</li>" - + "<li>operate at minimal cost</li>" - + "</ul>" - + "</body>" - + "</html>"; + private final static String WEBPAGE = "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\" xml:lang=\"en\"><head><title>Nutch</title></head>" + + "<body>" + + "<ul>" + + "<li>crawl several billion pages per month</li>" + + "<li>maintain an index of these pages</li>" + + "<li>search that index up to 1000 times per second</li>" + + "<li>provide very high quality search results</li>" + + "<li>operate at minimal cost</li>" + "</ul>" + "</body>" + "</html>"; private final static String[] ULCONTENT = new String[4]; - + @Before - public void setUp() throws Exception{ - ULCONTENT[0]="crawl several billion pages per month" ; - ULCONTENT[1]="maintain an index of these pages" ; - ULCONTENT[2]="search that index up to 1000 times per second" ; - ULCONTENT[3]="operate at minimal cost" ; + public void setUp() throws Exception { + ULCONTENT[0] = "crawl several billion pages per month"; + ULCONTENT[1] = "maintain an index of these pages"; + ULCONTENT[2] = "search that index up to 1000 times per second"; + ULCONTENT[3] = "operate at minimal cost"; } @Test public void testSkipChildren() { - DOMParser parser= new DOMParser(); - + DOMParser parser = new DOMParser(); + try { parser.setFeature("http://xml.org/sax/features/validation", false); - parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); - parser.parse(new InputSource(new ByteArrayInputStream(WEBPAGE.getBytes()))); + parser.setFeature( + "http://apache.org/xml/features/nonvalidating/load-external-dtd", + false); + parser + .parse(new InputSource(new ByteArrayInputStream(WEBPAGE.getBytes()))); } catch (Exception e) { e.printStackTrace(); } - + StringBuffer sb = new StringBuffer(); NodeWalker walker = new NodeWalker(parser.getDocument()); while (walker.hasNext()) { @@ -77,30 +76,33 @@ public class TestNodeWalker { sb.append(text); } } - assertTrue("UL Content can NOT be found in the node", findSomeUlContent(sb.toString())); - - StringBuffer sbSkip = new StringBuffer(); - NodeWalker walkerSkip = new NodeWalker(parser.getDocument()); - while (walkerSkip.hasNext()) { - Node currentNode = walkerSkip.nextNode(); - String nodeName = currentNode.getNodeName(); - short nodeType = currentNode.getNodeType(); - if ("ul".equalsIgnoreCase(nodeName)) { - walkerSkip.skipChildren(); - } - if (nodeType == Node.TEXT_NODE) { - String text = currentNode.getNodeValue(); - text = text.replaceAll("\\s+", " "); - sbSkip.append(text); - } - } - assertFalse("UL Content can be found in the node", findSomeUlContent(sbSkip.toString())); + assertTrue("UL Content can NOT be found in the node", + findSomeUlContent(sb.toString())); + + StringBuffer sbSkip = new StringBuffer(); + NodeWalker walkerSkip = new NodeWalker(parser.getDocument()); + while (walkerSkip.hasNext()) { + Node currentNode = walkerSkip.nextNode(); + String nodeName = currentNode.getNodeName(); + short nodeType = currentNode.getNodeType(); + if ("ul".equalsIgnoreCase(nodeName)) { + walkerSkip.skipChildren(); + } + if (nodeType == Node.TEXT_NODE) { + String text = currentNode.getNodeValue(); + text = text.replaceAll("\\s+", " "); + sbSkip.append(text); + } + } + assertFalse("UL Content can be found in the node", + findSomeUlContent(sbSkip.toString())); } - + public boolean findSomeUlContent(String str) { - for(int i=0; i<ULCONTENT.length ; i++){ - if(str.contains(ULCONTENT[i])) return true; - } + for (int i = 0; i < ULCONTENT.length; i++) { + if (str.contains(ULCONTENT[i])) + return true; + } return false; } }