Please have a look at the wiki page here [1] for setting up >= Nutch 1.3 in Eclipse.
Thank you [1] http://wiki.apache.org/nutch/RunNutchInEclipse 2011/11/13 Skiming_Zhang <[email protected]> > Hi ,I have set up my configuration in Nutch , and I use the source. > Because I can run Nutch 1.2 well in Eclipse, and I copy the configuration > specific files from Nutch 1.2 to Nutch 1.3 , > > System log: > > solrUrl is not set, indexing will be skipped... > crawl started in: crawl > rootUrlDir = urls > threads = 4 > depth = 5 > solrUrl=null > topN = 10 > Injector: starting at 2011-11-13 17:23:55 > Injector: crawlDb: crawl/crawldb > Injector: urlDir: urls > Injector: Converting injected urls to crawl db entries. > org.apache.nutch.plugin.PluginRuntimeException: > java.lang.ClassNotFoundException: > org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer > at > org.apache.nutch.plugin.Extension.getExtensionInstance(Extension.java:166) > at > org.apache.nutch.net.URLNormalizers.getURLNormalizers(URLNormalizers.java:170) > at org.apache.nutch.net.URLNormalizers.<init>(URLNormalizers.java:128) > at > org.apache.nutch.crawl.Injector$InjectMapper.configure(Injector.java:70) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) > at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) > at java.lang.reflect.Method.invoke(Unknown Source) > at > org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88) > at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64) > at > org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117) > at org.apache.hadoop.mapred.MapRunner.configure(MapRunner.java:34) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) > at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) > at java.lang.reflect.Method.invoke(Unknown Source) > at > org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88) > at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64) > at > org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117) > at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:354) > at org.apache.hadoop.mapred.MapTask.run(MapTask.java:307) > at > org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:177) > Caused by: java.lang.ClassNotFoundException: > org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer > at java.net.URLClassLoader$1.run(Unknown Source) > at java.net.URLClassLoader$1.run(Unknown Source) > at java.security.AccessController.doPrivileged(Native Method) > at java.net.URLClassLoader.findClass(Unknown Source) > at java.lang.ClassLoader.loadClass(Unknown Source) > at java.lang.ClassLoader.loadClass(Unknown Source) > at > org.apache.nutch.plugin.Extension.getExtensionInstance(Extension.java:156) > ... 21 more > org.apache.nutch.plugin.PluginRuntimeException: > java.lang.ClassNotFoundException: > org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer > at > org.apache.nutch.plugin.Extension.getExtensionInstance(Extension.java:166) > at > org.apache.nutch.net.URLNormalizers.getURLNormalizers(URLNormalizers.java:170) > at org.apache.nutch.net.URLNormalizers.<init>(URLNormalizers.java:128) > at > org.apache.nutch.crawl.Injector$InjectMapper.configure(Injector.java:70) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) > at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) > at java.lang.reflect.Method.invoke(Unknown Source) > at > org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88) > at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64) > at > org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117) > at org.apache.hadoop.mapred.MapRunner.configure(MapRunner.java:34) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) > at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) > at java.lang.reflect.Method.invoke(Unknown Source) > at > org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88) > at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64) > at > org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117) > at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:354) > at org.apache.hadoop.mapred.MapTask.run(MapTask.java:307) > at > org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:177) > Caused by: java.lang.ClassNotFoundException: > org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer > at java.net.URLClassLoader$1.run(Unknown Source) > at java.net.URLClassLoader$1.run(Unknown Source) > at java.security.AccessController.doPrivileged(Native Method) > at java.net.URLClassLoader.findClass(Unknown Source) > at java.lang.ClassLoader.loadClass(Unknown Source) > at java.lang.ClassLoader.loadClass(Unknown Source) > at > org.apache.nutch.plugin.Extension.getExtensionInstance(Extension.java:156) > ... 21 more > org.apache.nutch.plugin.PluginRuntimeException: > java.lang.ClassNotFoundException: > org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer > at > org.apache.nutch.plugin.Extension.getExtensionInstance(Extension.java:166) > at > org.apache.nutch.net.URLNormalizers.getURLNormalizers(URLNormalizers.java:170) > at org.apache.nutch.net.URLNormalizers.<init>(URLNormalizers.java:128) > at > org.apache.nutch.crawl.Injector$InjectMapper.configure(Injector.java:70) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) > at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) > at java.lang.reflect.Method.invoke(Unknown Source) > at > org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88) > at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64) > at > org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117) > at org.apache.hadoop.mapred.MapRunner.configure(MapRunner.java:34) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) > at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) > at java.lang.reflect.Method.invoke(Unknown Source) > at > org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88) > at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64) > at > org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117) > at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:354) > at org.apache.hadoop.mapred.MapTask.run(MapTask.java:307) > at > org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:177) > Caused by: java.lang.ClassNotFoundException: > org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer > at java.net.URLClassLoader$1.run(Unknown Source) > at java.net.URLClassLoader$1.run(Unknown Source) > at java.security.AccessController.doPrivileged(Native Method) > at java.net.URLClassLoader.findClass(Unknown Source) > at java.lang.ClassLoader.loadClass(Unknown Source) > at java.lang.ClassLoader.loadClass(Unknown Source) > at > org.apache.nutch.plugin.Extension.getExtensionInstance(Extension.java:156) > ... 21 more > Exception in thread "main" java.io.IOException: Job failed! > at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1252) > at org.apache.nutch.crawl.Injector.inject(Injector.java:217) > at org.apache.nutch.crawl.Crawl.run(Crawl.java:126) > at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65) > at org.apache.nutch.crawl.Crawl.main(Crawl.java:54) > > > > Hadoop log: > > 2011-11-13 17:23:55,505 WARN crawl.Crawl - solrUrl is not set, indexing > will be skipped... > 2011-11-13 17:23:55,676 INFO crawl.Crawl - crawl started in: crawl > 2011-11-13 17:23:55,676 INFO crawl.Crawl - rootUrlDir = urls > 2011-11-13 17:23:55,676 INFO crawl.Crawl - threads = 4 > 2011-11-13 17:23:55,676 INFO crawl.Crawl - depth = 5 > 2011-11-13 17:23:55,676 INFO crawl.Crawl - solrUrl=null > 2011-11-13 17:23:55,676 INFO crawl.Crawl - topN = 10 > 2011-11-13 17:23:55,692 INFO crawl.Injector - Injector: starting at > 2011-11-13 17:23:55 > 2011-11-13 17:23:55,692 INFO crawl.Injector - Injector: crawlDb: > crawl/crawldb > 2011-11-13 17:23:55,692 INFO crawl.Injector - Injector: urlDir: urls > 2011-11-13 17:23:55,692 INFO crawl.Injector - Injector: Converting > injected urls to crawl db entries. > 2011-11-13 17:23:55,832 WARN mapred.JobClient - No job jar file set. > User classes may not be found. See JobConf(Class) or > JobConf#setJar(String). > 2011-11-13 17:23:56,410 INFO plugin.PluginRepository - Plugins: looking > in: E:\IdealTimes\WorkSpace\Nutch1.3\plugin > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Plugin > Auto-activation mode: [true] > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Registered Plugins: > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - the nutch core > extension points (nutch-extensionpoints) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Basic URL > Normalizer (urlnormalizer-basic) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Html Parse > Plug-in (parse-html) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Basic Indexing > Filter (index-basic) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - HTTP Framework > (lib-http) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Pass-through URL > Normalizer (urlnormalizer-pass) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Regex URL Filter > (urlfilter-regex) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Http Protocol > Plug-in (protocol-http) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Regex URL > Normalizer (urlnormalizer-regex) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Tika Parser > Plug-in (parse-tika) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - OPIC Scoring > Plug-in (scoring-opic) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - CyberNeko HTML > Parser (lib-nekohtml) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Anchor Indexing > Filter (index-anchor) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Regex URL Filter > Framework (lib-regex-filter) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Registered > Extension-Points: > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Nutch URL > Normalizer (org.apache.nutch.net.URLNormalizer) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Nutch Protocol > (org.apache.nutch.protocol.Protocol) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Nutch Segment > Merge Filter (org.apache.nutch.segment.SegmentMergeFilter) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Nutch URL Filter > (org.apache.nutch.net.URLFilter) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Nutch Indexing > Filter (org.apache.nutch.indexer.IndexingFilter) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - HTML Parse Filter > (org.apache.nutch.parse.HtmlParseFilter) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Nutch Content > Parser (org.apache.nutch.parse.Parser) > 2011-11-13 17:23:56,472 INFO plugin.PluginRepository - Nutch Scoring > (org.apache.nutch.scoring.ScoringFilter) > 2011-11-13 17:23:56,472 WARN net.URLNormalizers - > URLNormalizers:PluginRuntimeException when initializing url normalizer > plugin urlnormalizer-basic instance in getURLNormalizers function: > attempting to continue instantiating plugins > 2011-11-13 17:23:56,472 WARN net.URLNormalizers - > URLNormalizers:PluginRuntimeException when initializing url normalizer > plugin urlnormalizer-regex instance in getURLNormalizers function: > attempting to continue instantiating plugins > 2011-11-13 17:23:56,488 WARN net.URLNormalizers - > URLNormalizers:PluginRuntimeException when initializing url normalizer > plugin urlnormalizer-pass instance in getURLNormalizers function: > attempting to continue instantiating plugins > 2011-11-13 17:23:56,488 WARN mapred.LocalJobRunner - job_local_0001 > java.lang.RuntimeException: Error in configuring object > at > org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:93) > at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64) > at > org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117) > at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:354) > at org.apache.hadoop.mapred.MapTask.run(MapTask.java:307) > at > org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:177) > Caused by: java.lang.reflect.InvocationTargetException > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) > at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) > at java.lang.reflect.Method.invoke(Unknown Source) > at > org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88) > ... 5 more > Caused by: java.lang.RuntimeException: Error in configuring object > at > org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:93) > at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64) > at > org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117) > at org.apache.hadoop.mapred.MapRunner.configure(MapRunner.java:34) > ... 10 more > Caused by: java.lang.reflect.InvocationTargetException > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) > at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) > at java.lang.reflect.Method.invoke(Unknown Source) > at > org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88) > ... 13 more > Caused by: java.lang.RuntimeException: > org.apache.nutch.plugin.PluginRuntimeException: > java.lang.ClassNotFoundException: > org.apache.nutch.urlfilter.regex.RegexURLFilter > at org.apache.nutch.net.URLFilters.<init>(URLFilters.java:77) > at > org.apache.nutch.crawl.Injector$InjectMapper.configure(Injector.java:72) > ... 18 more > Caused by: org.apache.nutch.plugin.PluginRuntimeException: > java.lang.ClassNotFoundException: > org.apache.nutch.urlfilter.regex.RegexURLFilter > at > org.apache.nutch.plugin.Extension.getExtensionInstance(Extension.java:166) > at org.apache.nutch.net.URLFilters.<init>(URLFilters.java:57) > ... 19 more > Caused by: java.lang.ClassNotFoundException: > org.apache.nutch.urlfilter.regex.RegexURLFilter > at java.net.URLClassLoader$1.run(Unknown Source) > at java.net.URLClassLoader$1.run(Unknown Source) > at java.security.AccessController.doPrivileged(Native Method) > at java.net.URLClassLoader.findClass(Unknown Source) > at java.lang.ClassLoader.loadClass(Unknown Source) > at java.lang.ClassLoader.loadClass(Unknown Source) > at > org.apache.nutch.plugin.Extension.getExtensionInstance(Extension.java:156) > ... 20 more > > > > > > -- *Lewis*

