how dum can you get. obviously quite dum... i would have to analyze the 
html-pages with a nested instance like this:

<entity name="rec" processor="XPathEntityProcessor" 
url="file:///C:\ColdFusion10\cfusion\solr\solr\tkbintranet\docImportUrl.xml" 
forEach="/docs/doc" dataSource="main"> 
        
                <entity name="htm" processor="XPathEntityProcessor" 
url="${rec.urlParse}" forEach="/xhtml:html" dataSource="dataUrl">
                        <field column="text" xpath="//content" />
                        <field column="h_2" xpath="//body" />
                        <field column="text_nohtml" xpath="//text" />
                        <field column="h_1" xpath="//h:h1" />
                </entity>
</entity>

but i'm pretty sure the foreach is wrong and the xpath expressions. in the 
moment i getting the following error:
        
        Caused by: java.lang.RuntimeException: 
org.apache.solr.handler.dataimport.DataImportHandlerException: 
java.lang.ClassCastException: 
sun.net.www.protocol.http.HttpURLConnection$HttpInputStream cannot be cast to 
java.io.Reader





On 28. Sep 2013, at 1:39 AM, Andreas Owen wrote:

> ok i see what your getting at but why doesn't the following work:
>       
>       <field xpath="//h:h1" column="h_1" />
>       <field column="text" xpath="/xhtml:html/xhtml:body" />
> 
> i removed the tiki-processor. what am i missing, i haven't found anything in 
> the wiki?
> 
> 
> On 28. Sep 2013, at 12:28 AM, P Williams wrote:
> 
>> I spent some more time thinking about this.  Do you really need to use the
>> TikaEntityProcessor?  It doesn't offer anything new to the document you are
>> building that couldn't be accomplished by the XPathEntityProcessor alone
>> from what I can tell.
>> 
>> I also tried to get the Advanced
>> Parsing<http://wiki.apache.org/solr/TikaEntityProcessor>example to
>> work without success.  There are some obvious typos (<document>
>> instead of </document>) and an odd order to the pieces (<dataSources> is
>> enclosed by <document>).  It also looks like
>> FieldStreamDataSource<http://lucene.apache.org/solr/4_3_1/solr-dataimporthandler/org/apache/solr/handler/dataimport/FieldStreamDataSource.html>is
>> the one that is meant to work in this context. If Koji is still around
>> maybe he could offer some help?  Otherwise this bit of erroneous
>> instruction should probably be removed from the wiki.
>> 
>> Cheers,
>> Tricia
>> 
>> $ svn diff
>> Index:
>> solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
>> ===================================================================
>> ---
>> solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
>>    (revision 1526990)
>> +++
>> solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
>>    (working copy)
>> @@ -99,13 +99,13 @@
>>    runFullImport(getConfigHTML("identity"));
>>    assertQ(req("*:*"), testsHTMLIdentity);
>>  }
>> -
>> +
>>  private String getConfigHTML(String htmlMapper) {
>>    return
>>        "<dataConfig>" +
>>            "  <dataSource type='BinFileDataSource'/>" +
>>            "  <document>" +
>> -            "    <entity name='Tika' format='xml'
>> processor='TikaEntityProcessor' " +
>> +            "    <entity name='Tika' format='html'
>> processor='TikaEntityProcessor' " +
>>            "       url='" +
>> getFile("dihextras/structured.html").getAbsolutePath() + "' " +
>>            ((htmlMapper == null) ? "" : (" htmlMapper='" + htmlMapper +
>> "'")) + ">" +
>>            "      <field column='text'/>" +
>> @@ -114,4 +114,36 @@
>>            "</dataConfig>";
>> 
>>  }
>> +  private String[] testsHTMLH1 = {
>> +      "//*[@numFound='1']"
>> +      , "//str[@name='h1'][contains(.,'H1 Header')]"
>> +  };
>> +
>> +  @Test
>> +  public void testTikaHTMLMapperSubEntity() throws Exception {
>> +    runFullImport(getConfigSubEntity("identity"));
>> +    assertQ(req("*:*"), testsHTMLH1);
>> +  }
>> +
>> +  private String getConfigSubEntity(String htmlMapper) {
>> +    return
>> +        "<dataConfig>" +
>> +        "<dataSource type='BinFileDataSource' name='bin'/>" +
>> +        "<dataSource type='FieldStreamDataSource' name='fld'/>" +
>> +        "<document>" +
>> +        "<entity name='tika' processor='TikaEntityProcessor' url='" +
>> getFile("dihextras/structured.html").getAbsolutePath() + "'
>> dataSource='bin' format='html' rootEntity='false'>" +
>> +        "<!--Do appropriate mapping here  meta=\"true\" means it is a
>> metadata field -->" +
>> +        "<field column='Author' meta='true' name='author'/>" +
>> +        "<field column='title' meta='true' name='title'/>" +
>> +        "<!--'text' is an implicit field emited by TikaEntityProcessor .
>> Map it appropriately-->" +
>> +        "<field name='text' column='text'/>" +
>> +        "<entity name='detail' type='XPathEntityProcessor' forEach='/html'
>> dataSource='fld' dataField='tika.text' rootEntity='true' >" +
>> +        "<field xpath='//div'  column='foo'/>" +
>> +        "<field xpath='//h1'  column='h1' />" +
>> +        "</entity>" +
>> +        "</entity>" +
>> +        "</document>" +
>> +        "</dataConfig>";
>> +  }
>> +
>> }
>> Index:
>> solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml
>> ===================================================================
>> ---
>> solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml
>>  (revision 1526990)
>> +++
>> solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml
>>  (working copy)
>> @@ -194,6 +194,8 @@
>>   <field name="title" type="string" indexed="true" stored="true"/>
>>   <field name="author" type="string" indexed="true" stored="true" />
>>   <field name="text" type="text" indexed="true" stored="true" />
>> +   <field name="h1" type="text" indexed="true" stored="true" />
>> +   <field name="foo" type="text" indexed="true" stored="true" />
>> 
>> </fields>
>> <!-- field for the QueryParser to use when an explicit fieldname is
>> absent -->
>> 
>> 
>> I find the SqlEntityProcessor part particularly odd.  That's the default
>> right?:
>> 2405 T12 C1 oashd.SqlEntityProcessor.initQuery ERROR The query failed
>> 'null' java.lang.RuntimeException: unsupported type : class java.lang.String
>> at
>> org.apache.solr.handler.dataimport.FieldStreamDataSource.getData(FieldStreamDataSource.java:89)
>> at
>> org.apache.solr.handler.dataimport.FieldStreamDataSource.getData(FieldStreamDataSource.java:1)
>> at
>> org.apache.solr.handler.dataimport.SqlEntityProcessor.initQuery(SqlEntityProcessor.java:59)
>> at
>> org.apache.solr.handler.dataimport.SqlEntityProcessor.nextRow(SqlEntityProcessor.java:73)
>> at
>> org.apache.solr.handler.dataimport.EntityProcessorWrapper.nextRow(EntityProcessorWrapper.java:243)
>> at
>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:469)
>> at
>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:495)
>> at
>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:408)
>> at
>> org.apache.solr.handler.dataimport.DocBuilder.doFullDump(DocBuilder.java:323)
>> at
>> org.apache.solr.handler.dataimport.DocBuilder.execute(DocBuilder.java:231)
>> at
>> org.apache.solr.handler.dataimport.DataImporter.doFullImport(DataImporter.java:411)
>> at
>> org.apache.solr.handler.dataimport.DataImporter.runCmd(DataImporter.java:476)
>> at
>> org.apache.solr.handler.dataimport.DataImportHandler.handleRequestBody(DataImportHandler.java:179)
>> at
>> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
>> at org.apache.solr.core.SolrCore.execute(SolrCore.java:1859)
>> at org.apache.solr.util.TestHarness.query(TestHarness.java:291)
>> at
>> org.apache.solr.handler.dataimport.AbstractDataImportHandlerTestCase.runFullImport(AbstractDataImportHandlerTestCase.java:96)
>> at
>> org.apache.solr.handler.dataimport.TestTikaEntityProcessor.testTikaHTMLMapperSubEntity(TestTikaEntityProcessor.java:124)
>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>> at
>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>> at
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> at java.lang.reflect.Method.invoke(Method.java:601)
>> at
>> com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1559)
>> at
>> com.carrotsearch.randomizedtesting.RandomizedRunner.access$600(RandomizedRunner.java:79)
>> at
>> com.carrotsearch.randomizedtesting.RandomizedRunner$6.evaluate(RandomizedRunner.java:737)
>> at
>> com.carrotsearch.randomizedtesting.RandomizedRunner$7.evaluate(RandomizedRunner.java:773)
>> at
>> com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:787)
>> at
>> com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:53)
>> at
>> org.apache.lucene.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:50)
>> at
>> org.apache.lucene.util.TestRuleFieldCacheSanity$1.evaluate(TestRuleFieldCacheSanity.java:51)
>> at
>> org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:46)
>> at
>> com.carrotsearch.randomizedtesting.rules.SystemPropertiesInvariantRule$1.evaluate(SystemPropertiesInvariantRule.java:55)
>> at
>> org.apache.lucene.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:49)
>> at
>> org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:70)
>> at
>> org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:48)
>> at
>> com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
>> at
>> com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:358)
>> at
>> com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:782)
>> at
>> com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:442)
>> at
>> com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:746)
>> at
>> com.carrotsearch.randomizedtesting.RandomizedRunner$3.evaluate(RandomizedRunner.java:648)
>> at
>> com.carrotsearch.randomizedtesting.RandomizedRunner$4.evaluate(RandomizedRunner.java:682)
>> at
>> com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:693)
>> at
>> com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
>> at
>> com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:53)
>> at
>> org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:46)
>> at
>> org.apache.lucene.util.TestRuleStoreClassName$1.evaluate(TestRuleStoreClassName.java:42)
>> at
>> com.carrotsearch.randomizedtesting.rules.SystemPropertiesInvariantRule$1.evaluate(SystemPropertiesInvariantRule.java:55)
>> at
>> com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:39)
>> at
>> com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:39)
>> at
>> com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
>> at
>> org.apache.lucene.util.TestRuleAssertionsRequired$1.evaluate(TestRuleAssertionsRequired.java:43)
>> at
>> org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:48)
>> at
>> org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:70)
>> at
>> org.apache.lucene.util.TestRuleIgnoreTestSuites$1.evaluate(TestRuleIgnoreTestSuites.java:55)
>> at
>> com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
>> at
>> com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:358)
>> at java.lang.Thread.run(Thread.java:722)
>> 
>> 
>> 
>> On Fri, Sep 27, 2013 at 3:55 AM, Andreas Owen <a...@conx.ch> wrote:
>> 
>>> i removed the FieldReaderDataSource and dataSource="fld" but it didn't
>>> help. i get the following for each document:
>>>       DataImportHandlerException: Exception in invoking url null
>>> Processing Document # 9
>>>       nullpointerexception
>>> 
>>> 
>>> On 26. Sep 2013, at 8:39 PM, P Williams wrote:
>>> 
>>>> Hi,
>>>> 
>>>> Haven't tried this myself but maybe try leaving out the
>>>> FieldReaderDataSource entirely.  From my quick searching looks like it's
>>>> tied to SQL.  Did you try copying the
>>>> http://wiki.apache.org/solr/TikaEntityProcessor Advanced Parsing example
>>>> exactly?  What happens when you leave out FieldReaderDataSource?
>>>> 
>>>> Cheers,
>>>> Tricia
>>>> 
>>>> 
>>>> On Thu, Sep 26, 2013 at 4:17 AM, Andreas Owen <a...@conx.ch> wrote:
>>>> 
>>>>> i'm using solr 4.3.1 and the dataimporter. i am trying to use
>>>>> XPathEntityProcessor within the TikaEntityProcessor for indexing
>>> html-pages
>>>>> but i'm getting this error for each document. i have also tried
>>>>> dataField="tika.text" and dataField="text" to no avail. the nested
>>>>> XPathEntityProcessor "detail" creates the error, the rest works fine.
>>> what
>>>>> am i doing wrong?
>>>>> 
>>>>> error:
>>>>> 
>>>>> ERROR - 2013-09-26 12:08:49.006;
>>>>> org.apache.solr.handler.dataimport.SqlEntityProcessor; The query failed
>>>>> 'null'
>>>>> java.lang.ClassCastException: java.io.StringReader cannot be cast to
>>>>> java.util.Iterator
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.SqlEntityProcessor.initQuery(SqlEntityProcessor.java:59)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.SqlEntityProcessor.nextRow(SqlEntityProcessor.java:73)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.EntityProcessorWrapper.nextRow(EntityProcessorWrapper.java:243)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:465)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:491)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:491)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:404)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DocBuilder.doFullDump(DocBuilder.java:319)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DocBuilder.execute(DocBuilder.java:227)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DataImporter.doFullImport(DataImporter.java:422)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DataImporter.runCmd(DataImporter.java:487)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DataImportHandler.handleRequestBody(DataImportHandler.java:179)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
>>>>>      at org.apache.solr.core.SolrCore.execute(SolrCore.java:1820)
>>>>>      at
>>>>> 
>>> org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:656)
>>>>>      at
>>>>> 
>>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:359)
>>>>>      at
>>>>> 
>>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:155)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1307)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:453)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:560)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1072)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:382)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1006)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
>>>>>      at org.eclipse.jetty.server.Server.handle(Server.java:365)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:485)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:937)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:998)
>>>>>      at
>>> org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:856)
>>>>>      at
>>>>> org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
>>>>>      at java.lang.Thread.run(Unknown Source)
>>>>> ERROR - 2013-09-26 12:08:49.022; org.apache.solr.common.SolrException;
>>>>> Exception in entity :
>>>>> detail:org.apache.solr.handler.dataimport.DataImportHandlerException:
>>>>> java.lang.ClassCastException: java.io.StringReader cannot be cast to
>>>>> java.util.Iterator
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.SqlEntityProcessor.initQuery(SqlEntityProcessor.java:65)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.SqlEntityProcessor.nextRow(SqlEntityProcessor.java:73)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.EntityProcessorWrapper.nextRow(EntityProcessorWrapper.java:243)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:465)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:491)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:491)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:404)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DocBuilder.doFullDump(DocBuilder.java:319)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DocBuilder.execute(DocBuilder.java:227)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DataImporter.doFullImport(DataImporter.java:422)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DataImporter.runCmd(DataImporter.java:487)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.DataImportHandler.handleRequestBody(DataImportHandler.java:179)
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
>>>>>      at org.apache.solr.core.SolrCore.execute(SolrCore.java:1820)
>>>>>      at
>>>>> 
>>> org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:656)
>>>>>      at
>>>>> 
>>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:359)
>>>>>      at
>>>>> 
>>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:155)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1307)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:453)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:560)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1072)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:382)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1006)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
>>>>>      at org.eclipse.jetty.server.Server.handle(Server.java:365)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:485)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:937)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:998)
>>>>>      at
>>> org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:856)
>>>>>      at
>>>>> org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
>>>>>      at
>>>>> 
>>> org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
>>>>>      at java.lang.Thread.run(Unknown Source)
>>>>> Caused by: java.lang.ClassCastException: java.io.StringReader cannot be
>>>>> cast to java.util.Iterator
>>>>>      at
>>>>> 
>>> org.apache.solr.handler.dataimport.SqlEntityProcessor.initQuery(SqlEntityProcessor.java:59)
>>>>>      ... 41 more
>>>>> 
>>>>> 
>>>>> 
>>>>> data-config.xml
>>>>> 
>>>>> <dataConfig>
>>>>>      <dataSource type="BinURLDataSource" name="dataFile"/>
>>>>>      <dataSource type="BinURLDataSource" name="dataUrl"/>
>>>>>      <dataSource type="URLDataSource" name="main"/>
>>>>>      <dataSource type="FieldReaderDataSource" name="fld"/>
>>>>> <document>
>>>>> <entity name="rec" processor="XPathEntityProcessor"
>>>>> 
>>> url="file:///C:\ColdFusion10\cfusion\solr\solr\tkbintranet\docImportUrl.xml"
>>>>> forEach="/docs/doc" dataSource="main">
>>>>>              <field column="title" xpath="//title" />
>>>>>              <field column="id" xpath="//id" />
>>>>>              <field column="file" xpath="//file" />
>>>>>              <field column="url" xpath="//url" />
>>>>>              <field column="urlParse" xpath="//urlParse" />
>>>>>              <field column="last_modified" xpath="//last_modified" />
>>>>>              <field column="Author" xpath="//author" />
>>>>> 
>>>>>              <entity name="tika" processor="TikaEntityProcessor"
>>>>> url="${rec.urlParse}" dataSource="dataUrl" onError="skip" format="html">
>>>>>                      <field column="text"/>
>>>>> 
>>>>>                      <entity name="detail" type="XPathEntityProcessor"
>>>>> forEach="/html" dataSource="fld" dataField="${tika.text}"
>>> rootEntity="true"
>>>>> onError="skip">
>>>>>                              <field xpath="//h1" column="h_1" />
>>>>>                      </entity>
>>>>>              </entity>
>>>>>      </entity>
>>>>> </document>
>>>>> </dataConfig>
>>> 
>>> 

Reply via email to