ok i see what your getting at but why doesn't the following work:
        
        <field xpath="//h:h1" column="h_1" />
        <field column="text" xpath="/xhtml:html/xhtml:body" />

i removed the tiki-processor. what am i missing, i haven't found anything in 
the wiki?


On 28. Sep 2013, at 12:28 AM, P Williams wrote:

> I spent some more time thinking about this.  Do you really need to use the
> TikaEntityProcessor?  It doesn't offer anything new to the document you are
> building that couldn't be accomplished by the XPathEntityProcessor alone
> from what I can tell.
> 
> I also tried to get the Advanced
> Parsing<http://wiki.apache.org/solr/TikaEntityProcessor>example to
> work without success.  There are some obvious typos (<document>
> instead of </document>) and an odd order to the pieces (<dataSources> is
> enclosed by <document>).  It also looks like
> FieldStreamDataSource<http://lucene.apache.org/solr/4_3_1/solr-dataimporthandler/org/apache/solr/handler/dataimport/FieldStreamDataSource.html>is
> the one that is meant to work in this context. If Koji is still around
> maybe he could offer some help?  Otherwise this bit of erroneous
> instruction should probably be removed from the wiki.
> 
> Cheers,
> Tricia
> 
> $ svn diff
> Index:
> solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
> ===================================================================
> ---
> solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
>     (revision 1526990)
> +++
> solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
>     (working copy)
> @@ -99,13 +99,13 @@
>     runFullImport(getConfigHTML("identity"));
>     assertQ(req("*:*"), testsHTMLIdentity);
>   }
> -
> +
>   private String getConfigHTML(String htmlMapper) {
>     return
>         "<dataConfig>" +
>             "  <dataSource type='BinFileDataSource'/>" +
>             "  <document>" +
> -            "    <entity name='Tika' format='xml'
> processor='TikaEntityProcessor' " +
> +            "    <entity name='Tika' format='html'
> processor='TikaEntityProcessor' " +
>             "       url='" +
> getFile("dihextras/structured.html").getAbsolutePath() + "' " +
>             ((htmlMapper == null) ? "" : (" htmlMapper='" + htmlMapper +
> "'")) + ">" +
>             "      <field column='text'/>" +
> @@ -114,4 +114,36 @@
>             "</dataConfig>";
> 
>   }
> +  private String[] testsHTMLH1 = {
> +      "//*[@numFound='1']"
> +      , "//str[@name='h1'][contains(.,'H1 Header')]"
> +  };
> +
> +  @Test
> +  public void testTikaHTMLMapperSubEntity() throws Exception {
> +    runFullImport(getConfigSubEntity("identity"));
> +    assertQ(req("*:*"), testsHTMLH1);
> +  }
> +
> +  private String getConfigSubEntity(String htmlMapper) {
> +    return
> +        "<dataConfig>" +
> +        "<dataSource type='BinFileDataSource' name='bin'/>" +
> +        "<dataSource type='FieldStreamDataSource' name='fld'/>" +
> +        "<document>" +
> +        "<entity name='tika' processor='TikaEntityProcessor' url='" +
> getFile("dihextras/structured.html").getAbsolutePath() + "'
> dataSource='bin' format='html' rootEntity='false'>" +
> +        "<!--Do appropriate mapping here  meta=\"true\" means it is a
> metadata field -->" +
> +        "<field column='Author' meta='true' name='author'/>" +
> +        "<field column='title' meta='true' name='title'/>" +
> +        "<!--'text' is an implicit field emited by TikaEntityProcessor .
> Map it appropriately-->" +
> +        "<field name='text' column='text'/>" +
> +        "<entity name='detail' type='XPathEntityProcessor' forEach='/html'
> dataSource='fld' dataField='tika.text' rootEntity='true' >" +
> +        "<field xpath='//div'  column='foo'/>" +
> +        "<field xpath='//h1'  column='h1' />" +
> +        "</entity>" +
> +        "</entity>" +
> +        "</document>" +
> +        "</dataConfig>";
> +  }
> +
> }
> Index:
> solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml
> ===================================================================
> ---
> solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml
>   (revision 1526990)
> +++
> solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml
>   (working copy)
> @@ -194,6 +194,8 @@
>    <field name="title" type="string" indexed="true" stored="true"/>
>    <field name="author" type="string" indexed="true" stored="true" />
>    <field name="text" type="text" indexed="true" stored="true" />
> +   <field name="h1" type="text" indexed="true" stored="true" />
> +   <field name="foo" type="text" indexed="true" stored="true" />
> 
>  </fields>
>  <!-- field for the QueryParser to use when an explicit fieldname is
> absent -->
> 
> 
> I find the SqlEntityProcessor part particularly odd.  That's the default
> right?:
> 2405 T12 C1 oashd.SqlEntityProcessor.initQuery ERROR The query failed
> 'null' java.lang.RuntimeException: unsupported type : class java.lang.String
> at
> org.apache.solr.handler.dataimport.FieldStreamDataSource.getData(FieldStreamDataSource.java:89)
> at
> org.apache.solr.handler.dataimport.FieldStreamDataSource.getData(FieldStreamDataSource.java:1)
> at
> org.apache.solr.handler.dataimport.SqlEntityProcessor.initQuery(SqlEntityProcessor.java:59)
> at
> org.apache.solr.handler.dataimport.SqlEntityProcessor.nextRow(SqlEntityProcessor.java:73)
> at
> org.apache.solr.handler.dataimport.EntityProcessorWrapper.nextRow(EntityProcessorWrapper.java:243)
> at
> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:469)
> at
> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:495)
> at
> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:408)
> at
> org.apache.solr.handler.dataimport.DocBuilder.doFullDump(DocBuilder.java:323)
> at
> org.apache.solr.handler.dataimport.DocBuilder.execute(DocBuilder.java:231)
> at
> org.apache.solr.handler.dataimport.DataImporter.doFullImport(DataImporter.java:411)
> at
> org.apache.solr.handler.dataimport.DataImporter.runCmd(DataImporter.java:476)
> at
> org.apache.solr.handler.dataimport.DataImportHandler.handleRequestBody(DataImportHandler.java:179)
> at
> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
> at org.apache.solr.core.SolrCore.execute(SolrCore.java:1859)
> at org.apache.solr.util.TestHarness.query(TestHarness.java:291)
> at
> org.apache.solr.handler.dataimport.AbstractDataImportHandlerTestCase.runFullImport(AbstractDataImportHandlerTestCase.java:96)
> at
> org.apache.solr.handler.dataimport.TestTikaEntityProcessor.testTikaHTMLMapperSubEntity(TestTikaEntityProcessor.java:124)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:601)
> at
> com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1559)
> at
> com.carrotsearch.randomizedtesting.RandomizedRunner.access$600(RandomizedRunner.java:79)
> at
> com.carrotsearch.randomizedtesting.RandomizedRunner$6.evaluate(RandomizedRunner.java:737)
> at
> com.carrotsearch.randomizedtesting.RandomizedRunner$7.evaluate(RandomizedRunner.java:773)
> at
> com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:787)
> at
> com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:53)
> at
> org.apache.lucene.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:50)
> at
> org.apache.lucene.util.TestRuleFieldCacheSanity$1.evaluate(TestRuleFieldCacheSanity.java:51)
> at
> org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:46)
> at
> com.carrotsearch.randomizedtesting.rules.SystemPropertiesInvariantRule$1.evaluate(SystemPropertiesInvariantRule.java:55)
> at
> org.apache.lucene.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:49)
> at
> org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:70)
> at
> org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:48)
> at
> com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
> at
> com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:358)
> at
> com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:782)
> at
> com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:442)
> at
> com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:746)
> at
> com.carrotsearch.randomizedtesting.RandomizedRunner$3.evaluate(RandomizedRunner.java:648)
> at
> com.carrotsearch.randomizedtesting.RandomizedRunner$4.evaluate(RandomizedRunner.java:682)
> at
> com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:693)
> at
> com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
> at
> com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:53)
> at
> org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:46)
> at
> org.apache.lucene.util.TestRuleStoreClassName$1.evaluate(TestRuleStoreClassName.java:42)
> at
> com.carrotsearch.randomizedtesting.rules.SystemPropertiesInvariantRule$1.evaluate(SystemPropertiesInvariantRule.java:55)
> at
> com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:39)
> at
> com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:39)
> at
> com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
> at
> org.apache.lucene.util.TestRuleAssertionsRequired$1.evaluate(TestRuleAssertionsRequired.java:43)
> at
> org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:48)
> at
> org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:70)
> at
> org.apache.lucene.util.TestRuleIgnoreTestSuites$1.evaluate(TestRuleIgnoreTestSuites.java:55)
> at
> com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
> at
> com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:358)
> at java.lang.Thread.run(Thread.java:722)
> 
> 
> 
> On Fri, Sep 27, 2013 at 3:55 AM, Andreas Owen <a...@conx.ch> wrote:
> 
>> i removed the FieldReaderDataSource and dataSource="fld" but it didn't
>> help. i get the following for each document:
>>        DataImportHandlerException: Exception in invoking url null
>> Processing Document # 9
>>        nullpointerexception
>> 
>> 
>> On 26. Sep 2013, at 8:39 PM, P Williams wrote:
>> 
>>> Hi,
>>> 
>>> Haven't tried this myself but maybe try leaving out the
>>> FieldReaderDataSource entirely.  From my quick searching looks like it's
>>> tied to SQL.  Did you try copying the
>>> http://wiki.apache.org/solr/TikaEntityProcessor Advanced Parsing example
>>> exactly?  What happens when you leave out FieldReaderDataSource?
>>> 
>>> Cheers,
>>> Tricia
>>> 
>>> 
>>> On Thu, Sep 26, 2013 at 4:17 AM, Andreas Owen <a...@conx.ch> wrote:
>>> 
>>>> i'm using solr 4.3.1 and the dataimporter. i am trying to use
>>>> XPathEntityProcessor within the TikaEntityProcessor for indexing
>> html-pages
>>>> but i'm getting this error for each document. i have also tried
>>>> dataField="tika.text" and dataField="text" to no avail. the nested
>>>> XPathEntityProcessor "detail" creates the error, the rest works fine.
>> what
>>>> am i doing wrong?
>>>> 
>>>> error:
>>>> 
>>>> ERROR - 2013-09-26 12:08:49.006;
>>>> org.apache.solr.handler.dataimport.SqlEntityProcessor; The query failed
>>>> 'null'
>>>> java.lang.ClassCastException: java.io.StringReader cannot be cast to
>>>> java.util.Iterator
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.SqlEntityProcessor.initQuery(SqlEntityProcessor.java:59)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.SqlEntityProcessor.nextRow(SqlEntityProcessor.java:73)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.EntityProcessorWrapper.nextRow(EntityProcessorWrapper.java:243)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:465)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:491)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:491)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:404)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DocBuilder.doFullDump(DocBuilder.java:319)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DocBuilder.execute(DocBuilder.java:227)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DataImporter.doFullImport(DataImporter.java:422)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DataImporter.runCmd(DataImporter.java:487)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DataImportHandler.handleRequestBody(DataImportHandler.java:179)
>>>>       at
>>>> 
>> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
>>>>       at org.apache.solr.core.SolrCore.execute(SolrCore.java:1820)
>>>>       at
>>>> 
>> org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:656)
>>>>       at
>>>> 
>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:359)
>>>>       at
>>>> 
>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:155)
>>>>       at
>>>> 
>> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1307)
>>>>       at
>>>> 
>> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:453)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
>>>>       at
>>>> 
>> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:560)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1072)
>>>>       at
>>>> 
>> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:382)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1006)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
>>>>       at org.eclipse.jetty.server.Server.handle(Server.java:365)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:485)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:937)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:998)
>>>>       at
>> org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:856)
>>>>       at
>>>> org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
>>>>       at
>>>> 
>> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
>>>>       at
>>>> 
>> org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
>>>>       at java.lang.Thread.run(Unknown Source)
>>>> ERROR - 2013-09-26 12:08:49.022; org.apache.solr.common.SolrException;
>>>> Exception in entity :
>>>> detail:org.apache.solr.handler.dataimport.DataImportHandlerException:
>>>> java.lang.ClassCastException: java.io.StringReader cannot be cast to
>>>> java.util.Iterator
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.SqlEntityProcessor.initQuery(SqlEntityProcessor.java:65)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.SqlEntityProcessor.nextRow(SqlEntityProcessor.java:73)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.EntityProcessorWrapper.nextRow(EntityProcessorWrapper.java:243)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:465)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:491)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:491)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:404)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DocBuilder.doFullDump(DocBuilder.java:319)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DocBuilder.execute(DocBuilder.java:227)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DataImporter.doFullImport(DataImporter.java:422)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DataImporter.runCmd(DataImporter.java:487)
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.DataImportHandler.handleRequestBody(DataImportHandler.java:179)
>>>>       at
>>>> 
>> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
>>>>       at org.apache.solr.core.SolrCore.execute(SolrCore.java:1820)
>>>>       at
>>>> 
>> org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:656)
>>>>       at
>>>> 
>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:359)
>>>>       at
>>>> 
>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:155)
>>>>       at
>>>> 
>> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1307)
>>>>       at
>>>> 
>> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:453)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
>>>>       at
>>>> 
>> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:560)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1072)
>>>>       at
>>>> 
>> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:382)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1006)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
>>>>       at org.eclipse.jetty.server.Server.handle(Server.java:365)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:485)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:937)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:998)
>>>>       at
>> org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:856)
>>>>       at
>>>> org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
>>>>       at
>>>> 
>> org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
>>>>       at
>>>> 
>> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
>>>>       at
>>>> 
>> org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
>>>>       at java.lang.Thread.run(Unknown Source)
>>>> Caused by: java.lang.ClassCastException: java.io.StringReader cannot be
>>>> cast to java.util.Iterator
>>>>       at
>>>> 
>> org.apache.solr.handler.dataimport.SqlEntityProcessor.initQuery(SqlEntityProcessor.java:59)
>>>>       ... 41 more
>>>> 
>>>> 
>>>> 
>>>> data-config.xml
>>>> 
>>>> <dataConfig>
>>>>       <dataSource type="BinURLDataSource" name="dataFile"/>
>>>>       <dataSource type="BinURLDataSource" name="dataUrl"/>
>>>>       <dataSource type="URLDataSource" name="main"/>
>>>>       <dataSource type="FieldReaderDataSource" name="fld"/>
>>>> <document>
>>>> <entity name="rec" processor="XPathEntityProcessor"
>>>> 
>> url="file:///C:\ColdFusion10\cfusion\solr\solr\tkbintranet\docImportUrl.xml"
>>>> forEach="/docs/doc" dataSource="main">
>>>>               <field column="title" xpath="//title" />
>>>>               <field column="id" xpath="//id" />
>>>>               <field column="file" xpath="//file" />
>>>>               <field column="url" xpath="//url" />
>>>>               <field column="urlParse" xpath="//urlParse" />
>>>>               <field column="last_modified" xpath="//last_modified" />
>>>>               <field column="Author" xpath="//author" />
>>>> 
>>>>               <entity name="tika" processor="TikaEntityProcessor"
>>>> url="${rec.urlParse}" dataSource="dataUrl" onError="skip" format="html">
>>>>                       <field column="text"/>
>>>> 
>>>>                       <entity name="detail" type="XPathEntityProcessor"
>>>> forEach="/html" dataSource="fld" dataField="${tika.text}"
>> rootEntity="true"
>>>> onError="skip">
>>>>                               <field xpath="//h1" column="h_1" />
>>>>                       </entity>
>>>>               </entity>
>>>>       </entity>
>>>> </document>
>>>> </dataConfig>
>> 
>> 

Reply via email to