Problem with DataImportHandler multi-threaded
---------------------------------------------

                 Key: SOLR-3360
                 URL: https://issues.apache.org/jira/browse/SOLR-3360
             Project: Solr
          Issue Type: Bug
    Affects Versions: 3.6
         Environment: Solr 3.6.0, Apache Tomcat 6.0.20, jdk1.6.0_15, Windows XP
            Reporter: Claudio R


Hi,

If I use dataimport with 1 thread, I got:

<lst name="statusMessages">
   <str name="Total Requests made to DataSource">5001</str>
   <str name="Total Rows Fetched">1000</str>
   <str name="Total Documents Skipped">0</str>
   <str name="Full Dump Started">2012-04-16 11:21:57</str>
   <str name="">Indexing completed. Added/Updated: 1000 documents. Deleted 0 
documents.</str>
   <str name="Committed">2012-04-16 11:23:19</str>
   <str name="Total Documents Processed">1000</str>
   <str name="Time taken">0:1:22.390</str>
</lst>

If I use datamport with 10 threads, I got:

<lst name="statusMessages">
   <str name="Total Requests made to DataSource">0</str>
   <str name="Total Rows Fetched">10000</str>
   <str name="Total Documents Skipped">0</str>
   <str name="Full Dump Started">2012-04-16 11:31:43</str>
   <str name="">Indexing completed. Added/Updated: 10000 documents. Deleted 0 
documents.</str>
   <str name="Committed">2012-04-16 11:41:50</str>
   <str name="Total Documents Processed">10000</str>
   <str name="Time taken">0:10:7.586</str>
</lst>

The configuration of 10 threads consumed 10 times longer than the configuration 
with 1 thread.
I have 1000 records in the database.
My db-data-config.xml is shown below:

<?xml version="1.0" encoding="UTF-8" ?>
<dataConfig>
   <dataSource driver="com.microsoft.sqlserver.jdbc.SQLServerDriver" 
url="jdbc:sqlserver://200.XXX.XXX.XXX:1433;databaseName=test" user="user" 
password="pass"/>
      <document>
         <entity name="indice" rootEntity="true" threads="10" 
transformer="RegexTransformer,TemplateTransformer" query="select top 1000 
i.id_indice, i.a, i.b from indice i where i.status = 'I'" 
deltaImportQuery="i.id_indice, i.a, i.b from indice i where id_indice in 
('${dataimporter.delta.id_indice}')" deltaQuery="select id_indice from indice 
where status='I' and data_hora_modificacao >= convert(datetime, 
'${dataimporter.last_index_time}', 120)" deletedPkQuery="select id_indice from 
indice where status='D' and data_hora_modificacao >= convert(datetime, 
'${dataimporter.last_index_time}', 120)">        
            <field column="id_indice" name="id_indice" />
            <field column="a" name="a" />
            <field column="b" name="b" />
            <entity name="filtro" 
transformer="RegexTransformer,TemplateTransformer" query="select categoria, 
sub_categoria from filtro where indice_id_indice = '${indice.id_indice}'">
               <field name="filtro_categoria" column="categoria" />
               <field name="filtro_sub_categoria" column="sub_categoria" />
               <field name="nv_sub_categoria" column="nv_sub_categoria" 
template="${filtro.categoria}|${filtro.sub_categoria}" />
            </entity>
            <entity name="pagina_relacionada" query="select url from 
pagina_relacionada where indice_id_indice = '${indice.id_indice}'">
               <field name="pagina_relacionada_url" column="url" />
            </entity>
            <entity name="veja_mais" query="select chamada, url from veja_mais 
where indice_id_indice = '${indice.id_indice}'">
               <field name="veja_mais_chamada" column="chamada" />
               <field name="veja_mais_url" column="url" />
            </entity>
            <entity name="video" query="select url from video where 
indice_id_indice = '${indice.id_indice}'">
               <field name="video_url" column="url" />
            </entity>
            <entity name="galeria" query="select url from galeria where 
indice_id_indice = '${indice.id_indice}'">
               <field name="galeria_url" column="url" />
            </entity>
         </entity>
      </document>
</dataConfig>

Thanks.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

        

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org
For additional commands, e-mail: dev-h...@lucene.apache.org

Reply via email to