On 10/22/2010 10:06 AM, Juan Manuel Alvarez wrote:
My question is:
Every time I do an import operation (delta or full) with DIH, I only
need to sync the index with one schema only, so... is there a way to
pass a custom parameter with the schema name to DIH so I can build the
query with the corresponding schema name?
Yes, there is. Below is the latest version of my dih config used with a
MySQL database. I've got almost everything in the SELECT statement
specified by the input URL, which gets built using the following template:
http://HOST:PORT/solr/CORE/dataimport?command=COMMAND&dbHost=DBHOST&dbSchema=DBSCHEMA&dataTable=DATATABLE&sgTable=SGTABLE&numShards=NUMSHARDS&modVal=MODVAL&minDid=MINDID&maxDid=MAXDID
<dataConfig>
<dataSource type="JdbcDataSource"
driver="com.mysql.jdbc.Driver"
encoding="UTF-8"
url="jdbc:mysql://${dataimporter.request.dbServer}:3306/${dataimporter.request.dbSchema}?zeroDateTimeBehavior=convertToNull"
batchSize="-1"
user="removed"
password="removed"/>
<document>
<entity name="dataTable" pk="did"
query="
SELECT d.*,FROM_UNIXTIME(d.post_date) AS pd,
s.search_group_map AS sg
FROM ${dataimporter.request.dataTable} d
LEFT JOIN ${dataimporter.request.sgTable} s
ON d.feature=s.featurecode
WHERE did > ${dataimporter.request.minDid}
AND did <= ${dataimporter.request.maxDid}
AND (did % ${dataimporter.request.numShards})
IN (${dataimporter.request.modVal})
GROUP BY d.did"
deltaImportQuery="
SELECT d.*,FROM_UNIXTIME(d.post_date) AS pd,
s.search_group_map AS sg
FROM ${dataimporter.request.dataTable} d
LEFT JOIN ${dataimporter.request.sgTable} s
ON d.feature=s.featurecode
WHERE did > ${dataimporter.request.minDid}
AND did <= ${dataimporter.request.maxDid}
AND (did % ${dataimporter.request.numShards})
IN (${dataimporter.request.modVal})
GROUP BY d.did"
deltaQuery="SELECT MAX(d.did) FROM
${dataimporter.request.dataTable} d"
>
<!-- That lone angle bracket looks wrong, but it's not. -->
<field column="search_group" splitBy="; *"/>
</entity>
</document>
</dataConfig>