Hi All,

 

I am a new user of Solr.

We are now trying to enable searching on Digg dataset.

It has story_id as the primary key and comment_id are the comment id
which commented story_id, so story_id and comment_id is one-to-many
relationship.

These comment_ids can be replied by some repliers, so comment_id and
repliers are one-to-many relationship.

 

The problem is that within a single returned document the search results
shows an array of comment_ids and an array of repliers without knowing
which repliers replied which comment.

For example: now we got comment_id:[c1,c,2...,cn],
repliers:[r1,r2,r3....rm]. Can we get something like
comment_id:[c1,c,2...,cn], repliers:[{r1,r2},{},r3....{rm-1,rm}] so that
{r1,r2} is corresponding to c1?

 

Our current data-config is attached:

<dataConfig>

    <dataSource type="JdbcDataSource" driver="com.mysql.jdbc.Driver"
autoreconnect="true" netTimeoutForStreamingResults="1200"
url="jdbc:mysql://localhost/diggdataset" batchSize="-1" user="root"
password=" "/>

    <document>

            <entity name="story" pk="story_id" query="select * from
story"

                  deltaImportQuery="select * from story where
ID=='${dataimporter.delta.story_id}'"

                  deltaQuery="select story_id from story where
last_modified > '${dataimporter.last_index_time}'">

            

            <field column="link" name="link" />

            <field column="title" name="title" />

            <field column="description" name="story_content" />

            <field column="digg" name="positiveness" />

            <field column="comment" name="spreading_number" />

            <field column="user_id" name="author" />

            <field column="profile_view" name="user_popularity" />

            <field column="topic" name="topic" />

            <field column="timestamp" name="timestamp" />

            

            <entity name="dugg_list"  pk="story_id"

                    query="select * from dugg_list where
story_id='${story.story_id}'"

                    deltaQuery="select SID from dugg_list where
last_modified > '${dataimporter.last_index_time}'"

                    parentDeltaQuery="select story_id from story where
story_id=${dugg_list.story_id}">

                  <field name="viewer" column="dugger" />

            </entity>

 

            <entity name="commenttable"  pk="comment_id"

                    query="select * from commenttable where
story_id='${story.story_id}'"

                    deltaQuery="select SID from commenttable where
last_modified > '${dataimporter.last_index_time}'"

                    parentDeltaQuery="select story_id from story where
story_id=${commenttable.story_id}">

                  <field name="comment_id" column="comment_id" />

                  <field name="spreading_user" column="replier" />

                  <field name="comment_positiveness" column="up" />

                  <field name="comment_negativeness" column="down" />

                  <field name="user_comment" column="content" />

                  <field name="user_comment_timestamp"
column="timestamp" />

             

 

            <entity name="replytable"  

                    query="select * from replytable where
comment_id='${commenttable.comment_id}'"

                    deltaQuery="select SID from replytable where
last_modified > '${dataimporter.last_index_time}'"

                    parentDeltaQuery="select comment_id from
commenttable where comment_id=${replytable.comment_id}">

                  <field name="replier_id" column="replier_id" />

                  <field name="reply_content" column="content" />

                  <field name="reply_positiveness" column="up" />

                  <field name="reply_negativeness" column="down" />

                  <field name="reply_timestamp" column="timestamp" />

            </entity>

 

            </entity>

            </entity>

    </document>

</dataConfig>

 

Please help me on this.

Many thanks

 

Vivian

 

 

 

Reply via email to