Re: Solr Phonetic Search Highlight issue in search results

Erick Erickson Tue, 26 Mar 2013 17:37:03 -0700

How would you expect it to highlight successfully? The term is "fakt",
there's nothing built in (and, indeed couldn't be) to un-phoneticize it
into "fact" and apply that to the Content field. The whole point of
phonetic processing is to do a lossy translation from the word into some
variant, losing precision all the way.....


So this behavior is unsurprising...

Best
Erick




On Tue, Mar 26, 2013 at 7:28 AM, Soumyanayan Kar <soumyanayan....@rebaca.com
> wrote:

> When we are issuing a query with Phonetic Search, it is returning the
> correct documents but not returning the highlights. When we use Stemming or
> Synonym searches we are getting the proper highlights.
>
>
>
> For example, when we execute a phonetic query for the term
> fakt(ContentSearchPhonetic:fakt) in the Solr Admin interface, it returns
> two
> documents containing the term "fact"(phonetic token equivalent), but the
> list of highlights is empty as shown in the response below.
>
>
>
>     <response>
>
>     <lst name="responseHeader">
>
>     <int name="status">0</int>
>
>     <int name="QTime">16</int>
>
>     <lst name="params">
>
>       <str name="q">ContentSearchPhonetic:fakt</str>
>
>       <str name="wt">xml</str>
>
>     </lst>
>
>   </lst>
>
>     <result name="response" numFound="2" start="0">
>
>         <doc>
>
>           <long name="DocId">1</long>
>
>           <str name="DocTitle">Doc 1</str>
>
>           <str name="Content">Anyway, this game was excellent and was well
> worth the time.  The graphics are truly amazing and the sound track was
> pretty pleasant also. The  preacher was in  fact a thief.</str>
>
>           <long name="_version_">1430480998833848320</long>
>
>         </doc>
>
>         <doc>
>
>           <long name="DocId">2</long>
>
>           <str name="DocTitle">Doc 2</str>
>
>           <str name="Content">stunning. The  preacher was in  fact an
> excellent thief who  had stolen the original manuscript of Hamlet  from an
> exhibit on the  Riviera, where  he also  acquired his remarkable and
> tan.</str>
>
>           <long name="_version_">1430480998841188352</long>
>
>         </doc>
>
>       </result>
>
>       <lst name="highlighting">
>
>         <lst name="1"/>
>
>         <lst name="2"/>
>
>       </lst>
>
>     </response>
>
>
>
> Relevant section of Solr schema:
>
>
>
>     <field name="DocId" type="long" indexed="true" stored="true"
> required="true"/>
>
>     <field name="DocTitle" type="string" indexed="false" stored="true"
> required="true"/>
>
>     <field name="Content" type="text_general" indexed="false" stored="true"
> required="true"/>
>
>
>
>     <field name="ContentSearch" type="text_general" indexed="true"
> stored="false" multiValued="true"/>
>
>     <field name="ContentSearchStemming" type="text_stem" indexed="true"
> stored="false" multiValued="true"/>
>
>     <field name="ContentSearchPhonetic" type="text_phonetic" indexed="true"
> stored="false" multiValued="true"/>
>
>     <field name="ContentSearchSynonym" type="text_synonym" indexed="true"
> stored="false" multiValued="true"/>
>
>
>
>     <uniqueKey>DocId</uniqueKey>
>
>     <copyField source="Content" dest="ContentSearch"/>
>
>     <copyField source="Content" dest="ContentSearchStemming"/>
>
>     <copyField source="Content" dest="ContentSearchPhonetic"/>
>
>     <copyField source="Content" dest="ContentSearchSynonym"/>
>
>
>
>     <fieldType name="text_stem" class="solr.TextField" >
>
>       <analyzer>
>
>          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
>
>          <filter class="solr.SnowballPorterFilterFactory"/>
>
>       </analyzer>
>
>     </fieldType>
>
>
>
>     <fieldType name="text_phonetic" class="solr.TextField" >
>
>       <analyzer>
>
>          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
>
>          <filter class="solr.PhoneticFilterFactory"
> encoder="DoubleMetaphone" inject="false"/>
>
>       </analyzer>
>
>     </fieldType>
>
>
>
>     <fieldType name="text_synonym" class="solr.TextField" >
>
>     <analyzer>
>
>       <tokenizer class="solr.WhitespaceTokenizerFactory"/>
>
>       <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
> ignoreCase="true" expand="true"/>
>
>     </analyzer>
>
>     </fieldType>
>
>
>
> Relevant section of Solr config:
>
>
>
>     <requestHandler name="/select" class="solr.SearchHandler">
>
>     <!-- default values for query parameters can be specified, these
>
>          will be overridden by parameters in the request
>
>       -->
>
>      <lst name="defaults">
>
>        <str name="echoParams">explicit</str>
>
>        <int name="rows">100</int>
>
>        <str name="df">ContentSearch</str>
>
>      <bool name="hl">true</bool>
>
>     <str name="hl.fl">Content</str>
>
>     <str name="f.Content.hl.fragsize">150</str>
>
>       <str name="f.Content.hl.snippets">40</str>
>
>      </lst>
>
>     </requestHandler>
>
>     <searchComponent class="solr.HighlightComponent" name="highlight">
>
>     <highlighting>
>
>     <!-- Configure the standard fragmenter -->
>
>     <!-- This could most likely be commented out in the "default" case -->
>
>     <fragmenter name="gap"
>
>                 default="true"
>
>                 class="solr.highlight.GapFragmenter">
>
>       <lst name="defaults">
>
>         <int name="hl.fragsize">100</int>
>
>       </lst>
>
>     </fragmenter>
>
>
>
>     <!-- A regular-expression-based fragmenter
>
>          (for sentence extraction)
>
>       -->
>
>     <fragmenter name="regex"
>
>                 class="solr.highlight.RegexFragmenter">
>
>       <lst name="defaults">
>
>         <!-- slightly smaller fragsizes work better because of slop -->
>
>         <int name="hl.fragsize">70</int>
>
>         <!-- allow 50% slop on fragment sizes -->
>
>         <float name="hl.regex.slop">0.5</float>
>
>         <!-- a basic sentence pattern -->
>
>         <str name="hl.regex.pattern">[-\w ,/\n\&quot;&apos;]{20,200}</str>
>
>       </lst>
>
>     </fragmenter>
>
>
>
> Has anyone experienced this kind of behaviour before? Need some direction
> for troubleshooting.
>
>
>
> Soumya.
>
>
>
>
>
>

Re: Solr Phonetic Search Highlight issue in search results

Reply via email to