Is there any chance that you were experimenting with an ngram filter for the field? If you were, and merely changed the field type without reindexing, this behavior makes sense. In other words, you appear to have had some filter that broke words into one and two-character terms.

Separate from that, the analyzer for a spellchecker should be very simple and preserve the structure of the term rather than decompose it, as WordDelimiterFilter does. So, be sure the use an analyzer that is very simple, such as StandardTokenizer and lower case filter, but nothing else. In general, use a separate field, like textSpell that has the simple analyzer and do a copyField from the original text field that can still have a richer analyzer

-- Jack Krupansky

-----Original Message----- From: Dixline
Sent: Friday, January 25, 2013 6:30 AM
To: solr-user@lucene.apache.org
Subject: Issue with spellcheck and autosuggest

Hi,

this is my spellcheck/autosuggest dictionary field and field type,

<field name="searchText" type="spelltext" indexed="true" stored="true"
multiValued="true" default="JulyMSO" />

<fieldType name="spelltext" class="solr.TextField"
positionIncrementGap="100">
        <analyzer type="index">
           <tokenizer class="solr.WhitespaceTokenizerFactory" />
           <filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" />
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"
generateNumberParts="1" catenateWords="1" catenateNumbers="1"
catenateAll="0"/>
           <filter class="solr.LowerCaseFilterFactory" />
           <filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt" />

           <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
        </analyzer>
        <analyzer type="query">
           <tokenizer class="solr.WhitespaceTokenizerFactory" />
           <filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt" ignoreCase="true" expand="true" />
           <filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" />
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"
generateNumberParts="1" catenateWords="0" catenateNumbers="0"
catenateAll="0"/>
           <filter class="solr.LowerCaseFilterFactory" />
           <filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt" />

           <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
        </analyzer>
     </fieldType>

And this is my solrconfig.xml,

<searchComponent name="spellcheck" class="solr.SpellCheckComponent">

   <str name="queryAnalyzerFieldType">spelltext</str>




   <lst name="spellchecker">
     <str name="name">default</str>
     <str name="field">searchText</str>
     <str name="classname">solr.DirectSolrSpellChecker</str>
     <str name="buildOnOptimize">true</str>


     <str name="distanceMeasure">internal</str>

     <float name="accuracy">0.1</float>

     <int name="maxEdits">2</int>

     <int name="minPrefix">1</int>

     <int name="maxInspections">5</int>

     <int name="minQueryLength">4</int>

     <float name="maxQueryFrequency">0.01</float>

     <float name="thresholdTokenFrequency">.01</float>
   </lst>


   <lst name="spellchecker">
     <str name="name">wordbreak</str>
     <str name="classname">solr.WordBreakSolrSpellChecker</str>
     <str name="field">searchText</str>
     <str name="combineWords">true</str>
     <str name="breakWords">true</str>
 <str name="buildOnOptimize">true</str>
     <int name="maxChanges">10</int>
   </lst>
 </searchComponent>
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
   <lst name="defaults">
     <str name="df">searchText</str>
     <str name="spellcheck.dictionary">default</str>
     <str name="spellcheck.dictionary">wordbreak</str>
     <str name="spellcheck">true</str>
 <str name="spellcheck.onlyMorePopular">true</str>
 <str name="spellcheck.count">6</str>
     <str name="spellcheck.extendedResults">false</str>
     <str name="spellcheck.alternativeTermCount">5</str>
     <str name="spellcheck.maxResultsForSuggest">5</str>
     <str name="spellcheck.collate">true</str>
     <str name="spellcheck.collateExtendedResults">false</str>
     <str name="spellcheck.maxCollationTries">3</str>
     <str name="spellcheck.maxCollations">1</str>
   </lst>
   <arr name="last-components">
     <str>spellcheck</str>
   </arr>
 </requestHandler>



  <searchComponent class="solr.SpellCheckComponent" name="suggest">
<str name="queryAnalyzerFieldType">spelltext</str>
   <lst name="spellchecker">
     <str name="name">suggest</str>
     <str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
     <str
name="lookupImpl">org.apache.solr.spelling.suggest.tst.TSTLookup</str>

     <str name="field">searchText</str>
 <str name="buildOnOptimize">true</str>
 <float name="accuracy">0.1</float>
     <float name="threshold">0.005</float>
   </lst>
 </searchComponent>
 <requestHandler class="org.apache.solr.handler.component.SearchHandler"
name="/suggest">
   <lst name="defaults">
 <str name="df">searchText</str>
     <str name="spellcheck.dictionary">suggest</str>
 <str name="spellcheck">true</str>
     <str name="spellcheck.onlyMorePopular">false</str>
     <str name="spellcheck.count">6</str>
 <str name="spellcheck.extendedResults">false</str>
     <str name="spellcheck.collate">true</str>
 <str name="spellcheck.collateExtendedResults">false</str>
     <str name="spellcheck.maxCollationTries">3</str>
     <str name="spellcheck.maxCollations">1</str>
   </lst>
   <arr name="components">
     <str>suggest</str>
   </arr>
 </requestHandler>


If i try spellcheck , i'm not getting proper suggestions. For eg there's a
word "yellow" in my solr document. If i search for yello i'm getting
suggestions as yellow, ye ll wo, y e ll ow,ye ll ow. Why is is coming like
this?

And when i try autosuggest i'm not getting any suggestions for any query.

Can anyone help me with this?

Thanks in advance.

-Dixline.M






--
View this message in context: http://lucene.472066.n3.nabble.com/Issue-with-spellcheck-and-autosuggest-tp4036208.html Sent from the Solr - User mailing list archive at Nabble.com.

Reply via email to