[ 
https://issues.apache.org/jira/browse/SOLR-7250?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Wasim updated SOLR-7250:
------------------------
    Description: 
Original frequency is not matching with suggestion frequency in SOLR

Output for "whs is" - (73) which is a suggestion of "who is" varies than its 
actual original frequency (94)
For your reference attaching two images of the output

My schema.xml

<field name="gram" type="textSpell" indexed="true" stored="true" 
required="true" multiValued="false"/>
<field name="gram_ci" type="textSpellCi" indexed="true" stored="false" 
multiValued="false"/>

<copyField source="gram" dest="gram_ci"/>

<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100">
        <analyzer type="index">
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.ShingleFilterFactory" maxShingleSize="5" 
minShingleSize="2" outputUnigrams="true"/>
        </analyzer>
        <analyzer type="query">
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.ShingleFilterFactory" maxShingleSize="5" 
minShingleSize="2" outputUnigrams="true"/>
        </analyzer>
</fieldType>
<fieldType name="textSpellCi" class="solr.TextField" positionIncrementGap="100">
        <analyzer type="index">
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.ShingleFilterFactory" maxShingleSize="5" 
minShingleSize="2" outputUnigrams="true"/>
        </analyzer>
        <analyzer type="query">
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.ShingleFilterFactory" maxShingleSize="5" 
minShingleSize="2" outputUnigrams="true"/>
        </analyzer>
</fieldType>

solrconfig.xml

<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
        <str name="queryAnalyzerFieldType">textSpellCi</str>
        <lst name="spellchecker">
                <str name="name">default</str>
                <str name="field">gram_ci</str>
                <str name="classname">solr.DirectSolrSpellChecker</str>
                <str name="distanceMeasure">internal</str>
                <float name="accuracy">0.5</float>
                <int name="maxEdits">2</int>
                <int name="minPrefix">0</int>
                <int name="maxInspections">5</int>
                <int name="minQueryLength">2</int>
                <float name="maxQueryFrequency">0.99</float>
                <str name="comparatorClass">freq</str>
                <float name="thresholdTokenFrequency">0.0</float>
        </lst>
</searchComponent>
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
        <lst name="defaults">
                <str name="df">gram_ci</str>
                <str name="spellcheck.dictionary">default</str>
                <str name="spellcheck">on</str>
                <str name="spellcheck.extendedResults">true</str>
                <str name="spellcheck.count">15</str>
                <str name="spellcheck.alternativeTermCount">10</str>
                <str name="spellcheck.onlyMorePopular">false</str>
        </lst>
        <arr name="last-components">
                <str>spellcheck</str>
        </arr>
</requestHandler>


For more information have a look at this
http://stackoverflow.com/questions/28857915/original-frequency-is-not-matching-with-suggestion-frequency-in-solr

  was:
Original frequency is not matching with suggestion frequency in SOLR

Output for "whs is" - (73) which is a suggestion of "who is" varies than its 
actual original frequency (94)
For your reference attaching two images of the output

My schema.xml

<field name="gram" type="textSpell" indexed="true" stored="true" 
required="true" multiValued="false"/>
<field name="gram_ci" type="textSpellCi" indexed="true" stored="false" 
multiValued="false"/>

<copyField source="gram" dest="gram_ci"/>

<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100">
        <analyzer type="index">
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.ShingleFilterFactory" maxShingleSize="5" 
minShingleSize="2" outputUnigrams="true"/>
        </analyzer>
        <analyzer type="query">
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.ShingleFilterFactory" maxShingleSize="5" 
minShingleSize="2" outputUnigrams="true"/>
        </analyzer>
</fieldType>
<fieldType name="textSpellCi" class="solr.TextField" positionIncrementGap="100">
        <analyzer type="index">
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.ShingleFilterFactory" maxShingleSize="5" 
minShingleSize="2" outputUnigrams="true"/>
        </analyzer>
        <analyzer type="query">
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.ShingleFilterFactory" maxShingleSize="5" 
minShingleSize="2" outputUnigrams="true"/>
        </analyzer>
</fieldType>

solrconfig.xml

<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
        <str name="queryAnalyzerFieldType">textSpellCi</str>
        <lst name="spellchecker">
                <str name="name">default</str>
                <str name="field">gram_ci</str>
                <str name="classname">solr.DirectSolrSpellChecker</str>
                <str name="distanceMeasure">internal</str>
                <float name="accuracy">0.5</float>
                <int name="maxEdits">2</int>
                <int name="minPrefix">0</int>
                <int name="maxInspections">5</int>
                <int name="minQueryLength">2</int>
                <float name="maxQueryFrequency">0.99</float>
                <str name="comparatorClass">freq</str>
                <float name="thresholdTokenFrequency">0.0</float>
        </lst>
</searchComponent>
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
        <lst name="defaults">
                <str name="df">gram_ci</str>
                <str name="spellcheck.dictionary">default</str>
                <str name="spellcheck">on</str>
                <str name="spellcheck.extendedResults">true</str>
                <str name="spellcheck.count">15</str>
                <str name="spellcheck.alternativeTermCount">10</str>
                <str name="spellcheck.onlyMorePopular">false</str>
        </lst>
        <arr name="last-components">
                <str>spellcheck</str>
        </arr>
</requestHandler>


> In spellcheck.extendedResults=true freq value of suggestion differs from it 
> actual origFreq 
> --------------------------------------------------------------------------------------------
>
>                 Key: SOLR-7250
>                 URL: https://issues.apache.org/jira/browse/SOLR-7250
>             Project: Solr
>          Issue Type: New Feature
>         Environment: solr 4.10.4
>            Reporter: Wasim
>   Original Estimate: 24h
>  Remaining Estimate: 24h
>
> Original frequency is not matching with suggestion frequency in SOLR
> Output for "whs is" - (73) which is a suggestion of "who is" varies than its 
> actual original frequency (94)
> For your reference attaching two images of the output
> My schema.xml
> <field name="gram" type="textSpell" indexed="true" stored="true" 
> required="true" multiValued="false"/>
> <field name="gram_ci" type="textSpellCi" indexed="true" stored="false" 
> multiValued="false"/>
> <copyField source="gram" dest="gram_ci"/>
> <fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100">
>         <analyzer type="index">
>                 <tokenizer class="solr.StandardTokenizerFactory"/>
>                 <filter class="solr.ShingleFilterFactory" maxShingleSize="5" 
> minShingleSize="2" outputUnigrams="true"/>
>         </analyzer>
>         <analyzer type="query">
>                 <tokenizer class="solr.StandardTokenizerFactory"/>
>                 <filter class="solr.ShingleFilterFactory" maxShingleSize="5" 
> minShingleSize="2" outputUnigrams="true"/>
>         </analyzer>
> </fieldType>
> <fieldType name="textSpellCi" class="solr.TextField" 
> positionIncrementGap="100">
>         <analyzer type="index">
>                 <tokenizer class="solr.StandardTokenizerFactory"/>
>                 <filter class="solr.LowerCaseFilterFactory"/>
>                 <filter class="solr.ShingleFilterFactory" maxShingleSize="5" 
> minShingleSize="2" outputUnigrams="true"/>
>         </analyzer>
>         <analyzer type="query">
>                 <tokenizer class="solr.StandardTokenizerFactory"/>
>                 <filter class="solr.LowerCaseFilterFactory"/>
>                 <filter class="solr.ShingleFilterFactory" maxShingleSize="5" 
> minShingleSize="2" outputUnigrams="true"/>
>         </analyzer>
> </fieldType>
> solrconfig.xml
> <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
>         <str name="queryAnalyzerFieldType">textSpellCi</str>
>         <lst name="spellchecker">
>                 <str name="name">default</str>
>                 <str name="field">gram_ci</str>
>                 <str name="classname">solr.DirectSolrSpellChecker</str>
>                 <str name="distanceMeasure">internal</str>
>                 <float name="accuracy">0.5</float>
>                 <int name="maxEdits">2</int>
>                 <int name="minPrefix">0</int>
>                 <int name="maxInspections">5</int>
>                 <int name="minQueryLength">2</int>
>                 <float name="maxQueryFrequency">0.99</float>
>                 <str name="comparatorClass">freq</str>
>                 <float name="thresholdTokenFrequency">0.0</float>
>         </lst>
> </searchComponent>
> <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
>         <lst name="defaults">
>                 <str name="df">gram_ci</str>
>                 <str name="spellcheck.dictionary">default</str>
>                 <str name="spellcheck">on</str>
>                 <str name="spellcheck.extendedResults">true</str>
>                 <str name="spellcheck.count">15</str>
>                 <str name="spellcheck.alternativeTermCount">10</str>
>                 <str name="spellcheck.onlyMorePopular">false</str>
>         </lst>
>         <arr name="last-components">
>                 <str>spellcheck</str>
>         </arr>
> </requestHandler>
> For more information have a look at this
> http://stackoverflow.com/questions/28857915/original-frequency-is-not-matching-with-suggestion-frequency-in-solr



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org
For additional commands, e-mail: dev-h...@lucene.apache.org

Reply via email to