i have the a fieldtype that uses ngramfilter whle indexing. is there a
setting that can force the ngramfilter to index smaller words then the
minGramSize? Mine is set to 3 and the search wont find word that are only
1 or 2 chars long. i would like to not set minGramSize=1 because the
results would be to diverse.
fieldtype:
<fieldType name="text_de" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<!-- <filter class="solr.WordDelimiterFilterFactory"
types="at-under-alpha.txt"/> -->
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_de.txt" format="snowball"
enablePositionIncrements="true"/> <!-- remove common words -->
<filter class="solr.GermanNormalizationFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="German"/>
<!-- remove noun/adjective inflections like plural endings -->
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"
generateNumberParts="1" catenateWords="1" catenateNumbers="1"
catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.NGramFilterFactory" minGramSize="3"
maxGramSize="50"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhiteSpaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_de.txt" format="snowball"
enablePositionIncrements="true"/> <!-- remove common words -->
<filter class="solr.GermanNormalizationFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory"
language="German"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"
generateNumberParts="1" catenateWords="1" catenateNumbers="1"
catenateAll="0" splitOnCaseChange="1"/>
</analyzer>
</fieldType>