I'm trying to index some data which often includes domain names. I'd
like to remove the .com TLD, so I have modified the text_en field type
by adding a PatternReplaceFilterFactory filter. However, it doesn't
appear to be working as a search for "text:(mydomain.com)" matches
records but "text:(mydomain)" does not.
<fieldType name="text_en" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" expand="true"
ignoreCase="true" synonyms="synonyms.txt"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"
ignoreCase="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.PatternReplaceFilterFactory"
pattern="([-a-z])\.com" replacement="$1"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" expand="true"
ignoreCase="true" synonyms="synonyms.txt"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"
ignoreCase="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.PatternReplaceFilterFactory"
pattern="([-a-z])\.com" replacement="$1"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
The actual field definitions are as follows:
<field name="companyName" type="text_en" indexed="true"
stored="true" required="true" />
<field name="jobTitle" type="text_en" indexed="true"
stored="true" required="true" />
<field name="text" type="text_general" indexed="true"
stored="false" />
<copyField source="companyName" dest="text" />
<copyField source="jobTitle" dest="text" />