Is there a way to tell ngramfilterfactory while indexing that number shall never be tokenized? then the query should be able to find numbers.
Or do i have to change the ngram-min for numbers (not alpha) to 1, if that is possible? So to speak put the hole number as token and not all possible tokens. Solr analysis shows onnly WDF has no underscore in its tokens, the rest have it. can i tell the query to search numbers differently with NGTF, WT, LCF or whatever? I also tried <filter class="solr.WordDelimiterFilterFactory" types="at-under-alpha.txt"/> @ => ALPHA _ => ALPHA I have gotten nearly everything to work. There are to queries where i dont get back what i want. "avaloq frage 1" -> only returns if i set minGramSize=1 while indexing "yh_cug" -> query parser doesn't remove "_" but the indexer does (WDF) so there is no match Is there a way to also query the hole term "avaloq frage 1" without tokenizing it? Fieldtype: <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> <analyzer type="index"> <tokenizer class="solr.StandardTokenizerFactory"/> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.WordDelimiterFilterFactory" types="at-under-alpha.txt"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" enablePositionIncrements="true"/> <!-- remove common words --> <filter class="solr.GermanNormalizationFilterFactory"/> <filter class="solr.SnowballPorterFilterFactory" language="German"/> <!-- remove noun/adjective inflections like plural endings --> <filter class="solr.NGramFilterFactory" minGramSize="3" maxGramSize="15"/> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> </analyzer> <analyzer type="query"> <tokenizer class="solr.WhiteSpaceTokenizerFactory"/> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.WordDelimiterFilterFactory" types="at-under-alpha.txt"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" enablePositionIncrements="true"/> <!-- remove common words --> <filter class="solr.GermanNormalizationFilterFactory"/> <filter class="solr.SnowballPorterFilterFactory" language="German"/> </analyzer> </fieldType> Solrconfig: > <queryParser name="synonym_edismax" > class="solr.SynonymExpandingExtendedDismaxQParserPlugin"> > <lst name="synonymAnalyzers"> > <lst name="myCoolAnalyzer"> > <lst name="tokenizer"> > <str name="class">standard</str> > </lst> > <lst name="filter"> > <str name="class">shingle</str> > <str name="outputUnigramsIfNoShingles">true</str> > <str name="outputUnigrams">true</str> > <str name="minShingleSize">2</str> > <str name="maxShingleSize">4</str> > </lst> > <lst name="filter"> > <str name="class">synonym</str> > <str name="tokenizerFactory">solr.KeywordTokenizerFactory</str> > <str name="synonyms">synonyms.txt</str> > <str name="expand">true</str> > <str name="ignoreCase">true</str> > </lst> > </lst> > </lst> > </queryParser> > > <requestHandler name="/select2" class="solr.SearchHandler"> > <lst name="defaults"> > <str name="echoParams">explicit</str> > <int name="rows">10</int> > <str name="defType">synonym_edismax</str> > <str name="synonyms">true</str> > <str name="qf">plain_text^10 editorschoice^200 > title^20 h_*^14 > tags^10 thema^15 inhaltstyp^6 breadcrumb^6 doctype^10 > contentmanager^5 links^5 > last_modified^5 url^5 > </str> > <str name="bq">(expiration:[NOW TO *] OR (*:* > -expiration:*))^6</str> > <str name="bf">div(clicks,max(displays,1))^8</str> <!-- tested --> > > <str name="df">text</str> > <str name="fl">*,path,score</str> > <str name="wt">json</str> > <str name="q.op">AND</str> > > <!-- Highlighting defaults --> > <str name="hl">on</str> > <str name="hl.fl">plain_text,title</str> > <str name="hl.fragSize">200</str> > <str name="hl.simple.pre"><b></str> > <str name="hl.simple.post"></b></str> > > <!-- <lst name="invariants"> --> > <str name="facet">on</str> > <str name="facet.mincount">1</str> > <str name="facet.field">{!ex=inhaltstyp_s}inhaltstyp_s</str> > <str name="f.inhaltstyp_s.facet.sort">index</str> > <str name="facet.field">{!ex=doctype}doctype</str> > <str name="f.doctype.facet.sort">index</str> > <str name="facet.field">{!ex=thema_f}thema_f</str> > <str name="f.thema_f.facet.sort">index</str> > <str name="facet.field">{!ex=author_s}author_s</str> > <str name="f.author_s.facet.sort">index</str> > <str > name="facet.field">{!ex=sachverstaendiger_s}sachverstaendiger_s</str> > <str name="f.sachverstaendiger_s.facet.sort">index</str> > <str name="facet.field">{!ex=veranstaltung_s}veranstaltung_s</str> > <str name="f.veranstaltung_s.facet.sort">index</str> > <str name="facet.date">{!ex=last_modified}last_modified</str> > <str name="facet.date.gap">+1MONTH</str> > <str name="facet.date.end">NOW/MONTH+1MONTH</str> > <str name="facet.date.start">NOW/MONTH-36MONTHS</str> > <str name="facet.date.other">after</str> > > </lst> > </requestHandler>