Hello, I am experimenting with word_delimiter and have an example with a special character that is indexed. The character is in the type table for the word delimiter. analysis of the tokenization looks good, but when i attempt to do a match query it doesnt seem to respect tokenization as expected. The example indexes 'HER2+ Breast Cancer'. Tokenization is 'her2+', 'breast', 'cancer', which is good. searching for 'HER2\\+' results in a hit, as well as 'HER2\\-'
#!/bin/sh curl -XPUT 'http://localhost:9200/specialchars' -d '{ "settings" : { "index" : { "number_of_shards" : 1, "number_of_replicas" : 1 }, "analysis" : { "filter" : { "special_character_spliter" : { "type" : "word_delimiter", "split_on_numerics":false, "type_table": ["+ => ALPHA", "- => ALPHA"] } }, "analyzer" : { "schar_analyzer" : { "type" : "custom", "tokenizer" : "whitespace", "filter" : ["lowercase", "special_character_spliter"] } } } }, "mappings" : { "specialchars" : { "properties" : { "msg" : { "type" : "string", "analyzer" : "schar_analyzer" } } } } }' curl -XPOST localhost:9200/specialchars/1 -d '{"msg" : "HER2+ Breast Cancer"}' curl -XPOST localhost:9200/specialchars/2 -d '{"msg" : "Non-Small Cell Lung Cancer"}' curl -XPOST localhost:9200/specialchars/3 -d '{"msg" : "c.2573T>G NSCLC"}' curl -XPOST localhost:9200/specialchars/_refresh curl -XGET 'localhost:9200/specialchars/_analyze?field=msg&pretty=1' -d "HER2+ Breast Cancer" #curl -XGET 'localhost:9200/specialchars/_analyze?field=msg&pretty=1' -d "Non-Small Cell Lung Cancer" #curl -XGET 'localhost:9200/specialchars/_analyze?field=msg&pretty=1' -d "c.2573T>G NSCLC" printf "HER2+\n" curl -XGET localhost:9200/specialchars/_search?pretty -d '{ "query" : { "match" : { "msg" : { "query" : "HER2\\+" } } } }' printf "HER2-\n" curl -XGET localhost:9200/specialchars/_search?pretty -d '{ "query" : { "match" : { "msg" : { "query" : "HER2\\-" } } } }' curl -X DELETE localhost:9200/specialchars -- You received this message because you are subscribed to the Google Groups "elasticsearch" group. To unsubscribe from this group and stop receiving emails from it, send an email to elasticsearch+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/elasticsearch/becb02b7-72f0-42dd-b347-5f031fa154d3%40googlegroups.com. For more options, visit https://groups.google.com/d/optout.