Sebastian Yonekura Baeza created LUCENE-7704:
------------------------------------------------

             Summary: SysnonymGraphFilter doesn't respect ignoreCase parameter
                 Key: LUCENE-7704
                 URL: https://issues.apache.org/jira/browse/LUCENE-7704
             Project: Lucene - Core
          Issue Type: Bug
          Components: modules/analysis
    Affects Versions: 6.4.1
            Reporter: Sebastian Yonekura Baeza
            Priority: Minor


Hi, it seems that SynonymGraphFilter doesn't respect ignoreCase parameter. In 
particular this test doesn't pass:

{code:title=UppercaseSynonymMapTest.java|borderStyle=solid}
package com.mapcity.suggest.lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.synonym.SynonymGraphFilter;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.junit.Test;

import java.io.IOException;

import static 
org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents;

/**
 * @author Sebastian Yonekura
 *         Created on 22-02-17
 */
public class UppercaseSynonymMapTest {

    @Test
    public void analyzerTest01() throws IOException {
        // This passes
        testAssertMapping("word", "synonym");
        // this one not
        testAssertMapping("word".toUpperCase(), "synonym");
    }

    private void testAssertMapping(String inputString, String outputString) 
throws IOException {
        SynonymMap.Builder builder = new SynonymMap.Builder(false);
        CharsRef input = SynonymMap.Builder.join(inputString.split(" "), new 
CharsRefBuilder());
        CharsRef output = SynonymMap.Builder.join(outputString.split(" "), new 
CharsRefBuilder());
        builder.add(input, output, true);
        Analyzer analyzer = new CustomAnalyzer(builder.build());
        TokenStream tokenStream = analyzer.tokenStream("field", inputString);
        assertTokenStreamContents(tokenStream, new String[]{
                outputString, inputString
        });
    }

    static class CustomAnalyzer extends Analyzer {
        private SynonymMap synonymMap;

        CustomAnalyzer(SynonymMap synonymMap) {
            this.synonymMap = synonymMap;
        }

        @Override
        protected TokenStreamComponents createComponents(String s) {
            Tokenizer tokenizer = new WhitespaceTokenizer();
            TokenStream tokenStream = new SynonymGraphFilter(tokenizer, 
synonymMap, true); // Ignore case True
            return new TokenStreamComponents(tokenizer, tokenStream);
        }
    }
}

{code}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org
For additional commands, e-mail: dev-h...@lucene.apache.org

Reply via email to