Github user alessandrobenedetti commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/482#discussion_r227469439 --- Diff: lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java --- @@ -47,58 +48,65 @@ public void testStopFilt() throws IOException { assertTokenStreamContents(stream, new String[] { "Now", "The" }); } + + private void logStopwords(String name, List<String> stopwords){ + // helper method: converts a list + log(String.format("stopword list \"%s:\"", name)); + for (int i = 0; i < stopwords.size(); i++) { + log(String.format("stopword (%d): %s ", i, stopwords.get(i))); + } + log("----------"); + } /** * Test Position increments applied by StopFilter with and without enabling this option. */ - public void testStopPositons() throws IOException { + public void testStopPositions() throws IOException { + final int NUMBER_OF_TOKENS = 20; StringBuilder sb = new StringBuilder(); - ArrayList<String> a = new ArrayList<>(); - for (int i=0; i<20; i++) { - String w = English.intToEnglish(i).trim(); - sb.append(w).append(" "); - if (i%3 != 0) a.add(w); + List<String> stopwords = new ArrayList<>(NUMBER_OF_TOKENS); + for (int i = 0; i < NUMBER_OF_TOKENS; i++) { + String token = English.intToEnglish(i).trim(); + sb.append(token).append(' '); + if (i%3 != 0) stopwords.add(token); } log(sb.toString()); - String stopWords[] = a.toArray(new String[0]); - for (int i=0; i<a.size(); i++) log("Stop: "+stopWords[i]); - CharArraySet stopSet = StopFilter.makeStopSet(stopWords); + CharArraySet stopSet = StopFilter.makeStopSet(stopwords); + logStopwords("All stopwords", stopwords); // with increments StringReader reader = new StringReader(sb.toString()); final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false); in.setReader(reader); - StopFilter stpf = new StopFilter(in, stopSet); - doTestStopPositons(stpf); + StopFilter stopfilter = new StopFilter(in, stopSet); + doTestStopwordsPositions(stopfilter); // with increments, concatenating two stop filters - ArrayList<String> a0 = new ArrayList<>(); - ArrayList<String> a1 = new ArrayList<>(); - for (int i=0; i<a.size(); i++) { - if (i%2==0) { - a0.add(a.get(i)); + List<String> evenStopwords = new ArrayList<>(stopwords.size()); + List<String> oddStopwords = new ArrayList<>(stopwords.size()); + for (int i=0; i < stopwords.size(); i++) { + if (i%2 == 0) { + evenStopwords.add(stopwords.get(i)); } else { - a1.add(a.get(i)); + oddStopwords.add(stopwords.get(i)); } } - String stopWords0[] = a0.toArray(new String[0]); - for (int i=0; i<a0.size(); i++) log("Stop0: "+stopWords0[i]); - String stopWords1[] = a1.toArray(new String[0]); - for (int i=0; i<a1.size(); i++) log("Stop1: "+stopWords1[i]); - CharArraySet stopSet0 = StopFilter.makeStopSet(stopWords0); - CharArraySet stopSet1 = StopFilter.makeStopSet(stopWords1); + CharArraySet evenStopSet = StopFilter.makeStopSet(evenStopwords); + logStopwords("Even stopwords", evenStopwords); + CharArraySet oddStopSet = StopFilter.makeStopSet(oddStopwords); + logStopwords("Odd stopwords", oddStopwords); reader = new StringReader(sb.toString()); final MockTokenizer in1 = new MockTokenizer(MockTokenizer.WHITESPACE, false); in1.setReader(reader); - StopFilter stpf0 = new StopFilter(in1, stopSet0); // first part of the set - StopFilter stpf01 = new StopFilter(stpf0, stopSet1); // two stop filters concatenated! - doTestStopPositons(stpf01); + StopFilter evenStopFilter = new StopFilter(in1, evenStopSet); // first part of the set + StopFilter oddStopFilter = new StopFilter(evenStopFilter, oddStopSet); // two stop filters concatenated! --- End diff -- maybe renaming this to concatenatedStopFilter because it cointains the entire stop filter set through 2 stop filters and not just the odd ones ?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org