dsmiley commented on a change in pull request #581: LUCENE-3041: QueryVisitor URL: https://github.com/apache/lucene-solr/pull/581#discussion_r260060558
########## File path: lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java ########## @@ -17,180 +17,124 @@ package org.apache.lucene.search.uhighlight; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; import java.util.List; -import java.util.function.Function; import java.util.function.Predicate; +import java.util.function.Supplier; -import org.apache.lucene.queries.function.FunctionScoreQuery; -import org.apache.lucene.search.AutomatonQuery; import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.BoostQuery; -import org.apache.lucene.search.ConstantScoreQuery; -import org.apache.lucene.search.DisjunctionMaxQuery; -import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.spans.SpanBoostQuery; -import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; -import org.apache.lucene.search.spans.SpanNearQuery; -import org.apache.lucene.search.spans.SpanNotQuery; -import org.apache.lucene.search.spans.SpanOrQuery; -import org.apache.lucene.search.spans.SpanPositionCheckQuery; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.ByteRunAutomaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton; -import org.apache.lucene.util.automaton.LevenshteinAutomata; import org.apache.lucene.util.automaton.Operations; /** * Support for highlighting multi-term queries. * * @lucene.internal */ -class MultiTermHighlighting { +final class MultiTermHighlighting { private MultiTermHighlighting() { } /** * Extracts MultiTermQueries that match the provided field predicate. * Returns equivalent automata that will match terms. */ - public static CharacterRunAutomaton[] extractAutomata(Query query, - Predicate<String> fieldMatcher, - boolean lookInSpan, - Function<Query, Collection<Query>> preRewriteFunc) { - // TODO Lucene needs a Query visitor API! LUCENE-3041 - - List<CharacterRunAutomaton> list = new ArrayList<>(); - Collection<Query> customSubQueries = preRewriteFunc.apply(query); - if (customSubQueries != null) { - for (Query sub : customSubQueries) { - list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc))); - } - } else if (query instanceof BooleanQuery) { - for (BooleanClause clause : (BooleanQuery) query) { - if (!clause.isProhibited()) { - list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc))); - } - } - } else if (query instanceof ConstantScoreQuery) { - list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan, - preRewriteFunc))); - } else if (query instanceof BoostQuery) { - list.addAll(Arrays.asList(extractAutomata(((BoostQuery) query).getQuery(), fieldMatcher, lookInSpan, - preRewriteFunc))); - } else if (query instanceof FunctionScoreQuery) { - list.addAll(Arrays.asList(extractAutomata(((FunctionScoreQuery) query).getWrappedQuery(), fieldMatcher, - lookInSpan, preRewriteFunc))); - } else if (query instanceof DisjunctionMaxQuery) { - for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) { - list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc))); - } - } else if (lookInSpan && query instanceof SpanOrQuery) { - for (Query sub : ((SpanOrQuery) query).getClauses()) { - list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc))); + static CharacterRunAutomaton[] extractAutomata(Query query, Predicate<String> fieldMatcher, boolean lookInSpan) { + + AutomataCollector collector = new AutomataCollector(fieldMatcher, lookInSpan); + query.visit(collector); + return collector.runAutomata.toArray(new CharacterRunAutomaton[0]); + } + + private static class AutomataCollector extends QueryVisitor { + + List<CharacterRunAutomaton> runAutomata = new ArrayList<>(); + final boolean lookInSpan; + final Predicate<String> fieldMatcher; + + private AutomataCollector(Predicate<String> fieldMatcher, boolean lookInSpan) { + this.lookInSpan = lookInSpan; + this.fieldMatcher = fieldMatcher; + } + + @Override + public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) { + if (lookInSpan == false && parent instanceof SpanQuery) { + return null; } - } else if (lookInSpan && query instanceof SpanNearQuery) { - for (Query sub : ((SpanNearQuery) query).getClauses()) { - list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc))); + return super.getSubVisitor(occur, parent); + } + + @Override + public void matchesAutomaton(Query query, String field, boolean isBinary, Supplier<Automaton> automatonSupplier) { + if (fieldMatcher.test(field) == false) { + return; } - } else if (lookInSpan && query instanceof SpanNotQuery) { - list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan, - preRewriteFunc))); - } else if (lookInSpan && query instanceof SpanPositionCheckQuery) { - list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan, - preRewriteFunc))); - } else if (lookInSpan && query instanceof SpanBoostQuery) { - list.addAll(Arrays.asList(extractAutomata(((SpanBoostQuery) query).getQuery(), fieldMatcher, lookInSpan, - preRewriteFunc))); - } else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) { - list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), - fieldMatcher, lookInSpan, preRewriteFunc))); - } else if (query instanceof FuzzyQuery) { - final FuzzyQuery fq = (FuzzyQuery) query; - if (fieldMatcher.test(fq.getField())) { - String utf16 = fq.getTerm().text(); - int termText[] = new int[utf16.codePointCount(0, utf16.length())]; - for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) { - termText[j++] = cp = utf16.codePointAt(i); - } - int termLength = termText.length; - int prefixLength = Math.min(fq.getPrefixLength(), termLength); - String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength); - LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions()); - String prefix = UnicodeUtil.newString(termText, 0, prefixLength); - Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix); - list.add(new CharacterRunAutomaton(automaton) { + if (isBinary == false) { + runAutomata.add(new CharacterRunAutomaton(automatonSupplier.get()){ @Override public String toString() { - return fq.toString(); + return query.toString(); } }); } - } else if (query instanceof AutomatonQuery) { - final AutomatonQuery aq = (AutomatonQuery) query; - if (fieldMatcher.test(aq.getField())) { - - if (aq.isAutomatonBinary() == false) { // note: is the case for WildcardQuery, RegexpQuery Review comment: might want to retain this comment in the new code ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org