dsmiley commented on a change in pull request #780: SOLR-11866: Support efficient subset matching in query elevation rules URL: https://github.com/apache/lucene-solr/pull/780#discussion_r303721929
########## File path: solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java ########## @@ -1131,4 +1227,333 @@ public int compareTop(int doc) { }; } } + + /** + * Matches a collection of subsets of type <E>. + * <p> + * Given a collection of subsets <code>N</code>, finds all the subsets that are contained (ignoring duplicate elements) + * by a provided set <code>s</code>. + * That is, finds all subsets <code>n</code> in <code>N</code> for which <code>s.containsAll(n)</code> + * (<code>s</code> contains all the elements of <code>n</code>, in any order). + * <p> + * Associates a match value of type <M> to each subset and provides it each time the subset matches (i.e. is + * contained by the provided set). + * + * @param <E> Subset element type. + * @param <M> Subset match value type. + */ + protected interface SubsetMatcher<E, M> { + + /** + * Returns an iterator over all the subsets that are contained by the provided set. + * The returned iterator does not support removal. + * + * @param set The provided set. It can be any {@link Collection} structure, see the implementation for more details. + */ + Iterator<M> findSubsetsMatching(Collection<E> set); + + /** + * Gets the number of subsets in this matcher. + */ + int getSubsetCount(); + + interface Builder<E, M> { + + Builder<E, M> addSubset(Collection<E> subset, M matchValue); + + SubsetMatcher<E, M> build(); + } + } + + /** + * Matches a potentially large collection of subsets with a trie implementation. + * <p> + * This matcher imposes the elements are {@link Comparable}. + * It does not keep the subset insertion order. + * Duplicate subsets stack their match values. + * <p> + * The time complexity of adding a subset is <code>O(n.log(n))</code>, where <code>n</code> is the size of the subset. + * <p> + * The worst case time complexity of the subset matching is <code>O(2^s)</code>, however a more typical case time + * complexity is <code>O(s^3)</code> where s is the size of the set to partially match. + * Note it does not depend on <code>N</code>, the size of the collection of subsets, nor on <code>n</code>, the size of + * a subset. + */ + protected static class TrieSubsetMatcher<E extends Comparable<? super E>, M> implements SubsetMatcher<E, M> { + + /* + Trie structure: Review comment: I love all these comments! ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org