Github user Timothy055 commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/105#discussion_r85613814 --- Diff: lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java --- @@ -65,58 +65,88 @@ public String getField() { */ public abstract List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException; - protected List<OffsetsEnum> createOffsetsEnums(LeafReader leafReader, int doc, TokenStream tokenStream) throws IOException { - List<OffsetsEnum> offsetsEnums = createOffsetsEnumsFromReader(leafReader, doc); - if (automata.length > 0) { - offsetsEnums.add(createOffsetsEnumFromTokenStream(doc, tokenStream)); + protected List<OffsetsEnum> createOffsetsEnumsFromReader(LeafReader leafReader, int doc) throws IOException { + final Terms termsIndex = leafReader.terms(field); + if (termsIndex == null) { + return Collections.emptyList(); } - return offsetsEnums; - } - protected List<OffsetsEnum> createOffsetsEnumsFromReader(LeafReader atomicReader, int doc) throws IOException { // For strict positions, get a Map of term to Spans: // note: ScriptPhraseHelper.NONE does the right thing for these method calls final Map<BytesRef, Spans> strictPhrasesTermToSpans = - strictPhrases.getTermToSpans(atomicReader, doc); + phraseHelper.getTermToSpans(leafReader, doc); // Usually simply wraps terms in a List; but if willRewrite() then can be expanded final List<BytesRef> sourceTerms = - strictPhrases.expandTermsIfRewrite(terms, strictPhrasesTermToSpans); + phraseHelper.expandTermsIfRewrite(terms, strictPhrasesTermToSpans); - final List<OffsetsEnum> offsetsEnums = new ArrayList<>(sourceTerms.size() + 1); + final List<OffsetsEnum> offsetsEnums = new ArrayList<>(sourceTerms.size() + automata.length); - Terms termsIndex = atomicReader == null || sourceTerms.isEmpty() ? null : atomicReader.terms(field); - if (termsIndex != null) { + // Handle sourceTerms: + if (!sourceTerms.isEmpty()) { TermsEnum termsEnum = termsIndex.iterator();//does not return null for (BytesRef term : sourceTerms) { - if (!termsEnum.seekExact(term)) { - continue; // term not found - } - PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS); - if (postingsEnum == null) { - // no offsets or positions available - throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight"); - } - if (doc != postingsEnum.advance(doc)) { // now it's positioned, although may be exhausted - continue; + if (termsEnum.seekExact(term)) { + PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS); + + if (postingsEnum == null) { + // no offsets or positions available + throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight"); + } + + if (doc == postingsEnum.advance(doc)) { // now it's positioned, although may be exhausted + postingsEnum = phraseHelper.filterPostings(term, postingsEnum, strictPhrasesTermToSpans.get(term)); + if (postingsEnum != null) { + offsetsEnums.add(new OffsetsEnum(term, postingsEnum)); + } + } } - postingsEnum = strictPhrases.filterPostings(term, postingsEnum, strictPhrasesTermToSpans.get(term)); - if (postingsEnum == null) { - continue;// completely filtered out + } + } + + // Handle automata + if (automata.length > 0) { + offsetsEnums.addAll(createAutomataOffsetsFromTerms(termsIndex, doc)); + } + + return offsetsEnums; + } + + protected List<OffsetsEnum> createAutomataOffsetsFromTerms(Terms termsIndex, int doc) throws IOException { + Map<CharacterRunAutomaton, List<PostingsEnum>> automataPostings = new IdentityHashMap<>(automata.length); --- End diff -- How about List<List<PostingsEnum>> that should give better locality without lose of type safety?
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org