Github user Timothy055 commented on a diff in the pull request:
https://github.com/apache/lucene-solr/pull/105#discussion_r85619786
--- Diff:
lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldOffsetStrategy.java
---
@@ -65,58 +65,88 @@ public String getField() {
*/
public abstract List<OffsetsEnum> getOffsetsEnums(IndexReader reader,
int docId, String content) throws IOException;
- protected List<OffsetsEnum> createOffsetsEnums(LeafReader leafReader,
int doc, TokenStream tokenStream) throws IOException {
- List<OffsetsEnum> offsetsEnums =
createOffsetsEnumsFromReader(leafReader, doc);
- if (automata.length > 0) {
- offsetsEnums.add(createOffsetsEnumFromTokenStream(doc, tokenStream));
+ protected List<OffsetsEnum> createOffsetsEnumsFromReader(LeafReader
leafReader, int doc) throws IOException {
+ final Terms termsIndex = leafReader.terms(field);
+ if (termsIndex == null) {
+ return Collections.emptyList();
}
- return offsetsEnums;
- }
- protected List<OffsetsEnum> createOffsetsEnumsFromReader(LeafReader
atomicReader, int doc) throws IOException {
// For strict positions, get a Map of term to Spans:
// note: ScriptPhraseHelper.NONE does the right thing for these
method calls
final Map<BytesRef, Spans> strictPhrasesTermToSpans =
- strictPhrases.getTermToSpans(atomicReader, doc);
+ phraseHelper.getTermToSpans(leafReader, doc);
// Usually simply wraps terms in a List; but if willRewrite() then can
be expanded
final List<BytesRef> sourceTerms =
- strictPhrases.expandTermsIfRewrite(terms,
strictPhrasesTermToSpans);
+ phraseHelper.expandTermsIfRewrite(terms, strictPhrasesTermToSpans);
- final List<OffsetsEnum> offsetsEnums = new
ArrayList<>(sourceTerms.size() + 1);
+ final List<OffsetsEnum> offsetsEnums = new
ArrayList<>(sourceTerms.size() + automata.length);
- Terms termsIndex = atomicReader == null || sourceTerms.isEmpty() ?
null : atomicReader.terms(field);
- if (termsIndex != null) {
+ // Handle sourceTerms:
+ if (!sourceTerms.isEmpty()) {
TermsEnum termsEnum = termsIndex.iterator();//does not return null
for (BytesRef term : sourceTerms) {
- if (!termsEnum.seekExact(term)) {
- continue; // term not found
- }
- PostingsEnum postingsEnum = termsEnum.postings(null,
PostingsEnum.OFFSETS);
- if (postingsEnum == null) {
- // no offsets or positions available
- throw new IllegalArgumentException("field '" + field + "' was
indexed without offsets, cannot highlight");
- }
- if (doc != postingsEnum.advance(doc)) { // now it's positioned,
although may be exhausted
- continue;
+ if (termsEnum.seekExact(term)) {
+ PostingsEnum postingsEnum = termsEnum.postings(null,
PostingsEnum.OFFSETS);
+
+ if (postingsEnum == null) {
+ // no offsets or positions available
+ throw new IllegalArgumentException("field '" + field + "' was
indexed without offsets, cannot highlight");
+ }
+
+ if (doc == postingsEnum.advance(doc)) { // now it's positioned,
although may be exhausted
+ postingsEnum = phraseHelper.filterPostings(term, postingsEnum,
strictPhrasesTermToSpans.get(term));
+ if (postingsEnum != null) {
+ offsetsEnums.add(new OffsetsEnum(term, postingsEnum));
+ }
+ }
}
- postingsEnum = strictPhrases.filterPostings(term, postingsEnum,
strictPhrasesTermToSpans.get(term));
- if (postingsEnum == null) {
- continue;// completely filtered out
+ }
+ }
+
+ // Handle automata
+ if (automata.length > 0) {
+ offsetsEnums.addAll(createAutomataOffsetsFromTerms(termsIndex, doc));
+ }
+
+ return offsetsEnums;
+ }
+
+ protected List<OffsetsEnum> createAutomataOffsetsFromTerms(Terms
termsIndex, int doc) throws IOException {
+ Map<CharacterRunAutomaton, List<PostingsEnum>> automataPostings = new
IdentityHashMap<>(automata.length);
--- End diff --
Pushed that for now to see what you think.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]