Manybubbles has submitted this change and it was merged.

Change subject: Add option to skip highlighting if last matched
......................................................................


Add option to skip highlighting if last matched

How it works from the docs:
The ```skip_if_last_matched``` option can be used to entirely skip highlighting
if the last field matched.  This can be used to form "chains" of fields only one
of which will return a match:
```js
  "highlight": {
    "type": "experimental",
    "fields": {
      "text": {},
      "aux_text": { "options": { "skip_if_last_matched": true } },
      "title": {},
      "redirect": { "options": { "skip_if_last_matched": true } },
      "section_heading": { "options": { "skip_if_last_matched": true } },
      "category": { "options": { "skip_if_last_matched": true } },
    }
  }
```
The above example creates two "chains":
* aux_text will only be highlighted if there isn't a match in text.
-and-
* redirect will only be highlighted if there isn't a match in title.
* section_heading will only be highlighted if there isn't a match in redirect
and title.
* category will only be highlighted if there isn't a match in section_heading,
redirect, or title.

This needed to wait until Elasticsearch 1.3 because in 1.2 the fields were
passed to the highlighter in random order.  In 1.3 they are passed in the
order they are sent.  Yay.

Change-Id: I0326baaf497a305348eb536461cd8d6404795d2c
---
M README.md
M 
experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
A 
experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/SkipTest.java
3 files changed, 135 insertions(+), 0 deletions(-)

Approvals:
  BearND: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/README.md b/README.md
index 6aebb4e..becc067 100644
--- a/README.md
+++ b/README.md
@@ -243,6 +243,32 @@
   }
 ```
 
+The ```skip_if_last_matched``` option can be used to entirely skip highlighting
+if the last field matched.  This can be used to form "chains" of fields only 
one
+of which will return a match:
+```js
+  "highlight": {
+    "type": "experimental",
+    "fields": {
+      "text": {},
+      "aux_text": { "options": { "skip_if_last_matched": true } },
+      "title": {},
+      "redirect": { "options": { "skip_if_last_matched": true } },
+      "section_heading": { "options": { "skip_if_last_matched": true } },
+      "category": { "options": { "skip_if_last_matched": true } },
+    }
+  }
+```
+The above example creates two "chains":
+* aux_text will only be highlighted if there isn't a match in text.
+-and-
+* redirect will only be highlighted if there isn't a match in title.
+* section_heading will only be highlighted if there isn't a match in redirect
+and title.
+* category will only be highlighted if there isn't a match in section_heading,
+redirect, or title.
+
+
 Offsets in postings or term vectors
 -----------------------------------
 Since adding offsets to the postings (set ```index_options``` to ```offsets```
diff --git 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
index bd3237c..e8000c7 100644
--- 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
+++ 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
@@ -82,6 +82,8 @@
     static class CacheEntry {
         private final Map<QueryCacheKey, BasicQueryWeigher> queryWeighers = 
new HashMap<>();
         private Map<String, AutomatonHitEnum.Factory> 
automatonHitEnumFactories;
+        private boolean lastMatched = false;
+        private int lastDocId = -1;
     }
 
     static class QueryCacheKey {
@@ -138,6 +140,10 @@
         }
 
         HighlightField highlight() throws IOException {
+            if (shouldSkip()) {
+                return null;
+            }
+
             // TODO it might be possible to not build the weigher at all if 
just using regex highlighting
             ensureWeigher();
             scoreMatters = context.field.fieldOptions().scoreOrdered();
@@ -154,8 +160,10 @@
             List<Snippet> snippets = buildChooser().choose(segmenter, 
buildHitEnum(),
                     numberOfSnippets);
             if (snippets.size() != 0) {
+                cache.lastMatched = true;
                 return new HighlightField(context.fieldName, 
formatSnippets(snippets));
             }
+            cache.lastMatched = false;
             int noMatchSize = context.field.fieldOptions().noMatchSize();
             if (noMatchSize <= 0) {
                 return null;
@@ -169,6 +177,17 @@
             return new HighlightField(context.fieldName, new Text[] 
{fragment});
         }
 
+        private boolean shouldSkip() {
+            // Maintain lastMatched - it should be false if we shift to a new 
doc.
+            if (cache.lastDocId != context.hitContext.docId()) {
+                cache.lastMatched = false;
+                cache.lastDocId = context.hitContext.docId();
+            }
+
+            Boolean skipIfLastMatched = 
(Boolean)getOption("skip_if_last_matched");
+            return skipIfLastMatched == null ? false : skipIfLastMatched && 
cache.lastMatched;
+        }
+
         void cleanup() throws Exception {
             Exception lastCaught = null;
             try {
diff --git 
a/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/SkipTest.java
 
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/SkipTest.java
new file mode 100644
index 0000000..c1b7246
--- /dev/null
+++ 
b/experimental-highlighter-elasticsearch-plugin/src/test/java/org/wikimedia/highlighter/experimental/elasticsearch/integration/SkipTest.java
@@ -0,0 +1,90 @@
+package org.wikimedia.highlighter.experimental.elasticsearch.integration;
+
+import static org.elasticsearch.index.query.QueryBuilders.termQuery;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
+import static org.hamcrest.Matchers.equalTo;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.elasticsearch.action.search.SearchRequestBuilder;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.search.highlight.HighlightBuilder;
+import org.junit.Test;
+import 
org.wikimedia.highlighter.experimental.elasticsearch.AbstractExperimentalHighlighterIntegrationTestBase;
+
+/**
+ * Tests for skipping highlighting.
+ */
+public class SkipTest extends 
AbstractExperimentalHighlighterIntegrationTestBase {
+    @Test
+    public void skipIfLastMatched() throws IOException {
+        buildIndex();
+        indexTestData();
+
+        Map<String, Object> skipIf = new HashMap<>();
+        skipIf.put("skip_if_last_matched", true);
+
+        // A single chain and a single extra highlight not in the chain
+        SearchRequestBuilder search = testSearch(termQuery("a", 
"test")).setSize(1000)
+                .addHighlightedField(new 
HighlightBuilder.Field("a").options(skipIf))
+                .addHighlightedField(new 
HighlightBuilder.Field("b").options(skipIf))
+                .addHighlightedField(new 
HighlightBuilder.Field("c").options(skipIf))
+                .addHighlightedField(new HighlightBuilder.Field("d"));
+        SearchResponse response = search.get();
+        assertHighlight(response, 0, "a", 0, equalTo("<em>test</em> a"));
+        assertNotHighlighted(response, 0, "b");
+        assertNotHighlighted(response, 0, "c");
+        assertHighlight(response, 0, "d", 0, equalTo("<em>test</em> foo d"));
+
+        assertHighlight(response, 1, "a", 0, equalTo("<em>test</em> a"));
+        assertNotHighlighted(response, 1, "b");
+        assertNotHighlighted(response, 1, "c");
+        assertHighlight(response, 1, "d", 0, equalTo("<em>test</em> foo d"));
+
+        // Support for multiple "chains"
+        search = testSearch(termQuery("b", "foo")).setSize(1000)
+                .addHighlightedField(new 
HighlightBuilder.Field("a").options(skipIf))
+                .addHighlightedField(new 
HighlightBuilder.Field("b").options(skipIf))
+                .addHighlightedField(new HighlightBuilder.Field("c"))
+                .addHighlightedField(new 
HighlightBuilder.Field("d").options(skipIf));
+        response = search.get();
+        assertNotHighlighted(response, 0, "a");
+        assertHighlight(response, 0, "b", 0, equalTo("test <em>foo</em> b"));
+        assertNotHighlighted(response, 0, "c");
+        assertHighlight(response, 0, "d", 0, equalTo("test <em>foo</em> d"));
+
+        assertNotHighlighted(response, 1, "a");
+        assertHighlight(response, 1, "b", 0, equalTo("test <em>foo</em> b"));
+        assertNotHighlighted(response, 1, "c");
+        assertHighlight(response, 1, "d", 0, equalTo("test <em>foo</em> d"));
+
+        // Everything is a single chain
+        search = testSearch(termQuery("a", "test")).setSize(1000)
+                .addHighlightedField(new 
HighlightBuilder.Field("a").options(skipIf))
+                .addHighlightedField(new 
HighlightBuilder.Field("b").options(skipIf))
+                .addHighlightedField(new 
HighlightBuilder.Field("c").options(skipIf))
+                .addHighlightedField(new 
HighlightBuilder.Field("d").options(skipIf));
+        response = search.get();
+        assertHighlight(response, 0, "a", 0, equalTo("<em>test</em> a"));
+        assertNotHighlighted(response, 0, "b");
+        assertNotHighlighted(response, 0, "c");
+        assertNotHighlighted(response, 0, "d");
+
+        assertHighlight(response, 1, "a", 0, equalTo("<em>test</em> a"));
+        assertNotHighlighted(response, 1, "b");
+        assertNotHighlighted(response, 1, "c");
+        assertNotHighlighted(response, 0, "d");
+    }
+
+    protected void indexTestData() {
+        client().prepareIndex("test", "test", "1")
+                .setSource("a", "test a", "b", "test foo b", "c", "test c", 
"d", "test foo d")
+                .get();
+        client().prepareIndex("test", "test", "2")
+                .setSource("a", "test a", "b", "test foo b", "c", "test c", 
"d", "test foo d")
+                .get();
+        refresh();
+    }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/150013
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I0326baaf497a305348eb536461cd8d6404795d2c
Gerrit-PatchSet: 6
Gerrit-Project: search/highlighter
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <[email protected]>
Gerrit-Reviewer: BearND <[email protected]>
Gerrit-Reviewer: BryanDavis <[email protected]>
Gerrit-Reviewer: Chad <[email protected]>
Gerrit-Reviewer: Manybubbles <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to