Manybubbles has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/133262

Change subject: WIP:  Add support for skipping some fields
......................................................................

WIP:  Add support for skipping some fields

This won't work properly right now because we can't control the execution
order of the fields.

Change-Id: I70255c89ed9b18a197066068cda158633abcae5a
---
M 
experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
1 file changed, 70 insertions(+), 40 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/search/highlighter 
refs/changes/62/133262/1

diff --git 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
index 3fc545c..a568736 100644
--- 
a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
+++ 
b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java
@@ -2,7 +2,6 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@@ -10,6 +9,8 @@
 
 import org.apache.lucene.search.Query;
 import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.hppc.ObjectObjectMap;
+import org.elasticsearch.common.hppc.ObjectObjectOpenHashMap;
 import org.elasticsearch.common.text.StringAndBytesText;
 import org.elasticsearch.common.text.StringText;
 import org.elasticsearch.common.text.Text;
@@ -50,65 +51,57 @@
     @Override
     public HighlightField highlight(HighlighterContext context) {
         try {
-            CacheEntry entry = (CacheEntry) 
context.hitContext.cache().get(CACHE_KEY);
-            if (entry == null) {
-                entry = new CacheEntry();
-                context.hitContext.cache().put(CACHE_KEY, entry);
-            }
-            BasicQueryWeigher weigher = 
entry.queryWeighers.get(context.query.originalQuery());
-            if (weigher == null) {
-                // TODO recycle. But addReleasble doesn't seem to close it
-                // properly later. I believe this is fixed in later
-                // Elasticsearch versions.
-                BytesRefHashTermInfos infos = new 
BytesRefHashTermInfos(BigArrays.NON_RECYCLING_INSTANCE);
-//                context.context.addReleasable(infos);
-                boolean phraseAsTerms = false;
-                if (context.field.fieldOptions().options() != null) {
-                    Boolean phraseAsTermsOption = (Boolean) 
context.field.fieldOptions().options()
-                            .get("phrase_as_terms");
-                    if (phraseAsTermsOption != null) {
-                        phraseAsTerms = phraseAsTermsOption;
-                    }
-                }
-                weigher = new BasicQueryWeigher(
-                        new ElasticsearchQueryFlattener(100, phraseAsTerms), 
infos,
-                        context.hitContext.topLevelReader(), 
context.query.originalQuery());
-                // Build the QueryWeigher with the top level reader to get all
-                // the frequency information
-                entry.queryWeighers.put(context.query.originalQuery(), 
weigher);
-            }
-            HighlightExecutionContext executionContext = new 
HighlightExecutionContext(context,
-                    weigher);
+            HighlightExecutionContext executionContext = new 
HighlightExecutionContext(context);
             try {
+                executionContext.setup();
                 return executionContext.highlight();
             } finally {
                 executionContext.cleanup();
             }
         } catch (Exception e) {
+            e.printStackTrace();
             throw new FetchPhaseExecutionException(context.context, "Failed to 
highlight field ["
                     + context.fieldName + "]", e);
         }
     }
-
+    
     static class CacheEntry {
-        private final Map<Query, BasicQueryWeigher> queryWeighers = new 
HashMap<Query, BasicQueryWeigher>();
+        // Note that the magic numbers in the constructors for the HashMaps a
+        // wild guesses as to efficient initial capacities
+        private final ObjectObjectMap<Query, BasicQueryWeigher> queryWeighers 
= new ObjectObjectOpenHashMap<Query, BasicQueryWeigher>(1);
+        private final ObjectObjectMap<String, Object> forwarded = new 
ObjectObjectOpenHashMap<String, Object>(10);
+        private int docId;
     }
 
     static class HighlightExecutionContext {
         private final HighlighterContext context;
-        private final BasicQueryWeigher weigher;
+        private BasicQueryWeigher weigher;
+        private CacheEntry cacheEntry;
         private FieldWrapper defaultField;
         private List<FieldWrapper> extraFields;
         private SegmenterFactory segmenterFactory;
         private DelayedSegmenter segmenter;
 
-        HighlightExecutionContext(HighlighterContext context, 
BasicQueryWeigher weigher) {
+        HighlightExecutionContext(HighlighterContext context) {
             this.context = context;
-            this.weigher = weigher;
-            defaultField = new FieldWrapper(this, context, weigher);
+            cacheEntry = (CacheEntry) 
context.hitContext.cache().get(CACHE_KEY);
+            if (cacheEntry == null) {
+                cacheEntry = new CacheEntry();
+                context.hitContext.cache().put(CACHE_KEY, cacheEntry);
+                cacheEntry.docId = context.hitContext.docId();
+            } else {
+                if (cacheEntry.docId != context.hitContext.docId()) {
+                    cacheEntry.forwarded.clear();
+                    cacheEntry.docId = context.hitContext.docId();
+                }
+            }
         }
 
         HighlightField highlight() throws IOException {
+            if (skip()) {
+                return null;
+            }
+            setup();
             int numberOfSnippets = 
context.field.fieldOptions().numberOfFragments();
             if (numberOfSnippets == 0) {
                 numberOfSnippets = 1;
@@ -117,6 +110,7 @@
             List<Snippet> snippets = buildChooser().choose(segmenter, 
buildHitEnum(),
                     numberOfSnippets);
             if (snippets.size() != 0) {
+                cacheEntry.forwarded.put(context.fieldName + "_matched", 
Boolean.TRUE);
                 return new HighlightField(context.fieldName, 
formatSnippets(snippets));
             }
             int noMatchSize = context.field.fieldOptions().noMatchSize();
@@ -134,10 +128,12 @@
 
         void cleanup() throws Exception {
             Exception lastCaught = null;
-            try {
-                defaultField.cleanup();
-            } catch (Exception e) {
-                lastCaught = e;
+            if (defaultField != null) {
+                try {
+                    defaultField.cleanup();
+                } catch (Exception e) {
+                    lastCaught = e;
+                }
             }
             if (extraFields != null) {
                 for (FieldWrapper extra : extraFields) {
@@ -366,5 +362,39 @@
             throw new IllegalArgumentException("Unknown fragmenter:  '" + 
options.fragmenter()
                     + "'.  Options are 'scan' or 'sentence'.");
         }
+        
+        private boolean skip() {
+            String unless = (String) getOption("unless");
+            if (unless == null) {
+                return false;
+            }
+            return cacheEntry.forwarded.containsKey(unless);
+        }
+
+        private void setup() {
+            weigher = 
cacheEntry.queryWeighers.get(context.query.originalQuery());
+            if (weigher == null) {
+                // TODO recycle. But addReleasble doesn't seem to close it
+                // properly later. I believe this is fixed in later
+                // Elasticsearch versions.
+                BytesRefHashTermInfos infos = new 
BytesRefHashTermInfos(BigArrays.NON_RECYCLING_INSTANCE);
+//                context.context.addReleasable(infos);
+                boolean phraseAsTerms = false;
+                if (context.field.fieldOptions().options() != null) {
+                    Boolean phraseAsTermsOption = (Boolean) 
context.field.fieldOptions().options()
+                            .get("phrase_as_terms");
+                    if (phraseAsTermsOption != null) {
+                        phraseAsTerms = phraseAsTermsOption;
+                    }
+                }
+                weigher = new BasicQueryWeigher(
+                        new ElasticsearchQueryFlattener(100, phraseAsTerms), 
infos,
+                        context.hitContext.topLevelReader(), 
context.query.originalQuery());
+                // Build the QueryWeigher with the top level reader to get all
+                // the frequency information
+                cacheEntry.queryWeighers.put(context.query.originalQuery(), 
weigher);
+            }
+            defaultField = new FieldWrapper(this, context, weigher);
+        }
     }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/133262
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I70255c89ed9b18a197066068cda158633abcae5a
Gerrit-PatchSet: 1
Gerrit-Project: search/highlighter
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <never...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to