Manybubbles has uploaded a new change for review. https://gerrit.wikimedia.org/r/133262
Change subject: WIP: Add support for skipping some fields ...................................................................... WIP: Add support for skipping some fields This won't work properly right now because we can't control the execution order of the fields. Change-Id: I70255c89ed9b18a197066068cda158633abcae5a --- M experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java 1 file changed, 70 insertions(+), 40 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/search/highlighter refs/changes/62/133262/1 diff --git a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java index 3fc545c..a568736 100644 --- a/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java +++ b/experimental-highlighter-elasticsearch-plugin/src/main/java/org/elasticsearch/search/highlight/ExperimentalHighlighter.java @@ -2,7 +2,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -10,6 +9,8 @@ import org.apache.lucene.search.Query; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.hppc.ObjectObjectMap; +import org.elasticsearch.common.hppc.ObjectObjectOpenHashMap; import org.elasticsearch.common.text.StringAndBytesText; import org.elasticsearch.common.text.StringText; import org.elasticsearch.common.text.Text; @@ -50,65 +51,57 @@ @Override public HighlightField highlight(HighlighterContext context) { try { - CacheEntry entry = (CacheEntry) context.hitContext.cache().get(CACHE_KEY); - if (entry == null) { - entry = new CacheEntry(); - context.hitContext.cache().put(CACHE_KEY, entry); - } - BasicQueryWeigher weigher = entry.queryWeighers.get(context.query.originalQuery()); - if (weigher == null) { - // TODO recycle. But addReleasble doesn't seem to close it - // properly later. I believe this is fixed in later - // Elasticsearch versions. - BytesRefHashTermInfos infos = new BytesRefHashTermInfos(BigArrays.NON_RECYCLING_INSTANCE); -// context.context.addReleasable(infos); - boolean phraseAsTerms = false; - if (context.field.fieldOptions().options() != null) { - Boolean phraseAsTermsOption = (Boolean) context.field.fieldOptions().options() - .get("phrase_as_terms"); - if (phraseAsTermsOption != null) { - phraseAsTerms = phraseAsTermsOption; - } - } - weigher = new BasicQueryWeigher( - new ElasticsearchQueryFlattener(100, phraseAsTerms), infos, - context.hitContext.topLevelReader(), context.query.originalQuery()); - // Build the QueryWeigher with the top level reader to get all - // the frequency information - entry.queryWeighers.put(context.query.originalQuery(), weigher); - } - HighlightExecutionContext executionContext = new HighlightExecutionContext(context, - weigher); + HighlightExecutionContext executionContext = new HighlightExecutionContext(context); try { + executionContext.setup(); return executionContext.highlight(); } finally { executionContext.cleanup(); } } catch (Exception e) { + e.printStackTrace(); throw new FetchPhaseExecutionException(context.context, "Failed to highlight field [" + context.fieldName + "]", e); } } - + static class CacheEntry { - private final Map<Query, BasicQueryWeigher> queryWeighers = new HashMap<Query, BasicQueryWeigher>(); + // Note that the magic numbers in the constructors for the HashMaps a + // wild guesses as to efficient initial capacities + private final ObjectObjectMap<Query, BasicQueryWeigher> queryWeighers = new ObjectObjectOpenHashMap<Query, BasicQueryWeigher>(1); + private final ObjectObjectMap<String, Object> forwarded = new ObjectObjectOpenHashMap<String, Object>(10); + private int docId; } static class HighlightExecutionContext { private final HighlighterContext context; - private final BasicQueryWeigher weigher; + private BasicQueryWeigher weigher; + private CacheEntry cacheEntry; private FieldWrapper defaultField; private List<FieldWrapper> extraFields; private SegmenterFactory segmenterFactory; private DelayedSegmenter segmenter; - HighlightExecutionContext(HighlighterContext context, BasicQueryWeigher weigher) { + HighlightExecutionContext(HighlighterContext context) { this.context = context; - this.weigher = weigher; - defaultField = new FieldWrapper(this, context, weigher); + cacheEntry = (CacheEntry) context.hitContext.cache().get(CACHE_KEY); + if (cacheEntry == null) { + cacheEntry = new CacheEntry(); + context.hitContext.cache().put(CACHE_KEY, cacheEntry); + cacheEntry.docId = context.hitContext.docId(); + } else { + if (cacheEntry.docId != context.hitContext.docId()) { + cacheEntry.forwarded.clear(); + cacheEntry.docId = context.hitContext.docId(); + } + } } HighlightField highlight() throws IOException { + if (skip()) { + return null; + } + setup(); int numberOfSnippets = context.field.fieldOptions().numberOfFragments(); if (numberOfSnippets == 0) { numberOfSnippets = 1; @@ -117,6 +110,7 @@ List<Snippet> snippets = buildChooser().choose(segmenter, buildHitEnum(), numberOfSnippets); if (snippets.size() != 0) { + cacheEntry.forwarded.put(context.fieldName + "_matched", Boolean.TRUE); return new HighlightField(context.fieldName, formatSnippets(snippets)); } int noMatchSize = context.field.fieldOptions().noMatchSize(); @@ -134,10 +128,12 @@ void cleanup() throws Exception { Exception lastCaught = null; - try { - defaultField.cleanup(); - } catch (Exception e) { - lastCaught = e; + if (defaultField != null) { + try { + defaultField.cleanup(); + } catch (Exception e) { + lastCaught = e; + } } if (extraFields != null) { for (FieldWrapper extra : extraFields) { @@ -366,5 +362,39 @@ throw new IllegalArgumentException("Unknown fragmenter: '" + options.fragmenter() + "'. Options are 'scan' or 'sentence'."); } + + private boolean skip() { + String unless = (String) getOption("unless"); + if (unless == null) { + return false; + } + return cacheEntry.forwarded.containsKey(unless); + } + + private void setup() { + weigher = cacheEntry.queryWeighers.get(context.query.originalQuery()); + if (weigher == null) { + // TODO recycle. But addReleasble doesn't seem to close it + // properly later. I believe this is fixed in later + // Elasticsearch versions. + BytesRefHashTermInfos infos = new BytesRefHashTermInfos(BigArrays.NON_RECYCLING_INSTANCE); +// context.context.addReleasable(infos); + boolean phraseAsTerms = false; + if (context.field.fieldOptions().options() != null) { + Boolean phraseAsTermsOption = (Boolean) context.field.fieldOptions().options() + .get("phrase_as_terms"); + if (phraseAsTermsOption != null) { + phraseAsTerms = phraseAsTermsOption; + } + } + weigher = new BasicQueryWeigher( + new ElasticsearchQueryFlattener(100, phraseAsTerms), infos, + context.hitContext.topLevelReader(), context.query.originalQuery()); + // Build the QueryWeigher with the top level reader to get all + // the frequency information + cacheEntry.queryWeighers.put(context.query.originalQuery(), weigher); + } + defaultField = new FieldWrapper(this, context, weigher); + } } } -- To view, visit https://gerrit.wikimedia.org/r/133262 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I70255c89ed9b18a197066068cda158633abcae5a Gerrit-PatchSet: 1 Gerrit-Project: search/highlighter Gerrit-Branch: master Gerrit-Owner: Manybubbles <never...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits