[
https://issues.apache.org/jira/browse/SOLR-9708?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15662930#comment-15662930
]
ASF GitHub Bot commented on SOLR-9708:
--------------------------------------
Github user dsmiley commented on a diff in the pull request:
https://github.com/apache/lucene-solr/pull/107#discussion_r87738726
--- Diff:
solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java ---
@@ -0,0 +1,366 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.highlight;
+
+import java.io.IOException;
+import java.text.BreakIterator;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.postingshighlight.WholeBreakIterator;
+import org.apache.lucene.search.uhighlight.DefaultPassageFormatter;
+import org.apache.lucene.search.uhighlight.PassageFormatter;
+import org.apache.lucene.search.uhighlight.PassageScorer;
+import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
+import org.apache.solr.common.params.HighlightParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.core.PluginInfo;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.SolrRequestInfo;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.DocIterator;
+import org.apache.solr.search.DocList;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.RTimerTree;
+import org.apache.solr.util.plugin.PluginInfoInitialized;
+
+/*
+ * TODO: The HighlightComponent should not call rewrite on the query; it
should be up to the
+ * SolrHighlighter to do if needed. Furthermore this arrangement is odd
-- why are these abstractions separate?
+ */
+
+/**
+ * Highlighter impl that uses {@link UnifiedHighlighter}
+ * <p>
+ * Example configuration with default values:
+ * <pre class="prettyprint">
+ * <requestHandler name="standard"
class="solr.StandardRequestHandler">
+ * <lst name="defaults">
+ * <int name="hl.snippets">1</int>
+ * <str name="hl.tag.pre">&lt;em&gt;</str>
+ * <str name="hl.tag.post">&lt;/em&gt;</str>
+ * <str name="hl.tag.ellipsis">... </str>
+ * <bool name="hl.defaultSummary">true</bool>
+ * <str name="hl.encoder">simple</str>
+ * <float name="hl.score.k1">1.2</float>
+ * <float name="hl.score.b">0.75</float>
+ * <float name="hl.score.pivot">87</float>
+ * <str name="hl.bs.language"></str>
+ * <str name="hl.bs.country"></str>
+ * <str name="hl.bs.variant"></str>
+ * <str name="hl.bs.type">SENTENCE</str>
+ * <int name="hl.maxAnalyzedChars">10000</int>
+ * <bool name="hl.highlightMultiTerm">true</bool>
+ * </lst>
+ * </requestHandler>
+ * </pre>
+ * ...
+ * <pre class="prettyprint">
+ * <searchComponent class="solr.HighlightComponent" name="highlight">
+ * <highlighting
class="org.apache.solr.highlight.UnifiedSolrHighlighter"/>
+ * </searchComponent>
+ * </pre>
+ * <p>
+ * Notes:
+ * <ul>
+ * <li>hl.q (string) can specify the query
+ * <li>hl.fl (string) specifies the field list.
+ * <li>hl.snippets (int) specifies how many snippets to return.
+ * <li>hl.tag.pre (string) specifies text which appears before a
highlighted term.
+ * <li>hl.tag.post (string) specifies text which appears after a
highlighted term.
+ * <li>hl.tag.ellipsis (string) specifies text which joins non-adjacent
passages. The default is to retain each
+ * value in a list without joining them.
+ * <li>hl.defaultSummary (bool) specifies if a field should have a default
summary of the leading text.
+ * <li>hl.encoder (string) can be 'html' (html escapes content) or
'simple' (no escaping).
+ * <li>hl.score.k1 (float) specifies bm25 scoring parameter 'k1'
+ * <li>hl.score.b (float) specifies bm25 scoring parameter 'b'
+ * <li>hl.score.pivot (float) specifies bm25 scoring parameter 'avgdl'
+ * <li>hl.bs.type (string) specifies how to divide text into passages:
[SENTENCE, LINE, WORD, CHAR, WHOLE]
+ * <li>hl.bs.language (string) specifies language code for BreakIterator.
default is empty string (root locale)
+ * <li>hl.bs.country (string) specifies country code for BreakIterator.
default is empty string (root locale)
+ * <li>hl.bs.variant (string) specifies country code for BreakIterator.
default is empty string (root locale)
+ * <li>hl.maxAnalyzedChars specifies how many characters at most will be
processed in a document for any one field.
+ * <li>hl.highlightMultiTerm enables highlighting for
range/wildcard/fuzzy/prefix queries at some cost.
+ * <li>hl.usePhraseHighlighter (bool) enables highlighting phrases and
some other queries strictly at some cost.</li>
+ * </ul>
+ * TODO add hl.method, hl.cacheFieldValCharsThreshold
+ *
+ * @lucene.experimental
+ */
+public class UnifiedSolrHighlighter extends SolrHighlighter implements
PluginInfoInitialized {
+
+ protected static final String SNIPPET_SEPARATOR = "\u0000";
+ private static final String[] ZERO_LEN_STR_ARRAY = new String[0];
+
+ //TODO move to Solr HighlightParams
--- End diff --
These TODOs should be addressed (and note corresponding docs TODO on line
109).
> Expose UnifiedHighlighter in Solr
> ---------------------------------
>
> Key: SOLR-9708
> URL: https://issues.apache.org/jira/browse/SOLR-9708
> Project: Solr
> Issue Type: New Feature
> Security Level: Public(Default Security Level. Issues are Public)
> Components: highlighter
> Reporter: Timothy M. Rodriguez
> Assignee: David Smiley
> Fix For: 6.4
>
>
> This ticket is for creating a Solr plugin that can utilize the new
> UnifiedHighlighter which was initially committed in
> https://issues.apache.org/jira/browse/LUCENE-7438
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]