This is an automated email from the ASF dual-hosted git repository.

mkhl pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 8171a5367c3 SOLR-16682:  transfer MLT Component queries via {!bool} 
(#1408)
8171a5367c3 is described below

commit 8171a5367c3aae579660d5f4eb0c6cf3dc1ee175
Author: Mikhail Khludnev <mkhlud...@users.noreply.github.com>
AuthorDate: Tue Feb 28 18:50:03 2023 +0300

    SOLR-16682:  transfer MLT Component queries via {!bool} (#1408)
    
    transfer MLT Component queries via {!bool} (#1260)
    
    Co-authored-by: David Smiley <dsmi...@apache.org>
---
 solr/CHANGES.txt                                   |   3 +
 .../apache/solr/handler/MoreLikeThisHandler.java   |  93 ++++++---------
 .../handler/component/MoreLikeThisComponent.java   | 126 +++++++++++++--------
 .../component/DistributedMLTComponentTest.java     |  28 ++++-
 4 files changed, 142 insertions(+), 108 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index e708dff7561..c8855eb016f 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -201,6 +201,9 @@ Bug Fixes
 
 * SOLR-16679: Fix solr.jetty.ssl.verifyClientHostName logging (Kevin Risden)
 
+* SOLR-16682: MoreLikeThis Component fails with SyntaxError: Cannot parse if 
document terms contains symbols from query parser syntax 
+  (Mikhail Khludnev)
+
 Build
 ---------------------
 * Upgrade forbiddenapis to 3.4 (Uwe Schindler)
diff --git 
a/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java 
b/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
index 2192aa9cabe..47ca8e3108b 100644
--- a/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
@@ -52,7 +52,6 @@ import org.apache.solr.handler.component.ResponseBuilder;
 import org.apache.solr.request.SimpleFacets;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.DocList;
@@ -130,8 +129,6 @@ public class MoreLikeThisHandler extends RequestHandlerBase 
{
 
       // Hold on to the interesting terms if relevant
       TermStyle termStyle = 
TermStyle.get(params.get(MoreLikeThisParams.INTERESTING_TERMS));
-      List<InterestingTerm> interesting =
-          (termStyle == TermStyle.NONE) ? null : new 
ArrayList<>(mlt.mlt.getMaxQueryTerms());
 
       DocListAndSet mltDocs = null;
 
@@ -159,7 +156,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase 
{
         // Find documents MoreLikeThis - either with a reader or a query
         // 
--------------------------------------------------------------------------------
         if (reader != null) {
-          mltDocs = mlt.getMoreLikeThis(reader, start, rows, filters, 
interesting, flags);
+          mltDocs = mlt.getMoreLikeThis(reader, start, rows, filters, flags);
         } else if (q != null) {
           // Matching options
           boolean includeMatch = 
params.getBool(MoreLikeThisParams.MATCH_INCLUDE, true);
@@ -177,7 +174,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase 
{
           if (iterator.hasNext()) {
             // do a MoreLikeThis query for each document in results
             int id = iterator.nextDoc();
-            mltDocs = mlt.getMoreLikeThis(id, start, rows, filters, 
interesting, flags);
+            mltDocs = mlt.getMoreLikeThis(id, start, rows, filters, flags);
           }
         } else {
           throw new SolrException(
@@ -195,7 +192,9 @@ public class MoreLikeThisHandler extends RequestHandlerBase 
{
       }
       rsp.addResponse(mltDocs.docList);
 
-      if (interesting != null) {
+      if (termStyle != TermStyle.NONE) {
+        final List<InterestingTerm> interesting =
+            mlt.getInterestingTerms(mlt.getBoostedMLTQuery(), 
mlt.mlt.getMaxQueryTerms());
         if (termStyle == TermStyle.DETAILS) {
           NamedList<Float> it = new NamedList<>();
           for (InterestingTerm t : interesting) {
@@ -351,14 +350,14 @@ public class MoreLikeThisHandler extends 
RequestHandlerBase {
     }
 
     private Query rawMLTQuery;
-    private Query boostedMLTQuery;
+    private BooleanQuery boostedMLTQuery;
     private BooleanQuery realMLTQuery;
 
     public Query getRawMLTQuery() {
       return rawMLTQuery;
     }
 
-    public Query getBoostedMLTQuery() {
+    public BooleanQuery getBoostedMLTQuery() {
       return boostedMLTQuery;
     }
 
@@ -366,7 +365,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase 
{
       return realMLTQuery;
     }
 
-    private Query getBoostedQuery(Query mltquery) {
+    private BooleanQuery getBoostedQuery(Query mltquery) {
       BooleanQuery boostedQuery = (BooleanQuery) mltquery;
       if (boostFields.size() > 0) {
         BooleanQuery.Builder newQ = new BooleanQuery.Builder();
@@ -392,18 +391,13 @@ public class MoreLikeThisHandler extends 
RequestHandlerBase {
     }
 
     public DocListAndSet getMoreLikeThis(
-        int id, int start, int rows, List<Query> filters, 
List<InterestingTerm> terms, int flags)
-        throws IOException {
+        int id, int start, int rows, List<Query> filters, int flags) throws 
IOException {
       Document doc = reader.document(id);
-      rawMLTQuery = mlt.like(id);
-      boostedMLTQuery = getBoostedQuery(rawMLTQuery);
-      if (terms != null) {
-        fillInterestingTermsFromMLTQuery(boostedMLTQuery, terms);
-      }
+      final Query boostedQuery = getBoostedMLTQuery(id);
 
       // exclude current document from results
       BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
-      realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
+      realMLTQuery.add(boostedQuery, BooleanClause.Occur.MUST);
       realMLTQuery.add(
           new TermQuery(
               new Term(
@@ -423,14 +417,15 @@ public class MoreLikeThisHandler extends 
RequestHandlerBase {
       return results;
     }
 
+    /** Sets {@link #boostedMLTQuery} and returns it */
+    public BooleanQuery getBoostedMLTQuery(int docNum) throws IOException {
+      rawMLTQuery = mlt.like(docNum);
+      boostedMLTQuery = getBoostedQuery(rawMLTQuery);
+      return boostedMLTQuery;
+    }
+
     public DocListAndSet getMoreLikeThis(
-        Reader reader,
-        int start,
-        int rows,
-        List<Query> filters,
-        List<InterestingTerm> terms,
-        int flags)
-        throws IOException {
+        Reader reader, int start, int rows, List<Query> filters, int flags) 
throws IOException {
       // SOLR-5351: if only check against a single field, use the reader 
directly. Otherwise we
       // repeat the stream's content for multiple fields so that query terms 
can be pulled from any
       // of those fields.
@@ -450,14 +445,9 @@ public class MoreLikeThisHandler extends 
RequestHandlerBase {
         for (String field : fields) {
           multifieldDoc.put(field, streamValue);
         }
-
         rawMLTQuery = mlt.like(multifieldDoc);
       }
-
       boostedMLTQuery = getBoostedQuery(rawMLTQuery);
-      if (terms != null) {
-        fillInterestingTermsFromMLTQuery(boostedMLTQuery, terms);
-      }
       DocListAndSet results = new DocListAndSet();
       if (this.needDocSet) {
         results = searcher.getDocListAndSet(boostedMLTQuery, filters, null, 
start, rows, flags);
@@ -466,37 +456,19 @@ public class MoreLikeThisHandler extends 
RequestHandlerBase {
       }
       return results;
     }
-
-    public NamedList<BooleanQuery> getMoreLikeTheseQuery(DocList docs) throws 
IOException {
-      IndexSchema schema = searcher.getSchema();
-      NamedList<BooleanQuery> result = new NamedList<>();
-      DocIterator iterator = docs.iterator();
-      while (iterator.hasNext()) {
-        int id = iterator.nextDoc();
-        String uniqueId = schema.printableUniqueKey(reader.document(id));
-
-        BooleanQuery mltquery = (BooleanQuery) mlt.like(id);
-        if (mltquery.clauses().size() == 0) {
-          return result;
-        }
-        mltquery = (BooleanQuery) getBoostedQuery(mltquery);
-
-        // exclude current document from results
-        BooleanQuery.Builder mltQuery = new BooleanQuery.Builder();
-        mltQuery.add(mltquery, BooleanClause.Occur.MUST);
-
-        mltQuery.add(
-            new TermQuery(new Term(uniqueKeyField.getName(), uniqueId)),
-            BooleanClause.Occur.MUST_NOT);
-        result.add(uniqueId, mltQuery.build());
-      }
-
-      return result;
-    }
-
-    private void fillInterestingTermsFromMLTQuery(Query query, 
List<InterestingTerm> terms) {
-      Collection<BooleanClause> clauses = ((BooleanQuery) query).clauses();
+    /**
+     * Yields terms with boosts from the boosted MLT query.
+     *
+     * @param maxTerms how many terms to return, a negative value means all 
terms are returned
+     */
+    public List<InterestingTerm> getInterestingTerms(BooleanQuery 
boostedMLTQuery, int maxTerms) {
+      assert boostedMLTQuery != null : "strictly expecting it's set";
+      Collection<BooleanClause> clauses = boostedMLTQuery.clauses();
+      List<InterestingTerm> output = new ArrayList<>(maxTerms < 0 ? 
clauses.size() : maxTerms);
       for (BooleanClause o : clauses) {
+        if (maxTerms > -1 && output.size() >= maxTerms) {
+          break;
+        }
         Query q = o.getQuery();
         float boost = 1f;
         if (q instanceof BoostQuery) {
@@ -507,10 +479,11 @@ public class MoreLikeThisHandler extends 
RequestHandlerBase {
         InterestingTerm it = new InterestingTerm();
         it.boost = boost;
         it.term = ((TermQuery) q).getTerm();
-        terms.add(it);
+        output.add(it);
       }
       // alternatively we could use
       // mltquery.extractTerms( terms );
+      return output;
     }
 
     public MoreLikeThis getMoreLikeThis() {
diff --git 
a/solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java
 
b/solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java
index 0f220e0d99a..e272c646116 100644
--- 
a/solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java
+++ 
b/solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java
@@ -23,13 +23,16 @@ import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.HashMap;
-import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.TreeMap;
-import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.CharsRefBuilder;
+import org.apache.solr.client.solrj.util.ClientUtils;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.params.CommonParams;
@@ -95,24 +98,24 @@ public class MoreLikeThisComponent extends SearchComponent {
             rb.rsp.add("moreLikeThis", new NamedList<DocList>());
             return;
           }
-
           MoreLikeThisHandler.MoreLikeThisHelper mlt =
               new MoreLikeThisHandler.MoreLikeThisHelper(params, searcher);
-
-          NamedList<BooleanQuery> bQuery = 
mlt.getMoreLikeTheseQuery(rb.getResults().docList);
-
-          NamedList<String> temp = new NamedList<>();
-          Iterator<Entry<String, BooleanQuery>> idToQueryIt = 
bQuery.iterator();
-
-          while (idToQueryIt.hasNext()) {
-            Entry<String, BooleanQuery> idToQuery = idToQueryIt.next();
-            String s = idToQuery.getValue().toString();
-
-            log.debug("MLT Query:{}", s);
-            temp.add(idToQuery.getKey(), idToQuery.getValue().toString());
+          NamedList<NamedList<?>> mltQueryByDocKey = new NamedList<>();
+          for (DocIterator results = rb.getResults().docList.iterator(); 
results.hasNext(); ) {
+            int docId = results.nextDoc();
+            final List<MoreLikeThisHandler.InterestingTerm> interestingTerms =
+                mlt.getInterestingTerms(mlt.getBoostedMLTQuery(docId), -1);
+            if (interestingTerms.isEmpty()) {
+              continue;
+            }
+            final String uniqueKey = 
rb.req.getSchema().getUniqueKeyField().getName();
+            final Document document = rb.req.getSearcher().doc(docId);
+            final String uniqueVal = 
rb.req.getSchema().printableUniqueKey(document);
+            final NamedList<String> mltQ =
+                mltViaQueryParams(rb.req.getSchema(), interestingTerms, 
uniqueKey, uniqueVal);
+            mltQueryByDocKey.add(uniqueVal, mltQ);
           }
-
-          rb.rsp.add("moreLikeThis", temp);
+          rb.rsp.add("moreLikeThis", mltQueryByDocKey);
         } else {
           NamedList<DocList> sim =
               getMoreLikeThese(rb, rb.req.getSearcher(), 
rb.getResults().docList, flags);
@@ -127,6 +130,53 @@ public class MoreLikeThisComponent extends SearchComponent 
{
     }
   }
 
+  private static NamedList<String> mltViaQueryParams(
+      IndexSchema schema,
+      List<MoreLikeThisHandler.InterestingTerm> terms,
+      String uniqueField,
+      String uniqueVal) {
+    final NamedList<String> mltQ = new NamedList<>();
+    StringBuilder q = new StringBuilder("{!bool");
+    q.append(" must_not=$");
+    int cnt = 0;
+    String param = "mltq" + (cnt++);
+    q.append(param);
+    mltQ.add(param, "{!field f=" + uniqueField + "}" + uniqueVal);
+    final StringBuilder reuseStr = new StringBuilder();
+    final CharsRefBuilder reuseChar = new CharsRefBuilder();
+    for (MoreLikeThisHandler.InterestingTerm term : terms) {
+      param = "mltq" + (cnt++);
+      q.append(" should=$");
+      q.append(param);
+      mltQ.add(param, toParserParam(schema, term.term, term.boost, reuseStr, 
reuseChar));
+    }
+    q.append("}");
+    mltQ.add(CommonParams.Q, q.toString());
+    return mltQ;
+  }
+
+  private static String toParserParam(
+      IndexSchema schema,
+      Term term1,
+      float boost,
+      StringBuilder reuseStr,
+      CharsRefBuilder reuseChar) {
+    reuseStr.setLength(0);
+    if (boost != 1f) {
+      reuseStr.append("{!boost b=");
+      reuseStr.append(boost);
+      reuseStr.append("}");
+    }
+    final String field = term1.field();
+    final CharsRef val =
+        schema.getField(field).getType().indexedToReadable(term1.bytes(), 
reuseChar);
+    reuseStr.append("{!term f=");
+    reuseStr.append(ClientUtils.encodeLocalParamVal(field));
+    reuseStr.append("}");
+    reuseStr.append(val);
+    return reuseStr.toString();
+  }
+
   @Override
   public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
     if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0
@@ -139,17 +189,18 @@ public class MoreLikeThisComponent extends 
SearchComponent {
           // This should only happen in case of using shards.tolerant=true. 
Omit this ShardResponse
           continue;
         }
-        NamedList<?> moreLikeThisReponse =
-            (NamedList<?>) 
r.getSolrResponse().getResponse().get("moreLikeThis");
+        @SuppressWarnings("unchecked")
+        NamedList<NamedList<String>> moreLikeThisReponse =
+            (NamedList<NamedList<String>>) 
r.getSolrResponse().getResponse().get("moreLikeThis");
         if (log.isDebugEnabled()) {
           log.debug("ShardRequest.response.shard: {}", r.getShard());
         }
         if (moreLikeThisReponse != null) {
-          for (Entry<String, ?> entry : moreLikeThisReponse) {
+          for (Entry<String, NamedList<String>> entry : moreLikeThisReponse) {
             if (log.isDebugEnabled()) {
               log.debug("id: '{}' Query: '{}'", entry.getKey(), 
entry.getValue());
             }
-            ShardRequest s = buildShardQuery(rb, (String) entry.getValue(), 
entry.getKey());
+            ShardRequest s = buildShardQuery(rb, entry.getValue(), 
entry.getKey());
             rb.addRequest(this, s);
           }
         }
@@ -309,7 +360,7 @@ public class MoreLikeThisComponent extends SearchComponent {
     return result;
   }
 
-  ShardRequest buildShardQuery(ResponseBuilder rb, String q, String key) {
+  ShardRequest buildShardQuery(ResponseBuilder rb, NamedList<String> q, String 
key) {
     ShardRequest s = new ShardRequest();
     s.params = new ModifiableSolrParams(rb.req.getParams());
     s.purpose |= ShardRequest.PURPOSE_GET_MLT_RESULTS;
@@ -337,24 +388,9 @@ public class MoreLikeThisComponent extends SearchComponent 
{
     s.params.set(CommonParams.FL, "score," + id);
     s.params.set(SORT, "score desc");
     // MLT Query is submitted as normal query to shards.
-    s.params.set(CommonParams.Q, q);
-
-    return s;
-  }
-
-  ShardRequest buildMLTQuery(ResponseBuilder rb, String q) {
-    ShardRequest s = new ShardRequest();
-    s.params = new ModifiableSolrParams();
-
-    s.params.set(CommonParams.START, 0);
-
-    String id = rb.req.getSchema().getUniqueKeyField().getName();
-
-    s.params.set(CommonParams.FL, "score," + id);
-    // MLT Query is submitted as normal query to shards.
-    s.params.set(CommonParams.Q, q);
+    s.params.remove(CommonParams.Q);
+    q.forEach((k, v) -> s.params.add(k, v));
 
-    s.shards = ShardRequest.ALL_SHARDS;
     return s;
   }
 
@@ -375,12 +411,8 @@ public class MoreLikeThisComponent extends SearchComponent 
{
     SimpleOrderedMap<Object> interestingTermsResponse = null;
     MoreLikeThisParams.TermStyle interestingTermsConfig =
         
MoreLikeThisParams.TermStyle.get(p.get(MoreLikeThisParams.INTERESTING_TERMS));
-    List<MoreLikeThisHandler.InterestingTerm> interestingTerms =
-        (interestingTermsConfig == MoreLikeThisParams.TermStyle.NONE)
-            ? null
-            : new ArrayList<>(mltHelper.getMoreLikeThis().getMaxQueryTerms());
 
-    if (interestingTerms != null) {
+    if (interestingTermsConfig != MoreLikeThisParams.TermStyle.NONE) {
       interestingTermsResponse = new SimpleOrderedMap<>();
     }
 
@@ -388,8 +420,7 @@ public class MoreLikeThisComponent extends SearchComponent {
       int id = iterator.nextDoc();
       int rows = p.getInt(MoreLikeThisParams.DOC_COUNT, 5);
 
-      DocListAndSet similarDocuments =
-          mltHelper.getMoreLikeThis(id, 0, rows, null, interestingTerms, 
flags);
+      DocListAndSet similarDocuments = mltHelper.getMoreLikeThis(id, 0, rows, 
null, flags);
       String name = schema.printableUniqueKey(searcher.doc(id));
       mltResponse.add(name, similarDocuments.docList);
 
@@ -410,6 +441,9 @@ public class MoreLikeThisComponent extends SearchComponent {
       }
 
       if (interestingTermsResponse != null) {
+        List<MoreLikeThisHandler.InterestingTerm> interestingTerms =
+            mltHelper.getInterestingTerms(
+                mltHelper.getBoostedMLTQuery(), 
mltHelper.getMoreLikeThis().getMaxQueryTerms());
         if (interestingTermsConfig == MoreLikeThisParams.TermStyle.DETAILS) {
           SimpleOrderedMap<Float> interestingTermsWithScore = new 
SimpleOrderedMap<>();
           for (MoreLikeThisHandler.InterestingTerm interestingTerm : 
interestingTerms) {
diff --git 
a/solr/core/src/test/org/apache/solr/handler/component/DistributedMLTComponentTest.java
 
b/solr/core/src/test/org/apache/solr/handler/component/DistributedMLTComponentTest.java
index bf803e3b2b1..2d5de7e3bd3 100644
--- 
a/solr/core/src/test/org/apache/solr/handler/component/DistributedMLTComponentTest.java
+++ 
b/solr/core/src/test/org/apache/solr/handler/component/DistributedMLTComponentTest.java
@@ -91,7 +91,7 @@ public class DistributedMLTComponentTest extends 
BaseDistributedSearchTestCase {
         id,
         "9",
         "lowerfilt",
-        "The quick red fox jumped over the lazy big and large brown dogs.",
+        "The quick red:fox jumped over the lazy big and large brown dogs.",
         "lowerfilt1",
         "x");
     index(id, "10", "lowerfilt", "blue", "lowerfilt1", "x");
@@ -100,7 +100,7 @@ public class DistributedMLTComponentTest extends 
BaseDistributedSearchTestCase {
         id,
         "13",
         "lowerfilt",
-        "The quote red fox jumped over the lazy brown dogs.",
+        "The quote RED)FOX jumped over the lazy brown dogs.",
         "lowerfilt1",
         "y");
     index(
@@ -389,5 +389,29 @@ public class DistributedMLTComponentTest extends 
BaseDistributedSearchTestCase {
       Long actual = ((SolrDocumentList) entry.getValue()).getNumFound();
       assertEquals("MLT mismatch for id=" + key, expected, actual);
     }
+    // test boost mlt.qf
+    query(
+        "q",
+        "lowerfilt:moon",
+        "fl",
+        id,
+        MoreLikeThisParams.MIN_TERM_FREQ,
+        2,
+        MoreLikeThisParams.MIN_DOC_FREQ,
+        1,
+        "sort",
+        "id_i1 desc",
+        "mlt",
+        "true",
+        "mlt.fl",
+        "lowerfilt1,lowerfilt",
+        "mlt.qf",
+        "lowerfilt1^1.2 lowerfilt^3.4",
+        "qt",
+        requestHandlerName,
+        "shards.qt",
+        requestHandlerName,
+        "mlt.count",
+        "20");
   }
 }

Reply via email to