dsmiley commented on code in PR #4277:
URL: https://github.com/apache/solr/pull/4277#discussion_r3077120573
##########
solr/core/src/java/org/apache/solr/handler/component/combine/QueryAndResponseCombiner.java:
##########
@@ -87,14 +129,47 @@ public static QueryResult simpleCombine(List<QueryResult>
queryResults) {
combinedResultsLength,
combinedResultsDocIds,
combinedResultScores,
- Math.max(combinedResultsLength, totalMatches),
+ Math.max(combinedResultsLength, totalMatches - removedByCollapse),
combinedResultScores.length > 0 ? combinedResultScores[0] : 0,
TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO);
combinedQueryResults.setDocList(combinedResultSlice);
combinedQueryResults.setDocSet(combinedDocSet);
return combinedQueryResults;
}
+ /**
+ * Removes collapsed duplicates by delegating to SolrIndexSearcher with the
CollapsingPostFilter.
+ * Uses a PrecomputedScoreQuery to preserve original scores from combined
sub-queries, then
+ * applies the collapse filter to determine surviving doc IDs.
+ *
+ * <p>This leverages Solr's native collapse infrastructure instead of
manually reading DocValues.
Review Comment:
very clever; well done!
##########
solr/core/src/java/org/apache/solr/handler/component/combine/QueryAndResponseCombiner.java:
##########
@@ -127,4 +202,155 @@ public static QueryAndResponseCombiner getImplementation(
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST, "Unknown Combining algorithm: " +
algorithm);
}
+
+ /**
+ * A query that returns pre-computed scores for specific doc IDs. Used to
preserve original scores
+ * from combined sub-queries when delegating collapse to the searcher.
+ */
+ private static class PrecomputedScoreQuery extends Query {
+ private final Map<Integer, Float> docScores;
+
+ PrecomputedScoreQuery(Map<Integer, Float> docScores) {
+ this.docScores = docScores;
+ }
+
+ @Override
+ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode,
float boost) {
+ return new PrecomputedScoreWeight(this, docScores, boost);
+ }
+
+ @Override
+ public String toString(String field) {
+ return "PrecomputedScoreQuery(docs=" + docScores.size() + ")";
+ }
+
+ @Override
+ public void visit(QueryVisitor visitor) {
+ visitor.visitLeaf(this);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ return other instanceof PrecomputedScoreQuery otherQuery
+ && docScores.equals(otherQuery.docScores);
+ }
+
+ @Override
+ public int hashCode() {
+ return classHash() * 31 + docScores.hashCode();
+ }
+ }
+
+ private static class PrecomputedScoreWeight extends Weight {
+ private final Map<Integer, Float> docScores;
+ private final float boost;
+
+ PrecomputedScoreWeight(Query query, Map<Integer, Float> docScores, float
boost) {
+ super(query);
+ this.docScores = docScores;
+ this.boost = boost;
+ }
+
+ @Override
+ public Explanation explain(LeafReaderContext context, int doc) {
+ int globalDoc = context.docBase + doc;
+ Float score = docScores.get(globalDoc);
+ if (score != null) {
+ return Explanation.match(score * boost, "precomputed score");
+ }
+ return Explanation.noMatch("no precomputed score");
+ }
+
+ @Override
+ public ScorerSupplier scorerSupplier(LeafReaderContext context) {
+ List<Integer> localDocs = getLocalDocs(context);
+ if (localDocs.isEmpty()) {
+ return null;
+ }
+ Scorer scorer = new PrecomputedScoreScorer(localDocs, context.docBase,
docScores, boost);
+ return new SolrDefaultScorerSupplier(scorer);
+ }
+
+ private List<Integer> getLocalDocs(LeafReaderContext context) {
Review Comment:
the word "local" is not a word I've seen to describe the docs for a segment.
I suggest you use the word "segment" or "leaf"
##########
solr/core/src/java/org/apache/solr/handler/component/combine/QueryAndResponseCombiner.java:
##########
@@ -127,4 +202,155 @@ public static QueryAndResponseCombiner getImplementation(
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST, "Unknown Combining algorithm: " +
algorithm);
}
+
+ /**
+ * A query that returns pre-computed scores for specific doc IDs. Used to
preserve original scores
+ * from combined sub-queries when delegating collapse to the searcher.
+ */
+ private static class PrecomputedScoreQuery extends Query {
+ private final Map<Integer, Float> docScores;
+
+ PrecomputedScoreQuery(Map<Integer, Float> docScores) {
+ this.docScores = docScores;
+ }
+
+ @Override
+ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode,
float boost) {
+ return new PrecomputedScoreWeight(this, docScores, boost);
+ }
+
+ @Override
+ public String toString(String field) {
+ return "PrecomputedScoreQuery(docs=" + docScores.size() + ")";
+ }
+
+ @Override
+ public void visit(QueryVisitor visitor) {
+ visitor.visitLeaf(this);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ return other instanceof PrecomputedScoreQuery otherQuery
+ && docScores.equals(otherQuery.docScores);
+ }
+
+ @Override
+ public int hashCode() {
+ return classHash() * 31 + docScores.hashCode();
+ }
+ }
+
+ private static class PrecomputedScoreWeight extends Weight {
+ private final Map<Integer, Float> docScores;
+ private final float boost;
+
+ PrecomputedScoreWeight(Query query, Map<Integer, Float> docScores, float
boost) {
+ super(query);
+ this.docScores = docScores;
+ this.boost = boost;
+ }
+
+ @Override
+ public Explanation explain(LeafReaderContext context, int doc) {
+ int globalDoc = context.docBase + doc;
+ Float score = docScores.get(globalDoc);
+ if (score != null) {
+ return Explanation.match(score * boost, "precomputed score");
+ }
+ return Explanation.noMatch("no precomputed score");
+ }
+
+ @Override
+ public ScorerSupplier scorerSupplier(LeafReaderContext context) {
+ List<Integer> localDocs = getLocalDocs(context);
+ if (localDocs.isEmpty()) {
+ return null;
+ }
+ Scorer scorer = new PrecomputedScoreScorer(localDocs, context.docBase,
docScores, boost);
+ return new SolrDefaultScorerSupplier(scorer);
+ }
+
+ private List<Integer> getLocalDocs(LeafReaderContext context) {
Review Comment:
This overall algorithm has a concerning algorithmic complexity cost --
basically numScoredDocs * numSegments. But moreover I think this can be
replaced as I indicated above (i.e. custom DoubleValuesSource)
##########
solr/core/src/java/org/apache/solr/handler/component/combine/QueryAndResponseCombiner.java:
##########
@@ -127,4 +202,155 @@ public static QueryAndResponseCombiner getImplementation(
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST, "Unknown Combining algorithm: " +
algorithm);
}
+
+ /**
+ * A query that returns pre-computed scores for specific doc IDs. Used to
preserve original scores
+ * from combined sub-queries when delegating collapse to the searcher.
+ */
+ private static class PrecomputedScoreQuery extends Query {
Review Comment:
I suggest replacing this query with the following:
DocSetQuery from a SortedIntDocSet. Then secondly wrap this with
`FunctionScoreQuery.boostByValue` and a custom DoubleValuesSource. The DVS
will have some code to it but much simpler and less code than a custom Query
that you have here.
##########
solr/core/src/java/org/apache/solr/handler/component/combine/QueryAndResponseCombiner.java:
##########
@@ -127,4 +202,155 @@ public static QueryAndResponseCombiner getImplementation(
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST, "Unknown Combining algorithm: " +
algorithm);
}
+
+ /**
+ * A query that returns pre-computed scores for specific doc IDs. Used to
preserve original scores
+ * from combined sub-queries when delegating collapse to the searcher.
+ */
+ private static class PrecomputedScoreQuery extends Query {
+ private final Map<Integer, Float> docScores;
Review Comment:
Consider a HPPC map here
##########
solr/core/src/java/org/apache/solr/handler/component/combine/QueryAndResponseCombiner.java:
##########
@@ -127,4 +202,155 @@ public static QueryAndResponseCombiner getImplementation(
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST, "Unknown Combining algorithm: " +
algorithm);
}
+
+ /**
+ * A query that returns pre-computed scores for specific doc IDs. Used to
preserve original scores
+ * from combined sub-queries when delegating collapse to the searcher.
+ */
+ private static class PrecomputedScoreQuery extends Query {
Review Comment:
I was thinking the same but I've never seen one like this and my attempt to
find one now in Lucene turned up nothing.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]