benwtrent commented on code in PR #15667:
URL: https://github.com/apache/lucene/pull/15667#discussion_r2771264110
##########
lucene/core/src/java/org/apache/lucene/util/hnsw/NeighborArray.java:
##########
@@ -310,21 +310,47 @@ private boolean isWorstNonDiverse(
float minAcceptedSimilarity = scores.get(candidateIndex);
if (candidateIndex == uncheckedIndexes[uncheckedCursor]) {
// the candidate itself is unchecked
- for (int i = candidateIndex - 1; i >= 0; i--) {
- float neighborSimilarity = scorer.score(nodes.get(i));
+ int numNodesToCheck = candidateIndex;
+ if (numNodesToCheck == 0) {
+ return false;
+ }
+
+ // Allocate a temporary buffer for scores.
+ // NeighborArray size is typically small (M=16 or 32), so this
allocation is acceptable
+ // and keeps the change localized to this class.
+ float[] neighborScores = new float[numNodesToCheck];
+
+ // Bulk score all neighbors.
+ // The default implementation in RandomVectorScorer handles the fallback
if needed.
+ scorer.bulkScore(nodes.buffer, neighborScores, numNodesToCheck);
+
+ for (int i = 0; i < numNodesToCheck; i++) {
// candidate node is too similar to node i given its score relative to
the base node
- if (neighborSimilarity >= minAcceptedSimilarity) {
+ if (neighborScores[i] >= minAcceptedSimilarity) {
return true;
}
}
} else {
// else we just need to make sure candidate does not violate diversity
with the (newly
// inserted) unchecked nodes
assert candidateIndex > uncheckedIndexes[uncheckedCursor];
- for (int i = uncheckedCursor; i >= 0; i--) {
- float neighborSimilarity =
scorer.score(nodes.get(uncheckedIndexes[i]));
+ int numNodesToCheck = uncheckedCursor + 1;
+
+ // Allocate a temporary buffer for scores.
+ float[] neighborScores = new float[numNodesToCheck];
+
+ // Create a temporary array with only the nodes we need to check
+ int[] nodesToCheck = new int[numNodesToCheck];
+ for (int i = 0; i <= uncheckedCursor; i++) {
+ nodesToCheck[i] = nodes.get(uncheckedIndexes[i]);
+ }
+
+ // Bulk score all unchecked neighbors
+ scorer.bulkScore(nodesToCheck, neighborScores, numNodesToCheck);
Review Comment:
Why can't use use the max score returned from bulk score?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]