This is an automated email from the ASF dual-hosted git repository.

abenedetti pushed a commit to branch branch_10_0
in repository https://gitbox.apache.org/repos/asf/solr.git

commit eee4ad9953b1d37b1410d779340a33768d2cf687
Author: Ilaria Petreti <[email protected]>
AuthorDate: Mon Oct 20 15:23:57 2025 +0200

    SOLR-17813: Add support for SeededKnnVectorQuery in vector search (#3705)
    
    * support Lucene's (proposed) HNSW search seeding feature
    
    Co-authored-by: Christine Poerschke <[email protected]>
    
    (cherry picked from commit 4f17deeee503e6f42af59704dbce7a7a6782ff32)
---
 solr/CHANGES.txt                                   |   4 +
 .../org/apache/solr/schema/DenseVectorField.java   |  94 +++++++++++------
 .../org/apache/solr/search/neural/KnnQParser.java  |  24 ++++-
 .../apache/solr/search/neural/KnnQParserTest.java  | 116 +++++++++++++++++++++
 .../query-guide/pages/dense-vector-search.adoc     |  36 +++++--
 5 files changed, 231 insertions(+), 43 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index cd796c07cfa..bb9d4448590 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -29,6 +29,10 @@ New Features
 
 * SOLR-17814: Add support for PatienceKnnVectorQuery. (Ilaria Petreti via 
Alessandro Benedetti)
 
+* SOLR-17948: Support indexing primitive float[] values for DenseVectorField 
via JavaBin (Puneet Ahuja, Noble Paul)
+
+* SOLR-17813: Add support for SeededKnnVectorQuery (Ilaria Petreti via 
Alessandro Benedetti)
+
 Improvements
 ---------------------
 
diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java 
b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java
index 22d0add817c..771d11c5635 100644
--- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java
+++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java
@@ -41,6 +41,7 @@ import org.apache.lucene.search.KnnByteVectorQuery;
 import org.apache.lucene.search.KnnFloatVectorQuery;
 import org.apache.lucene.search.PatienceKnnVectorQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SeededKnnVectorQuery;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.hnsw.HnswGraph;
@@ -377,43 +378,35 @@ public class DenseVectorField extends FloatPointField {
       String vectorToSearch,
       int topK,
       Query filterQuery,
+      Query seedQuery,
       EarlyTerminationParams earlyTermination) {
 
     DenseVectorParser vectorBuilder =
         getVectorBuilder(vectorToSearch, DenseVectorParser.BuilderPhase.QUERY);
 
-    switch (vectorEncoding) {
-      case FLOAT32:
-        KnnFloatVectorQuery knnFloatVectorQuery =
-            new KnnFloatVectorQuery(fieldName, vectorBuilder.getFloatVector(), 
topK, filterQuery);
-        if (earlyTermination.isEnabled()) {
-          return (earlyTermination.getSaturationThreshold() != null
-                  && earlyTermination.getPatience() != null)
-              ? PatienceKnnVectorQuery.fromFloatQuery(
-                  knnFloatVectorQuery,
-                  earlyTermination.getSaturationThreshold(),
-                  earlyTermination.getPatience())
-              : PatienceKnnVectorQuery.fromFloatQuery(knnFloatVectorQuery);
-        }
-        return knnFloatVectorQuery;
-      case BYTE:
-        KnnByteVectorQuery knnByteVectorQuery =
-            new KnnByteVectorQuery(fieldName, vectorBuilder.getByteVector(), 
topK, filterQuery);
-        if (earlyTermination.isEnabled()) {
-          return (earlyTermination.getSaturationThreshold() != null
-                  && earlyTermination.getPatience() != null)
-              ? PatienceKnnVectorQuery.fromByteQuery(
-                  knnByteVectorQuery,
-                  earlyTermination.getSaturationThreshold(),
-                  earlyTermination.getPatience())
-              : PatienceKnnVectorQuery.fromByteQuery(knnByteVectorQuery);
-        }
-        return knnByteVectorQuery;
-      default:
-        throw new SolrException(
-            SolrException.ErrorCode.SERVER_ERROR,
-            "Unexpected state. Vector Encoding: " + vectorEncoding);
-    }
+    final Query knnQuery =
+        switch (vectorEncoding) {
+          case FLOAT32 -> new KnnFloatVectorQuery(
+              fieldName, vectorBuilder.getFloatVector(), topK, filterQuery);
+          case BYTE -> new KnnByteVectorQuery(
+              fieldName, vectorBuilder.getByteVector(), topK, filterQuery);
+        };
+
+    final boolean seedEnabled = (seedQuery != null);
+    final boolean earlyTerminationEnabled =
+        (earlyTermination != null && earlyTermination.isEnabled());
+
+    int caseNumber = (seedEnabled ? 1 : 0) + (earlyTerminationEnabled ? 2 : 0);
+    return switch (caseNumber) {
+        // 0: no seed, no early termination -> knnQuery
+      default -> knnQuery;
+        // 1: only seed -> Seeded(knnQuery)
+      case 1 -> getSeededQuery(knnQuery, seedQuery);
+        // 2: only early termination -> Patience(knnQuery)
+      case 2 -> getEarlyTerminationQuery(knnQuery, earlyTermination);
+        // 3: seed + early termination -> Patience(Seeded(knnQuery))
+      case 3 -> getEarlyTerminationQuery(getSeededQuery(knnQuery, seedQuery), 
earlyTermination);
+    };
   }
 
   /**
@@ -446,4 +439,41 @@ public class DenseVectorField extends FloatPointField {
     throw new SolrException(
         SolrException.ErrorCode.BAD_REQUEST, "Cannot sort on a Dense Vector 
field");
   }
+
+  private Query getSeededQuery(Query knnQuery, Query seed) {
+    return switch (knnQuery) {
+      case KnnFloatVectorQuery knnFloatQuery -> 
SeededKnnVectorQuery.fromFloatQuery(
+          knnFloatQuery, seed);
+      case KnnByteVectorQuery knnByteQuery -> 
SeededKnnVectorQuery.fromByteQuery(
+          knnByteQuery, seed);
+      default -> throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR, "Invalid type of knn query");
+    };
+  }
+
+  private Query getEarlyTerminationQuery(Query knnQuery, 
EarlyTerminationParams earlyTermination) {
+    final boolean useExplicitParams =
+        (earlyTermination.getSaturationThreshold() != null
+            && earlyTermination.getPatience() != null);
+    return switch (knnQuery) {
+      case KnnFloatVectorQuery knnFloatQuery -> useExplicitParams
+          ? PatienceKnnVectorQuery.fromFloatQuery(
+              knnFloatQuery,
+              earlyTermination.getSaturationThreshold(),
+              earlyTermination.getPatience())
+          : PatienceKnnVectorQuery.fromFloatQuery(knnFloatQuery);
+      case KnnByteVectorQuery knnByteQuery -> useExplicitParams
+          ? PatienceKnnVectorQuery.fromByteQuery(
+              knnByteQuery,
+              earlyTermination.getSaturationThreshold(),
+              earlyTermination.getPatience())
+          : PatienceKnnVectorQuery.fromByteQuery(knnByteQuery);
+      case SeededKnnVectorQuery seedQuery -> useExplicitParams
+          ? PatienceKnnVectorQuery.fromSeededQuery(
+              seedQuery, earlyTermination.getSaturationThreshold(), 
earlyTermination.getPatience())
+          : PatienceKnnVectorQuery.fromSeededQuery(seedQuery);
+      default -> throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR, "Invalid type of knn query");
+    };
+  }
 }
diff --git a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java 
b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
index 189069805cd..664e6f341c9 100644
--- a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
@@ -23,6 +23,7 @@ import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.schema.DenseVectorField;
 import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.QParser;
 import org.apache.solr.search.SyntaxError;
 
 public class KnnQParser extends AbstractVectorQParserBase {
@@ -30,10 +31,10 @@ public class KnnQParser extends AbstractVectorQParserBase {
   // retrieve the top K results based on the distance similarity function
   protected static final String TOP_K = "topK";
   protected static final int DEFAULT_TOP_K = 10;
+  protected static final String SEED_QUERY = "seedQuery";
 
   // parameters for PatienceKnnVectorQuery, a version of knn vector query that 
exits early when HNSW
-  // queue
-  // saturates over a {@code #saturationThreshold} for more than {@code 
#patience} times.
+  // queue saturates over a {@code #saturationThreshold} for more than {@code 
#patience} times.
   protected static final String EARLY_TERMINATION = "earlyTermination";
   protected static final boolean DEFAULT_EARLY_TERMINATION = false;
   protected static final String SATURATION_THRESHOLD = "saturationThreshold";
@@ -88,6 +89,18 @@ public class KnnQParser extends AbstractVectorQParserBase {
     return new EarlyTerminationParams(enabled, saturationThreshold, patience);
   }
 
+  protected Query getSeedQuery() throws SolrException, SyntaxError {
+    String seed = localParams.get(SEED_QUERY);
+    if (seed == null) return null;
+    if (seed.isBlank()) {
+      throw new SolrException(
+          SolrException.ErrorCode.BAD_REQUEST,
+          "'seedQuery' parameter is present but is blank: please provide a 
valid query");
+    }
+    final QParser seedParser = subQuery(seed, null);
+    return seedParser.getQuery();
+  }
+
   @Override
   public Query parse() throws SyntaxError {
     final SchemaField schemaField = 
req.getCore().getLatestSchema().getField(getFieldName());
@@ -96,6 +109,11 @@ public class KnnQParser extends AbstractVectorQParserBase {
     final int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K);
 
     return denseVectorType.getKnnVectorQuery(
-        schemaField.getName(), vectorToSearch, topK, getFilterQuery(), 
getEarlyTerminationParams());
+        schemaField.getName(),
+        vectorToSearch,
+        topK,
+        getFilterQuery(),
+        getSeedQuery(),
+        getEarlyTerminationParams());
   }
 }
diff --git 
a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java 
b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
index fe417165197..cfa5d91da69 100644
--- a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
+++ b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
@@ -1198,4 +1198,120 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
                 vectorToSearch)),
         SolrException.ErrorCode.BAD_REQUEST);
   }
+
+  @Test
+  public void knnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() {
+    // Test to verify that when the seedQuery parameter is provided, the 
SeededKnnVectorQuery is
+    // executed (float).
+    String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
+
+    assertQ(
+        req(
+            CommonParams.Q,
+            "{!knn f=vector topK=4 seedQuery='id:(1 4 7 8 9)'}" + 
vectorToSearch,
+            "fl",
+            "id",
+            "debugQuery",
+            "true"),
+        "//result[@numFound='4']",
+        
"//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:1
 id:4 id:7 id:8 id:9, seedWeight=null, 
delegate=KnnFloatVectorQuery:vector[1.0,...][4]})']");
+  }
+
+  @Test
+  public void byteKnnQueryWithSeedQuery_shouldPerformSeededKnnVectorQuery() {
+    // Test to verify that when the seedQuery parameter is provided, the 
SeededKnnVectorQuery is
+    // executed (byte).
+
+    String vectorToSearch = "[2, 2, 1, 3]";
+
+    // BooleanQuery
+    assertQ(
+        req(
+            CommonParams.Q,
+            "{!knn f=vector_byte_encoding topK=4 seedQuery='id:(1 4 7 8 9)'}" 
+ vectorToSearch,
+            "fl",
+            "id",
+            "debugQuery",
+            "true"),
+        "//result[@numFound='4']",
+        
"//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=id:1
 id:4 id:7 id:8 id:9, seedWeight=null, 
delegate=KnnByteVectorQuery:vector_byte_encoding[2,...][4]})']");
+  }
+
+  @Test
+  public void knnQueryWithBlankSeed_shouldThrowException() {
+    // Test to verify that when the seedQuery parameter is provided but blank, 
Solr throws a
+    // BAD_REQUEST exception.
+    String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
+
+    assertQEx(
+        "Blank seed query should throw Exception",
+        "'seedQuery' parameter is present but is blank: please provide a valid 
query",
+        req(CommonParams.Q, "{!knn f=vector topK=4 seedQuery=''}" + 
vectorToSearch),
+        SolrException.ErrorCode.BAD_REQUEST);
+  }
+
+  @Test
+  public void knnQueryWithInvalidSeedQuery_shouldThrowException() {
+    // Test to verify that when the seedQuery parameter is provided with an 
invalid value, Solr
+    // throws a BAD_REQUEST exception.
+    String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
+
+    assertQEx(
+        "Invalid seed query should throw Exception",
+        "Cannot parse 'id:'",
+        req(CommonParams.Q, "{!knn f=vector topK=4 seedQuery='id:'}" + 
vectorToSearch),
+        SolrException.ErrorCode.BAD_REQUEST);
+  }
+
+  @Test
+  public void knnQueryWithKnnSeedQuery_shouldPerformSeededKnnVectorQuery() {
+    // Test to verify that when the seedQuery parameter itself is a knn query, 
it is correctly
+    // parsed and applied as the seed for the main knn query.
+    String mainVectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
+    String seedVectorToSearch = "[0.1, 0.2, 0.3, 0.4]";
+
+    assertQ(
+        req(
+            CommonParams.Q,
+            "{!knn f=vector topK=4 seedQuery=$seedQuery}" + mainVectorToSearch,
+            "seedQuery",
+            "{!knn f=vector topK=4}" + seedVectorToSearch,
+            "fl",
+            "id",
+            "debugQuery",
+            "true"),
+        "//result[@numFound='4']",
+        
"//str[@name='parsedquery'][.='SeededKnnVectorQuery(SeededKnnVectorQuery{seed=KnnFloatVectorQuery:vector[0.1,...][4],
 seedWeight=null, delegate=KnnFloatVectorQuery:vector[1.0,...][4]})']");
+  }
+
+  @Test
+  public void
+      
knnQueryWithBothSeedAndEarlyTermination_shouldPerformPatienceKnnVectorQueryFromSeeded()
 {
+    // Test to verify that when both the seed and the early termination 
parameters are provided, the
+    // PatienceKnnVectorQuery is executed using the SeededKnnVectorQuery.
+    String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
+
+    assertQ(
+        req(
+            CommonParams.Q,
+            "{!knn f=vector topK=4 seedQuery='id:(1 4 7 8 9)' 
earlyTermination=true}"
+                + vectorToSearch,
+            "fl",
+            "id",
+            "debugQuery",
+            "true"),
+        // Verify that 4 documents are returned
+        "//result[@numFound='4']",
+        // Verify that the parsed query is a nested PatienceKnnVectorQuery 
wrapping a
+        // SeededKnnVectorQuery
+        
"//str[@name='parsedquery'][contains(.,'PatienceKnnVectorQuery(PatienceKnnVectorQuery{saturationThreshold=0.995,
 patience=7, delegate=SeededKnnVectorQuery{')]",
+        // Verify that the seed query contains the expected document IDs
+        "//str[@name='parsedquery'][contains(.,'seed=id:1 id:4 id:7 id:8 
id:9')]",
+        // Verify that a seedWeight field is present — its value 
(BooleanWeight@<hash>) includes a
+        // hash code that changes on each run, so it cannot be asserted 
explicitly
+        "//str[@name='parsedquery'][contains(.,'seedWeight=')]",
+        // Verify that the final delegate is a KnnFloatVectorQuery with the 
expected vector and topK
+        // value
+        
"//str[@name='parsedquery'][contains(.,'delegate=KnnFloatVectorQuery:vector[1.0,...][4]')]");
+  }
 }
diff --git 
a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc 
b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
index 942999f88c6..82ae6648a05 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
@@ -47,7 +47,7 @@ The strategy implemented in Apache Lucene and used by Apache 
Solr is based on Na
 
 It provides efficient approximate nearest neighbor search for high dimensional 
vectors.
 
-See https://doi.org/10.1016/j.is.2013.10.006[Approximate nearest neighbor 
algorithm based on navigable small world graphs [2014]] and 
https://arxiv.org/abs/1603.09320[Efficient and robust approximate nearest 
neighbor search using Hierarchical Navigable Small World graphs [2018]] for 
details.
+See https://doi.org/10.1016/j.is.2013.10.006[Approximate nearest neighbor 
algorithm based on navigable small world graphs (2014)] and 
https://arxiv.org/abs/1603.09320[Efficient and robust approximate nearest 
neighbor search using Hierarchical Navigable Small World graphs (2018)] for 
details.
 
 
 == Index Time
@@ -416,7 +416,7 @@ The search results retrieved are the k=10 nearest documents 
to the vector in inp
 |Optional |Default: `false`
 |===
 +
-Early termination is an HNSW optimization. Solr relies on the Lucene’s 
implementation of early termination for kNN queries, based on 
https://cs.uwaterloo.ca/~jimmylin/publications/Teofili_Lin_ECIR2025.pdf[Patience
 in Proximity: A Simple Early Termination Strategy for HNSW Graph Traversal in 
Approximate k-Nearest Neighbor Search].
+Early termination is an HNSW optimization. Solr relies on the Lucene’s 
implementation of early termination for kNN queries, based on 
https://cs.uwaterloo.ca/~jimmylin/publications/Teofili_Lin_ECIR2025.pdf[Patience
 in Proximity: A Simple Early Termination Strategy for HNSW Graph Traversal in 
Approximate k-Nearest Neighbor Search (2025)].
 +
 When enabled (true), the search may exit early when the HNSW candidate queue 
remains saturated over a threshold (saturationThreshold) for more than a given 
number of iterations (patience). Refer to the two parameters below for more 
details.
 +
@@ -457,6 +457,26 @@ Here's an example of a `knn` search using the early 
termination with input param
 [source,text]
 ?q={!knn f=vector topK=10 earlyTermination=true saturationThreshold=0.989 
patience=10}[1.0, 2.0, 3.0, 4.0]
 
+`seedQuery`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: none
+|===
++
+A query seed to initiate the vector search, i.e. entry points in the HNSW 
graph exploration. Solr relies on Lucene’s implementation of 
{lucene-javadocs}/core/org/apache/lucene/search/SeededKnnVectorQuery.html[SeededKnnVectorQuery]
 based on https://arxiv.org/pdf/2307.16779[Lexically-Accelerated Dense 
Retrieval (2023)].
++
+The seedQuery is primarily intended to be a lexical query, guiding the vector 
search in a hybrid-like way through traditional query logic. Although a knn 
query can also be used as a seed — which might make sense in specific scenarios 
and has been verified by a dedicated test — this approach is not considered a 
best practice.
++
+The seedQuery can also be used in combination with earlyTermination.
+
+Here is an example of a `knn` search using a `seedQuery`:
+
+[source,text]
+?q={!knn f=vector topK=10 seedQuery='id:(1 4 10)'}[1.0, 2.0, 3.0, 4.0]
+
+The search results retrieved are the k=10 nearest documents to the vector in 
input `[1.0, 2.0, 3.0, 4.0]`. Documents matching the query `id:(1 4 10)` are 
used as entry points for the ANN search. If no documents match the seed, Solr 
falls back to a regular knn search without seeding, starting instead from 
random entry points.
+
 === knn_text_to_vector Query Parser
 
 The `knn_text_to_vector` query parser encode a textual query to a vector using 
a dedicated Large Language Model(fine tuned for the task of encoding text to 
vector for sentence similarity) and matches k-nearest neighbours documents to 
such query vector.
@@ -824,7 +844,7 @@ cat > cuvs_configset/conf/solrconfig.xml << 'EOF'
     <luceneMatchVersion>10.0.0</luceneMatchVersion>
     <dataDir>${solr.data.dir:}</dataDir>
     <directoryFactory name="DirectoryFactory" 
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
-    
+
     <updateHandler class="solr.DirectUpdateHandler2">
         <updateLog>
             <str name="dir">${solr.ulog.dir:}</str>
@@ -853,7 +873,7 @@ cat > cuvs_configset/conf/solrconfig.xml << 'EOF'
             <int name="rows">10</int>
         </lst>
     </requestHandler>
-    
+
     <requestHandler name="/update" class="solr.UpdateRequestHandler" />
 </config>
 EOF
@@ -865,16 +885,16 @@ cat > cuvs_configset/conf/managed-schema << 'EOF'
 <?xml version="1.0" ?>
 <schema name="schema-densevector" version="1.7">
     <fieldType name="string" class="solr.StrField" multiValued="true"/>
-    <fieldType name="knn_vector" class="solr.DenseVectorField" 
-               vectorDimension="8" 
-               knnAlgorithm="cagra_hnsw" 
+    <fieldType name="knn_vector" class="solr.DenseVectorField"
+               vectorDimension="8"
+               knnAlgorithm="cagra_hnsw"
                similarityFunction="cosine" />
     <fieldType name="plong" class="solr.LongPointField" 
useDocValuesAsStored="false"/>
 
     <field name="id" type="string" indexed="true" stored="true" 
multiValued="false" required="false"/>
     <field name="article_vector" type="knn_vector" indexed="true" 
stored="true"/>
     <field name="_version_" type="plong" indexed="true" stored="true" 
multiValued="false" />
-    
+
     <uniqueKey>id</uniqueKey>
 </schema>
 EOF

Reply via email to