This is an automated email from the ASF dual-hosted git repository.
ishan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 25d07610f4f SOLR-17892, SOLR-17991: Cuvs module - Modify how
parameters are passed (#3872)
25d07610f4f is described below
commit 25d07610f4f0529de8329027b72d528d85f460ed
Author: Vivek Narang <[email protected]>
AuthorDate: Fri Nov 28 06:00:27 2025 -0500
SOLR-17892, SOLR-17991: Cuvs module - Modify how parameters are passed
(#3872)
---
.../org/apache/solr/schema/DenseVectorField.java | 82 ++++++++++++++++++++++
.../src/java/org/apache/solr/cuvs/CuVSCodec.java | 82 +++++++++-------------
.../test-files/solr/collection1/conf/schema.xml | 2 +-
.../solr/collection1/conf/solrconfig.xml | 9 +--
.../apache/solr/cuvs/TestCuVSCodecSupportIT.java | 2 -
.../query-guide/pages/dense-vector-search.adoc | 50 ++++++-------
6 files changed, 140 insertions(+), 87 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java
b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java
index 0500d7fcbbe..f1831c1ad9c 100644
--- a/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java
+++ b/solr/core/src/java/org/apache/solr/schema/DenseVectorField.java
@@ -69,6 +69,7 @@ import org.slf4j.LoggerFactory;
public class DenseVectorField extends FloatPointField {
private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String HNSW_ALGORITHM = "hnsw";
+ public static final String CAGRA_HNSW_ALGORITHM = "cagra_hnsw";
public static final String DEFAULT_KNN_ALGORITHM = HNSW_ALGORITHM;
static final String KNN_VECTOR_DIMENSION = "vectorDimension";
static final String KNN_ALGORITHM = "knnAlgorithm";
@@ -78,6 +79,20 @@ public class DenseVectorField extends FloatPointField {
static final VectorEncoding DEFAULT_VECTOR_ENCODING = VectorEncoding.FLOAT32;
static final String KNN_SIMILARITY_FUNCTION = "similarityFunction";
static final VectorSimilarityFunction DEFAULT_SIMILARITY =
VectorSimilarityFunction.EUCLIDEAN;
+
+ static final String CUVS_WRITER_THREADS = "cuvsWriterThreads";
+ static final String CUVS_INT_GRAPH_DEGREE = "cuvsIntGraphDegree";
+ static final String CUVS_GRAPH_DEGREE = "cuvsGraphDegree";
+ static final String CUVS_HNSW_LAYERS = "cuvsHnswLayers";
+ static final String CUVS_HNSW_MAX_CONNECTIONS = "cuvsHnswM";
+ static final String CUVS_HNSW_EF_CONSTRUCTION = "cuvsHNSWEfConstruction";
+ static final int DEFAULT_CUVS_WRITER_THREADS = 32;
+ static final int DEFAULT_CUVS_INT_GRAPH_DEGREE = 128;
+ static final int DEFAULT_CUVS_GRAPH_DEGREE = 64;
+ static final int DEFAULT_CUVS_HNSW_LAYERS = 1;
+ static final int DEFAULT_CUVS_HNSW_MAX_CONNECTIONS = 16;
+ static final int DEFAULT_CUVS_HNSW_EF_CONSTRUCTION = 100;
+
private int dimension;
private VectorSimilarityFunction similarityFunction;
private String knnAlgorithm;
@@ -100,6 +115,13 @@ public class DenseVectorField extends FloatPointField {
*/
private VectorEncoding vectorEncoding;
+ private int cuvsWriterThreads;
+ private int cuvsIntGraphDegree;
+ private int cuvsGraphDegree;
+ private int cuvsHnswLayers;
+ private int cuvsHnswM;
+ private int cuvsHNSWEfConstruction;
+
public DenseVectorField() {
super();
}
@@ -155,6 +177,42 @@ public class DenseVectorField extends FloatPointField {
ofNullable(args.get(HNSW_BEAM_WIDTH)).map(Integer::parseInt).orElse(DEFAULT_BEAM_WIDTH);
args.remove(HNSW_BEAM_WIDTH);
+ this.cuvsWriterThreads =
+ ofNullable(args.get(CUVS_WRITER_THREADS))
+ .map(Integer::parseInt)
+ .orElse(DEFAULT_CUVS_WRITER_THREADS);
+ args.remove(CUVS_WRITER_THREADS);
+
+ this.cuvsIntGraphDegree =
+ ofNullable(args.get(CUVS_INT_GRAPH_DEGREE))
+ .map(Integer::parseInt)
+ .orElse(DEFAULT_CUVS_INT_GRAPH_DEGREE);
+ args.remove(CUVS_INT_GRAPH_DEGREE);
+
+ this.cuvsGraphDegree =
+ ofNullable(args.get(CUVS_GRAPH_DEGREE))
+ .map(Integer::parseInt)
+ .orElse(DEFAULT_CUVS_GRAPH_DEGREE);
+ args.remove(CUVS_GRAPH_DEGREE);
+
+ this.cuvsHnswLayers =
+ ofNullable(args.get(CUVS_HNSW_LAYERS))
+ .map(Integer::parseInt)
+ .orElse(DEFAULT_CUVS_HNSW_LAYERS);
+ args.remove(CUVS_HNSW_LAYERS);
+
+ this.cuvsHnswM =
+ ofNullable(args.get(CUVS_HNSW_MAX_CONNECTIONS))
+ .map(Integer::parseInt)
+ .orElse(DEFAULT_CUVS_HNSW_MAX_CONNECTIONS);
+ args.remove(CUVS_HNSW_MAX_CONNECTIONS);
+
+ this.cuvsHNSWEfConstruction =
+ ofNullable(args.get(CUVS_HNSW_EF_CONSTRUCTION))
+ .map(Integer::parseInt)
+ .orElse(DEFAULT_CUVS_HNSW_EF_CONSTRUCTION);
+ args.remove(CUVS_HNSW_EF_CONSTRUCTION);
+
this.properties &= ~MULTIVALUED;
this.properties &= ~UNINVERTIBLE;
@@ -185,6 +243,30 @@ public class DenseVectorField extends FloatPointField {
return vectorEncoding;
}
+ public int getCuvsWriterThreads() {
+ return cuvsWriterThreads;
+ }
+
+ public int getCuvsIntGraphDegree() {
+ return cuvsIntGraphDegree;
+ }
+
+ public int getCuvsGraphDegree() {
+ return cuvsGraphDegree;
+ }
+
+ public int getCuvsHnswLayers() {
+ return cuvsHnswLayers;
+ }
+
+ public int getCuvsHnswMaxConn() {
+ return cuvsHnswM;
+ }
+
+ public int getCuvsHnswEfConstruction() {
+ return cuvsHNSWEfConstruction;
+ }
+
@Override
protected boolean enableDocValuesByDefault() {
return false;
diff --git a/solr/modules/cuvs/src/java/org/apache/solr/cuvs/CuVSCodec.java
b/solr/modules/cuvs/src/java/org/apache/solr/cuvs/CuVSCodec.java
index a1423f8c954..2c82c3e37df 100644
--- a/solr/modules/cuvs/src/java/org/apache/solr/cuvs/CuVSCodec.java
+++ b/solr/modules/cuvs/src/java/org/apache/solr/cuvs/CuVSCodec.java
@@ -40,59 +40,14 @@ import org.slf4j.LoggerFactory;
public class CuVSCodec extends FilterCodec {
private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
- private static final int DEFAULT_CUVS_WRITER_THREADS = 32;
- private static final int DEFAULT_INT_GRAPH_DEGREE = 128;
- private static final int DEFAULT_GRAPH_DEGREE = 64;
- private static final int DEFAULT_HNSW_LAYERS = 1;
- private static final int DEFAULT_MAX_CONN = 16;
- private static final int DEFAULT_BEAM_WIDTH = 100;
-
- private static final String CAGRA_HNSW = "cagra_hnsw";
private static final String FALLBACK_CODEC = "Lucene103";
-
private final SolrCore core;
private final Lucene103Codec fallbackCodec;
- private final Lucene99AcceleratedHNSWVectorsFormat cuvsHNSWVectorsFormat;
public CuVSCodec(SolrCore core, Lucene103Codec fallback, NamedList<?> args) {
super(FALLBACK_CODEC, fallback);
this.core = core;
this.fallbackCodec = fallback;
-
- String cwt = args._getStr("cuvsWriterThreads");
- int cuvsWriterThreads = cwt != null ? Integer.parseInt(cwt) :
DEFAULT_CUVS_WRITER_THREADS;
- String igd = args._getStr("intGraphDegree");
- int intGraphDegree = igd != null ? Integer.parseInt(igd) :
DEFAULT_INT_GRAPH_DEGREE;
- String gd = args._getStr("graphDegree");
- int graphDegree = gd != null ? Integer.parseInt(gd) : DEFAULT_GRAPH_DEGREE;
- String hl = args._getStr("hnswLayers");
- int hnswLayers = hl != null ? Integer.parseInt(hl) : DEFAULT_HNSW_LAYERS;
- String mc = args._getStr("maxConn");
- int maxConn = mc != null ? Integer.parseInt(mc) : DEFAULT_MAX_CONN;
- String bw = args._getStr("beamWidth");
- int beamWidth = bw != null ? Integer.parseInt(bw) : DEFAULT_BEAM_WIDTH;
-
- assert cuvsWriterThreads > 0 : "cuvsWriterThreads cannot be less then or
equal to 0";
- assert intGraphDegree > 0 : "intGraphDegree cannot be less then or equal
to 0";
- assert graphDegree > 0 : "graphDegree cannot be less then or equal to 0";
- assert hnswLayers > 0 : "hnswLayers cannot be less then or equal to 0";
- assert maxConn > 0 : "max connections cannot be less then or equal to 0";
- assert beamWidth > 0 : "beam width cannot be less then or equal to 0";
-
- cuvsHNSWVectorsFormat =
- new Lucene99AcceleratedHNSWVectorsFormat(
- cuvsWriterThreads, intGraphDegree, graphDegree, hnswLayers,
maxConn, beamWidth);
-
- if (log.isInfoEnabled()) {
- log.info(
- "Lucene99AcceleratedHNSWVectorsFormat initialized with parameter
values: cuvsWriterThreads {}, intGraphDegree {}, graphDegree {}, hnswLayers {},
maxConn {}, beamWidth {}",
- cuvsWriterThreads,
- intGraphDegree,
- graphDegree,
- hnswLayers,
- maxConn,
- beamWidth);
- }
}
@Override
@@ -108,8 +63,41 @@ public class CuVSCodec extends FilterCodec {
FieldType fieldType = (schemaField == null ? null :
schemaField.getType());
if (fieldType instanceof DenseVectorField vectorType) {
String knnAlgorithm = vectorType.getKnnAlgorithm();
- if (CAGRA_HNSW.equals(knnAlgorithm)) {
- return cuvsHNSWVectorsFormat;
+ if (DenseVectorField.CAGRA_HNSW_ALGORITHM.equals(knnAlgorithm)) {
+
+ int cuvsWriterThreads = vectorType.getCuvsWriterThreads();
+ int cuvsIntGraphDegree = vectorType.getCuvsIntGraphDegree();
+ int cuvsGraphDegree = vectorType.getCuvsGraphDegree();
+ int cuvsHnswLayers = vectorType.getCuvsHnswLayers();
+ int cuvsHnswM = vectorType.getCuvsHnswMaxConn();
+ int cuvsHNSWEfConstruction =
vectorType.getCuvsHnswEfConstruction();
+
+ assert cuvsWriterThreads > 0 : "cuvsWriterThreads cannot be less
then or equal to 0";
+ assert cuvsIntGraphDegree > 0
+ : "cuvsIntGraphDegree cannot be less then or equal to 0";
+ assert cuvsGraphDegree > 0 : "cuvsGraphDegree cannot be less
then or equal to 0";
+ assert cuvsHnswLayers > 0 : "cuvsHnswLayers cannot be less then
or equal to 0";
+ assert cuvsHnswM > 0 : "cuvsHnswM cannot be less then or equal
to 0";
+ assert cuvsHNSWEfConstruction > 0
+ : "cuvsHNSWEfConstruction cannot be less then or equal to 0";
+
+ if (log.isInfoEnabled()) {
+ log.info(
+ "Initializing Lucene99AcceleratedHNSWVectorsFormat with
parameter values: cuvsWriterThreads {}, cuvsIntGraphDegree {}, cuvsGraphDegree
{}, cuvsHnswLayers {}, cuvsHnswM {}, cuvsHNSWEfConstruction {}",
+ cuvsWriterThreads,
+ cuvsIntGraphDegree,
+ cuvsGraphDegree,
+ cuvsHnswLayers,
+ cuvsHnswM,
+ cuvsHNSWEfConstruction);
+ }
+ return new Lucene99AcceleratedHNSWVectorsFormat(
+ cuvsWriterThreads,
+ cuvsIntGraphDegree,
+ cuvsGraphDegree,
+ cuvsHnswLayers,
+ cuvsHnswM,
+ cuvsHNSWEfConstruction);
} else if (DenseVectorField.HNSW_ALGORITHM.equals(knnAlgorithm)) {
return fallbackCodec.getKnnVectorsFormatForField(field);
} else {
diff --git a/solr/modules/cuvs/src/test-files/solr/collection1/conf/schema.xml
b/solr/modules/cuvs/src/test-files/solr/collection1/conf/schema.xml
index 122eed5c464..e9aea48d6ab 100644
--- a/solr/modules/cuvs/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/modules/cuvs/src/test-files/solr/collection1/conf/schema.xml
@@ -20,7 +20,7 @@
<schema name="schema-densevector" version="1.7">
<fieldType name="string" class="solr.StrField" multiValued="true"/>
- <fieldType name="knn_vector1" class="solr.DenseVectorField"
vectorDimension="8" knnAlgorithm="cagra_hnsw" similarityFunction="cosine"/>
+ <fieldType name="knn_vector1" class="solr.DenseVectorField"
vectorDimension="8" knnAlgorithm="cagra_hnsw" similarityFunction="cosine"
cuvsWriterThreads="32" cuvsIntGraphDegree="128" cuvsGraphDegree="64"
cuvsHnswLayers="1" cuvsHnswM="16" cuvsHNSWEfConstruction="100"/>
<fieldType name="knn_vector2" class="solr.DenseVectorField"
vectorDimension="8" similarityFunction="cosine"/>
<fieldType name="plong" class="solr.LongPointField"
useDocValuesAsStored="false"/>
diff --git
a/solr/modules/cuvs/src/test-files/solr/collection1/conf/solrconfig.xml
b/solr/modules/cuvs/src/test-files/solr/collection1/conf/solrconfig.xml
index 38551201f6f..38117a9e7a6 100644
--- a/solr/modules/cuvs/src/test-files/solr/collection1/conf/solrconfig.xml
+++ b/solr/modules/cuvs/src/test-files/solr/collection1/conf/solrconfig.xml
@@ -28,14 +28,7 @@
<dataDir>${solr.data.dir:}</dataDir>
<directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.MockDirectoryFactory}"/>
- <codecFactory name="CuVSCodecFactory"
class="org.apache.solr.cuvs.CuVSCodecFactory">
- <str name="cuvsWriterThreads">32</str>
- <str name="intGraphDegree">128</str>
- <str name="graphDegree">64</str>
- <str name="hnswLayers">1</str>
- <str name="maxConn">16</str>
- <str name="beamWidth">100</str>
- </codecFactory>
+ <codecFactory name="CuVSCodecFactory"
class="org.apache.solr.cuvs.CuVSCodecFactory"/>
<requestHandler name="/select" class="solr.SearchHandler"></requestHandler>
</config>
diff --git
a/solr/modules/cuvs/src/test/org/apache/solr/cuvs/TestCuVSCodecSupportIT.java
b/solr/modules/cuvs/src/test/org/apache/solr/cuvs/TestCuVSCodecSupportIT.java
index c00be259e65..0da5bdd86b0 100644
---
a/solr/modules/cuvs/src/test/org/apache/solr/cuvs/TestCuVSCodecSupportIT.java
+++
b/solr/modules/cuvs/src/test/org/apache/solr/cuvs/TestCuVSCodecSupportIT.java
@@ -39,7 +39,6 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.junit.BeforeClass;
-import org.junit.Ignore;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -50,7 +49,6 @@ import org.slf4j.LoggerFactory;
*
* @since 10.0.0
*/
-@Ignore("https://issues.apache.org/jira/browse/SOLR-17938")
public class TestCuVSCodecSupportIT extends SolrTestCaseJ4 {
private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
diff --git
a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
index 856c63329ff..26bbcff1670 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
@@ -718,36 +718,29 @@ Define the `fieldType` in the schema, with knnAlgorithm
set to `cagra_hnsw`:
[source,xml]
----
-<fieldType name="knn_vector" class="solr.DenseVectorField" vectorDimension="8"
knnAlgorithm="cagra_hnsw" similarityFunction="cosine" />
-----
-
-Define the xref:configuration-guide:codec-factory.adoc[codecFactory] in
xref:configuration-guide:configuring-solrconfig-xml.adoc[solrconfig.xml]
-
-[source,xml]
-----
-<codecFactory name="CuVSCodecFactory"
class="org.apache.solr.cuvs.CuVSCodecFactory">
- <str name="cuvsWriterThreads">8</str>
- <str name="intGraphDegree">128</str>
- <str name="graphDegree">64</str>
- <str name="hnswLayers">1</str>
- <str name="maxConn">16</str>
- <str name="beamWidth">100</str>
-</codecFactory>
+<fieldType name="knn_vector" class="solr.DenseVectorField" vectorDimension="8"
knnAlgorithm="cagra_hnsw" similarityFunction="cosine" cuvsWriterThreads="32"
cuvsIntGraphDegree="128" cuvsGraphDegree="64" cuvsHnswLayers="1" cuvsHnswM="16"
cuvsHNSWEfConstruction="100"/>
----
Where:
* `cuvsWriterThreads` - number of threads to use
-* `intGraphDegree` - Intermediate graph degree for building the CAGRA index
+* `cuvsIntGraphDegree` - Intermediate graph degree for building the CAGRA index
-* `graphDegree` - Graph degree for building the CAGRA index
+* `cuvsGraphDegree` - Graph degree for building the CAGRA index
-* `hnswLayers` - Number of HNSW graph layers to construct while building the
HNSW index
+* `cuvsHnswLayers` - Number of HNSW graph layers to construct while building
the HNSW index
-* `maxConn` - Max connections parameter passed to the fallback
Lucene99HnswVectorsWriter
+* `cuvsHnswM` - Max connections parameter passed to the fallback
Lucene99HnswVectorsWriter
-* `beamWidth` - Beam width parameter passed to the fallback
Lucene99HnswVectorsWriter
+* `cuvsHNSWEfConstruction` - Beam width parameter passed to the fallback
Lucene99HnswVectorsWriter
+
+Define the xref:configuration-guide:codec-factory.adoc[codecFactory] in
xref:configuration-guide:configuring-solrconfig-xml.adoc[solrconfig.xml]
+
+[source,xml]
+----
+<codecFactory name="CuVSCodecFactory"
class="org.apache.solr.cuvs.CuVSCodecFactory"/>
+----
=== Example
@@ -880,14 +873,7 @@ cat > cuvs_configset/conf/solrconfig.xml << 'EOF'
</autoSoftCommit>
</updateHandler>
- <codecFactory name="CuVSCodecFactory"
class="org.apache.solr.cuvs.CuVSCodecFactory">
- <str name="cuvsWriterThreads">32</str>
- <str name="intGraphDegree">128</str>
- <str name="graphDegree">64</str>
- <str name="hnswLayers">1</str>
- <str name="maxConn">16</str>
- <str name="beamWidth">100</str>
- </codecFactory>
+ <codecFactory name="CuVSCodecFactory"
class="org.apache.solr.cuvs.CuVSCodecFactory"/>
<requestHandler name="/select" class="solr.SearchHandler">
<lst name="defaults">
@@ -910,7 +896,13 @@ cat > cuvs_configset/conf/managed-schema << 'EOF'
<fieldType name="knn_vector" class="solr.DenseVectorField"
vectorDimension="8"
knnAlgorithm="cagra_hnsw"
- similarityFunction="cosine" />
+ similarityFunction="cosine"
+ cuvsWriterThreads="32"
+ cuvsIntGraphDegree="128"
+ cuvsGraphDegree="64"
+ cuvsHnswLayers="1"
+ cuvsHnswM="16"
+ cuvsHNSWEfConstruction="100"/>
<fieldType name="plong" class="solr.LongPointField"
useDocValuesAsStored="false"/>
<field name="id" type="string" indexed="true" stored="true"
multiValued="false" required="false"/>