This is an automated email from the ASF dual-hosted git repository.

abenedetti pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new 7f454a658ce [SOLR-17812] Add support for 
BinaryQuantizedDenseVectorField (#3468)
7f454a658ce is described below

commit 7f454a658cea143275c7c2ed0fb1153b816af851
Author: Kevin Liang <[email protected]>
AuthorDate: Tue Sep 23 06:35:38 2025 -0400

    [SOLR-17812] Add support for BinaryQuantizedDenseVectorField (#3468)
    
    * added binary quantisation + documentation and tests
    ---------
    
    Co-authored-by: kliang78 <[email protected]>
    Co-authored-by: Alessandro Benedetti <[email protected]>
---
 solr/CHANGES.txt                                   |  2 +
 .../schema/BinaryQuantizedDenseVectorField.java    | 28 ++++++++++++
 .../collection1/conf/schema-densevector-bq.xml     | 50 ++++++++++++++++++++++
 .../BinaryQuantizedDenseVectorFieldTest.java       | 47 ++++++++++++++++++++
 .../query-guide/pages/dense-vector-search.adoc     | 25 +++++++++++
 5 files changed, 152 insertions(+)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index abe7b8f5563..60a2db6c5f1 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -21,6 +21,8 @@ New Features
 
 * SOLR-17780: Add support for scalar quantized dense vectors (Kevin Liang via 
Alessandro Benedetti)
 
+* SOLR-17812: Add support for binary quantized dense vectors (Kevin Liang via 
Alessandro Benedetti)
+
 * SOLR-17023: Use Modern NLP Models from Apache OpenNLP with Solr (Jeff 
Zemerick, Eric Pugh)
 
 * SOLR-17814: Add support for PatienceKnnVectorQuery. (Ilaria Petreti via 
Alessandro Benedetti)
diff --git 
a/solr/core/src/java/org/apache/solr/schema/BinaryQuantizedDenseVectorField.java
 
b/solr/core/src/java/org/apache/solr/schema/BinaryQuantizedDenseVectorField.java
new file mode 100644
index 00000000000..4293e9a6d8b
--- /dev/null
+++ 
b/solr/core/src/java/org/apache/solr/schema/BinaryQuantizedDenseVectorField.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.schema;
+
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import 
org.apache.lucene.codecs.lucene102.Lucene102HnswBinaryQuantizedVectorsFormat;
+
+public class BinaryQuantizedDenseVectorField extends DenseVectorField {
+
+  @Override
+  public KnnVectorsFormat buildKnnVectorsFormat() {
+    return new Lucene102HnswBinaryQuantizedVectorsFormat(getHnswMaxConn(), 
getHnswBeamWidth());
+  }
+}
diff --git 
a/solr/core/src/test-files/solr/collection1/conf/schema-densevector-bq.xml 
b/solr/core/src/test-files/solr/collection1/conf/schema-densevector-bq.xml
new file mode 100644
index 00000000000..056e0928060
--- /dev/null
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-densevector-bq.xml
@@ -0,0 +1,50 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Test schema file for DenseVectorField types -->
+
+<schema name="schema-densevector-bbq" version="1.0">
+  <fieldType name="string" class="solr.StrField" multiValued="true"/>
+  <fieldType name="plong" class="solr.LongPointField" 
useDocValuesAsStored="false"/>
+
+  <!-- Binary Bit Quantized vectors -->
+  <fieldType name="knn_vector_binary_quantized" 
class="solr.BinaryQuantizedDenseVectorField" vectorDimension="4"/>
+
+  <field name="v_bq" type="knn_vector_binary_quantized" indexed="true" 
stored="true" />
+
+  <field name="string_field" type="string" indexed="true" stored="true" 
multiValued="false" required="false"/>
+  <field name="id" type="string" indexed="true" stored="true" 
multiValued="false" required="false"/>
+  <field name="_version_" type="plong" indexed="true" stored="true" 
multiValued="false" />
+  <field name="_text_" type="text_general" indexed="true" stored="false" 
multiValued="true"/>
+  <copyField source="*" dest="_text_"/>
+  <fieldType name="text_general" class="solr.TextField" 
positionIncrementGap="100" multiValued="true">
+    <analyzer type="index">
+      <tokenizer class="solr.StandardTokenizerFactory"/>
+      <filter class="solr.StopFilterFactory" words="stopwords.txt" 
ignoreCase="true"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
+    </analyzer>
+    <analyzer type="query">
+      <tokenizer class="solr.StandardTokenizerFactory"/>
+      <filter class="solr.StopFilterFactory" words="stopwords.txt" 
ignoreCase="true"/>
+      <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" 
ignoreCase="true" expand="true"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
+    </analyzer>
+  </fieldType>
+
+  <uniqueKey>id</uniqueKey>
+</schema>
diff --git 
a/solr/core/src/test/org/apache/solr/schema/BinaryQuantizedDenseVectorFieldTest.java
 
b/solr/core/src/test/org/apache/solr/schema/BinaryQuantizedDenseVectorFieldTest.java
new file mode 100644
index 00000000000..de08d3f7ed2
--- /dev/null
+++ 
b/solr/core/src/test/org/apache/solr/schema/BinaryQuantizedDenseVectorFieldTest.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.schema;
+
+import org.apache.solr.core.AbstractBadConfigTestBase;
+import org.junit.Test;
+
+public class BinaryQuantizedDenseVectorFieldTest extends 
AbstractBadConfigTestBase {
+  @Test
+  public void fieldDefinition_correctConfiguration_shouldLoadSchemaField() 
throws Exception {
+    try {
+      initCore("solrconfig-basic.xml", "schema-densevector-bq.xml");
+      IndexSchema schema = h.getCore().getLatestSchema();
+
+      SchemaField vector = schema.getField("v_bq");
+      assertNotNull(vector);
+
+      BinaryQuantizedDenseVectorField type = (BinaryQuantizedDenseVectorField) 
vector.getType();
+      assertEquals(4, type.getDimension());
+      assertTrue(vector.indexed());
+      assertTrue(vector.stored());
+    } finally {
+      deleteCore();
+    }
+  }
+
+  // there are no major interface differences between 
BinaryBitQuantizedDenseVectorField and
+  // DenseVectorField
+  // so we can rely on those tests for validation cases
+  //
+  // as for behavior, there are no externally visible state differences. 
Internal implementation
+  // is tested at Lucene level
+}
diff --git 
a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc 
b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
index d596fbaf985..c52fa7ffb08 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
@@ -316,6 +316,31 @@ preserved when `stored` is true.
 +
 Accepted values: `BOOLEAN`
 
+=== BinaryQuantizedDenseVectorField
+
+Binary quantization is a quantization technique that extends scalar 
quantization, and is even more aggressive in its compression;
+able to reduce in-memory representation of each vector dimension from a 32 bit 
float down to a single bit.
+This is done by normalizing each dimension of a vector relative to a centroid 
(mid-point pre-calculated against all vectors in the index)
+with the stored bit representing whether the actual value is "above" or 
"below" the centroid's value. A further "corrective factor" is also computed
+and stored to help compensate accuracy in the estimated distance. At query 
time asymmetric quantization is applied to the query
+vector (reducing its dimension values down to 4 bits each), but allowing 
comparison with the stored binary quantized vector via bit arithmetic.
+
+This implementation comprises of LVQ, proposed in 
https://arxiv.org/abs/2304.04759[Similarity Search in the Blink of an Eye With 
Compressed Indices]
+by Cecilia Aguerrebere et al., previous work on globally optimized scalar 
quantization in Apache Lucene, and ideas from
+https://arxiv.org/abs/1908.10396[Accelerating Large-Scale Inference with 
Anisotropic Vector Quantization] by Ruiqi Guo et al.
+
+This vector type is best utilized for data sets consisting of large amounts of 
high dimensionality vectors.
+
+Here is how a BinaryQuantizedDenseVectorField can be defined in the schema:
+
+[source,xml]
+<fieldType name="binary_quantized_vector" 
class="solr.BinaryQuantizedDenseVectorField" vectorDimension="4"/>
+<field name="vector" type="binary_quantized_vector" indexed="true" 
stored="true"/>
+
+BinaryQuantizedDenseVectorField accepts the same parameters as 
`DenseVectorField` with the only notable exception being
+`similarityFunction`. Bit quantization uses its own distance calculation and 
so does not require nor use the `similarityFunction`
+param.
+
 == Query Time
 
 Apache Solr provides three query parsers that work with dense vector fields, 
that each support different ways of matching documents based on vector 
similarity: The `knn` query parser, the `vectorSimilarity` query parser and the 
`knn_text_to_vector` query parser.

Reply via email to