alessandrobenedetti commented on code in PR #2523: URL: https://github.com/apache/solr/pull/2523#discussion_r1644014991
########## solr/core/src/java/org/apache/solr/search/neural/VecSimQParserPlugin.java: ########## @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.neural; + +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.QParser; +import org.apache.solr.search.QParserPlugin; + +/** A neural query parser to run min-similarity search on Dense Vector fields. */ +public class VecSimQParserPlugin extends QParserPlugin { Review Comment: same here, maybe "VectorSimilarityQParserPlugin" ########## solr/core/src/java/org/apache/solr/search/neural/VecSimQParser.java: ########## @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.neural; + +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.search.ByteVectorSimilarityQuery; +import org.apache.lucene.search.FloatVectorSimilarityQuery; +import org.apache.lucene.search.Query; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.DenseVectorField; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.util.vector.DenseVectorParser; + +public class VecSimQParser extends AbstractVectorQParserBase { Review Comment: Why not "VectorSimilarityQParser" ? In general I always in favour of explicit names, as I realised over the years that what seems obvious to someone may complicate the readability of others in the feature ########## solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc: ########## @@ -237,14 +237,12 @@ client.add(Arrays.asList(d1, d2)); -- == Query Time -This is the Apache Solr query approach designed to support dense vector search: -=== knn Query Parser -The `knn` k-nearest neighbors query parser allows to find the k-nearest documents to the target vector according to indexed dense vectors in the given field. The set of documents can be Pre-Filtered to reduce the number of vector distance calculations that must be computed, and ensure the best `topK` are returned. +Apache Solr provides two query parsers that work with dense vector fields, that each support differnet ways of matching documents based on vector similarity: The `knn` query parser, and the `vecSim` query parser. -The score for a retrieved document is the approximate distance to the target vector(defined by the similarityFunction configured at indexing time). +Both parsers return scores for retrieved documents that is the approximate distance to the target vector (defined by the similarityFunction configured at indexing time) and both support "Pre-Filtering" the document graph to reduce the number of candidate vectors evaluated (with out needing to compute their vector similarity distances). Review Comment: typo? -> Both parsers return **a score** for retrieved documents that **is**... ########## solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc: ########## @@ -293,22 +282,73 @@ Indicates that only `fq` filters with the specified `tag` should be considered f Indicates that `fq` filters with the specified `tag` should be excluded from consideration for implicit Pre-Filtering. Must not be combined with `preFilter`. -Here's how to run a simple KNN search: +=== knn Query Parser + +The `knn` k-nearest neighbors query parser matches k-nearest documents to the target vector. + +In addition to the common parameters described above, it takes the following parameters: + +`topK`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: 10 +|=== ++ +How many k-nearest results to return. + +Here's an example of a simple `knn` search: [source,text] ?q={!knn f=vector topK=10}[1.0, 2.0, 3.0, 4.0] The search results retrieved are the k=10 nearest documents to the vector in input `[1.0, 2.0, 3.0, 4.0]`, ranked by the `similarityFunction` configured at indexing time. +=== vecSim Query Parser + +The `vecSim` vector similarity query parser matches documents whose similarity with the target vector is a above a minimum threshold. -==== Explicit KNN Pre-Filtering +In addition to the common parameters described above, it takes the following parameters: -The `knn` query parser's `preFilter` parameter can be specified to reduce the number of candidate documents evaluated for the k-nearest distance calculation: + +`minReturn`:: ++ +[%autowidth,frame=none] +|=== +s|Required |Default: none +|=== ++ +Minimum similarity threshold of nodes in the graph to be returned as matches + +`minTraverse`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: -Infinity +|=== ++ +Minimum similarity of nodes in the graph to continue traversal of their neighbors + +Here's an example of a simple `vecSim` search: + +[source,text] +?q={!vecSim f=vector minReturn=0.7}[1.0, 2.0, 3.0, 4.0] + +The search results retrieved are all documents whose similarity with the input vector `[1.0, 2.0, 3.0, 4.0]` is at least `0.7` based on the `similarityFunction` configured at indexing time + + +=== Graph Pre-Filtering + +Pre-Filtering the set of candidate documents considered when walking the graph can be specified either explicitly, or implicitly (based on existing `fq` params) depending on how and when these dense vector query parsers are used. + +==== Explicit Pre-Filtering + +The `preFilter` parameter can be specified explicitly to reduce the number of candidate documents evaluated for the distance calculation: [source,text] -?q={!knn f=vector topK=10 preFilter=inStock:true}[1.0, 2.0, 3.0, 4.0] +?q={!vecSim f=vector minReturn=0.7 preFilter=inStock:true}[1.0, 2.0, 3.0, 4.0] -In the above example, only documents matching the Pre-Filter `inStock:true` will be candidates for consideration when evaluating the k-nearest search against the specified vector. +In the above example, only documents matching the Pre-Filter `inStock:true` will be candidates for consideration when evaluating the `knn` search against the specified vector. Review Comment: We mention here 'knn' search, but the example above is now a 'vectorSimilarity' query? ########## solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc: ########## @@ -237,14 +237,12 @@ client.add(Arrays.asList(d1, d2)); -- == Query Time -This is the Apache Solr query approach designed to support dense vector search: -=== knn Query Parser -The `knn` k-nearest neighbors query parser allows to find the k-nearest documents to the target vector according to indexed dense vectors in the given field. The set of documents can be Pre-Filtered to reduce the number of vector distance calculations that must be computed, and ensure the best `topK` are returned. +Apache Solr provides two query parsers that work with dense vector fields, that each support differnet ways of matching documents based on vector similarity: The `knn` query parser, and the `vecSim` query parser. Review Comment: typo -> ", that each support different..." ########## solr/core/src/java/org/apache/solr/search/neural/VecSimQParser.java: ########## @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.neural; + +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.search.ByteVectorSimilarityQuery; +import org.apache.lucene.search.FloatVectorSimilarityQuery; +import org.apache.lucene.search.Query; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.DenseVectorField; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.util.vector.DenseVectorParser; + +public class VecSimQParser extends AbstractVectorQParserBase { + + // retrieve the top results based on the distance similarity function thresholds + static final String MIN_RETURN = "minReturn"; + static final String MIN_TRAVERSE = "minTraverse"; + + static final float DEFAULT_MIN_TRAVERSE = Float.NEGATIVE_INFINITY; + + public VecSimQParser( + String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { + super(qstr, localParams, params, req); + } + + @Override + public Query parse() throws SyntaxError { + final String fieldName = getFieldName(); + final SchemaField schemaField = req.getCore().getLatestSchema().getField(fieldName); + final DenseVectorField denseVectorType = getCheckedFieldType(schemaField); + final String vectorToSearch = getVectorToSearch(); + final float minT = localParams.getFloat(MIN_TRAVERSE, DEFAULT_MIN_TRAVERSE); Review Comment: same here, maybe "minTraverse" and "minReturn" ########## solr/core/src/java/org/apache/solr/search/neural/VecSimQParserPlugin.java: ########## @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.neural; + +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.QParser; +import org.apache.solr.search.QParserPlugin; + +/** A neural query parser to run min-similarity search on Dense Vector fields. */ +public class VecSimQParserPlugin extends QParserPlugin { + public static final String NAME = "vecSim"; Review Comment: same here "vectorSimilarity" ########## solr/core/src/java/org/apache/solr/search/neural/VecSimQParser.java: ########## @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.neural; + +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.search.ByteVectorSimilarityQuery; +import org.apache.lucene.search.FloatVectorSimilarityQuery; +import org.apache.lucene.search.Query; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.DenseVectorField; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.util.vector.DenseVectorParser; + +public class VecSimQParser extends AbstractVectorQParserBase { + + // retrieve the top results based on the distance similarity function thresholds + static final String MIN_RETURN = "minReturn"; + static final String MIN_TRAVERSE = "minTraverse"; + + static final float DEFAULT_MIN_TRAVERSE = Float.NEGATIVE_INFINITY; + + public VecSimQParser( + String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { + super(qstr, localParams, params, req); + } + + @Override + public Query parse() throws SyntaxError { + final String fieldName = getFieldName(); + final SchemaField schemaField = req.getCore().getLatestSchema().getField(fieldName); + final DenseVectorField denseVectorType = getCheckedFieldType(schemaField); + final String vectorToSearch = getVectorToSearch(); + final float minT = localParams.getFloat(MIN_TRAVERSE, DEFAULT_MIN_TRAVERSE); + final Float minR = localParams.getFloat(MIN_RETURN); + if (null == minR) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + MIN_RETURN + " is requried to use Vector Similarity QParser"); Review Comment: typo "required" -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org