This is an automated email from the ASF dual-hosted git repository.
thomasm pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new 39f73d5d87 OAK-11721: experimental and inferenceConfig implementation
should be … (#2294)
39f73d5d87 is described below
commit 39f73d5d87ee1e507f3b8dbeb570244f1a1f4fa2
Author: Mohit Kataria <[email protected]>
AuthorDate: Thu May 15 01:55:42 2025 +0530
OAK-11721: experimental and inferenceConfig implementation should be …
(#2294)
* OAK-11721: experimental and inferenceConfig implementation should be
backward compatible + resolve default config implementation
* OAK-11719: vector query should not break normal queries
* OAK-11721: added compatibility flag
* OAK-11719: added review comments
* OAK-11719: vector query should not break normal queries- removed
experimental inference testing
* OAK-11721: hybrid test passes in compatibility mode
* OAK-11721: compatibility mode test
* OAK-11719: compatibility mode test
* OAK-11719: compatibility mode test
---
.../oak/query/ast/FullTextSearchImpl.java | 8 +-
.../jackrabbit/oak/query/AbstractQueryTest.java | 1 -
.../plugins/index/lucene/LucenePropertyIndex.java | 9 +++
.../index/lucene/LuceneFullTextIndexTest.java | 10 +++
.../index/lucene/LuceneTestRepositoryBuilder.java | 2 +
.../oak/spi/query/fulltext/VectorQuery.java | 86 ++++++++++++----------
.../fulltext/VectorQueryCompatibilityModeTest.java | 63 ++++++++++++++++
.../oak/spi/query/fulltext/VectorQueryTest.java | 82 ++++++++++++++++-----
.../index/elastic/query/ElasticRequestHandler.java | 7 +-
.../index/elastic/ElasticFullTextIndexTest.java | 9 +++
.../inference/ElasticInferenceUsingConfigTest.java | 40 +++++++---
.../oak/plugins/index/FullTextIndexCommonTest.java | 81 ++++++++++++--------
12 files changed, 291 insertions(+), 107 deletions(-)
diff --git
a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
index 2f5c88c57c..2b800f7d9a 100644
---
a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
+++
b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
@@ -144,10 +144,10 @@ public class FullTextSearchImpl extends ConstraintImpl {
// e.g. ?{"inferenceModelConfig": "ada-test-model"}?little red fox
// So here we split the query into text part of query and
inferenceConfig part of query.
// Afterwards we only parse text part of query as this part of
query is what we want to search.
- if (query.getSettings().isInferenceEnabled()) {
- VectorQuery vectorQuery = new VectorQuery(rawText);
- queryText = vectorQuery.getQueryText();
- }
+ // We are explicitly removing vectorQueryConfig from rawText
before providing it to FullTextParser.
+ // But full query is passed later for execution as rawtext.
+ VectorQuery vectorQuery = new VectorQuery(rawText);
+ queryText = vectorQuery.getQueryText();
FullTextExpression e = FullTextParser.parse(p2, queryText);
return new FullTextContains(p2, rawText, e);
} catch (ParseException e) {
diff --git
a/oak-core/src/test/java/org/apache/jackrabbit/oak/query/AbstractQueryTest.java
b/oak-core/src/test/java/org/apache/jackrabbit/oak/query/AbstractQueryTest.java
index c20748ae43..71c29eb8af 100644
---
a/oak-core/src/test/java/org/apache/jackrabbit/oak/query/AbstractQueryTest.java
+++
b/oak-core/src/test/java/org/apache/jackrabbit/oak/query/AbstractQueryTest.java
@@ -29,7 +29,6 @@ import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
diff --git
a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
index be0bcb19fe..4e63da29e6 100644
---
a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
+++
b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
@@ -83,6 +83,7 @@ import org.apache.jackrabbit.oak.spi.query.Filter;
import org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction;
import org.apache.jackrabbit.oak.spi.query.QueryConstants;
import org.apache.jackrabbit.oak.spi.query.QueryLimits;
+import org.apache.jackrabbit.oak.spi.query.fulltext.VectorQuery;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.jackrabbit.oak.spi.state.NodeStateUtils;
import org.apache.lucene.analysis.Analyzer;
@@ -1477,6 +1478,14 @@ public class LucenePropertyIndex extends FulltextIndex {
private boolean visitTerm(String propertyName, String text, String
boost, boolean not) {
String p = getLuceneFieldName(propertyName, pr);
+ // Lucene don't support vectorQuery so we remove
queryVectorConfig from complete query text.
+ if (propertyName == null) {
+ // Lucene indexes don't support inference, so we should
remove queryInferenceConfig
+ // from query before evaluating it.
+ VectorQuery vectorQuery = new VectorQuery(text);
+ text = vectorQuery.getQueryText();
+ }
+
Query q = tokenToQuery(text, p, pr, analyzer, augmentor);
if (q == null) {
return false;
diff --git
a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneFullTextIndexTest.java
b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneFullTextIndexTest.java
index fe379982fb..65868b2d56 100644
---
a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneFullTextIndexTest.java
+++
b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneFullTextIndexTest.java
@@ -19,6 +19,7 @@ package org.apache.jackrabbit.oak.plugins.index.lucene;
import org.apache.jackrabbit.oak.api.ContentRepository;
import org.apache.jackrabbit.oak.plugins.index.FullTextIndexCommonTest;
import org.apache.jackrabbit.oak.plugins.index.LuceneIndexOptions;
+import org.apache.jackrabbit.oak.spi.query.fulltext.VectorQuery;
import org.junit.After;
import org.junit.Rule;
import org.junit.rules.TemporaryFolder;
@@ -45,6 +46,15 @@ public class LuceneFullTextIndexTest extends
FullTextIndexCommonTest {
setTraversalEnabled(false);
}
+ @Override
+ protected String[] getPrefixes() {
+
+ // we are not testing with experimental inferenceImplementation
+ return new String[]{"",
+ VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX
+"{}"+VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX
+ };
+ }
+
@After
public void shutdownExecutor() {
executorService.shutdown();
diff --git
a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneTestRepositoryBuilder.java
b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneTestRepositoryBuilder.java
index 48358551ce..21bea835d1 100644
---
a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneTestRepositoryBuilder.java
+++
b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneTestRepositoryBuilder.java
@@ -58,6 +58,8 @@ public class LuceneTestRepositoryBuilder extends
TestRepositoryBuilder {
resultCountingIndexProvider = new
ResultCountingIndexProvider(indexProvider);
queryEngineSettings = new QueryEngineSettings();
+ // enabling inference to check impact on all tests.
+ queryEngineSettings.setInferenceEnabled(true);
optionalEditorProvider = new TestUtil.OptionalEditorProvider();
asyncIndexUpdate.setCorruptIndexHandler(trackingCorruptIndexHandler);
}
diff --git
a/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/fulltext/VectorQuery.java
b/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/fulltext/VectorQuery.java
index 7079b73f35..e0709bff53 100644
---
a/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/fulltext/VectorQuery.java
+++
b/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/fulltext/VectorQuery.java
@@ -27,9 +27,10 @@ import org.slf4j.LoggerFactory;
public class VectorQuery {
private static final Logger LOG =
LoggerFactory.getLogger(VectorQuery.class);
private static final String DEFAULT_INFERENCE_QUERY_CONFIG_PREFIX = "?";
- private static final String INFERENCE_QUERY_CONFIG_PREFIX_KEY =
"org.apache.jackrabbit.oak.search.inference.query.prefix";
+ public static final String INFERENCE_QUERY_CONFIG_PREFIX_KEY =
"org.apache.jackrabbit.oak.search.inference.query.prefix";
public static final String INFERENCE_QUERY_CONFIG_PREFIX =
System.getProperty(
INFERENCE_QUERY_CONFIG_PREFIX_KEY,
DEFAULT_INFERENCE_QUERY_CONFIG_PREFIX);
+ public static final String EXPERIMENTAL_COMPATIBILITY_MODE_KEY =
"oak.inference.experimental.compatibility";
private final String queryInferenceConfig;
private final String queryText;
@@ -41,53 +42,56 @@ public class VectorQuery {
}
private String[] parseText(String inputText) {
+ String jsonPart = null;
+ String queryTextPart = null;
String text = inputText.trim();
- // Remove the first delimiter
- if (text.startsWith(INFERENCE_QUERY_CONFIG_PREFIX) &&
text.charAt(INFERENCE_QUERY_CONFIG_PREFIX.length()) == '{') {
+ if (text.startsWith(INFERENCE_QUERY_CONFIG_PREFIX)) {
text = text.substring(INFERENCE_QUERY_CONFIG_PREFIX.length());
-
- // Try to find the end of the JSON part by parsing incrementally
- int possibleEndIndex = 0;
- String jsonPart = null;
- String queryTextPart;
- int jsonEndDelimiterIndex = -1;
-
- while (possibleEndIndex < text.length()) {
- possibleEndIndex = text.indexOf(INFERENCE_QUERY_CONFIG_PREFIX,
possibleEndIndex + 1);
- if (possibleEndIndex == -1) {
- // If we reach here, it means we couldn't find a valid
JSON part
- jsonPart = "";
- LOG.warn("Query starts with inference prefix {}, but
without valid json part," +
- " if case this prefix is a valid fulltext
query prefix, please update system property {} with different prefix value",
- INFERENCE_QUERY_CONFIG_PREFIX,
INFERENCE_QUERY_CONFIG_PREFIX_KEY);
- break;
+ if (text.charAt(0) == '{') {
+ // Try to find the end of the JSON part by parsing
incrementally
+ int possibleEndIndex = 0;
+ int jsonEndDelimiterIndex = -1;
+ while (possibleEndIndex < text.length()) {
+ possibleEndIndex =
text.indexOf(INFERENCE_QUERY_CONFIG_PREFIX, possibleEndIndex +
INFERENCE_QUERY_CONFIG_PREFIX.length());
+ if (possibleEndIndex == -1) {
+ // If we reach here, it means we couldn't find a valid
JSON part
+ jsonPart = "{}";
+ // we should now use text string as queryText
+ jsonEndDelimiterIndex = 0;
+ break;
+ }
+ String candidateJson = text.substring(0, possibleEndIndex);
+ // Verify if this is valid JSON using Oak's JsopTokenizer
+ if (JsonUtils.isValidJson(candidateJson, false)) {
+ jsonPart = candidateJson;
+ jsonEndDelimiterIndex = possibleEndIndex;
+ break;
+ }
}
- String candidateJson = text.substring(0, possibleEndIndex);
- // Verify if this is valid JSON using Oak's JsopTokenizer
- if (JsonUtils.isValidJson(candidateJson, false)) {
- jsonPart = candidateJson;
- jsonEndDelimiterIndex = possibleEndIndex;
- break;
+ text = text.substring(jsonEndDelimiterIndex);
+ if (text.startsWith(INFERENCE_QUERY_CONFIG_PREFIX)) {
+ // Remove the second delimiter
+ text =
text.substring(INFERENCE_QUERY_CONFIG_PREFIX.length());
}
- }
- // If we found a valid JSON part, extract it
- if (jsonPart == null) {
- // If we reach here, it means we couldn't find a valid JSON
part
- jsonPart = "";
queryTextPart = text;
- LOG.warn("Query starts with InferenceQueryPrefix: {}, but
without valid json part," +
- " if case this prefix is a valid fulltext
query prefix, please update {} with different prefix value",
- INFERENCE_QUERY_CONFIG_PREFIX,
INFERENCE_QUERY_CONFIG_PREFIX_KEY);
-
} else {
- // Extract query text part (everything after the JSON part
delimiter)
- queryTextPart = text.substring(jsonEndDelimiterIndex +
1).trim();
-
+ if (isCompatibilityModeEnabled()) {
+ // No JSON part present but starts with prefix
+ // we return "{}" to be compatible with experimental
inference queries
+ jsonPart = "{}";
+ queryTextPart = text;
+ }
+ else {
+ jsonPart = "";
+ queryTextPart = inputText;
+ }
}
- return new String[]{jsonPart, queryTextPart};
} else {
- return new String[]{"", text};
+ // If the text doesn't start with the prefix, return empty config
and the original text
+ jsonPart = "";
+ queryTextPart = text;
}
+ return new String[]{jsonPart, queryTextPart};
}
public String getQueryInferenceConfig() {
@@ -97,4 +101,8 @@ public class VectorQuery {
public String getQueryText() {
return queryText;
}
+
+ private boolean isCompatibilityModeEnabled() {
+ return Boolean.getBoolean(EXPERIMENTAL_COMPATIBILITY_MODE_KEY);
+ }
}
\ No newline at end of file
diff --git
a/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/VectorQueryCompatibilityModeTest.java
b/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/VectorQueryCompatibilityModeTest.java
new file mode 100644
index 0000000000..b01b63332c
--- /dev/null
+++
b/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/VectorQueryCompatibilityModeTest.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.spi.query.fulltext;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class VectorQueryCompatibilityModeTest {
+
+ @Before
+ public void setUp() {
+ // Set up any necessary system properties or configurations
+ System.setProperty(VectorQuery.EXPERIMENTAL_COMPATIBILITY_MODE_KEY,
"true");
+ }
+
+ @After
+ public void tearDown() {
+ // Clean up any system properties set during the tests
+ System.clearProperty(VectorQuery.EXPERIMENTAL_COMPATIBILITY_MODE_KEY);
+ System.clearProperty(VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX_KEY);
+ }
+
+ @Test
+ public void testQueryWithEmptyConfigExperimentalInferenceCompatible() {
+ // Input string: "??query text"
+ String inputString = VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX +
VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "query text";
+ VectorQuery query = new VectorQuery(inputString);
+
+ assertEquals("{}", query.getQueryInferenceConfig());
+ assertEquals(VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "query text",
query.getQueryText());
+ }
+
+ @Test
+ public void testPrefixOnlyQueryExperimentalInferenceCompatible() {
+ // Input string: "?query text"
+ VectorQuery query = new
VectorQuery(VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "query text");
+ assertEquals("{}", query.getQueryInferenceConfig());
+ // With the implementation fix, the prefix should now be correctly
stripped
+ assertEquals("query text", query.getQueryText());
+ }
+
+ // We don't need to explicitly enable experimental compatibility mode in
each test anymore
+ // as it's already set in setUp()
+}
\ No newline at end of file
diff --git
a/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/VectorQueryTest.java
b/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/VectorQueryTest.java
index 9601b7e95a..cc59c04e5a 100644
---
a/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/VectorQueryTest.java
+++
b/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/VectorQueryTest.java
@@ -18,14 +18,29 @@
*/
package org.apache.jackrabbit.oak.spi.query.fulltext;
+import org.junit.After;
+import org.junit.Before;
import org.junit.Test;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
public class VectorQueryTest {
+ @Before
+ public void setUp() {
+ // Ensure compatibility mode is disabled for these tests
+ System.setProperty(VectorQuery.EXPERIMENTAL_COMPATIBILITY_MODE_KEY,
"false");
+ }
+
+ @After
+ public void tearDown() {
+ // Clean up all system properties set during the tests
+ System.clearProperty(VectorQuery.EXPERIMENTAL_COMPATIBILITY_MODE_KEY);
+ }
+
@Test
public void testBasicQuery() {
+ // Input string: "simple query"
VectorQuery query = new VectorQuery("simple query");
assertEquals("", query.getQueryInferenceConfig());
assertEquals("simple query", query.getQueryText());
@@ -33,60 +48,89 @@ public class VectorQueryTest {
@Test
public void testQueryWithInferenceConfig() {
- VectorQuery query = new VectorQuery("?{\"model\":\"gpt-4\"}?search for
oak trees");
+ // Input string: "?{"model":"gpt-4"}?search for oak trees"
+ VectorQuery query = new
VectorQuery(VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "{\"model\":\"gpt-4\"}"
+ VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "search for oak trees");
assertEquals("{\"model\":\"gpt-4\"}", query.getQueryInferenceConfig());
assertEquals("search for oak trees", query.getQueryText());
}
@Test
public void testQueryWithComplexInferenceConfig() {
+ // Input string:
"?{"model":"gpt-4","temperature":0.7,"options":{"filter":true}}?oak trees"
VectorQuery query = new VectorQuery(
-
"?{\"model\":\"gpt-4\",\"temperature\":0.7,\"options\":{\"filter\":true}}?oak
trees");
-
assertEquals("{\"model\":\"gpt-4\",\"temperature\":0.7,\"options\":{\"filter\":true}}",
+ VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX +
"{\"model\":\"gpt-4\",\"temperature\":0.7,\"options\":{\"filter\":true}}" +
VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "oak trees");
+
assertEquals("{\"model\":\"gpt-4\",\"temperature\":0.7,\"options\":{\"filter\":true}}",
query.getQueryInferenceConfig());
assertEquals("oak trees", query.getQueryText());
}
@Test
public void testQueryWithQuestionMarksInText() {
- VectorQuery query = new VectorQuery("?{\"model\":\"gpt-4\"}?what are
oak trees?");
+ // Input string: "?{"model":"gpt-4"}?what are oak trees?"
+ VectorQuery query = new
VectorQuery(VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "{\"model\":\"gpt-4\"}"
+ VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "what are oak trees?");
assertEquals("{\"model\":\"gpt-4\"}", query.getQueryInferenceConfig());
assertEquals("what are oak trees?", query.getQueryText());
}
@Test
public void testQueryWithoutInferencePrefix() {
- VectorQuery query = new VectorQuery("{\"model\":\"gpt-4\"}?query");
+ // Input string: "{"model":"gpt-4"}?query"
+ VectorQuery query = new VectorQuery("{\"model\":\"gpt-4\"}" +
VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "query");
assertEquals("", query.getQueryInferenceConfig());
- assertEquals("{\"model\":\"gpt-4\"}?query", query.getQueryText());
+ assertEquals("{\"model\":\"gpt-4\"}" +
VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "query", query.getQueryText());
}
@Test
public void testQueryWithInvalidJson() {
- VectorQuery query = new VectorQuery("?{invalid json}?query");
- assertEquals("", query.getQueryInferenceConfig());
- assertEquals("{invalid json}?query", query.getQueryText());
- }
-
- @Test
- public void testQueryWithEmptyConfig() {
- VectorQuery query = new VectorQuery("??query text");
- assertEquals("", query.getQueryInferenceConfig());
- assertEquals("??query text", query.getQueryText());
+ // Input string: "?{invalid json}?query"
+ VectorQuery query = new
VectorQuery(VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "{invalid json}" +
VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "query");
+ assertEquals("{}", query.getQueryInferenceConfig());
+ assertEquals("{invalid json}" +
VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "query", query.getQueryText());
}
@Test
public void testQueryWithWhitespace() {
- VectorQuery query = new VectorQuery(" ?{\"model\":\"gpt-4\"}?
search query ");
+ String whiteSpaces = " ";
+ // Input string: " ?{"model":"gpt-4"}? search query "
+ VectorQuery query = new VectorQuery(" " +
VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "{\"model\":\"gpt-4\"}" +
VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + whiteSpaces + "search query ");
assertEquals("{\"model\":\"gpt-4\"}", query.getQueryInferenceConfig());
- assertEquals("search query", query.getQueryText());
+ assertEquals(whiteSpaces + "search query", query.getQueryText());
}
@Test
public void testEmptyQuery() {
+ // Input string: ""
VectorQuery query = new VectorQuery("");
assertEquals("", query.getQueryInferenceConfig());
assertEquals("", query.getQueryText());
}
+ @Test
+ public void testNoJsonEndDelimiterQuery() {
+ // Input string: "?{"model":"gpt-4"query text"
+ VectorQuery query = new
VectorQuery(VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX +
"{\"model\":\"gpt-4\"query text");
+ assertEquals("{}", query.getQueryInferenceConfig());
+ // With the implementation fix, the prefix should now be correctly
stripped
+ assertEquals("{\"model\":\"gpt-4\"query text", query.getQueryText());
+ }
+
+ @Test
+ public void testQueryWithEmptyConfigExperimentalInferenceNonCompatible() {
+ // Input string: "??query text"
+ String inputString = VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX +
VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "query text";
+ VectorQuery query = new VectorQuery(inputString);
+
+ assertEquals("", query.getQueryInferenceConfig());
+ assertEquals(VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX +
VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "query text", query.getQueryText());
+ }
+
+ @Test
+ public void testPrefixOnlyQueryExperimentalInferenceNonCompatible() {
+ // Input string: "?query text"
+ VectorQuery query = new
VectorQuery(VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "query text");
+ assertEquals("", query.getQueryInferenceConfig());
+ // When compatibility mode is disabled, the prefix should remain part
of the query text
+ assertEquals(VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX + "query text",
query.getQueryText());
+ }
+
}
\ No newline at end of file
diff --git
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
index 494dbdf69c..220935327e 100644
---
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
+++
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
@@ -641,7 +641,7 @@ public class ElasticRequestHandler {
}
// Experimental support for inference queries
else if (elasticIndexDefinition.inferenceDefinition !=
null && elasticIndexDefinition.inferenceDefinition.queries != null) {
- bqBuilder.must(m -> m.bool(b -> inference(b,
propertyName, queryText, pr, includeDynamicBoostedValues)));
+ bqBuilder.must(m -> m.bool(b -> inference(b,
propertyName, text, pr, includeDynamicBoostedValues)));
} else {
QueryStringQuery.Builder qsqBuilder =
fullTextQuery(queryText, getElasticFulltextFieldName(propertyName), pr,
includeDynamicBoostedValues);
bqBuilder.must(m -> m.queryString(qsqBuilder.build()));
@@ -685,19 +685,20 @@ public class ElasticRequestHandler {
inferenceModelConfig.getMinTerms(),
vectorQuery.getQueryText());
return b.must(mm -> mm.queryString(qsqBuilder.build()));
} else if (inferenceModelConfig.isEnabled() &&
inferenceModelConfig.getMinTerms() <=
vectorQuery.getQueryText().split("\\s+").length) {
+ String inferenceModelConfigName =
inferenceModelConfig.getInferenceModelConfigName();
InferenceService inferenceService = InferenceServiceManager
.getInstance(inferenceModelConfig);
List<Float> embeddings =
inferenceService.embeddings(vectorQuery.getQueryText(), (int)
inferenceModelConfig.getTimeoutMillis());
if (embeddings != null) {
KnnQuery.Builder knnQueryBuilder = new
KnnQuery.Builder();
- knnQueryBuilder.field(InferenceConstants.VECTOR_SPACES
+ "." + inferenceQueryModelName + "." + InferenceConstants.VECTOR);
+ knnQueryBuilder.field(InferenceConstants.VECTOR_SPACES
+ "." + inferenceModelConfigName + "." + InferenceConstants.VECTOR);
knnQueryBuilder.numCandidates(inferenceModelConfig.getNumCandidates());
knnQueryBuilder.queryVector(embeddings);
KnnQuery knnQuery = knnQueryBuilder.build();
NestedQuery.Builder nestedQueryBuilder = new
NestedQuery.Builder()
- .path(InferenceConstants.VECTOR_SPACES + "." +
inferenceQueryModelName)
+ .path(InferenceConstants.VECTOR_SPACES + "." +
inferenceModelConfigName)
.query(Query.of(q2 -> q2.knn(knnQuery)));
b.should(s -> s.nested(nestedQueryBuilder.build()));
diff --git
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextIndexTest.java
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextIndexTest.java
index 457add8e22..98c8417bfc 100644
---
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextIndexTest.java
+++
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextIndexTest.java
@@ -19,6 +19,7 @@ package org.apache.jackrabbit.oak.plugins.index.elastic;
import org.apache.jackrabbit.oak.api.ContentRepository;
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.plugins.index.FullTextIndexCommonTest;
+import org.apache.jackrabbit.oak.spi.query.fulltext.VectorQuery;
import org.junit.ClassRule;
import org.junit.Test;
@@ -70,4 +71,12 @@ public class ElasticFullTextIndexTest extends
FullTextIndexCommonTest {
});
}
+ @Override
+ protected String[] getPrefixes() {
+ // we are not testing experimental inferenceImplementation
+ return new String[]{"",
+ VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX
+"{}"+VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX
+ };
+ }
+
}
diff --git
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
index 5df1eee832..657dbe5ec6 100644
---
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
+++
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
@@ -39,6 +39,7 @@ import
org.apache.jackrabbit.oak.plugins.index.elastic.ElasticAbstractQueryTest;
import
org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder;
import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
+import org.apache.jackrabbit.oak.spi.query.fulltext.VectorQuery;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.stats.CounterStats;
import org.apache.jackrabbit.oak.stats.DefaultStatisticsProvider;
@@ -212,9 +213,30 @@ public class ElasticInferenceUsingConfigTest extends
ElasticAbstractQueryTest {
}
@Test
- public void hybridSearch() throws Exception {
+ public void testHybridSearchWithVectorQueryConfigJson() throws Exception {
+ // Test hybrid search with inference configuration
+ hybridSearch("?{\"inferenceModelConfig\": \"ada-test-model\"}?");
+ }
+
+ @Test
+ public void testHybridSearchWithEmptyVectorQueryConfigJson() throws
Exception {
+ // Test hybrid search with empty inference configuration
+ hybridSearch("?{}?");
+ }
+
+ @Test
+ public void testHybridSearchWithExperimentalPrefix() throws Exception {
+ enableExperimentalInferenceCompatibility();
+ // Test hybrid search with experimental inference query prefix
+ hybridSearch("?");
+ }
+
+ private void enableExperimentalInferenceCompatibility() {
+ System.setProperty(VectorQuery.EXPERIMENTAL_COMPATIBILITY_MODE_KEY,
"true");
+ }
+
+ private void hybridSearch(String inferenceConfigInQuery) throws Exception {
String jcrIndexName = UUID.randomUUID().toString();
- String inferenceConfigInQuery = "{\"inferenceModelConfig\":
\"ada-test-model\"}";
String inferenceServiceUrl = "http://localhost:" + wireMock.port() +
"/v1/embeddings";
String inferenceModelConfigName = "ada-test-model";
String inferenceModelName = "text-embedding-ada-002";
@@ -449,8 +471,8 @@ public class ElasticInferenceUsingConfigTest extends
ElasticAbstractQueryTest {
String expectedPath = entry.getValue();
// Test with inference config
- String queryPath = "select [jcr:path] from [nt:base] where
ISDESCENDANTNODE('/content') and contains(*, '?"
- + inferenceConfigInQuery + "?" + query + "')";
+ String queryPath = "select [jcr:path] from [nt:base] where
ISDESCENDANTNODE('/content') and contains(*, '"
+ + inferenceConfigInQuery + query + "')";
List<String> results = executeQuery(queryPath, SQL2, true, true);
assertEquals(expectedPath, results.get(0));
@@ -465,13 +487,13 @@ public class ElasticInferenceUsingConfigTest extends
ElasticAbstractQueryTest {
*/
private void verifyErrorHandling(String jcrIndexName, String
inferenceConfigInQuery) {
// Test server error handling
- String queryPath3 = "select [jcr:path] from [nt:base] where
ISDESCENDANTNODE('/content') and contains(*, '?"
- + inferenceConfigInQuery + "?" + "machine learning')";
+ String queryPath3 = "select [jcr:path] from [nt:base] where
ISDESCENDANTNODE('/content') and contains(*, '"
+ + inferenceConfigInQuery + "machine learning')";
assertQuery(queryPath3, List.of("/content/ml",
"/content/programming"));
// Test timeout handling
- String queryPath4 = "select [jcr:path] from [nt:base] where
ISDESCENDANTNODE('/content') and contains(*, '?"
- + inferenceConfigInQuery + "?" + "farming practices')";
+ String queryPath4 = "select [jcr:path] from [nt:base] where
ISDESCENDANTNODE('/content') and contains(*, '"
+ + inferenceConfigInQuery + "farming practices')";
assertQuery(queryPath4, List.of("/content/farm"));
}
@@ -687,8 +709,8 @@ public class ElasticInferenceUsingConfigTest extends
ElasticAbstractQueryTest {
// Add content
Tree content = root.getTree("/").addChild("content");
Tree document = content.addChild("document");
- document.setProperty("title", "Test Document for Reinitialization");
Tree document2 = content.addChild("document2");
+ document.setProperty("title", "Test Document for Reinitialization");
document2.setProperty("title", "Test Document for Reinitialization 2");
root.commit();
diff --git
a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java
b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java
index edc24bcf24..ceef6b0af8 100644
---
a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java
+++
b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java
@@ -152,13 +152,15 @@ public abstract class FullTextIndexCommonTest extends
AbstractQueryTest {
c.setProperty("propb", "Hello folks");
test.addChild("d").setProperty("propb", "baz");
root.commit();
-
- assertEventually(() -> {
- assertQuery("//*[jcr:contains(., 'Hello')]", XPATH,
List.of("/test/c", "/test/b", "/test/a"), true, true);
- assertQuery("//*[jcr:contains(., 'Hello')] order by @jcr:score
ascending", XPATH,
+ for (String prefix : getPrefixes()) {
+ assertEventually(() -> {
+ assertQuery("//*[jcr:contains(., '" + prefix + "Hello')]",
XPATH,
+ List.of("/test/c", "/test/b", "/test/a"), true, true);
+ assertQuery("//*[jcr:contains(., '" + prefix + "Hello')] order
by @jcr:score ascending", XPATH,
List.of("/test/a", "/test/b", "/test/c"), true, true);
- assertQuery("//*[jcr:contains(., 'people')]", XPATH,
List.of("/test/c"));
- });
+ assertQuery("//*[jcr:contains(., '" + prefix + "people')]",
XPATH, List.of("/test/c"));
+ });
+ }
}
@Test
@@ -197,11 +199,13 @@ public abstract class FullTextIndexCommonTest extends
AbstractQueryTest {
d.setProperty("b", "world");
root.commit();
- assertEventually(() -> {
- assertQuery("//*[jcr:contains(., 'Hello')]", XPATH,
List.of("/test/nodea", "/test/nodec", "/test/noded"));
- assertQuery("//*[jcr:contains(., 'hello world')]", XPATH,
List.of("/test/nodec", "/test/noded"));
- assertQuery("//*[jcr:contains(., 'hello OR world')]", XPATH,
List.of("/test/nodea", "/test/nodeb", "/test/nodec", "/test/noded"));
- });
+ for (String prefix : getPrefixes()) {
+ assertEventually(() -> {
+ assertQuery("//*[jcr:contains(., '" + prefix + "Hello')]",
XPATH, List.of("/test/nodea", "/test/nodec", "/test/noded"));
+ assertQuery("//*[jcr:contains(., '" + prefix + "hello
world')]", XPATH, List.of("/test/nodec", "/test/noded"));
+ assertQuery("//*[jcr:contains(., '" + prefix + "hello OR
world')]", XPATH, List.of("/test/nodea", "/test/nodeb", "/test/nodec",
"/test/noded"));
+ });
+ }
}
@Test
@@ -224,10 +228,12 @@ public abstract class FullTextIndexCommonTest extends
AbstractQueryTest {
d.setProperty("b", "world");
root.commit();
- assertEventually(() -> {
- assertQuery("//*[jcr:contains(., 'Hello')]", XPATH,
List.of("/test/a", "/test/c", "/test/d"));
- assertQuery("//*[jcr:contains(., 'hello world')]", XPATH,
List.of("/test/c", "/test/d"));
- });
+ for (String prefix : getPrefixes()) {
+ assertEventually(() -> {
+ assertQuery("//*[jcr:contains(., '" + prefix + "Hello')]",
XPATH, List.of("/test/a", "/test/c", "/test/d"));
+ assertQuery("//*[jcr:contains(., '" + prefix + "hello
world')]", XPATH, List.of("/test/c", "/test/d"));
+ });
+ }
}
@Test
@@ -250,10 +256,12 @@ public abstract class FullTextIndexCommonTest extends
AbstractQueryTest {
d.setProperty("b", "world");
root.commit();
- assertEventually(() -> {
- assertQuery("//*[jcr:contains(., 'Hello')]", XPATH,
List.of("/test/nodea", "/test/nodec", "/test/noded"));
- assertQuery("//*[jcr:contains(., 'hello world')]", XPATH,
List.of("/test/nodec", "/test/noded"));
- });
+ for (String prefix : getPrefixes()) {
+ assertEventually(() -> {
+ assertQuery("//*[jcr:contains(., '" + prefix + "Hello')]",
XPATH, List.of("/test/nodea", "/test/nodec", "/test/noded"));
+ assertQuery("//*[jcr:contains(., '" + prefix + "hello
world')]", XPATH, List.of("/test/nodec", "/test/noded"));
+ });
+ }
}
/*
@@ -280,10 +288,12 @@ public abstract class FullTextIndexCommonTest extends
AbstractQueryTest {
d.setProperty("b", "world");
root.commit();
- assertEventually(() -> {
- assertQuery("//*[jcr:contains(., 'Hello')]", XPATH,
List.of("/test/nodec", "/test/noded"));
- assertQuery("//*[jcr:contains(., 'hello world')]", XPATH,
List.of("/test/nodec"));
- });
+ for (String prefix : getPrefixes()) {
+ assertEventually(() -> {
+ assertQuery("//*[jcr:contains(., '" + prefix + "Hello')]",
XPATH, List.of("/test/nodec", "/test/noded"));
+ assertQuery("//*[jcr:contains(., '" + prefix + "hello
world')]", XPATH, List.of("/test/nodec"));
+ });
+ }
}
@Test
@@ -321,8 +331,10 @@ public abstract class FullTextIndexCommonTest extends
AbstractQueryTest {
index.setProperty(FulltextIndexConstants.PROP_REFRESH_DEFN, true);
root.commit();
- assertEventually(() ->
- assertQuery("//*[jcr:contains(., 'jpg')]", XPATH,
List.of("/test/a")));
+ for (String prefix : getPrefixes()) {
+ assertEventually(() ->
+ assertQuery("//*[jcr:contains(., '" + prefix + "jpg')]",
XPATH, List.of("/test/a")));
+ }
}
@Test
@@ -346,14 +358,16 @@ public abstract class FullTextIndexCommonTest extends
AbstractQueryTest {
root.commit();
- assertEventually(() -> {
- assertQuery("//*[jcr:contains(., 'foo')]", XPATH,
List.of("/test/a"));
- assertQuery("//*[jcr:contains(., '2025')]", XPATH,
List.of("/test/b"));
- assertQuery("//*[jcr:contains(., '123')]", XPATH,
List.of("/test/c"));
- assertQuery("//*[jcr:contains(., '456.78')]", XPATH,
List.of("/test/d"));
- assertQuery("//*[jcr:contains(., 'true')]", XPATH,
List.of("/test/e"));
+ for (String prefix : getPrefixes()) {
+ assertEventually(() -> {
+ assertQuery("//*[jcr:contains(., '" + prefix + "foo')]",
XPATH, List.of("/test/a"));
+ assertQuery("//*[jcr:contains(., '" + prefix + "2025')]",
XPATH, List.of("/test/b"));
+ assertQuery("//*[jcr:contains(., '" + prefix + "123')]",
XPATH, List.of("/test/c"));
+ assertQuery("//*[jcr:contains(., '" + prefix +
"456.78')]", XPATH, List.of("/test/d"));
+ assertQuery("//*[jcr:contains(., '" + prefix + "true')]",
XPATH, List.of("/test/e"));
}
- );
+ );
+ }
}
protected void assertEventually(Runnable r) {
@@ -396,4 +410,7 @@ public abstract class FullTextIndexCommonTest extends
AbstractQueryTest {
return executeQuery(explain, lang).get(0);
}
+ protected String[] getPrefixes() {
+ return new String[]{""};
+ }
}