This is an automated email from the ASF dual-hosted git repository. mkataria pushed a commit to branch OAK-11694_1 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 963ba66b7e764856c29068ef4e76206198b25ece Author: Mohit Kataria <[email protected]> AuthorDate: Mon May 5 13:51:50 2025 +0530 OAK-11693: Parse inference query and extract out inference config and text query --- .../oak/query/ast/FullTextSearchImpl.java | 1 - .../oak/spi/query/fulltext/InferenceQuery.java | 100 +++++++++++++++++++++ .../query/fulltext/InferenceQueryConfigTest.java | 53 +++++++++++ .../oak/spi/query/fulltext/InferenceQueryTest.java | 92 +++++++++++++++++++ 4 files changed, 245 insertions(+), 1 deletion(-) diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java index 2f5c88c57c..9f51672808 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java @@ -30,7 +30,6 @@ import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextContains; import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression; import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextParser; import org.apache.jackrabbit.oak.spi.query.fulltext.VectorQuery; - import java.text.ParseException; import java.util.Collections; import java.util.Set; diff --git a/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQuery.java b/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQuery.java new file mode 100644 index 0000000000..317fb86c9d --- /dev/null +++ b/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQuery.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.jackrabbit.oak.spi.query.fulltext; + +import org.apache.jackrabbit.oak.json.JsonUtils; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class InferenceQuery { + private static final Logger LOG = LoggerFactory.getLogger(InferenceQuery.class); + private static final String DEFAULT_INFERENCE_QUERY_CONFIG_PREFIX = "?"; + private static final String INFERENCE_QUERY_CONFIG_PREFIX_KEY = "org.apache.jackrabbit.oak.search.inference.query.prefix"; + public static final String INFERENCE_QUERY_CONFIG_PREFIX = System.getProperty( + INFERENCE_QUERY_CONFIG_PREFIX_KEY, DEFAULT_INFERENCE_QUERY_CONFIG_PREFIX); + + private final String queryInferenceConfig; + private final String queryText; + + public InferenceQuery(@NotNull String text) { + String[] components = parseText(text); + this.queryInferenceConfig = components[0]; + this.queryText = components[1]; + } + + private String[] parseText(String inputText) { + String text = inputText.trim(); + // Remove the first delimiter + if (text.startsWith(INFERENCE_QUERY_CONFIG_PREFIX) && text.charAt(INFERENCE_QUERY_CONFIG_PREFIX.length()) == '{') { + text = text.substring(INFERENCE_QUERY_CONFIG_PREFIX.length()); + + // Try to find the end of the JSON part by parsing incrementally + int possibleEndIndex = 0; + String jsonPart = null; + String queryTextPart; + int jsonEndDelimiterIndex = -1; + + while (possibleEndIndex < text.length()) { + possibleEndIndex = text.indexOf(INFERENCE_QUERY_CONFIG_PREFIX, possibleEndIndex + 1); + if (possibleEndIndex == -1) { + // If we reach here, it means we couldn't find a valid JSON part + jsonPart = ""; + LOG.warn("Query starts with inference prefix {}, but without valid json part," + + " if case this prefix is a valid fulltext query prefix, please update system property {} with different prefix value", + INFERENCE_QUERY_CONFIG_PREFIX, INFERENCE_QUERY_CONFIG_PREFIX_KEY); + break; + } + String candidateJson = text.substring(0, possibleEndIndex); + // Verify if this is valid JSON using Oak's JsopTokenizer + if (JsonUtils.isValidJson(candidateJson, false)) { + jsonPart = candidateJson; + jsonEndDelimiterIndex = possibleEndIndex; + break; + } + } + // If we found a valid JSON part, extract it + if (jsonPart == null) { + // If we reach here, it means we couldn't find a valid JSON part + jsonPart = ""; + queryTextPart = text; + LOG.warn("Query starts with InferenceQueryPrefix: {}, but without valid json part," + + " if case this prefix is a valid fulltext query prefix, please update {} with different prefix value", + INFERENCE_QUERY_CONFIG_PREFIX, INFERENCE_QUERY_CONFIG_PREFIX_KEY); + + } else { + // Extract query text part (everything after the JSON part delimiter) + queryTextPart = text.substring(jsonEndDelimiterIndex + 1).trim(); + + } + return new String[]{jsonPart, queryTextPart}; + } else { + return new String[]{"", text}; + } + } + + public String getQueryInferenceConfig() { + return queryInferenceConfig; + } + + public String getQueryText() { + return queryText; + } +} \ No newline at end of file diff --git a/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQueryConfigTest.java b/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQueryConfigTest.java new file mode 100644 index 0000000000..4ac85ddfd1 --- /dev/null +++ b/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQueryConfigTest.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.spi.query.fulltext; + +import org.junit.Test; +import static org.junit.Assert.*; + +public class InferenceQueryConfigTest { + + @Test + public void testEmptyConfig() { + InferenceQueryConfig config = new InferenceQueryConfig(""); + assertNull(config.getInferenceModelConfig()); + } + + @Test + public void testEmptyJsonConfig() { + InferenceQueryConfig config = new InferenceQueryConfig("{}"); + assertEquals("", config.getInferenceModelConfig()); + } + + @Test + public void testValidConfig() { + InferenceQueryConfig config = new InferenceQueryConfig("{\"inferenceModelConfig\":\"ada-test-model\"}"); + assertEquals("ada-test-model", config.getInferenceModelConfig()); + } + + @Test(expected = RuntimeException.class) + public void testInvalidJsonConfig() { + new InferenceQueryConfig("{invalid json}"); + } + + @Test(expected = RuntimeException.class) + public void testMissingTypeConfig() { + new InferenceQueryConfig("{\"someOtherField\":\"value\"}"); + } +} diff --git a/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQueryTest.java b/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQueryTest.java new file mode 100644 index 0000000000..3017464b88 --- /dev/null +++ b/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQueryTest.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.spi.query.fulltext; + +import org.junit.Test; + +import static org.junit.Assert.*; + +public class InferenceQueryTest { + + @Test + public void testBasicQuery() { + InferenceQuery query = new InferenceQuery("simple query"); + assertEquals("", query.getQueryInferenceConfig()); + assertEquals("simple query", query.getQueryText()); + } + + @Test + public void testQueryWithInferenceConfig() { + InferenceQuery query = new InferenceQuery("?{\"model\":\"gpt-4\"}?search for oak trees"); + assertEquals("{\"model\":\"gpt-4\"}", query.getQueryInferenceConfig()); + assertEquals("search for oak trees", query.getQueryText()); + } + + @Test + public void testQueryWithComplexInferenceConfig() { + InferenceQuery query = new InferenceQuery( + "?{\"model\":\"gpt-4\",\"temperature\":0.7,\"options\":{\"filter\":true}}?oak trees"); + assertEquals("{\"model\":\"gpt-4\",\"temperature\":0.7,\"options\":{\"filter\":true}}", + query.getQueryInferenceConfig()); + assertEquals("oak trees", query.getQueryText()); + } + + @Test + public void testQueryWithQuestionMarksInText() { + InferenceQuery query = new InferenceQuery("?{\"model\":\"gpt-4\"}?what are oak trees?"); + assertEquals("{\"model\":\"gpt-4\"}", query.getQueryInferenceConfig()); + assertEquals("what are oak trees?", query.getQueryText()); + } + + @Test + public void testQueryWithoutInferencePrefix() { + InferenceQuery query = new InferenceQuery("{\"model\":\"gpt-4\"}?query"); + assertEquals("", query.getQueryInferenceConfig()); + assertEquals("{\"model\":\"gpt-4\"}?query", query.getQueryText()); + } + + @Test + public void testQueryWithInvalidJson() { + InferenceQuery query = new InferenceQuery("?{invalid json}?query"); + assertEquals("", query.getQueryInferenceConfig()); + assertEquals("{invalid json}?query", query.getQueryText()); + } + + @Test + public void testQueryWithEmptyConfig() { + InferenceQuery query = new InferenceQuery("??query text"); + assertEquals("", query.getQueryInferenceConfig()); + assertEquals("??query text", query.getQueryText()); + } + + @Test + public void testQueryWithWhitespace() { + InferenceQuery query = new InferenceQuery(" ?{\"model\":\"gpt-4\"}? search query "); + assertEquals("{\"model\":\"gpt-4\"}", query.getQueryInferenceConfig()); + assertEquals("search query", query.getQueryText()); + } + + @Test + public void testEmptyQuery() { + InferenceQuery query = new InferenceQuery(""); + assertEquals("", query.getQueryInferenceConfig()); + assertEquals("", query.getQueryText()); + } + +} \ No newline at end of file
