This is an automated email from the ASF dual-hosted git repository.

mkataria pushed a commit to branch OAK-11694_1
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 963ba66b7e764856c29068ef4e76206198b25ece
Author: Mohit Kataria <[email protected]>
AuthorDate: Mon May 5 13:51:50 2025 +0530

    OAK-11693: Parse inference query and extract out inference config and text 
query
---
 .../oak/query/ast/FullTextSearchImpl.java          |   1 -
 .../oak/spi/query/fulltext/InferenceQuery.java     | 100 +++++++++++++++++++++
 .../query/fulltext/InferenceQueryConfigTest.java   |  53 +++++++++++
 .../oak/spi/query/fulltext/InferenceQueryTest.java |  92 +++++++++++++++++++
 4 files changed, 245 insertions(+), 1 deletion(-)

diff --git 
a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
 
b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
index 2f5c88c57c..9f51672808 100644
--- 
a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
+++ 
b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
@@ -30,7 +30,6 @@ import 
org.apache.jackrabbit.oak.spi.query.fulltext.FullTextContains;
 import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression;
 import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextParser;
 import org.apache.jackrabbit.oak.spi.query.fulltext.VectorQuery;
-
 import java.text.ParseException;
 import java.util.Collections;
 import java.util.Set;
diff --git 
a/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQuery.java
 
b/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQuery.java
new file mode 100644
index 0000000000..317fb86c9d
--- /dev/null
+++ 
b/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQuery.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.spi.query.fulltext;
+
+import org.apache.jackrabbit.oak.json.JsonUtils;
+import org.jetbrains.annotations.NotNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class InferenceQuery {
+    private static final Logger LOG = 
LoggerFactory.getLogger(InferenceQuery.class);
+    private static final String DEFAULT_INFERENCE_QUERY_CONFIG_PREFIX = "?";
+    private static final String INFERENCE_QUERY_CONFIG_PREFIX_KEY = 
"org.apache.jackrabbit.oak.search.inference.query.prefix";
+    public static final String INFERENCE_QUERY_CONFIG_PREFIX = 
System.getProperty(
+            INFERENCE_QUERY_CONFIG_PREFIX_KEY, 
DEFAULT_INFERENCE_QUERY_CONFIG_PREFIX);
+
+    private final String queryInferenceConfig;
+    private final String queryText;
+
+    public InferenceQuery(@NotNull String text) {
+        String[] components = parseText(text);
+        this.queryInferenceConfig = components[0];
+        this.queryText = components[1];
+    }
+
+    private String[] parseText(String inputText) {
+        String text = inputText.trim();
+        // Remove the first delimiter
+        if (text.startsWith(INFERENCE_QUERY_CONFIG_PREFIX) && 
text.charAt(INFERENCE_QUERY_CONFIG_PREFIX.length()) == '{') {
+            text = text.substring(INFERENCE_QUERY_CONFIG_PREFIX.length());
+
+            // Try to find the end of the JSON part by parsing incrementally
+            int possibleEndIndex = 0;
+            String jsonPart = null;
+            String queryTextPart;
+            int jsonEndDelimiterIndex = -1;
+
+            while (possibleEndIndex < text.length()) {
+                possibleEndIndex = text.indexOf(INFERENCE_QUERY_CONFIG_PREFIX, 
possibleEndIndex + 1);
+                if (possibleEndIndex == -1) {
+                    // If we reach here, it means we couldn't find a valid 
JSON part
+                    jsonPart = "";
+                    LOG.warn("Query starts with inference prefix {}, but 
without valid json part," +
+                                    " if case this prefix is a valid fulltext 
query prefix, please update system property {} with different prefix value",
+                            INFERENCE_QUERY_CONFIG_PREFIX, 
INFERENCE_QUERY_CONFIG_PREFIX_KEY);
+                    break;
+                }
+                String candidateJson = text.substring(0, possibleEndIndex);
+                // Verify if this is valid JSON using Oak's JsopTokenizer
+                if (JsonUtils.isValidJson(candidateJson, false)) {
+                    jsonPart = candidateJson;
+                    jsonEndDelimiterIndex = possibleEndIndex;
+                    break;
+                }
+            }
+            // If we found a valid JSON part, extract it
+            if (jsonPart == null) {
+                // If we reach here, it means we couldn't find a valid JSON 
part
+                jsonPart = "";
+                queryTextPart = text;
+                LOG.warn("Query starts with InferenceQueryPrefix: {}, but 
without valid json part," +
+                                " if case this prefix is a valid fulltext 
query prefix, please update {} with different prefix value",
+                        INFERENCE_QUERY_CONFIG_PREFIX, 
INFERENCE_QUERY_CONFIG_PREFIX_KEY);
+
+            } else {
+                // Extract query text part (everything after the JSON part 
delimiter)
+                queryTextPart = text.substring(jsonEndDelimiterIndex + 
1).trim();
+
+            }
+            return new String[]{jsonPart, queryTextPart};
+        } else {
+            return new String[]{"", text};
+        }
+    }
+
+    public String getQueryInferenceConfig() {
+        return queryInferenceConfig;
+    }
+
+    public String getQueryText() {
+        return queryText;
+    }
+}
\ No newline at end of file
diff --git 
a/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQueryConfigTest.java
 
b/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQueryConfigTest.java
new file mode 100644
index 0000000000..4ac85ddfd1
--- /dev/null
+++ 
b/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQueryConfigTest.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.spi.query.fulltext;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+public class InferenceQueryConfigTest {
+
+    @Test
+    public void testEmptyConfig() {
+        InferenceQueryConfig config = new InferenceQueryConfig("");
+        assertNull(config.getInferenceModelConfig());
+    }
+
+    @Test
+    public void testEmptyJsonConfig() {
+        InferenceQueryConfig config = new InferenceQueryConfig("{}");
+        assertEquals("", config.getInferenceModelConfig());
+    }
+
+    @Test
+    public void testValidConfig() {
+        InferenceQueryConfig config = new 
InferenceQueryConfig("{\"inferenceModelConfig\":\"ada-test-model\"}");
+        assertEquals("ada-test-model", config.getInferenceModelConfig());
+    }
+
+    @Test(expected = RuntimeException.class)
+    public void testInvalidJsonConfig() {
+        new InferenceQueryConfig("{invalid json}");
+    }
+
+    @Test(expected = RuntimeException.class)
+    public void testMissingTypeConfig() {
+        new InferenceQueryConfig("{\"someOtherField\":\"value\"}");
+    }
+}
diff --git 
a/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQueryTest.java
 
b/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQueryTest.java
new file mode 100644
index 0000000000..3017464b88
--- /dev/null
+++ 
b/oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQueryTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.spi.query.fulltext;
+
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+public class InferenceQueryTest {
+
+    @Test
+    public void testBasicQuery() {
+        InferenceQuery query = new InferenceQuery("simple query");
+        assertEquals("", query.getQueryInferenceConfig());
+        assertEquals("simple query", query.getQueryText());
+    }
+
+    @Test
+    public void testQueryWithInferenceConfig() {
+        InferenceQuery query = new 
InferenceQuery("?{\"model\":\"gpt-4\"}?search for oak trees");
+        assertEquals("{\"model\":\"gpt-4\"}", query.getQueryInferenceConfig());
+        assertEquals("search for oak trees", query.getQueryText());
+    }
+
+    @Test
+    public void testQueryWithComplexInferenceConfig() {
+        InferenceQuery query = new InferenceQuery(
+            
"?{\"model\":\"gpt-4\",\"temperature\":0.7,\"options\":{\"filter\":true}}?oak 
trees");
+        
assertEquals("{\"model\":\"gpt-4\",\"temperature\":0.7,\"options\":{\"filter\":true}}",
 
+            query.getQueryInferenceConfig());
+        assertEquals("oak trees", query.getQueryText());
+    }
+
+    @Test
+    public void testQueryWithQuestionMarksInText() {
+        InferenceQuery query = new InferenceQuery("?{\"model\":\"gpt-4\"}?what 
are oak trees?");
+        assertEquals("{\"model\":\"gpt-4\"}", query.getQueryInferenceConfig());
+        assertEquals("what are oak trees?", query.getQueryText());
+    }
+
+    @Test
+    public void testQueryWithoutInferencePrefix() {
+        InferenceQuery query = new 
InferenceQuery("{\"model\":\"gpt-4\"}?query");
+        assertEquals("", query.getQueryInferenceConfig());
+        assertEquals("{\"model\":\"gpt-4\"}?query", query.getQueryText());
+    }
+
+    @Test
+    public void testQueryWithInvalidJson() {
+        InferenceQuery query = new InferenceQuery("?{invalid json}?query");
+        assertEquals("", query.getQueryInferenceConfig());
+        assertEquals("{invalid json}?query", query.getQueryText());
+    }
+
+    @Test
+    public void testQueryWithEmptyConfig() {
+        InferenceQuery query = new InferenceQuery("??query text");
+        assertEquals("", query.getQueryInferenceConfig());
+        assertEquals("??query text", query.getQueryText());
+    }
+
+    @Test
+    public void testQueryWithWhitespace() {
+        InferenceQuery query = new InferenceQuery("   ?{\"model\":\"gpt-4\"}?  
 search query   ");
+        assertEquals("{\"model\":\"gpt-4\"}", query.getQueryInferenceConfig());
+        assertEquals("search query", query.getQueryText());
+    }
+
+    @Test
+    public void testEmptyQuery() {
+        InferenceQuery query = new InferenceQuery("");
+        assertEquals("", query.getQueryInferenceConfig());
+        assertEquals("", query.getQueryText());
+    }
+
+}
\ No newline at end of file

Reply via email to