This is an automated email from the ASF dual-hosted git repository.
abenedetti pushed a commit to branch branch_10x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_10x by this push:
new ac4f371b1a6 SOLR-17999: Fix the Managed Model Store Initialization in
the LLM module (#4056)
ac4f371b1a6 is described below
commit ac4f371b1a654e61b1d4b9ef2c81283f752a34f8
Author: Ilaria Petreti <[email protected]>
AuthorDate: Tue Jan 27 12:45:42 2026 +0100
SOLR-17999: Fix the Managed Model Store Initialization in the LLM module
(#4056)
(cherry picked from commit a5e02593f27bcbe5aef497cbba38117dc21c3a41)
---
...SOLR-17999-fix-llm-managed-model-store-init.yml | 9 +++
.../TextToVectorUpdateProcessorFactory.java | 26 ++++++++-
.../solrconfig-language-models-no-components.xml | 53 +++++++++++++++++
...olrconfig-language-models-query-parser-only.xml | 57 ++++++++++++++++++
...nguage-models-update-request-processor-only.xml | 62 ++++++++++++++++++++
.../rest/TestManagedModelStoreInitialization.java | 67 ++++++++++++++++++++++
.../modules/query-guide/pages/text-to-vector.adoc | 35 ++++++++++-
7 files changed, 305 insertions(+), 4 deletions(-)
diff --git
a/changelog/unreleased/SOLR-17999-fix-llm-managed-model-store-init.yml
b/changelog/unreleased/SOLR-17999-fix-llm-managed-model-store-init.yml
new file mode 100644
index 00000000000..524ae95da45
--- /dev/null
+++ b/changelog/unreleased/SOLR-17999-fix-llm-managed-model-store-init.yml
@@ -0,0 +1,9 @@
+# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
+title: Fix the Text to Vector Managed Model Store Initialization in LLM Module
+type: fixed
+authors:
+ - name: Ilaria Petreti
+ - name: Alessandro Benedetti
+links:
+ - name: SOLR-17999
+ url: https://issues.apache.org/jira/browse/SOLR-17999
diff --git
a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java
b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java
index e524b689d56..f89766337ae 100644
---
a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java
+++
b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java
@@ -21,16 +21,21 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.RequiredSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrResourceLoader;
import
org.apache.solr.languagemodels.textvectorisation.model.SolrTextToVectorModel;
import
org.apache.solr.languagemodels.textvectorisation.store.rest.ManagedTextToVectorModelStore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.rest.ManagedResource;
+import org.apache.solr.rest.ManagedResourceObserver;
import org.apache.solr.schema.DenseVectorField;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
+import org.apache.solr.util.plugin.SolrCoreAware;
/**
* Vectorises a textual field value and add the resulting vector to another
field.
@@ -47,10 +52,12 @@ import
org.apache.solr.update.processor.UpdateRequestProcessorFactory;
*
* *
*/
-public class TextToVectorUpdateProcessorFactory extends
UpdateRequestProcessorFactory {
+public class TextToVectorUpdateProcessorFactory extends
UpdateRequestProcessorFactory
+ implements SolrCoreAware, ManagedResourceObserver {
private static final String INPUT_FIELD_PARAM = "inputField";
private static final String OUTPUT_FIELD_PARAM = "outputField";
private static final String MODEL_NAME = "model";
+ private ManagedTextToVectorModelStore modelStore = null;
private String inputField;
private String outputField;
@@ -66,6 +73,23 @@ public class TextToVectorUpdateProcessorFactory extends
UpdateRequestProcessorFa
modelName = required.get(MODEL_NAME);
}
+ @Override
+ public void inform(SolrCore core) {
+ final SolrResourceLoader solrResourceLoader = core.getResourceLoader();
+
ManagedTextToVectorModelStore.registerManagedTextToVectorModelStore(solrResourceLoader,
this);
+ }
+
+ @Override
+ public void onManagedResourceInitialized(NamedList<?> args, ManagedResource
res)
+ throws SolrException {
+ if (res instanceof ManagedTextToVectorModelStore) {
+ modelStore = (ManagedTextToVectorModelStore) res;
+ }
+ if (modelStore != null) {
+ modelStore.loadStoredModels();
+ }
+ }
+
@Override
public UpdateRequestProcessor getInstance(
SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor
next) {
diff --git
a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-language-models-no-components.xml
b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-language-models-no-components.xml
new file mode 100644
index 00000000000..582af45aa10
--- /dev/null
+++
b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-language-models-no-components.xml
@@ -0,0 +1,53 @@
+<?xml version="1.0" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more
contributor
+ license agreements. See the NOTICE file distributed with this work for
additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+
+<config>
+ <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
+ <dataDir>${solr.data.dir:}</dataDir>
+ <directoryFactory name="DirectoryFactory"
+ class="${solr.directoryFactory:solr.MockDirectoryFactory}"
/>
+ <schemaFactory class="ClassicIndexSchemaFactory" />
+
+ <requestDispatcher>
+ <requestParsers />
+ </requestDispatcher>
+
+ <query>
+ <filterCache class="solr.CaffeineCache" size="4096"
+ initialSize="2048" autowarmCount="0" />
+ </query>
+ <requestHandler name="/select" class="solr.SearchHandler" />
+
+ <updateHandler class="solr.DirectUpdateHandler2">
+ <autoCommit>
+ <maxTime>15000</maxTime>
+ <openSearcher>false</openSearcher>
+ </autoCommit>
+ <autoSoftCommit>
+ <maxTime>1000</maxTime>
+ </autoSoftCommit>
+ <updateLog>
+ <str name="dir">${solr.data.dir:}</str>
+ </updateLog>
+ </updateHandler>
+
+ <!-- Query request handler managing models and features -->
+ <requestHandler name="/query" class="solr.SearchHandler">
+ <lst name="defaults">
+ <str name="echoParams">explicit</str>
+ <str name="wt">json</str>
+ <str name="indent">true</str>
+ <str name="df">id</str>
+ </lst>
+ </requestHandler>
+
+</config>
diff --git
a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-language-models-query-parser-only.xml
b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-language-models-query-parser-only.xml
new file mode 100644
index 00000000000..80054a9775a
--- /dev/null
+++
b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-language-models-query-parser-only.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more
contributor
+ license agreements. See the NOTICE file distributed with this work for
additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+
+<config>
+ <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
+ <dataDir>${solr.data.dir:}</dataDir>
+ <directoryFactory name="DirectoryFactory"
+ class="${solr.directoryFactory:solr.MockDirectoryFactory}"
/>
+ <schemaFactory class="ClassicIndexSchemaFactory" />
+
+ <requestDispatcher>
+ <requestParsers />
+ </requestDispatcher>
+
+ <!-- Query parser used to run vector search queries-->
+ <queryParser name="knn_text_to_vector"
+
class="org.apache.solr.languagemodels.textvectorisation.search.TextToVectorQParserPlugin"
/>
+
+ <query>
+ <filterCache class="solr.CaffeineCache" size="4096"
+ initialSize="2048" autowarmCount="0" />
+ </query>
+ <requestHandler name="/select" class="solr.SearchHandler" />
+
+ <updateHandler class="solr.DirectUpdateHandler2">
+ <autoCommit>
+ <maxTime>15000</maxTime>
+ <openSearcher>false</openSearcher>
+ </autoCommit>
+ <autoSoftCommit>
+ <maxTime>1000</maxTime>
+ </autoSoftCommit>
+ <updateLog>
+ <str name="dir">${solr.data.dir:}</str>
+ </updateLog>
+ </updateHandler>
+
+ <!-- Query request handler managing models and features -->
+ <requestHandler name="/query" class="solr.SearchHandler">
+ <lst name="defaults">
+ <str name="echoParams">explicit</str>
+ <str name="wt">json</str>
+ <str name="indent">true</str>
+ <str name="df">id</str>
+ </lst>
+ </requestHandler>
+
+</config>
diff --git
a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-language-models-update-request-processor-only.xml
b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-language-models-update-request-processor-only.xml
new file mode 100644
index 00000000000..664019200dc
--- /dev/null
+++
b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-language-models-update-request-processor-only.xml
@@ -0,0 +1,62 @@
+<?xml version="1.0" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more
contributor
+ license agreements. See the NOTICE file distributed with this work for
additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+
+<config>
+ <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
+ <dataDir>${solr.data.dir:}</dataDir>
+ <directoryFactory name="DirectoryFactory"
+ class="${solr.directoryFactory:solr.MockDirectoryFactory}"
/>
+ <schemaFactory class="ClassicIndexSchemaFactory" />
+
+ <requestDispatcher>
+ <requestParsers />
+ </requestDispatcher>
+
+ <query>
+ <filterCache class="solr.CaffeineCache" size="4096"
+ initialSize="2048" autowarmCount="0" />
+ </query>
+ <requestHandler name="/select" class="solr.SearchHandler" />
+
+ <updateHandler class="solr.DirectUpdateHandler2">
+ <autoCommit>
+ <maxTime>15000</maxTime>
+ <openSearcher>false</openSearcher>
+ </autoCommit>
+ <autoSoftCommit>
+ <maxTime>1000</maxTime>
+ </autoSoftCommit>
+ <updateLog>
+ <str name="dir">${solr.data.dir:}</str>
+ </updateLog>
+ </updateHandler>
+
+ <!-- Query request handler managing models and features -->
+ <requestHandler name="/query" class="solr.SearchHandler">
+ <lst name="defaults">
+ <str name="echoParams">explicit</str>
+ <str name="wt">json</str>
+ <str name="indent">true</str>
+ <str name="df">id</str>
+ </lst>
+ </requestHandler>
+
+ <updateRequestProcessorChain name="textToVector">
+ <processor
class="solr.languagemodels.textvectorisation.update.processor.TextToVectorUpdateProcessorFactory">
+ <str name="inputField">_text_</str>
+ <str name="outputField">vector</str>
+ <str name="model">dummy-1</str>
+ </processor>
+ <processor class="solr.RunUpdateProcessorFactory"/>
+ </updateRequestProcessorChain>
+
+</config>
diff --git
a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/store/rest/TestManagedModelStoreInitialization.java
b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/store/rest/TestManagedModelStoreInitialization.java
new file mode 100644
index 00000000000..244094b8764
--- /dev/null
+++
b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/textvectorisation/store/rest/TestManagedModelStoreInitialization.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.languagemodels.textvectorisation.store.rest;
+
+import org.apache.solr.languagemodels.TestLanguageModelBase;
+import org.junit.After;
+import org.junit.Test;
+
+public class TestManagedModelStoreInitialization extends TestLanguageModelBase
{
+
+ @After
+ public void cleanUp() throws Exception {
+ afterTest();
+ }
+
+ @Test
+ public void
managedModelStore_whenUpdateRequestComponentConfigured_shouldBeInitialized()
+ throws Exception {
+ setupTest(
+ "solrconfig-language-models-update-request-processor-only.xml",
+ "schema-language-models.xml",
+ false,
+ false);
+
+ assertJQ(ManagedTextToVectorModelStore.REST_END_POINT,
"/responseHeader/status==0");
+ assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models==[]");
+ }
+
+ @Test
+ public void
managedModelStore_whenQueryParserComponentConfigured_shouldBeInitialized()
+ throws Exception {
+ setupTest(
+ "solrconfig-language-models-query-parser-only.xml",
+ "schema-language-models.xml",
+ false,
+ false);
+
+ assertJQ(ManagedTextToVectorModelStore.REST_END_POINT,
"/responseHeader/status==0");
+ assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models==[]");
+ }
+
+ @Test
+ public void managedModelStore_whenNoComponents_shouldNotBeInitialized()
throws Exception {
+ setupTest(
+ "solrconfig-language-models-no-components.xml",
"schema-language-models.xml", false, false);
+ assertJQ(
+ ManagedTextToVectorModelStore.REST_END_POINT,
+ "/responseHeader/status==400",
+ "/error/msg=='No REST managed resource registered for path "
+ + ManagedTextToVectorModelStore.REST_END_POINT
+ + "'");
+ }
+}
diff --git a/solr/solr-ref-guide/modules/query-guide/pages/text-to-vector.adoc
b/solr/solr-ref-guide/modules/query-guide/pages/text-to-vector.adoc
index ebcaf8c7189..aafafd861fb 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/text-to-vector.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/text-to-vector.adoc
@@ -72,15 +72,38 @@ This is provided via the `language-models`
xref:configuration-guide:solr-modules
== Language Model Configuration
-You need to register / configure the plugins provided by the Language Models
module that you want to use. This is done in `solrconfig.xml`.
+Language Models is a module and therefore its plugins must be configured in
`solrconfig.xml`.
-* Declaration of the `knn_text_to_vector` query parser.
+=== Minimum Requirements
+
+* Enable the `language-models` module to make the Language Models classes
available on Solr's classpath.
+See xref:configuration-guide:solr-modules.adoc[Solr Module] for more details.
+
+* At least one of the following components must be declared in
`solrconfig.xml`:
+** the TextToVector Update Processor (index time)
++
+[source,xml]
+----
+<updateRequestProcessorChain name="textToVector">
+ <processor
class="solr.languagemodels.textvectorisation.update.processor.TextToVectorUpdateProcessorFactory">
+ <str name="inputField">_text_</str>
+ <str name="outputField">vector</str>
+ <str name="model">dummy-1</str>
+ </processor>
+ <processor class="solr.RunUpdateProcessorFactory"/>
+</updateRequestProcessorChain>
+----
+** the TextToVector Query Parser (query time)
+
[source,xml]
----
<queryParser name="knn_text_to_vector"
class="org.apache.solr.languagemodels.textvectorisation.search.TextToVectorQParserPlugin"/>
----
+[NOTE]
+====
+If no component is configured in `solrconfig.xml`, the Text-to-Vector model
store will not be registered and requests to
`/schema/text-to-vector-model-store` will return an error.
+====
== Text to Vector Lifecycle
@@ -122,7 +145,7 @@ Accepted values:
s|Required |Default: none
|===
+
-The identifier of your model, this is used by any component that intends to
use the model (`knn_text_to_vector` query parser).
+The identifier of your model, this is used by any component that intends to
use the model (e.g., `knn_text_to_vector` query parser or
`TextToVectorUpdateProcessorFactory` update processor).
`params`::
+
@@ -410,6 +433,12 @@ Faceting or querying on the boolean 'vectorised' field can
also give you a quick
=== Running a Text-to-Vector Query
+Before running a Text-to-Vector query, ensure that the `knn_text_to_vector`
query parser is declared in `solrconfig.xml`:
+[source,xml]
+----
+<queryParser name="knn_text_to_vector"
class="org.apache.solr.languagemodels.textvectorisation.search.TextToVectorQParserPlugin"/>
+----
+
To run a query that vectorises your query text, using a model you previously
uploaded is simple:
[source,text]