This is an automated email from the ASF dual-hosted git repository.
abenedetti pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 8a28b917c63 removing check in TextToVectorUpdateProcessorFactory which
brakes update for new dynamic fields (#3426)
8a28b917c63 is described below
commit 8a28b917c63dd7dcc0fda1b63e493fbcc34f3a3e
Author: Renato Haeberli <[email protected]>
AuthorDate: Mon Jul 14 16:01:35 2025 +0200
removing check in TextToVectorUpdateProcessorFactory which brakes update
for new dynamic fields (#3426)
* distinguish between dynamic field and field, dynamic fields do not have
to exist
---------
Co-authored-by: Alessandro Benedetti <[email protected]>
---
solr/CHANGES.txt | 2 +
.../model/SolrTextToVectorModel.java | 44 ++++++++-------
.../TextToVectorUpdateProcessorFactory.java | 7 ++-
.../test-files/solr/collection1/conf/schema.xml | 2 +
.../TextToVectorUpdateProcessorFactoryTest.java | 64 ++++++++++++++++++++--
.../processor/TextToVectorUpdateProcessorTest.java | 36 ++++++------
6 files changed, 110 insertions(+), 45 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 85b4be69d12..2ca073e8ef8 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -374,6 +374,8 @@ Bug Fixes
* SOLR-17792: Fix deadlocks in ParallelHttpShardHandler, re-implement
synchronization in HttpShardHandler (Houston Putman)
+* GITHUB#3426: Improving check in TextToVectorUpdateProcessorFactory, which
breaks update for new dynamic fields. (Renato Haeberli via Alessandro Benedetti)
+
Dependency Upgrades
---------------------
* SOLR-17795: Upgrade Lucene to 9.12.2. (Pierre Salagnac, Christine Poerschke,
Houston Putman)
diff --git
a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java
b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java
index e8c5bcf014a..9a22087b990 100644
---
a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java
+++
b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java
@@ -18,6 +18,7 @@ package org.apache.solr.llm.textvectorisation.model;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.model.embedding.EmbeddingModel;
+import java.lang.invoke.MethodHandles;
import java.lang.reflect.Method;
import java.time.Duration;
import java.util.ArrayList;
@@ -25,9 +26,12 @@ import java.util.Map;
import java.util.Objects;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.solr.common.SolrException;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.llm.textvectorisation.store.TextToVectorModelException;
import
org.apache.solr.llm.textvectorisation.store.rest.ManagedTextToVectorModelStore;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* This object wraps a {@link dev.langchain4j.model.embedding.EmbeddingModel}
to encode text to
@@ -35,6 +39,7 @@ import
org.apache.solr.llm.textvectorisation.store.rest.ManagedTextToVectorModel
* ManagedTextToVectorModelStore}
*/
public class SolrTextToVectorModel implements Accountable {
+ private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final long BASE_RAM_BYTES =
RamUsageEstimator.shallowSizeOfInstance(SolrTextToVectorModel.class);
private static final String TIMEOUT_PARAM = "timeout";
@@ -80,27 +85,20 @@ public class SolrTextToVectorModel implements Accountable {
* support, some of them may require to be handled in here as
separate switch cases
*/
switch (paramName) {
- case TIMEOUT_PARAM:
+ case TIMEOUT_PARAM -> {
Duration timeOut = Duration.ofSeconds((Long)
params.get(paramName));
builder.getClass().getMethod(paramName,
Duration.class).invoke(builder, timeOut);
- break;
- case MAX_SEGMENTS_PER_BATCH_PARAM:
- builder
- .getClass()
- .getMethod(paramName, Integer.class)
- .invoke(builder, ((Long) params.get(paramName)).intValue());
- break;
- case MAX_RETRIES_PARAM:
- builder
- .getClass()
- .getMethod(paramName, Integer.class)
- .invoke(builder, ((Long) params.get(paramName)).intValue());
- break;
+ }
+ case MAX_SEGMENTS_PER_BATCH_PARAM, MAX_RETRIES_PARAM -> builder
+ .getClass()
+ .getMethod(paramName, Integer.class)
+ .invoke(builder, ((Long) params.get(paramName)).intValue());
+
/*
* For primitive params if there's only one setter available, we
call it.
* If there's choice we default to the string one
*/
- default:
+ default -> {
ArrayList<Method> paramNameMatches = new ArrayList<>();
for (var method : builder.getClass().getMethods()) {
if (paramName.equals(method.getName()) &&
method.getParameterCount() == 1) {
@@ -108,13 +106,19 @@ public class SolrTextToVectorModel implements Accountable
{
}
}
if (paramNameMatches.size() == 1) {
- paramNameMatches.get(0).invoke(builder, params.get(paramName));
+ paramNameMatches.getFirst().invoke(builder,
params.get(paramName));
} else {
- builder
- .getClass()
- .getMethod(paramName, String.class)
- .invoke(builder, params.get(paramName).toString());
+ try {
+ builder
+ .getClass()
+ .getMethod(paramName, String.class)
+ .invoke(builder, params.get(paramName).toString());
+ } catch (NoSuchMethodException e) {
+ log.error("Parameter {} not supported by model {}",
paramName, className);
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
e.getMessage(), e);
+ }
}
+ }
}
}
}
diff --git
a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java
b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java
index 559ea9a4309..07653df62dd 100644
---
a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java
+++
b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java
@@ -70,15 +70,16 @@ public class TextToVectorUpdateProcessorFactory extends
UpdateRequestProcessorFa
public UpdateRequestProcessor getInstance(
SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor
next) {
IndexSchema latestSchema = req.getCore().getLatestSchema();
- if (!latestSchema.hasExplicitField(inputField)) {
+
+ if (!latestSchema.isDynamicField(inputField) &&
!latestSchema.hasExplicitField(inputField)) {
throw new SolrException(
SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" +
inputField + "\"");
}
- if (!latestSchema.hasExplicitField(outputField)) {
+
+ if (!latestSchema.isDynamicField(outputField) &&
!latestSchema.hasExplicitField(outputField)) {
throw new SolrException(
SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" +
outputField + "\"");
}
-
final SchemaField outputFieldSchema = latestSchema.getField(outputField);
assertIsDenseVectorField(outputFieldSchema);
diff --git a/solr/modules/llm/src/test-files/solr/collection1/conf/schema.xml
b/solr/modules/llm/src/test-files/solr/collection1/conf/schema.xml
index 92aa0e76591..a574b93294a 100644
--- a/solr/modules/llm/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/modules/llm/src/test-files/solr/collection1/conf/schema.xml
@@ -25,6 +25,8 @@
<fieldType name="high_dimensional_float_knn_vector"
class="solr.DenseVectorField" vectorDimension="2048"
similarityFunction="cosine" vectorEncoding="FLOAT32"/>
<fieldType name="high_dimensional_byte_knn_vector"
class="solr.DenseVectorField" vectorDimension="2048"
similarityFunction="cosine" vectorEncoding="BYTE"/>
<fieldType name="plong" class="solr.LongPointField"
useDocValuesAsStored="false"/>
+ <dynamicField name="*_s" type="string" indexed="true" stored="true" />
+ <dynamicField name="*_vector1024" type="knn_vector" indexed="true"
stored="true" />
<field name="id" type="string" indexed="true" stored="true"
multiValued="false" required="false"/>
<field name="vector" type="knn_vector" indexed="true" stored="true"/>
diff --git
a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java
b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java
index f01aa187537..c5b7991b3c5 100644
---
a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java
+++
b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java
@@ -21,8 +21,13 @@ import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.llm.TestLlmBase;
+import org.apache.solr.llm.textvectorisation.model.SolrTextToVectorModel;
+import
org.apache.solr.llm.textvectorisation.store.rest.ManagedTextToVectorModelStore;
import org.apache.solr.request.SolrQueryRequestBase;
+import org.apache.solr.update.processor.UpdateRequestProcessor;
+import org.junit.After;
import org.junit.AfterClass;
+import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -38,6 +43,18 @@ public class TextToVectorUpdateProcessorFactoryTest extends
TestLlmBase {
afterTest();
}
+ SolrCore collection1;
+
+ @Before
+ public void setup() {
+ collection1 = solrClientTestRule.getCoreContainer().getCore("collection1");
+ }
+
+ @After
+ public void after() {
+ collection1.close();
+ }
+
@Test
public void init_fullArgs_shouldInitAllParams() {
NamedList<String> args = new NamedList<>();
@@ -100,13 +117,11 @@ public class TextToVectorUpdateProcessorFactoryTest
extends TestLlmBase {
TextToVectorUpdateProcessorFactory factoryToTest = new
TextToVectorUpdateProcessorFactory();
ModifiableSolrParams params = new ModifiableSolrParams();
- SolrCore collection1 =
solrClientTestRule.getCoreContainer().getCore("collection1");
SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params)
{};
factoryToTest.init(args);
SolrException e =
assertThrows(SolrException.class, () -> factoryToTest.getInstance(req,
null, null));
assertEquals("undefined field: \"notExistentOutput\"", e.getMessage());
- collection1.close();
}
/* Following test depends on a real solr schema and depends on
BeforeClass-AfterClass methods */
@@ -120,14 +135,12 @@ public class TextToVectorUpdateProcessorFactoryTest
extends TestLlmBase {
TextToVectorUpdateProcessorFactory factoryToTest = new
TextToVectorUpdateProcessorFactory();
ModifiableSolrParams params = new ModifiableSolrParams();
- SolrCore collection1 =
solrClientTestRule.getCoreContainer().getCore("collection1");
SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params)
{};
factoryToTest.init(args);
SolrException e =
assertThrows(SolrException.class, () -> factoryToTest.getInstance(req,
null, null));
assertEquals(
"only DenseVectorField is compatible with Vector Query Parsers:
_text_", e.getMessage());
- collection1.close();
}
/* Following test depends on a real solr schema and depends on
BeforeClass-AfterClass methods */
@@ -141,12 +154,51 @@ public class TextToVectorUpdateProcessorFactoryTest
extends TestLlmBase {
TextToVectorUpdateProcessorFactory factoryToTest = new
TextToVectorUpdateProcessorFactory();
ModifiableSolrParams params = new ModifiableSolrParams();
- SolrCore collection1 =
solrClientTestRule.getCoreContainer().getCore("collection1");
SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params)
{};
factoryToTest.init(args);
SolrException e =
assertThrows(SolrException.class, () -> factoryToTest.getInstance(req,
null, null));
assertEquals("undefined field: \"notExistentInput\"", e.getMessage());
- collection1.close();
+ }
+
+ /* Following test depends on a real solr schema and depends on
BeforeClass-AfterClass methods */
+ @Test
+ public void init_notExistentDynamicInputField_shouldNotThrowException() {
+ String inputFieldName = "text_s";
+ String outputFieldName = "vector";
+
+ UpdateRequestProcessor instance =
+ createUpdateProcessor(inputFieldName, outputFieldName, collection1,
"model1");
+ assertNotNull(instance);
+ }
+
+ /* Following test depends on a real solr schema and depends on
BeforeClass-AfterClass methods */
+ @Test
+ public void init_notExistingDynamicOutputField_shouldNotThrowException() {
+ String inputFieldName = "_text_";
+ String outputFieldName = "vec_vector1024";
+
+ UpdateRequestProcessor instance =
+ createUpdateProcessor(inputFieldName, outputFieldName, collection1,
"model2");
+ assertNotNull(instance);
+ }
+
+ private UpdateRequestProcessor createUpdateProcessor(
+ String inputFieldName, String outputFieldName, SolrCore collection1,
String modelName) {
+ NamedList<String> args = new NamedList<>();
+
+ ManagedTextToVectorModelStore.getManagedModelStore(collection1)
+ .addModel(new SolrTextToVectorModel(modelName, null, null));
+ args.add("inputField", inputFieldName);
+ args.add("outputField", outputFieldName);
+ args.add("model", modelName);
+
+ TextToVectorUpdateProcessorFactory factoryToTest = new
TextToVectorUpdateProcessorFactory();
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ factoryToTest.init(args);
+
+ SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params)
{};
+
+ return factoryToTest.getInstance(req, null, null);
}
}
diff --git
a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorTest.java
b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorTest.java
index 8614d637c5f..55cf735f9d4 100644
---
a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorTest.java
+++
b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorTest.java
@@ -24,6 +24,7 @@ import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.llm.TestLlmBase;
import
org.apache.solr.llm.textvectorisation.store.rest.ManagedTextToVectorModelStore;
+import org.junit.After;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -40,6 +41,13 @@ public class TextToVectorUpdateProcessorTest extends
TestLlmBase {
afterTest();
}
+ @After
+ public void afterEachTest() throws Exception {
+ restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT +
"/dummy-1");
+ restTestHarness.delete(
+ ManagedTextToVectorModelStore.REST_END_POINT +
"/exception-throwing-model"); // clean
+ }
+
@Test
public void processAdd_inputField_shouldVectoriseInputField() throws
Exception {
loadModel("dummy-model.json"); // preparation
@@ -50,10 +58,7 @@ public class TextToVectorUpdateProcessorTest extends
TestLlmBase {
"textToVector");
assertU(commit());
- final String solrQuery = "*:*";
- final SolrQuery query = new SolrQuery();
- query.setQuery(solrQuery);
- query.add("fl", "id,vector");
+ final SolrQuery query = getSolrQuery();
assertJQ(
"/query" + query.toQueryString(),
@@ -66,6 +71,14 @@ public class TextToVectorUpdateProcessorTest extends
TestLlmBase {
restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT +
"/dummy-1"); // clean up
}
+ private SolrQuery getSolrQuery() {
+ final String solrQuery = "*:*";
+ final SolrQuery query = new SolrQuery();
+ query.setQuery(solrQuery);
+ query.add("fl", "id,vector");
+ return query;
+ }
+
/*
This test looks for the 'dummy-1' model, but such model is not loaded, the
model store is empty, so the update fails
*/
@@ -93,10 +106,7 @@ public class TextToVectorUpdateProcessorTest extends
TestLlmBase {
addWithChain(sdoc("id", "98", "_text_", "Vegeta is the saiyan prince."),
"textToVector");
assertU(commit());
- final String solrQuery = "*:*";
- final SolrQuery query = new SolrQuery();
- query.setQuery(solrQuery);
- query.add("fl", "id,vector");
+ final SolrQuery query = getSolrQuery();
assertJQ(
"/query" + query.toQueryString(),
@@ -116,10 +126,7 @@ public class TextToVectorUpdateProcessorTest extends
TestLlmBase {
assertU(adoc("id", "98"));
assertU(commit());
- final String solrQuery = "*:*";
- final SolrQuery query = new SolrQuery();
- query.setQuery(solrQuery);
- query.add("fl", "id,vector");
+ final SolrQuery query = getSolrQuery();
assertJQ(
"/query" + query.toQueryString(),
@@ -141,10 +148,7 @@ public class TextToVectorUpdateProcessorTest extends
TestLlmBase {
"failingTextToVector");
assertU(commit());
- final String solrQuery = "*:*";
- final SolrQuery query = new SolrQuery();
- query.setQuery(solrQuery);
- query.add("fl", "id,vector");
+ final SolrQuery query = getSolrQuery();
assertJQ(
"/query" + query.toQueryString(),