This is an automated email from the ASF dual-hosted git repository. abenedetti pushed a commit to branch branch_9x in repository https://gitbox.apache.org/repos/asf/solr.git
commit f529e70337d2ad7620e9a310428a710c4ac17640 Author: Renato Haeberli <[email protected]> AuthorDate: Mon Jul 14 16:01:35 2025 +0200 removing check in TextToVectorUpdateProcessorFactory which brakes update for new dynamic fields (#3426) * distinguish between dynamic field and field, dynamic fields do not have to exist --------- Co-authored-by: Alessandro Benedetti <[email protected]> (cherry picked from commit 8a28b917c63dd7dcc0fda1b63e493fbcc34f3a3e) --- solr/CHANGES.txt | 2 + .../model/SolrTextToVectorModel.java | 44 ++++++++------- .../TextToVectorUpdateProcessorFactory.java | 7 ++- .../test-files/solr/collection1/conf/schema.xml | 2 + .../TextToVectorUpdateProcessorFactoryTest.java | 64 ++++++++++++++++++++-- .../processor/TextToVectorUpdateProcessorTest.java | 36 ++++++------ 6 files changed, 110 insertions(+), 45 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 9259ce173f3..135115970ea 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -179,6 +179,8 @@ Bug Fixes * SOLR-17792: Fix deadlocks in ParallelHttpShardHandler, re-implement synchronization in HttpShardHandler (Houston Putman) +* GITHUB#3426: Improving check in TextToVectorUpdateProcessorFactory, which breaks update for new dynamic fields. (Renato Haeberli via Alessandro Benedetti) + Dependency Upgrades --------------------- * SOLR-17795: Upgrade Lucene to 9.12.2. (Pierre Salagnac, Christine Poerschke, Houston Putman) diff --git a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java index e8c5bcf014a..9a22087b990 100644 --- a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java +++ b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java @@ -18,6 +18,7 @@ package org.apache.solr.llm.textvectorisation.model; import dev.langchain4j.data.embedding.Embedding; import dev.langchain4j.model.embedding.EmbeddingModel; +import java.lang.invoke.MethodHandles; import java.lang.reflect.Method; import java.time.Duration; import java.util.ArrayList; @@ -25,9 +26,12 @@ import java.util.Map; import java.util.Objects; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.RamUsageEstimator; +import org.apache.solr.common.SolrException; import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.llm.textvectorisation.store.TextToVectorModelException; import org.apache.solr.llm.textvectorisation.store.rest.ManagedTextToVectorModelStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * This object wraps a {@link dev.langchain4j.model.embedding.EmbeddingModel} to encode text to @@ -35,6 +39,7 @@ import org.apache.solr.llm.textvectorisation.store.rest.ManagedTextToVectorModel * ManagedTextToVectorModelStore} */ public class SolrTextToVectorModel implements Accountable { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(SolrTextToVectorModel.class); private static final String TIMEOUT_PARAM = "timeout"; @@ -80,27 +85,20 @@ public class SolrTextToVectorModel implements Accountable { * support, some of them may require to be handled in here as separate switch cases */ switch (paramName) { - case TIMEOUT_PARAM: + case TIMEOUT_PARAM -> { Duration timeOut = Duration.ofSeconds((Long) params.get(paramName)); builder.getClass().getMethod(paramName, Duration.class).invoke(builder, timeOut); - break; - case MAX_SEGMENTS_PER_BATCH_PARAM: - builder - .getClass() - .getMethod(paramName, Integer.class) - .invoke(builder, ((Long) params.get(paramName)).intValue()); - break; - case MAX_RETRIES_PARAM: - builder - .getClass() - .getMethod(paramName, Integer.class) - .invoke(builder, ((Long) params.get(paramName)).intValue()); - break; + } + case MAX_SEGMENTS_PER_BATCH_PARAM, MAX_RETRIES_PARAM -> builder + .getClass() + .getMethod(paramName, Integer.class) + .invoke(builder, ((Long) params.get(paramName)).intValue()); + /* * For primitive params if there's only one setter available, we call it. * If there's choice we default to the string one */ - default: + default -> { ArrayList<Method> paramNameMatches = new ArrayList<>(); for (var method : builder.getClass().getMethods()) { if (paramName.equals(method.getName()) && method.getParameterCount() == 1) { @@ -108,13 +106,19 @@ public class SolrTextToVectorModel implements Accountable { } } if (paramNameMatches.size() == 1) { - paramNameMatches.get(0).invoke(builder, params.get(paramName)); + paramNameMatches.getFirst().invoke(builder, params.get(paramName)); } else { - builder - .getClass() - .getMethod(paramName, String.class) - .invoke(builder, params.get(paramName).toString()); + try { + builder + .getClass() + .getMethod(paramName, String.class) + .invoke(builder, params.get(paramName).toString()); + } catch (NoSuchMethodException e) { + log.error("Parameter {} not supported by model {}", paramName, className); + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.getMessage(), e); + } } + } } } } diff --git a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java index 559ea9a4309..07653df62dd 100644 --- a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java +++ b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java @@ -70,15 +70,16 @@ public class TextToVectorUpdateProcessorFactory extends UpdateRequestProcessorFa public UpdateRequestProcessor getInstance( SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { IndexSchema latestSchema = req.getCore().getLatestSchema(); - if (!latestSchema.hasExplicitField(inputField)) { + + if (!latestSchema.isDynamicField(inputField) && !latestSchema.hasExplicitField(inputField)) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" + inputField + "\""); } - if (!latestSchema.hasExplicitField(outputField)) { + + if (!latestSchema.isDynamicField(outputField) && !latestSchema.hasExplicitField(outputField)) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" + outputField + "\""); } - final SchemaField outputFieldSchema = latestSchema.getField(outputField); assertIsDenseVectorField(outputFieldSchema); diff --git a/solr/modules/llm/src/test-files/solr/collection1/conf/schema.xml b/solr/modules/llm/src/test-files/solr/collection1/conf/schema.xml index 92aa0e76591..a574b93294a 100644 --- a/solr/modules/llm/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/modules/llm/src/test-files/solr/collection1/conf/schema.xml @@ -25,6 +25,8 @@ <fieldType name="high_dimensional_float_knn_vector" class="solr.DenseVectorField" vectorDimension="2048" similarityFunction="cosine" vectorEncoding="FLOAT32"/> <fieldType name="high_dimensional_byte_knn_vector" class="solr.DenseVectorField" vectorDimension="2048" similarityFunction="cosine" vectorEncoding="BYTE"/> <fieldType name="plong" class="solr.LongPointField" useDocValuesAsStored="false"/> + <dynamicField name="*_s" type="string" indexed="true" stored="true" /> + <dynamicField name="*_vector1024" type="knn_vector" indexed="true" stored="true" /> <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="false"/> <field name="vector" type="knn_vector" indexed="true" stored="true"/> diff --git a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java index f01aa187537..c5b7991b3c5 100644 --- a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java +++ b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java @@ -21,8 +21,13 @@ import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.llm.TestLlmBase; +import org.apache.solr.llm.textvectorisation.model.SolrTextToVectorModel; +import org.apache.solr.llm.textvectorisation.store.rest.ManagedTextToVectorModelStore; import org.apache.solr.request.SolrQueryRequestBase; +import org.apache.solr.update.processor.UpdateRequestProcessor; +import org.junit.After; import org.junit.AfterClass; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -38,6 +43,18 @@ public class TextToVectorUpdateProcessorFactoryTest extends TestLlmBase { afterTest(); } + SolrCore collection1; + + @Before + public void setup() { + collection1 = solrClientTestRule.getCoreContainer().getCore("collection1"); + } + + @After + public void after() { + collection1.close(); + } + @Test public void init_fullArgs_shouldInitAllParams() { NamedList<String> args = new NamedList<>(); @@ -100,13 +117,11 @@ public class TextToVectorUpdateProcessorFactoryTest extends TestLlmBase { TextToVectorUpdateProcessorFactory factoryToTest = new TextToVectorUpdateProcessorFactory(); ModifiableSolrParams params = new ModifiableSolrParams(); - SolrCore collection1 = solrClientTestRule.getCoreContainer().getCore("collection1"); SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; factoryToTest.init(args); SolrException e = assertThrows(SolrException.class, () -> factoryToTest.getInstance(req, null, null)); assertEquals("undefined field: \"notExistentOutput\"", e.getMessage()); - collection1.close(); } /* Following test depends on a real solr schema and depends on BeforeClass-AfterClass methods */ @@ -120,14 +135,12 @@ public class TextToVectorUpdateProcessorFactoryTest extends TestLlmBase { TextToVectorUpdateProcessorFactory factoryToTest = new TextToVectorUpdateProcessorFactory(); ModifiableSolrParams params = new ModifiableSolrParams(); - SolrCore collection1 = solrClientTestRule.getCoreContainer().getCore("collection1"); SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; factoryToTest.init(args); SolrException e = assertThrows(SolrException.class, () -> factoryToTest.getInstance(req, null, null)); assertEquals( "only DenseVectorField is compatible with Vector Query Parsers: _text_", e.getMessage()); - collection1.close(); } /* Following test depends on a real solr schema and depends on BeforeClass-AfterClass methods */ @@ -141,12 +154,51 @@ public class TextToVectorUpdateProcessorFactoryTest extends TestLlmBase { TextToVectorUpdateProcessorFactory factoryToTest = new TextToVectorUpdateProcessorFactory(); ModifiableSolrParams params = new ModifiableSolrParams(); - SolrCore collection1 = solrClientTestRule.getCoreContainer().getCore("collection1"); SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; factoryToTest.init(args); SolrException e = assertThrows(SolrException.class, () -> factoryToTest.getInstance(req, null, null)); assertEquals("undefined field: \"notExistentInput\"", e.getMessage()); - collection1.close(); + } + + /* Following test depends on a real solr schema and depends on BeforeClass-AfterClass methods */ + @Test + public void init_notExistentDynamicInputField_shouldNotThrowException() { + String inputFieldName = "text_s"; + String outputFieldName = "vector"; + + UpdateRequestProcessor instance = + createUpdateProcessor(inputFieldName, outputFieldName, collection1, "model1"); + assertNotNull(instance); + } + + /* Following test depends on a real solr schema and depends on BeforeClass-AfterClass methods */ + @Test + public void init_notExistingDynamicOutputField_shouldNotThrowException() { + String inputFieldName = "_text_"; + String outputFieldName = "vec_vector1024"; + + UpdateRequestProcessor instance = + createUpdateProcessor(inputFieldName, outputFieldName, collection1, "model2"); + assertNotNull(instance); + } + + private UpdateRequestProcessor createUpdateProcessor( + String inputFieldName, String outputFieldName, SolrCore collection1, String modelName) { + NamedList<String> args = new NamedList<>(); + + ManagedTextToVectorModelStore.getManagedModelStore(collection1) + .addModel(new SolrTextToVectorModel(modelName, null, null)); + args.add("inputField", inputFieldName); + args.add("outputField", outputFieldName); + args.add("model", modelName); + + TextToVectorUpdateProcessorFactory factoryToTest = new TextToVectorUpdateProcessorFactory(); + ModifiableSolrParams params = new ModifiableSolrParams(); + factoryToTest.init(args); + + SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; + + return factoryToTest.getInstance(req, null, null); } } diff --git a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorTest.java b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorTest.java index 13508c87ddd..6f57dfe3d52 100644 --- a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorTest.java +++ b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorTest.java @@ -24,6 +24,7 @@ import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.llm.TestLlmBase; import org.apache.solr.llm.textvectorisation.store.rest.ManagedTextToVectorModelStore; +import org.junit.After; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -40,6 +41,13 @@ public class TextToVectorUpdateProcessorTest extends TestLlmBase { afterTest(); } + @After + public void afterEachTest() throws Exception { + restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/dummy-1"); + restTestHarness.delete( + ManagedTextToVectorModelStore.REST_END_POINT + "/exception-throwing-model"); // clean + } + @Test public void processAdd_inputField_shouldVectoriseInputField() throws Exception { loadModel("dummy-model.json"); // preparation @@ -50,10 +58,7 @@ public class TextToVectorUpdateProcessorTest extends TestLlmBase { "textToVector"); assertU(commit()); - final String solrQuery = "*:*"; - final SolrQuery query = new SolrQuery(); - query.setQuery(solrQuery); - query.add("fl", "id,vector"); + final SolrQuery query = getSolrQuery(); assertJQ( "/query" + query.toQueryString(), @@ -66,6 +71,14 @@ public class TextToVectorUpdateProcessorTest extends TestLlmBase { restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/dummy-1"); // clean up } + private SolrQuery getSolrQuery() { + final String solrQuery = "*:*"; + final SolrQuery query = new SolrQuery(); + query.setQuery(solrQuery); + query.add("fl", "id,vector"); + return query; + } + /* This test looks for the 'dummy-1' model, but such model is not loaded, the model store is empty, so the update fails */ @@ -93,10 +106,7 @@ public class TextToVectorUpdateProcessorTest extends TestLlmBase { addWithChain(sdoc("id", "98", "_text_", "Vegeta is the saiyan prince."), "textToVector"); assertU(commit()); - final String solrQuery = "*:*"; - final SolrQuery query = new SolrQuery(); - query.setQuery(solrQuery); - query.add("fl", "id,vector"); + final SolrQuery query = getSolrQuery(); assertJQ( "/query" + query.toQueryString(), @@ -116,10 +126,7 @@ public class TextToVectorUpdateProcessorTest extends TestLlmBase { assertU(adoc("id", "98")); assertU(commit()); - final String solrQuery = "*:*"; - final SolrQuery query = new SolrQuery(); - query.setQuery(solrQuery); - query.add("fl", "id,vector"); + final SolrQuery query = getSolrQuery(); assertJQ( "/query" + query.toQueryString(), @@ -141,10 +148,7 @@ public class TextToVectorUpdateProcessorTest extends TestLlmBase { "failingTextToVector"); assertU(commit()); - final String solrQuery = "*:*"; - final SolrQuery query = new SolrQuery(); - query.setQuery(solrQuery); - query.add("fl", "id,vector"); + final SolrQuery query = getSolrQuery(); assertJQ( "/query" + query.toQueryString(),
