aruggero commented on code in PR #4259:
URL: https://github.com/apache/solr/pull/4259#discussion_r3033113647


##########
solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml:
##########
@@ -36,11 +40,31 @@
   <field name="2048_byte_vector" type="high_dimensional_byte_knn_vector" 
indexed="true" stored="true" />
   <field name="2048_float_vector" type="high_dimensional_float_knn_vector" 
indexed="true" stored="true" />
   <field name="string_field" type="string" indexed="true" stored="true" 
multiValued="false" required="false"/>
+  <field name="body_field" type="string" indexed="true" stored="true" 
multiValued="false" required="false"/>
+  <field name="enriched_field" type="string" indexed="true" stored="true" 
multiValued="false" required="false"/>
+  <field name="enriched_field_multi" type="string" indexed="true" 
stored="true" multiValued="true" required="false"/>
+
+  <!-- single-valued output fields for each supported type -->
+  <field name="output_long"    type="plong"   indexed="true" stored="true" 
multiValued="false"/>
+  <field name="output_int"     type="pint"    indexed="true" stored="true" 
multiValued="false"/>
+  <field name="output_float"   type="pfloat"  indexed="true" stored="true" 
multiValued="false"/>
+  <field name="output_double"  type="pdouble" indexed="true" stored="true" 
multiValued="false"/>
+  <field name="output_boolean" type="boolean" indexed="true" stored="true" 
multiValued="false"/>
+  <field name="output_date"    type="pdate"   indexed="true" stored="true" 
multiValued="false"/>
+
+  <!-- multivalued output fields for each supported type -->
+  <field name="output_long_multi"    type="plong"   indexed="true" 
stored="true" multiValued="true"/>
+  <field name="output_int_multi"     type="pint"    indexed="true" 
stored="true" multiValued="true"/>
+  <field name="output_float_multi"   type="pfloat"  indexed="true" 
stored="true" multiValued="true"/>
+  <field name="output_double_multi"  type="pdouble" indexed="true" 
stored="true" multiValued="true"/>
+  <field name="output_boolean_multi" type="boolean" indexed="true" 
stored="true" multiValued="true"/>
+  <field name="output_date_multi"    type="pdate"   indexed="true" 
stored="true" multiValued="true"/>
 
   <field name="_version_" type="plong" indexed="true" stored="true" 
multiValued="false" />
   <field name="_text_" type="text_general" indexed="true" stored="false" 
multiValued="true"/>
   <copyField source="*" dest="_text_"/>
   <field name="vectorised" type="boolean" indexed="true" stored="false" 
docValues="true" default="false"/>
+  <field name="enriched" type="boolean" indexed="true" stored="false" 
docValues="true" default="false"/>

Review Comment:
   For what is this used?



##########
solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-ambiguous.json:
##########
@@ -0,0 +1,8 @@
+{

Review Comment:
   Is there a way to avoid creating all these different dummy model files?
   like setting the response in the tests and initiating the model just one time



##########
solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml:
##########
@@ -0,0 +1,62 @@
+<?xml version="1.0" ?>

Review Comment:
   Couldn't we use the other for everything?



##########
solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java:
##########
@@ -0,0 +1,718 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.languagemodels.documentenrichment.update.processor;
+
+import java.io.IOException;
+import java.util.Map;
+import org.apache.solr.client.solrj.RemoteSolrException;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.request.SolrQuery;
+import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.languagemodels.TestLanguageModelBase;
+import 
org.apache.solr.languagemodels.documentenrichment.store.rest.ManagedChatModelStore;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class DocumentEnrichmentUpdateProcessorTest extends 
TestLanguageModelBase {
+
+  @BeforeClass
+  public static void init() throws Exception {
+    setupTest("solrconfig-document-enrichment.xml", 
"schema-language-models.xml", false, false);
+  }
+
+  @AfterClass
+  public static void cleanup() throws Exception {
+    afterTest();
+  }
+
+  private String loadedModelId;
+
+  @After
+  public void afterEachTest() throws Exception {
+    if (loadedModelId != null) {
+      restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + 
loadedModelId);
+      loadedModelId = null;
+    }
+  }
+
+  private void loadTestChatModel(String fileName, String modelId) throws 
Exception {
+    loadChatModel(fileName);
+    loadedModelId = modelId;
+  }
+
+  @Test
+  public void processAdd_inputField_shouldEnrichInputField() throws Exception {
+    loadTestChatModel("dummy-chat-model.json", "dummy-chat-1");
+
+    addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan 
prince."), "documentEnrichment");
+    addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown 
up on planet Earth."), "documentEnrichment");
+    assertU(commit());
+
+    final SolrQuery query = getEnrichmentQuery("enriched_field");
+
+    assertJQ(
+        "/query" + query.toQueryString(),
+        "/response/numFound==2]",
+        "/response/docs/[0]/id=='99'",
+        "/response/docs/[0]/enriched_field=='enriched content'",
+        "/response/docs/[1]/id=='98'",
+        "/response/docs/[1]/enriched_field=='enriched content'");
+  }
+
+  /*
+   This test looks for the 'dummy-chat-1' model, but such model is not loaded —
+   the model store is empty, so the update fails.
+  */
+  @Test
+  public void processAdd_modelNotFound_shouldThrowException() {
+    RuntimeException thrown =
+        assertThrows(
+            "model not found should throw an exception",
+            RemoteSolrException.class,
+            () ->
+                addWithChain(
+                    sdoc("id", "99", "string_field", "Vegeta is the saiyan 
prince."),
+                    "documentEnrichment"));
+    assertTrue(
+        thrown
+            .getMessage()
+            .contains(
+                "The model configured in the Update Request Processor 
'dummy-chat-1' can't be found in the store: /schema/chat-model-store"));
+  }
+
+  @Test
+  public void 
processAdd_emptyInputField_shouldLogAndIndexWithNoEnrichedField() throws 
Exception {
+    loadTestChatModel("dummy-chat-model.json", "dummy-chat-1");
+    addWithChain(sdoc("id", "99", "string_field", ""), "documentEnrichment");
+    addWithChain(sdoc("id", "98", "string_field", "Vegeta is the saiyan 
prince."), "documentEnrichment");
+    assertU(commit());
+
+    final SolrQuery query = getEnrichmentQuery("enriched_field");
+
+    assertJQ(
+        "/query" + query.toQueryString(),
+        "/response/numFound==2]",
+        "/response/docs/[0]/id=='99'",
+        "!/response/docs/[0]/enriched_field==", // no enriched field for doc 99
+        "/response/docs/[1]/id=='98'",
+        "/response/docs/[1]/enriched_field=='enriched content'");
+  }
+
+  @Test
+  public void processAdd_nullInputField_shouldLogAndIndexWithNoEnrichedField() 
throws Exception {
+    loadTestChatModel("dummy-chat-model.json", "dummy-chat-1");
+    addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan 
prince."), "documentEnrichment");
+    assertU(adoc("id", "98")); // no string_field
+    assertU(commit());
+
+    final SolrQuery query = getEnrichmentQuery("enriched_field");
+
+    assertJQ(
+        "/query" + query.toQueryString(),
+        "/response/numFound==2]",
+        "/response/docs/[0]/id=='99'",
+        "/response/docs/[0]/enriched_field=='enriched content'",
+        "/response/docs/[1]/id=='98'",
+        "!/response/docs/[1]/enriched_field=="); // no enriched field for doc 
98
+  }
+
+  @Test
+  public void 
processAdd_failingEnrichment_shouldLogAndIndexWithNoEnrichedField() throws 
Exception {
+    loadTestChatModel("exception-throwing-chat-model.json", 
"exception-throwing-chat-model");
+    addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan 
prince."), "failingDocumentEnrichment");
+    addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown 
up on planet Earth."), "failingDocumentEnrichment");
+    assertU(commit());
+
+    final SolrQuery query = getEnrichmentQuery("enriched_field");
+
+    assertJQ(
+        "/query" + query.toQueryString(),
+        "/response/numFound==2]",
+        "/response/docs/[0]/id=='99'",
+        "!/response/docs/[0]/enriched_field==", // no enriched field for doc 99
+        "/response/docs/[1]/id=='98'",
+        "!/response/docs/[1]/enriched_field=="); // no enriched field for doc 
98
+  }
+
+  @Test
+  public void 
processAtomicUpdate_shouldTriggerEnrichmentAndFetchTheStoredContent()
+      throws Exception {
+    // Verifies that when using a processor chain configured for partial 
updates
+    // (i.e., DistributedUpdateProcessorFactory before 
DocumentEnrichmentUpdateProcessorFactory),
+    // the system correctly retrieves the stored value of string_field and 
generates the
+    // enriched content for the document.
+    loadTestChatModel("dummy-chat-model.json", "dummy-chat-1");
+    assertU(adoc("id", "99", "string_field", "Vegeta is the saiyan prince."));
+    assertU(adoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on 
planet Earth."));
+    assertU(commit());
+
+    SolrInputDocument atomicDoc = new SolrInputDocument();
+    atomicDoc.setField("id", "99");
+    atomicDoc.setField("enriched", Map.of("set", true));
+    addWithChain(atomicDoc, "documentEnrichmentForPartialUpdates");
+    assertU(commit());
+
+    final SolrQuery query = getEnrichmentQuery("enriched_field");
+
+    assertJQ(
+        "/query" + query.toQueryString(),
+        "/response/numFound==2]",
+        "/response/docs/[0]/id=='99'",
+        "/response/docs/[0]/enriched_field=='enriched content'",
+        "/response/docs/[1]/id=='98'",
+        "!/response/docs/[1]/enriched_field==" // no enriched field for 
document 98
+        );
+  }
+
+  @Test
+  public void 
processAtomicUpdate_shouldReplaceExistingEnrichedFieldNotAppend() throws 
Exception {
+    // Verifies that when a document already contains an enriched_field and 
string_field is
+    // modified via atomic update, the enriched content is recomputed and 
replaces the previous
+    // value rather than being appended.
+    loadTestChatModel("dummy-chat-model.json", "dummy-chat-1");
+    addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan 
prince."), "documentEnrichment");
+    addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown 
up on planet Earth."), "documentEnrichment");
+    assertU(commit());
+
+    SolrInputDocument atomicDoc = new SolrInputDocument();
+    atomicDoc.setField("id", "99");
+    atomicDoc.setField("string_field", Map.of("set", "Vegeta is the saiyan 
prince from the Dragon Ball series."));
+    addWithChain(atomicDoc, "documentEnrichmentForPartialUpdates");
+    assertU(commit());
+
+    final SolrQuery query = getEnrichmentQuery("enriched_field");
+
+    assertJQ(
+        "/query" + query.toQueryString(),
+        "/response/numFound==2]",
+        "/response/docs/[0]/id=='99'",
+        "/response/docs/[0]/enriched_field=='enriched content'",
+        "/response/docs/[1]/id=='98'",
+        "/response/docs/[1]/enriched_field=='enriched content'");
+  }
+
+  // --- multi-field tests ---
+
+  @Test
+  public void processAdd_multipleInputFields_allPresent_shouldEnrichDocument() 
throws Exception {
+    loadTestChatModel("dummy-chat-model.json", "dummy-chat-1");
+
+    addWithChain(
+        sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", 
"body_field", "He is very proud."),
+        "documentEnrichmentMultiField");
+    addWithChain(
+        sdoc("id", "98", "string_field", "Kakaroth is a saiyan.", 
"body_field", "He grew up on Earth."),
+        "documentEnrichmentMultiField");
+    assertU(commit());
+
+    final SolrQuery query = getEnrichmentQuery("enriched_field");
+
+    assertJQ(
+        "/query" + query.toQueryString(),
+        "/response/numFound==2]",
+        "/response/docs/[0]/id=='99'",
+        "/response/docs/[0]/enriched_field=='enriched content'",
+        "/response/docs/[1]/id=='98'",
+        "/response/docs/[1]/enriched_field=='enriched content'");
+  }
+
+  @Test
+  public void 
processAdd_multipleInputFields_firstFieldNull_shouldSkipEnrichment() throws 
Exception {
+    loadTestChatModel("dummy-chat-model.json", "dummy-chat-1");
+
+    addWithChain(
+        sdoc("id", "99", "body_field", "He is very proud."), // string_field 
absent
+        "documentEnrichmentMultiField");
+    addWithChain(
+        sdoc("id", "98", "body_field", "He is very jealous."), // string_field 
absent
+        "documentEnrichmentMultiField");
+    assertU(commit());
+
+    final SolrQuery query = getEnrichmentQuery("enriched_field");
+
+    assertJQ(
+        "/query" + query.toQueryString(),
+        "/response/numFound==2]",
+        "/response/docs/[0]/id=='99'",
+        "!/response/docs/[0]/enriched_field==",
+        "/response/docs/[1]/id=='98'",
+        "!/response/docs/[1]/enriched_field==");
+  }
+
+  @Test
+  public void 
processAdd_multipleInputFields_secondFieldEmpty_shouldSkipEnrichment() throws 
Exception {
+    loadTestChatModel("dummy-chat-model.json", "dummy-chat-1");
+
+    addWithChain(
+        sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", 
"body_field", ""),
+        "documentEnrichmentMultiField");
+    addWithChain(
+        sdoc("id", "98", "string_field", "Goku is the best saiyan.", 
"body_field", ""),
+        "documentEnrichmentMultiField");
+    assertU(commit());
+
+    final SolrQuery query = getEnrichmentQuery("enriched_field");
+
+    assertJQ(
+        "/query" + query.toQueryString(),
+        "/response/numFound==2]",
+        "/response/docs/[0]/id=='99'",
+        "!/response/docs/[0]/enriched_field==",
+        "/response/docs/[1]/id=='98'",
+        "!/response/docs/[1]/enriched_field==");
+  }
+
+  @Test
+  public void 
processAdd_multipleInputFields_bothFieldsAbsent_shouldSkipEnrichment() throws 
Exception {
+    loadTestChatModel("dummy-chat-model.json", "dummy-chat-1");
+
+    addWithChain(sdoc("id", "99"), "documentEnrichmentMultiField");
+    addWithChain(sdoc("id", "98"), "documentEnrichmentMultiField");
+    assertU(commit());
+
+    final SolrQuery query = getEnrichmentQuery("enriched_field");
+
+    assertJQ(
+        "/query" + query.toQueryString(),
+        "/response/numFound==2]",
+        "/response/docs/[0]/id=='99'",
+        "!/response/docs/[0]/enriched_field==",
+        "/response/docs/[1]/id=='98'",
+        "!/response/docs/[1]/enriched_field==");
+  }
+
+  @Test
+  public void 
processAdd_multipleInputFields_failingModel_shouldLogAndSkipEnrichment() throws 
Exception {
+    loadTestChatModel("exception-throwing-chat-model.json", 
"exception-throwing-chat-model");
+
+    addWithChain(
+        sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", 
"body_field", "He is very proud."),
+        "failingDocumentEnrichmentMultiField");
+    addWithChain(
+        sdoc("id", "98", "string_field", "Kakaroth is a saiyan.", 
"body_field", "He grew up on Earth."),
+        "failingDocumentEnrichmentMultiField");
+    assertU(commit());
+
+    final SolrQuery query = getEnrichmentQuery("enriched_field");
+
+    assertJQ(
+        "/query" + query.toQueryString(),
+        "/response/numFound==2]",
+        "/response/docs/[0]/id=='99'",
+        "!/response/docs/[0]/enriched_field==",
+        "/response/docs/[1]/id=='98'",
+        "!/response/docs/[1]/enriched_field==");
+  }
+
+  @Test
+  public void 
processAdd_multivaluedStringOutputField_shouldPopulateAllValues() throws 
Exception {
+    loadTestChatModel("dummy-chat-model-multivalued-string.json", 
"dummy-chat-multivalued-1");
+
+    addWithChain(
+        sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."),
+        "documentEnrichmentMultivaluedString");
+    addWithChain(
+        sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on 
planet Earth."),
+        "documentEnrichmentMultivaluedString");
+    assertU(commit());
+
+    final SolrQuery query = getEnrichmentQuery("enriched_field_multi");
+
+    assertJQ(
+        "/query" + query.toQueryString(),
+        "/response/numFound==2]",
+        "/response/docs/[0]/id=='99'",
+        "/response/docs/[0]/enriched_field_multi/[0]=='tag1'",
+        "/response/docs/[0]/enriched_field_multi/[1]=='tag2'",
+        "/response/docs/[1]/id=='98'",
+        "/response/docs/[1]/enriched_field_multi/[0]=='tag1'",
+        "/response/docs/[1]/enriched_field_multi/[1]=='tag2'");
+  }
+
+  @Test
+  public void 
processAdd_multivaluedStringOutputField_emptyInput_shouldSkipEnrichment()
+      throws Exception {
+    loadTestChatModel("dummy-chat-model-multivalued-string.json", 
"dummy-chat-multivalued-1");
+
+    addWithChain(sdoc("id", "99", "string_field", ""), 
"documentEnrichmentMultivaluedString");
+    addWithChain(sdoc("id", "98", "string_field", ""), 
"documentEnrichmentMultivaluedString");
+    assertU(commit());
+
+    final SolrQuery query = getEnrichmentQuery("enriched_field_multi");
+
+    assertJQ(
+        "/query" + query.toQueryString(),
+        "/response/numFound==2]",
+        "/response/docs/[0]/id=='99'",
+        "!/response/docs/[0]/enriched_field_multi==",
+        "/response/docs/[1]/id=='98'",
+        "!/response/docs/[1]/enriched_field_multi==");
+  }
+
+  // --- typed single-valued output field tests ---
+
+  @Test
+  public void processAdd_singleLongOutputField_shouldPopulateValue() throws 
Exception {

Review Comment:
   I have some doubts on the following tests... the assert itself is not able 
to differentiate the output type.. double and float are the same...



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to