This is an automated email from the ASF dual-hosted git repository.

mkataria pushed a commit to branch OAK-11694_1
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 6389c4161b6f60fc1147c7554cbaec627396f99c
Author: Mohit Kataria <[email protected]>
AuthorDate: Wed May 7 20:01:29 2025 +0530

    OAK-11694: Added enricher status in document
---
 .../index/elastic/index/ElasticIndexWriter.java    |  38 +---
 .../inference/ElasticInferenceUsingConfigTest.java | 249 +++++++++++++++++++++
 2 files changed, 250 insertions(+), 37 deletions(-)

diff --git 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
index a5313cd291..29011cf6d7 100644
--- 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
+++ 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
@@ -160,43 +160,7 @@ class ElasticIndexWriter implements 
FulltextIndexWriter<ElasticDocument> {
             if (InferenceConfig.getInstance().isInferenceEnabled()
                 && 
InferenceConfig.getInstance().getInferenceIndexConfig(jcrIndexName).isEnabled())
 {
                 doc.addProperty(InferenceConstants.ENRICH_NODE,
-                    Map.of(InferenceConstants.ENRICH_STATUS, 
InferenceConstants.ENRICH_STATUS_PENDING));
-            }
-            /*
-
-                Once inference is enabled, it is not trivial to disable it.  
As inference configuration in Elasticsearch (ES)
-                is persisted only during the creation of a new index
-                or reindexing of an existing one. This means that the enricher 
configuration is updated only under
-                these conditions. If we want to disable inference on instance, 
the existing enricher configuration
-                remains unchanged, and the enricher will continue processing 
new documents.
-
-                To stop the enricher from processing documents, we need to 
explicitly update the enricher status to
-                `COMPLETED` in the ES document by adding the following 
structure:
-                {
-                    :enrich {
-                        "status": "COMPLETED",
-                        "inferenceDisabled": true
-                    }
-                }
-
-                The `inferenceDisabled` flag is added to allow for potential 
evaluations at a later stage.
-
-                This should happen in all cases where we try to disable 
inference i.e.
-
-                1. Inference is disabled in ElasticIndexProviderService but 
InferenceConfig is valid.
-                2. Inference is enabled and InferenceConfig is not equal to 
InferenceConfig.NOOP i.e.
-                    any of the properties is different from below:
-                    enricherConfig = "";
-                    isEnabled = false;
-                    inferenceModelConfigs = Map.of();
-                 Note: This is possible by not setting enricherConfig to empty 
string as other fields are set to default values.
-             */
-            else {
-                Map<String, Object> enrichDocStatus = Map.of(
-                        InferenceConstants.ENRICH_STATUS, 
InferenceConstants.ENRICH_STATUS_COMPLETED,
-                        InferenceConstants.ENRICH_STATUS_INFERENCE_DISABLED, 
true
-                );
-                doc.addProperty(InferenceConstants.ENRICH_NODE, 
enrichDocStatus);
+                    InferenceConfig.getInstance().getEnricherStatus());
             }
             bulkProcessorHandler.update(indexName, 
ElasticIndexUtils.idFromPath(path), doc);
         }
diff --git 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
index 046ca29ee1..58c0d8ebfe 100644
--- 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
+++ 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
@@ -336,4 +336,253 @@ public class ElasticInferenceUsingConfigTest extends 
ElasticAbstractQueryTest {
         assertNotNull(carsDocUpdated.get(InferenceConstants.VECTOR_SPACES));
 
     }
+
+    @Test
+    public void testEnricherStatus() throws Exception {
+        String jcrIndexName = UUID.randomUUID().toString();
+        String inferenceServiceUrl = "http://localhost:"; + wireMock.port() + 
"/v1/embeddings";
+        String inferenceModelConfigName = "ada-test-model";
+        String inferenceModelName = "text-embedding-ada-002";
+
+        // Create inference config with enricher information
+        createInferenceConfig(jcrIndexName, true, enricherConfig, 
inferenceModelConfigName,
+            inferenceModelName, inferenceServiceUrl,
+            0.8, 1L, true, true);
+
+        // Create and set up the node with enricher status information
+        NodeBuilder rootBuilder = nodeStore.getRoot().builder();
+        NodeBuilder nodeBuilder = rootBuilder;
+        for (String path : PathUtils.elements(INFERENCE_CONFIG_PATH)) {
+            nodeBuilder = nodeBuilder.child(path);
+        }
+        // Add enricher status node
+        NodeBuilder enrichNodeBuilder = 
nodeBuilder.child(InferenceConstants.ENRICH_NODE);
+        enrichNodeBuilder.setProperty("lastUpdated", 
System.currentTimeMillis());
+        enrichNodeBuilder.setProperty("status", "active");
+        enrichNodeBuilder.setProperty("documentsProcessed", 100);
+        nodeStore.merge(rootBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        IndexDefinitionBuilder builder = createIndex();
+        builder.includedPaths("/content")
+            .indexRule("nt:base")
+            .property("title").propertyIndex().analyzed().nodeScopeIndex()
+            
.property("description").propertyIndex().analyzed().nodeScopeIndex();
+
+        Tree index = setIndex(jcrIndexName, builder);
+        root.commit();
+
+        // Add content
+        Tree content = root.getTree("/").addChild("content");
+        Tree document = content.addChild("document");
+        document.setProperty("title", "Test Document");
+        document.setProperty("description", "This is a test document to verify 
enricher status is included in document updates.");
+        root.commit();
+
+        // Let the index catch up
+        assertEventually(() -> assertEquals(2, countDocuments(index)));
+
+        // Get the document and check that it has the enricher status
+        ObjectNode docNode = getDocument(index, "/content/document");
+        assertNotNull(docNode);
+
+        // Add another property to trigger an update
+        document.setProperty("updatedAt", Instant.now().toString());
+        root.commit();
+
+        // Let the index catch up with the update
+        assertEventually(() -> {
+            ObjectNode updatedDoc = getDocument(index, "/content/document");
+            assertNotNull(updatedDoc.get(InferenceConstants.ENRICH_NODE));
+            JsonNode enrichNode = 
updatedDoc.get(InferenceConstants.ENRICH_NODE);
+            assertNotNull(enrichNode);
+            assertNotNull(enrichNode.get("lastUpdated"));
+            assertEquals("active", enrichNode.get("status").asText());
+            assertEquals(100, enrichNode.get("documentsProcessed").asInt());
+        });
+    }
+
+    @Test
+    public void testEnricherStatusPreservedWithVectorEmbeddings() throws 
Exception {
+        String jcrIndexName = UUID.randomUUID().toString();
+        String inferenceServiceUrl = "http://localhost:"; + wireMock.port() + 
"/v1/embeddings";
+        String inferenceModelConfigName = "ada-test-model";
+        String inferenceModelName = "text-embedding-ada-002";
+
+        // Create inference config with enricher information
+        createInferenceConfig(jcrIndexName, true, enricherConfig, 
inferenceModelConfigName,
+            inferenceModelName, inferenceServiceUrl,
+            0.8, 1L, true, true);
+
+        // Create and set up the node with enricher status information
+        NodeBuilder rootBuilder = nodeStore.getRoot().builder();
+        NodeBuilder nodeBuilder = rootBuilder;
+        for (String path : PathUtils.elements(INFERENCE_CONFIG_PATH)) {
+            nodeBuilder = nodeBuilder.child(path);
+        }
+        // Add enricher status node
+        NodeBuilder enrichNode = 
nodeBuilder.child(InferenceConstants.ENRICH_NODE);
+        enrichNode.setProperty("lastUpdated", System.currentTimeMillis());
+        enrichNode.setProperty("status", "active");
+        enrichNode.setProperty("documentsProcessed", 100);
+        nodeStore.merge(rootBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        IndexDefinitionBuilder builder = createIndex();
+        builder.includedPaths("/content")
+            .indexRule("nt:base")
+            .property("title").propertyIndex().analyzed().nodeScopeIndex()
+            
.property("description").propertyIndex().analyzed().nodeScopeIndex()
+            .property("updatedBy").propertyIndex();
+
+        Tree index = setIndex(jcrIndexName, builder);
+        root.commit();
+
+        // Add content
+        Tree content = root.getTree("/").addChild("content");
+        Tree document = content.addChild("document");
+        document.setProperty("title", "Test Document with Embeddings");
+        document.setProperty("description", "This is a test document that will 
have vector embeddings and enricher status.");
+        root.commit();
+
+        // Let the index catch up
+        assertEventually(() -> assertEquals(2, countDocuments(index)));
+
+        // Create an update with vector embeddings
+        ObjectMapper mapper = new JsonMapper();
+        ObjectNode updateDoc = mapper.createObjectNode();
+        List<Float> embeddings = List.of(0.1f, 0.2f, 0.3f, 0.4f, 0.5f);
+        VectorDocument vectorDocument = new 
VectorDocument(UUID.randomUUID().toString(), embeddings,
+            Map.of("updatedAt", Instant.now().toEpochMilli(), "model", 
inferenceModelName));
+        ObjectNode vectorSpacesNode = 
updateDoc.putObject(InferenceConstants.VECTOR_SPACES);
+        ArrayNode inferenceModelConfigNode = 
vectorSpacesNode.putArray(inferenceModelConfigName);
+        inferenceModelConfigNode.addPOJO(vectorDocument);
+
+        // Update the document with vector embeddings
+        updateDocument(index, "/content/document", updateDoc);
+
+        // Verify the document has the embeddings
+        assertEventually(() -> {
+            ObjectNode docWithEmbeddings = getDocument(index, 
"/content/document");
+            
assertNotNull(docWithEmbeddings.get(InferenceConstants.VECTOR_SPACES));
+            JsonNode vectorSpaces = 
docWithEmbeddings.get(InferenceConstants.VECTOR_SPACES);
+            assertNotNull(vectorSpaces.get(inferenceModelConfigName));
+        });
+
+        // Now update a property to trigger another update which should 
preserve the embeddings
+        // and also add the enricher status
+        document.setProperty("updatedBy", "Test User");
+        root.commit();
+
+        // Verify the document still has embeddings and now has enricher status
+        assertEventually(() -> {
+            ObjectNode updatedDoc = getDocument(index, "/content/document");
+
+            // Check that the vector embeddings are preserved
+            assertNotNull(updatedDoc.get(InferenceConstants.VECTOR_SPACES));
+            JsonNode vectorSpaces = 
updatedDoc.get(InferenceConstants.VECTOR_SPACES);
+            assertNotNull(vectorSpaces.get(inferenceModelConfigName));
+
+            // Check that the enricher status is present
+            assertNotNull(updatedDoc.get(InferenceConstants.ENRICH_NODE));
+            JsonNode enrichNodeData = 
updatedDoc.get(InferenceConstants.ENRICH_NODE);
+            assertNotNull(enrichNodeData.get("lastUpdated"));
+            assertEquals("active", enrichNodeData.get("status").asText());
+            assertEquals(100, 
enrichNodeData.get("documentsProcessed").asInt());
+
+            // Check that the updated property is present
+            assertEquals("Test User", updatedDoc.get("updatedBy").asText());
+        });
+    }
+
+    @Test
+    public void testEnricherStatusOnReinitialization() throws Exception {
+        String jcrIndexName = UUID.randomUUID().toString();
+        String inferenceServiceUrl = "http://localhost:"; + wireMock.port() + 
"/v1/embeddings";
+        String inferenceModelConfigName = "ada-test-model";
+        String inferenceModelName = "text-embedding-ada-002";
+
+        // Create inference config
+        createInferenceConfig(jcrIndexName, true, enricherConfig, 
inferenceModelConfigName,
+            inferenceModelName, inferenceServiceUrl,
+            0.8, 1L, true, true);
+
+        // Create and set up the node with initial enricher status
+        NodeBuilder rootBuilder = nodeStore.getRoot().builder();
+        NodeBuilder nodeBuilder = rootBuilder;
+        for (String path : PathUtils.elements(INFERENCE_CONFIG_PATH)) {
+            nodeBuilder = nodeBuilder.child(path);
+        }
+        // Add enricher status node with initial values
+        NodeBuilder enrichNode = 
nodeBuilder.child(InferenceConstants.ENRICH_NODE);
+        long initialTime = System.currentTimeMillis();
+        enrichNode.setProperty("lastUpdated", initialTime);
+        enrichNode.setProperty("status", "initializing");
+        enrichNode.setProperty("documentsProcessed", 0);
+        nodeStore.merge(rootBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        // Force reinitialization of InferenceConfig
+        InferenceConfig.reInitialize();
+
+        // Verify initial enricher status
+        Map<String, Object> initialStatus = 
InferenceConfig.getInstance().getEnricherStatus();
+        assertNotNull(initialStatus);
+        assertEquals(initialTime, initialStatus.get("lastUpdated"));
+        assertEquals("initializing", initialStatus.get("status"));
+        assertEquals(0L, initialStatus.get("documentsProcessed"));
+
+        // Create and set up the node with updated enricher status
+        rootBuilder = nodeStore.getRoot().builder();
+        nodeBuilder = rootBuilder;
+        for (String path : PathUtils.elements(INFERENCE_CONFIG_PATH)) {
+            nodeBuilder = nodeBuilder.child(path);
+        }
+        // Update enricher status node with new values
+        enrichNode = nodeBuilder.child(InferenceConstants.ENRICH_NODE);
+        long updatedTime = System.currentTimeMillis() + 1000; // Ensure it's 
different
+        enrichNode.setProperty("lastUpdated", updatedTime);
+        enrichNode.setProperty("status", "active");
+        enrichNode.setProperty("documentsProcessed", 200);
+        // Add a new property
+        enrichNode.setProperty("errorCount", 5);
+        nodeStore.merge(rootBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        // Force reinitialization of InferenceConfig
+        InferenceConfig.reInitialize();
+
+        // Verify updated enricher status
+        Map<String, Object> updatedStatus = 
InferenceConfig.getInstance().getEnricherStatus();
+        assertNotNull(updatedStatus);
+        assertEquals(updatedTime, updatedStatus.get("lastUpdated"));
+        assertEquals("active", updatedStatus.get("status"));
+        assertEquals(200L, updatedStatus.get("documentsProcessed"));
+        assertEquals(5L, updatedStatus.get("errorCount"));
+
+        // Create an index and verify the enricher status gets included in 
document updates
+        IndexDefinitionBuilder builder = createIndex();
+        builder.includedPaths("/content")
+            .indexRule("nt:base")
+            .property("title").propertyIndex().analyzed().nodeScopeIndex();
+
+        Tree index = setIndex(jcrIndexName, builder);
+        root.commit();
+
+        // Add content
+        Tree content = root.getTree("/").addChild("content");
+        Tree document = content.addChild("document");
+        document.setProperty("title", "Test Document for Reinitialization");
+        root.commit();
+
+        // Let the index catch up
+        assertEventually(() -> assertEquals(2, countDocuments(index)));
+
+        // Verify the enricher status in the indexed document
+        assertEventually(() -> {
+            ObjectNode docNode = getDocument(index, "/content/document");
+            assertNotNull(docNode.get(InferenceConstants.ENRICH_NODE));
+            JsonNode enrichNodeData = 
docNode.get(InferenceConstants.ENRICH_NODE);
+            assertEquals(updatedTime, 
enrichNodeData.get("lastUpdated").asLong());
+            assertEquals("active", enrichNodeData.get("status").asText());
+            assertEquals(200, 
enrichNodeData.get("documentsProcessed").asInt());
+            assertEquals(5, enrichNodeData.get("errorCount").asInt());
+        });
+    }
 }

Reply via email to