This is an automated email from the ASF dual-hosted git repository.

kotman12 pushed a commit to branch branch_10x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_10x by this push:
     new 2557ef60533 SOLR-18194: fix nested docs detection false positive 
(#4279)
2557ef60533 is described below

commit 2557ef6053337c8bbad3152e3bb8e9cd3d63baf4
Author: Luke Kot-Zaniewski <[email protected]>
AuthorDate: Mon Apr 27 18:26:50 2026 -0400

    SOLR-18194: fix nested docs detection false positive (#4279)
    
    Previously if a segment had updates to the same Solr document (delete + add 
within a commit interval) the UPDATECOREINDEX action would falsely identify it 
as having child documents. These are not supported by the action so it would 
unnecessarily fail. We improve the check to compare cardinality of id with 
_root_ to identify child documents.
---
 .../solr/handler/admin/api/UpgradeCoreIndex.java   |  48 ++++++---
 .../handler/admin/UpgradeCoreIndexActionTest.java  | 107 +++++++++++++++++++--
 2 files changed, 134 insertions(+), 21 deletions(-)

diff --git 
a/solr/core/src/java/org/apache/solr/handler/admin/api/UpgradeCoreIndex.java 
b/solr/core/src/java/org/apache/solr/handler/admin/api/UpgradeCoreIndex.java
index c91f107e952..c92d3e20947 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/api/UpgradeCoreIndex.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/api/UpgradeCoreIndex.java
@@ -149,11 +149,11 @@ public class UpgradeCoreIndex extends CoreAdminAPIBase {
 
       RefCounted<SolrIndexSearcher> searcherRef = core.getSearcher();
       try {
-        // Check for nested documents before processing - we don't support them
-        if (indexContainsNestedDocs(searcherRef.get())) {
+        // Check for child documents before processing - we don't support them
+        if (indexContainsChildDocs(searcherRef.get())) {
           throw new SolrException(
               BAD_REQUEST,
-              "UPGRADECOREINDEX does not support indexes containing nested 
documents. "
+              "UPGRADECOREINDEX does not support indexes containing 
child/nested documents. "
                   + " Consider reindexing your data "
                   + "from the original source.");
         }
@@ -259,26 +259,44 @@ public class UpgradeCoreIndex extends CoreAdminAPIBase {
     return (segmentMinVersion == null || segmentMinVersion.major < 
Version.LATEST.major);
   }
 
-  private boolean indexContainsNestedDocs(SolrIndexSearcher searcher) throws 
IOException {
+  private boolean indexContainsChildDocs(SolrIndexSearcher searcher) throws 
IOException {
     IndexSchema schema = searcher.getSchema();
 
-    // First check if schema supports nested docs
+    // First check if schema supports child docs
     if (!schema.isUsableForChildDocs()) {
       return false;
     }
 
-    // Check if _root_ field has fewer unique values than documents with that 
field.
-    // This indicates multiple docs share the same _root_ (i.e., child docs 
exist)
+    String uniqueKeyFieldName = schema.getUniqueKeyField().getName();
+
+    // Compare unique _root_ values against unique id values per segment.
+    // For non-child docs, every document's _root_ equals its own id, so the 
number of
+    // distinct _root_ values equals the number of distinct id values. For 
child docs,
+    // children share the parent's _root_ value, so there are fewer distinct 
_root_ values
+    // than distinct id values.
+    //
+    // We intentionally compare against unique id values rather than 
Terms.getDocCount()
+    // (the number of documents with the _root_ field) because segment-level 
term statistics
+    // include deleted documents. Updates (delete + re-add of the same id) can 
leave multiple
+    // documents with the same _root_ value within a segment, causing 
getDocCount() to exceed
+    // the unique _root_ count even when no child docs exist.
     IndexReader reader = searcher.getIndexReader();
     for (LeafReaderContext leaf : reader.leaves()) {
-      Terms terms = leaf.reader().terms(IndexSchema.ROOT_FIELD_NAME);
-      if (terms != null) {
-        long uniqueRootValues = terms.size();
-        int docsWithRoot = terms.getDocCount();
-
-        if (uniqueRootValues == -1 || uniqueRootValues < docsWithRoot) {
-          return true; // Codec doesn't store number of terms (so a safe 
fallback), or multiple docs
-          // share same _root_ (aka nested docs exist)
+      Terms rootTerms = leaf.reader().terms(IndexSchema.ROOT_FIELD_NAME);
+      if (rootTerms != null) {
+        long uniqueRootValues = rootTerms.size();
+        if (uniqueRootValues == -1) {
+          return true; // Codec doesn't report term count; assume child docs 
as a safe fallback
+        }
+
+        Terms idTerms = leaf.reader().terms(uniqueKeyFieldName);
+        long uniqueIdValues = (idTerms != null) ? idTerms.size() : -1;
+        if (uniqueIdValues == -1) {
+          return true; // Codec doesn't report term count; assume child docs 
as a safe fallback
+        }
+
+        if (uniqueRootValues < uniqueIdValues) {
+          return true; // Fewer distinct _root_ values than distinct ids means 
child docs exist
         }
       }
     }
diff --git 
a/solr/core/src/test/org/apache/solr/handler/admin/UpgradeCoreIndexActionTest.java
 
b/solr/core/src/test/org/apache/solr/handler/admin/UpgradeCoreIndexActionTest.java
index 14bfe1ad7fe..536652f270d 100644
--- 
a/solr/core/src/test/org/apache/solr/handler/admin/UpgradeCoreIndexActionTest.java
+++ 
b/solr/core/src/test/org/apache/solr/handler/admin/UpgradeCoreIndexActionTest.java
@@ -323,11 +323,11 @@ public class UpgradeCoreIndexActionTest extends 
SolrTestCaseJ4 {
   private record SegmentLayout(String coreName, String seg1, String seg2, 
String seg3) {}
 
   @Test
-  public void testUpgradeCoreIndexFailsWithNestedDocuments() throws Exception {
+  public void testUpgradeCoreIndexFailsWithChildDocuments() throws Exception {
     final SolrCore core = h.getCore();
     final String coreName = core.getName();
 
-    // Create a parent document with a child document (nested doc)
+    // Create a parent document with a child document
     SolrInputDocument parentDoc = new SolrInputDocument();
     parentDoc.addField("id", "100");
     parentDoc.addField("title", "Parent Document");
@@ -338,7 +338,7 @@ public class UpgradeCoreIndexActionTest extends 
SolrTestCaseJ4 {
 
     parentDoc.addChildDocument(childDoc);
 
-    // Index the nested document
+    // Index the parent+child document
     try (SolrQueryRequestBase req = new SolrQueryRequestBase(core, new 
ModifiableSolrParams())) {
       AddUpdateCommand cmd = new AddUpdateCommand(req);
       cmd.solrDoc = parentDoc;
@@ -349,7 +349,7 @@ public class UpgradeCoreIndexActionTest extends 
SolrTestCaseJ4 {
     // Verify documents were indexed (parent + child = 2 docs)
     assertQ(req("q", "*:*"), "//result[@numFound='2']");
 
-    // Attempt to upgrade the index - should fail because of nested documents
+    // Attempt to upgrade the index - should fail because of child documents
     CoreAdminHandler admin = new CoreAdminHandler(h.getCoreContainer());
     try {
       final SolrQueryResponse resp = new SolrQueryResponse();
@@ -365,13 +365,108 @@ public class UpgradeCoreIndexActionTest extends 
SolrTestCaseJ4 {
                           coreName),
                       resp));
 
-      // Verify the exception message indicates nested documents are not 
supported
+      // Verify the exception message indicates child documents are not 
supported
       assertThat(
           thrown.getMessage(),
-          containsString("does not support indexes containing nested 
documents"));
+          containsString("does not support indexes containing child/nested 
documents"));
     } finally {
       admin.shutdown();
       admin.close();
     }
   }
+
+  @Test
+  public void testChildDocsDetection_noChildDocs() throws Exception {
+    addDocsWithRandomUpdatesAndDeletes();
+
+    final String coreName = h.getCore().getName();
+    CoreAdminHandler admin = new CoreAdminHandler(h.getCoreContainer());
+    try {
+      final SolrQueryResponse resp = new SolrQueryResponse();
+      admin.handleRequestBody(
+          req(
+              CoreAdminParams.ACTION,
+              CoreAdminParams.CoreAdminAction.UPGRADECOREINDEX.toString(),
+              CoreAdminParams.CORE,
+              coreName),
+          resp);
+      assertNull("Unexpected exception: " + resp.getException(), 
resp.getException());
+    } finally {
+      admin.shutdown();
+      admin.close();
+    }
+  }
+
+  @Test
+  public void testChildDocsDetection_withChildDocs() throws Exception {
+    addChildDoc("100", "101");
+    addDocsWithRandomUpdatesAndDeletes();
+
+    final String coreName = h.getCore().getName();
+    CoreAdminHandler admin = new CoreAdminHandler(h.getCoreContainer());
+    try {
+      final SolrQueryResponse resp = new SolrQueryResponse();
+      SolrException thrown =
+          assertThrows(
+              SolrException.class,
+              () ->
+                  admin.handleRequestBody(
+                      req(
+                          CoreAdminParams.ACTION,
+                          
CoreAdminParams.CoreAdminAction.UPGRADECOREINDEX.toString(),
+                          CoreAdminParams.CORE,
+                          coreName),
+                      resp));
+      assertThat(
+          thrown.getMessage(),
+          containsString("does not support indexes containing child/nested 
documents"));
+    } finally {
+      admin.shutdown();
+      admin.close();
+    }
+  }
+
+  /**
+   * Add non-child docs with a random number of within-commit updates and 
deletes. This exercises
+   * the false-positive scenario for child doc detection: updates and deletes 
leave behind deleted
+   * entries in the same segment, causing multiple docs to share the same 
{@code _root_} value.
+   *
+   * <p>With NoMergePolicy and a 100MB RAM buffer (from SolrIndexConfig 
defaults), no flush or merge
+   * occurs mid-batch, guaranteeing co-location in a single segment.
+   */
+  private void addDocsWithRandomUpdatesAndDeletes() {
+    int numDocs = 10;
+    for (int i = 0; i < numDocs; i++) {
+      assertU(adoc("id", String.valueOf(i), "title", "doc" + i));
+    }
+    int numUpdates = random().nextInt(4);
+    for (int i = 0; i < numUpdates; i++) {
+      assertU(adoc("id", String.valueOf(i), "title", "updated_doc" + i));
+    }
+    int numDeletes = random().nextInt(4);
+    for (int i = 0; i < numDeletes; i++) {
+      assertU(delI(String.valueOf(numDocs - 1 - i)));
+    }
+    assertU(commit("openSearcher", "true"));
+  }
+
+  /** Index a parent document with a single child via the update handler. */
+  private void addChildDoc(String parentId, String childId) throws Exception {
+    SolrCore core = h.getCore();
+    SolrInputDocument parentDoc = new SolrInputDocument();
+    parentDoc.addField("id", parentId);
+    parentDoc.addField("title", "Parent " + parentId);
+
+    SolrInputDocument childDoc = new SolrInputDocument();
+    childDoc.addField("id", childId);
+    childDoc.addField("title", "Child " + childId);
+    parentDoc.addChildDocument(childDoc);
+
+    try (SolrQueryRequestBase solrReq =
+        new SolrQueryRequestBase(core, new ModifiableSolrParams())) {
+      AddUpdateCommand cmd = new AddUpdateCommand(solrReq);
+      cmd.solrDoc = parentDoc;
+      core.getUpdateHandler().addDoc(cmd);
+    }
+  }
 }

Reply via email to