This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch OAK-10765
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 78c4c021e6d09e8c5fa69ba9eb2edd3c26cfa147
Author: Rishabh Kumar <d...@adobe.com>
AuthorDate: Tue Apr 16 12:36:51 2024 +0530

    OAK-10765 : fetch document with only min required fields to speed up 
verfication of orphan nodes
---
 .../oak/plugins/document/VersionGCSupport.java     | 50 ++++++++++++++++++++++
 .../plugins/document/VersionGarbageCollector.java  | 23 +++++-----
 .../document/mongo/MongoVersionGCSupport.java      | 22 ++++++++++
 .../oak/plugins/document/VersionGCSupportTest.java | 50 ++++++++++++++++++++++
 4 files changed, 132 insertions(+), 13 deletions(-)

diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
index 705ebc9a3c..6aba03a6f3 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
@@ -21,19 +21,25 @@ package org.apache.jackrabbit.oak.plugins.document;
 
 import static java.util.Comparator.comparing;
 import static java.util.Optional.empty;
+import static java.util.Optional.of;
 import static java.util.Optional.ofNullable;
 import static java.util.stream.Stream.concat;
 import static java.util.stream.StreamSupport.stream;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static java.util.stream.Collectors.toList;
+import static org.apache.jackrabbit.guava.common.collect.Sets.newHashSet;
+import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
 import static 
org.apache.jackrabbit.oak.plugins.document.util.Utils.getAllDocuments;
 import static 
org.apache.jackrabbit.oak.plugins.document.util.Utils.getSelectedDocuments;
 
+import java.util.List;
+import java.util.Objects;
 import java.util.Optional;
 import java.util.Set;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 import org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
@@ -127,6 +133,10 @@ public class VersionGCSupport {
         return modified != null && modified.compareTo(getModifiedInSecs(time)) 
< 0;
     }
 
+    private boolean idEquals(final NodeDocument doc, final String id) {
+        return Objects.equals(doc.getId(), id);
+    }
+
     /**
      * Returns the underlying document store.
      *
@@ -226,6 +236,46 @@ public class VersionGCSupport {
         return empty();
     }
 
+    /**
+     * Retrieve the document with given id with only required fields.
+     *
+     * @param id the document id
+     * @param fields {@link List} of required fields, keep empty to fetch all
+     *
+     * @return the document with given id or empty if not found
+     */
+    public Optional<NodeDocument> getDocument(final String id, final 
List<String> fields) {
+
+        Iterable<NodeDocument> docs = null;
+        try {
+            docs = stream(getSelectedDocuments(store, null, 0, 
MIN_ID_VALUE).spliterator(), false)
+                    .filter(input -> idEquals(input, 
id)).limit(1).collect(toList());
+            if (docs.iterator().hasNext()) {
+                final NodeDocument doc = docs.iterator().next();
+                if (LOG.isDebugEnabled()) {
+                    LOG.debug("Found Document with id {}", id);
+                }
+                if (fields == null || fields.isEmpty()) {
+                    return ofNullable(doc);
+                }
+
+                final Set<String> projectedSet = newHashSet(fields);
+                projectedSet.add(ID);
+
+                final NodeDocument newDoc = 
Collection.NODES.newDocument(store);
+                doc.deepCopy(newDoc);
+                newDoc.keySet().retainAll(projectedSet);
+                return of(newDoc);
+            }
+
+        } finally {
+            Utils.closeIfCloseable(docs);
+        }
+
+        LOG.info("No Doc has been found with id [{}]", id);
+        return empty();
+    }
+
     public long getDeletedOnceCount() throws UnsupportedOperationException {
         throw new UnsupportedOperationException("getDeletedOnceCount()");
     }
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index e3b76f28b5..58ae878b82 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -32,6 +32,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Objects;
+import java.util.Optional;
 import java.util.Set;
 import java.util.SortedMap;
 import java.util.concurrent.TimeUnit;
@@ -73,6 +74,7 @@ import org.slf4j.LoggerFactory;
 import static java.lang.Math.round;
 import static java.lang.String.join;
 import static java.util.Collections.emptySet;
+import static java.util.List.of;
 import static java.util.Objects.nonNull;
 import static java.util.Objects.requireNonNull;
 import static java.util.Optional.ofNullable;
@@ -91,6 +93,7 @@ import static 
org.apache.jackrabbit.guava.common.util.concurrent.Atomics.newRefe
 import static java.util.concurrent.TimeUnit.MICROSECONDS;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.SETTINGS;
+import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.BRANCH_COMMITS;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.COLLISIONS;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.COMMIT_ROOT;
@@ -100,6 +103,7 @@ import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.REVISIONS;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH;
+import static 
org.apache.jackrabbit.oak.plugins.document.util.Utils.getIdFromPath;
 import static 
org.apache.jackrabbit.oak.plugins.document.util.Utils.timestampToString;
 import static org.apache.jackrabbit.oak.stats.StatisticsProvider.NOOP;
 import static 
org.apache.jackrabbit.oak.plugins.document.util.Utils.isCommitted;
@@ -1216,34 +1220,27 @@ public class VersionGarbageCollector {
                 phases.stop(GCPhase.DETAILED_GC_COLLECT_ORPHAN_NODES);
                 return false;
             }
-            if (detailedGcMode == DetailedGCMode.GAP_ORPHANS
-                    || detailedGcMode == 
DetailedGCMode.GAP_ORPHANS_EMPTYPROPS) {
+            if (detailedGcMode == DetailedGCMode.GAP_ORPHANS || detailedGcMode 
== DetailedGCMode.GAP_ORPHANS_EMPTYPROPS) {
                 // check the ancestor docs for gaps
                 final Path docPath = doc.getPath();
-                final Path geaPath = greatestExistingAncestorOrSelf;
-                final Path geaChildPath = docPath
-                        .getAncestor(docPath.getDepth() - geaPath.getDepth() - 
1);
+                final Path geaChildPath = 
docPath.getAncestor(docPath.getDepth() - 
greatestExistingAncestorOrSelf.getDepth() - 1);
                 Boolean missingType = missingDocsTypes.get(geaChildPath);
                 if (missingType == null) {
                     // we don't have it cached yet - so do the potentially 
expensive find
-                    final NodeDocument d = 
nodeStore.getDocumentStore().find(NODES,
-                            Utils.getIdFromPath(geaChildPath));
-                    final boolean parentDocExists = d != null;
-                    missingType = !parentDocExists;
+                    missingType = 
versionStore.getDocument(getIdFromPath(geaChildPath), of(ID)).isEmpty();
                     if (missingDocsTypes.size() > 
DETAILED_GC_MISSING_DOCS_TYPE_CACHE_SIZE) {
                         final Iterator<Path> it = 
missingDocsTypes.keySet().iterator();
                         it.next();
                         it.remove();
                         if (missingDocsTypes.size() > 
DETAILED_GC_MISSING_DOCS_TYPE_CACHE_SIZE) {
                             // should never really happen, if it does: clear 
all, break out
-                            log.error("isDeletedOrOrphanedNode : knownNullDocs 
removal failed, size was {}",
-                                    missingDocsTypes.size());
+                            log.error("isDeletedOrOrphanedNode : knownNullDocs 
removal failed, size was {}", missingDocsTypes.size());
                             missingDocsTypes.clear();
                         }
                     }
                     missingDocsTypes.put(geaChildPath, missingType);
                 }
-                if (missingType == false) {
+                if (!missingType) {
                     // then it is not a gap orphan
                     // nothing to do here then
                     // (even though somewhere along descendants
@@ -1671,7 +1668,7 @@ public class VersionGarbageCollector {
                     // if it is still referenced locally, keep it
                     continue;
                 }
-                final boolean isRoot = 
doc.getId().equals(Utils.getIdFromPath(Path.ROOT));
+                final boolean isRoot = 
doc.getId().equals(getIdFromPath(Path.ROOT));
                 // local bcs only considered for removal
                 final boolean isBC = 
doc.getLocalBranchCommits().contains(revision);
                 final boolean newerThanSweep = 
nodeStore.getSweepRevisions().isRevisionNewer(revision);
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index 1f6d4bf5f5..41d271e9d2 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -23,6 +23,7 @@ import static com.mongodb.client.model.Filters.eq;
 import static com.mongodb.client.model.Filters.exists;
 import static com.mongodb.client.model.Filters.gt;
 import static com.mongodb.client.model.Filters.or;
+import static com.mongodb.client.model.Projections.include;
 import static java.util.Optional.empty;
 import static java.util.Optional.ofNullable;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.concat;
@@ -165,6 +166,27 @@ public class MongoVersionGCSupport extends 
VersionGCSupport {
         return wrap(transform(cursor, input -> 
store.convertFromDBObject(NODES, input)));
     }
 
+    @Override
+    public Optional<NodeDocument> getDocument(final String id, final 
List<String> fields) {
+
+        final Bson query = eq(ID, id);
+
+        final FindIterable<BasicDBObject> result = 
getNodeCollection().find(query);
+
+        if (fields != null && !fields.isEmpty()) {
+            result.projection(include(fields));
+        }
+
+        try(MongoCursor<BasicDBObject> cur = result.iterator()) {
+            return cur.hasNext() ? ofNullable(store.convertFromDBObject(NODES, 
cur.next())) : empty();
+        } catch (Exception ex) {
+            LOG.error("getDocument() <- error while fetching data from Mongo", 
ex);
+        }
+        LOG.info("No Doc has been found with id [{}], retuning empty", id);
+        return empty();
+
+    }
+
     @Override
     public long getDeletedOnceCount() {
         Bson query = Filters.eq(DELETED_ONCE, Boolean.TRUE);
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
index 565600a143..a27269d4d0 100644
--- 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
@@ -41,15 +41,18 @@ import static java.util.concurrent.TimeUnit.SECONDS;
 import static java.util.stream.StreamSupport.stream;
 import static org.apache.jackrabbit.guava.common.collect.Comparators.isInOrder;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
+import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
 import static 
org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.MEMORY;
 import static 
org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.MONGO;
 import static 
org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.RDB_H2;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
+import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.setModified;
 import static 
org.apache.jackrabbit.oak.plugins.document.util.Utils.getIdFromPath;
 import static org.apache.jackrabbit.oak.stats.Clock.SIMPLE;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertTrue;
 
 @RunWith(Parameterized.class)
@@ -185,6 +188,53 @@ public class VersionGCSupportTest {
         assertTrue("diff (s) should be < 5: " + Math.abs(secs - reportedSecs), 
Math.abs(secs - reportedSecs) < 5);
     }
 
+    @Test
+    public void findDocument() {
+        long secs = 1234567;
+        long offset = SECONDS.toMillis(secs);
+        Revision r = new Revision(offset, 0, 1);
+        String id = getIdFromPath("/doc");
+        ids.add(id);
+        UpdateOp op = new UpdateOp(id, true);
+        setModified(op, r);
+        store.create(NODES, of(op));
+
+        NodeDocument doc = gcSupport.getDocument(id, of(ID)).orElse(NULL);
+        assertEquals(id, doc.getId());
+        assertEquals(1, doc.keySet().size());
+    }
+
+    @Test
+    public void findDocumentWhenNotExist() {
+        long secs = 1234567;
+        long offset = SECONDS.toMillis(secs);
+        Revision r = new Revision(offset, 0, 1);
+        String id = getIdFromPath("/doc/3");
+        ids.add(id);
+        UpdateOp op = new UpdateOp(id, true);
+        setModified(op, r);
+        store.create(NODES, of(op));
+
+        NodeDocument doc = gcSupport.getDocument(getIdFromPath("/doc/4"), 
of()).orElse(NULL);
+        assertNotEquals(id, doc.getId());
+    }
+
+    @Test
+    public void findDocumentWithProjection() {
+        long secs = 1234567;
+        long offset = SECONDS.toMillis(secs);
+        Revision r = new Revision(offset, 0, 1);
+        String id = getIdFromPath("/doc/2");
+        ids.add(id);
+        UpdateOp op = new UpdateOp(id, true);
+        setModified(op, r);
+        store.create(NODES, of(op));
+
+        NodeDocument doc = gcSupport.getDocument(id, of(ID, 
MODIFIED_IN_SECS)).orElse(NULL);
+        assertEquals(id, doc.getId());
+        assertEquals(2, doc.keySet().size());
+    }
+
     @Test
     public void findOldestModified() {
         long secs = 1234567;

Reply via email to