This is an automated email from the ASF dual-hosted git repository. daim pushed a commit to branch OAK-10765 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 78c4c021e6d09e8c5fa69ba9eb2edd3c26cfa147 Author: Rishabh Kumar <d...@adobe.com> AuthorDate: Tue Apr 16 12:36:51 2024 +0530 OAK-10765 : fetch document with only min required fields to speed up verfication of orphan nodes --- .../oak/plugins/document/VersionGCSupport.java | 50 ++++++++++++++++++++++ .../plugins/document/VersionGarbageCollector.java | 23 +++++----- .../document/mongo/MongoVersionGCSupport.java | 22 ++++++++++ .../oak/plugins/document/VersionGCSupportTest.java | 50 ++++++++++++++++++++++ 4 files changed, 132 insertions(+), 13 deletions(-) diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java index 705ebc9a3c..6aba03a6f3 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java @@ -21,19 +21,25 @@ package org.apache.jackrabbit.oak.plugins.document; import static java.util.Comparator.comparing; import static java.util.Optional.empty; +import static java.util.Optional.of; import static java.util.Optional.ofNullable; import static java.util.stream.Stream.concat; import static java.util.stream.StreamSupport.stream; import static org.apache.jackrabbit.guava.common.collect.Iterables.filter; import static java.util.stream.Collectors.toList; +import static org.apache.jackrabbit.guava.common.collect.Sets.newHashSet; +import static org.apache.jackrabbit.oak.plugins.document.Document.ID; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs; import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getAllDocuments; import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getSelectedDocuments; +import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType; @@ -127,6 +133,10 @@ public class VersionGCSupport { return modified != null && modified.compareTo(getModifiedInSecs(time)) < 0; } + private boolean idEquals(final NodeDocument doc, final String id) { + return Objects.equals(doc.getId(), id); + } + /** * Returns the underlying document store. * @@ -226,6 +236,46 @@ public class VersionGCSupport { return empty(); } + /** + * Retrieve the document with given id with only required fields. + * + * @param id the document id + * @param fields {@link List} of required fields, keep empty to fetch all + * + * @return the document with given id or empty if not found + */ + public Optional<NodeDocument> getDocument(final String id, final List<String> fields) { + + Iterable<NodeDocument> docs = null; + try { + docs = stream(getSelectedDocuments(store, null, 0, MIN_ID_VALUE).spliterator(), false) + .filter(input -> idEquals(input, id)).limit(1).collect(toList()); + if (docs.iterator().hasNext()) { + final NodeDocument doc = docs.iterator().next(); + if (LOG.isDebugEnabled()) { + LOG.debug("Found Document with id {}", id); + } + if (fields == null || fields.isEmpty()) { + return ofNullable(doc); + } + + final Set<String> projectedSet = newHashSet(fields); + projectedSet.add(ID); + + final NodeDocument newDoc = Collection.NODES.newDocument(store); + doc.deepCopy(newDoc); + newDoc.keySet().retainAll(projectedSet); + return of(newDoc); + } + + } finally { + Utils.closeIfCloseable(docs); + } + + LOG.info("No Doc has been found with id [{}]", id); + return empty(); + } + public long getDeletedOnceCount() throws UnsupportedOperationException { throw new UnsupportedOperationException("getDeletedOnceCount()"); } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java index e3b76f28b5..58ae878b82 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java @@ -32,6 +32,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.SortedMap; import java.util.concurrent.TimeUnit; @@ -73,6 +74,7 @@ import org.slf4j.LoggerFactory; import static java.lang.Math.round; import static java.lang.String.join; import static java.util.Collections.emptySet; +import static java.util.List.of; import static java.util.Objects.nonNull; import static java.util.Objects.requireNonNull; import static java.util.Optional.ofNullable; @@ -91,6 +93,7 @@ import static org.apache.jackrabbit.guava.common.util.concurrent.Atomics.newRefe import static java.util.concurrent.TimeUnit.MICROSECONDS; import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; import static org.apache.jackrabbit.oak.plugins.document.Collection.SETTINGS; +import static org.apache.jackrabbit.oak.plugins.document.Document.ID; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.BRANCH_COMMITS; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.COLLISIONS; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.COMMIT_ROOT; @@ -100,6 +103,7 @@ import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.REVISIONS; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH; +import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getIdFromPath; import static org.apache.jackrabbit.oak.plugins.document.util.Utils.timestampToString; import static org.apache.jackrabbit.oak.stats.StatisticsProvider.NOOP; import static org.apache.jackrabbit.oak.plugins.document.util.Utils.isCommitted; @@ -1216,34 +1220,27 @@ public class VersionGarbageCollector { phases.stop(GCPhase.DETAILED_GC_COLLECT_ORPHAN_NODES); return false; } - if (detailedGcMode == DetailedGCMode.GAP_ORPHANS - || detailedGcMode == DetailedGCMode.GAP_ORPHANS_EMPTYPROPS) { + if (detailedGcMode == DetailedGCMode.GAP_ORPHANS || detailedGcMode == DetailedGCMode.GAP_ORPHANS_EMPTYPROPS) { // check the ancestor docs for gaps final Path docPath = doc.getPath(); - final Path geaPath = greatestExistingAncestorOrSelf; - final Path geaChildPath = docPath - .getAncestor(docPath.getDepth() - geaPath.getDepth() - 1); + final Path geaChildPath = docPath.getAncestor(docPath.getDepth() - greatestExistingAncestorOrSelf.getDepth() - 1); Boolean missingType = missingDocsTypes.get(geaChildPath); if (missingType == null) { // we don't have it cached yet - so do the potentially expensive find - final NodeDocument d = nodeStore.getDocumentStore().find(NODES, - Utils.getIdFromPath(geaChildPath)); - final boolean parentDocExists = d != null; - missingType = !parentDocExists; + missingType = versionStore.getDocument(getIdFromPath(geaChildPath), of(ID)).isEmpty(); if (missingDocsTypes.size() > DETAILED_GC_MISSING_DOCS_TYPE_CACHE_SIZE) { final Iterator<Path> it = missingDocsTypes.keySet().iterator(); it.next(); it.remove(); if (missingDocsTypes.size() > DETAILED_GC_MISSING_DOCS_TYPE_CACHE_SIZE) { // should never really happen, if it does: clear all, break out - log.error("isDeletedOrOrphanedNode : knownNullDocs removal failed, size was {}", - missingDocsTypes.size()); + log.error("isDeletedOrOrphanedNode : knownNullDocs removal failed, size was {}", missingDocsTypes.size()); missingDocsTypes.clear(); } } missingDocsTypes.put(geaChildPath, missingType); } - if (missingType == false) { + if (!missingType) { // then it is not a gap orphan // nothing to do here then // (even though somewhere along descendants @@ -1671,7 +1668,7 @@ public class VersionGarbageCollector { // if it is still referenced locally, keep it continue; } - final boolean isRoot = doc.getId().equals(Utils.getIdFromPath(Path.ROOT)); + final boolean isRoot = doc.getId().equals(getIdFromPath(Path.ROOT)); // local bcs only considered for removal final boolean isBC = doc.getLocalBranchCommits().contains(revision); final boolean newerThanSweep = nodeStore.getSweepRevisions().isRevisionNewer(revision); diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java index 1f6d4bf5f5..41d271e9d2 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java @@ -23,6 +23,7 @@ import static com.mongodb.client.model.Filters.eq; import static com.mongodb.client.model.Filters.exists; import static com.mongodb.client.model.Filters.gt; import static com.mongodb.client.model.Filters.or; +import static com.mongodb.client.model.Projections.include; import static java.util.Optional.empty; import static java.util.Optional.ofNullable; import static org.apache.jackrabbit.guava.common.collect.Iterables.concat; @@ -165,6 +166,27 @@ public class MongoVersionGCSupport extends VersionGCSupport { return wrap(transform(cursor, input -> store.convertFromDBObject(NODES, input))); } + @Override + public Optional<NodeDocument> getDocument(final String id, final List<String> fields) { + + final Bson query = eq(ID, id); + + final FindIterable<BasicDBObject> result = getNodeCollection().find(query); + + if (fields != null && !fields.isEmpty()) { + result.projection(include(fields)); + } + + try(MongoCursor<BasicDBObject> cur = result.iterator()) { + return cur.hasNext() ? ofNullable(store.convertFromDBObject(NODES, cur.next())) : empty(); + } catch (Exception ex) { + LOG.error("getDocument() <- error while fetching data from Mongo", ex); + } + LOG.info("No Doc has been found with id [{}], retuning empty", id); + return empty(); + + } + @Override public long getDeletedOnceCount() { Bson query = Filters.eq(DELETED_ONCE, Boolean.TRUE); diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java index 565600a143..a27269d4d0 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java @@ -41,15 +41,18 @@ import static java.util.concurrent.TimeUnit.SECONDS; import static java.util.stream.StreamSupport.stream; import static org.apache.jackrabbit.guava.common.collect.Comparators.isInOrder; import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; +import static org.apache.jackrabbit.oak.plugins.document.Document.ID; import static org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.MEMORY; import static org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.MONGO; import static org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.RDB_H2; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.setModified; import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getIdFromPath; import static org.apache.jackrabbit.oak.stats.Clock.SIMPLE; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; @RunWith(Parameterized.class) @@ -185,6 +188,53 @@ public class VersionGCSupportTest { assertTrue("diff (s) should be < 5: " + Math.abs(secs - reportedSecs), Math.abs(secs - reportedSecs) < 5); } + @Test + public void findDocument() { + long secs = 1234567; + long offset = SECONDS.toMillis(secs); + Revision r = new Revision(offset, 0, 1); + String id = getIdFromPath("/doc"); + ids.add(id); + UpdateOp op = new UpdateOp(id, true); + setModified(op, r); + store.create(NODES, of(op)); + + NodeDocument doc = gcSupport.getDocument(id, of(ID)).orElse(NULL); + assertEquals(id, doc.getId()); + assertEquals(1, doc.keySet().size()); + } + + @Test + public void findDocumentWhenNotExist() { + long secs = 1234567; + long offset = SECONDS.toMillis(secs); + Revision r = new Revision(offset, 0, 1); + String id = getIdFromPath("/doc/3"); + ids.add(id); + UpdateOp op = new UpdateOp(id, true); + setModified(op, r); + store.create(NODES, of(op)); + + NodeDocument doc = gcSupport.getDocument(getIdFromPath("/doc/4"), of()).orElse(NULL); + assertNotEquals(id, doc.getId()); + } + + @Test + public void findDocumentWithProjection() { + long secs = 1234567; + long offset = SECONDS.toMillis(secs); + Revision r = new Revision(offset, 0, 1); + String id = getIdFromPath("/doc/2"); + ids.add(id); + UpdateOp op = new UpdateOp(id, true); + setModified(op, r); + store.create(NODES, of(op)); + + NodeDocument doc = gcSupport.getDocument(id, of(ID, MODIFIED_IN_SECS)).orElse(NULL); + assertEquals(id, doc.getId()); + assertEquals(2, doc.keySet().size()); + } + @Test public void findOldestModified() { long secs = 1234567;