rishabhdaim commented on code in PR #1090: URL: https://github.com/apache/jackrabbit-oak/pull/1090#discussion_r1314892637
########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java: ########## @@ -902,6 +905,206 @@ private void collectDeletedProperties(final NodeDocument doc, final GCPhases pha } } + private void collectUnmergedBranchCommitDocument(final NodeDocument doc, + long toModifiedMillis, final GCPhases phases, final UpdateOp updateOp) { + if (!phases.start(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC)) { + // GC was cancelled, stop + return; + } + + // from + // https://jackrabbit.apache.org/oak/docs/nodestore/documentmk.html#previous-documents + // "branch commits are not moved to previous documents until the branch is + // merged." + // i.e. if we're looking for unmerged branch commits, they cannot be in + // previous documents, they have to be in the main one - hence we have to use + // getLocalBranchCommits here + final Set<Revision> localBranchCommits = doc.getLocalBranchCommits(); + if (localBranchCommits.isEmpty()) { + // nothing to do then + phases.stop(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC); + return; + } + + // !Note, the _bc sub-document was introduced with Oak 1.8 and is not present + // in older versions. The branch commit revision is added to _bc whenever a + // change is done on a document with a branch commit. This helps the + // DocumentNodeStore to more easily identify branch commit changes." + // The current implementation of "collectUnmergedBranchCommitDocument" only + // supports branch commits that are created after Oak 1.8 + for (Revision bcRevision : localBranchCommits) { + if (!isRevisionOlderThan(bcRevision, toModifiedMillis)) { + // only even consider revisions that are older than the provided + // timestamp - otherwise skip this + continue; + } + final String commitValue = nodeStore.getCommitValue(bcRevision, doc); + if (isCommitted(commitValue)) { + // obviously don't do anything with merged (committed) branch commits + continue; + } + removeUnmergedBCRevision(bcRevision, doc, updateOp); + } + // now for any of the handled system properties (the normal properties would + // already be cleaned up by cleanupDeletedProperties), the resulting + // subdocument could in theory become empty after removing all unmerged branch + // commit revisions is done later. + // so we need to go through all of them and check if we'd have removed + // the entirety - and then, instead of individually remove revisions, just + // delete the entire property. + if (updateOp.hasChanges()) { Review Comment: This can be further optimized by considering the case where `updateOp` has changed for the `unmergedBranchCommit` phase. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@jackrabbit.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org