rishabhdaim commented on code in PR #1372:
URL: https://github.com/apache/jackrabbit-oak/pull/1372#discussion_r1535173583


##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java:
##########
@@ -929,6 +929,53 @@ private boolean isValidRevision(@NotNull RevisionContext 
context,
         return false;
     }
 
+    /**
+     * Resolve the commit revision that holds the current value of a property 
based
+     * on provided readRevision if the current value is in the local
+     * map - null if the current value might be in a split doc or the node or 
property
+     * does not exist at all.
+     *
+     * @param nodeStore    the node store.
+     * @param readRevision the read revision.
+     * @param key          the key of the property to resolve
+     * @return a Revision if the value of the property resolves to a value 
based
+     *         on what's in the local document, null if the node or property 
does
+     *         not exist at all or the value is in a split document.
+     */
+    Revision localCommitRevisionOfProperty(@NotNull DocumentNodeStore 
nodeStore,
+                                           @NotNull RevisionVector 
readRevision,
+                                           @NotNull String key) {
+        Map<Revision, String> validRevisions = Maps.newHashMap();

Review Comment:
   ```suggestion
           Map<Revision, String> validRevisions = new HashMap<>();
   ```



##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void 
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
             }
         }
 
+        /**
+         * Remove all property revisions in the local document that are no 
longer used.
+         * This includes bundled properties. It also includes related entries 
that
+         * become obsolete as a result - i.e. _commitRoot and _bc (though the 
latter
+         * is never removed on root)
+         */
+        private void collectUnusedPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final boolean ignoreInternalProperties) {
+
+            if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+                // cancelled
+                return;
+            }
+            final Set<Revision> allKeepRevs = new HashSet<>();
+            // phase A : collectUnusedUserPropertyRevisions
+            int deletedTotalRevsCount = 
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, 
allKeepRevs);
+            int deletedUserRevsCount = deletedTotalRevsCount;
+            // phase B : collectUnusedInternalPropertyRevisions
+            if (!ignoreInternalProperties) {
+                deletedTotalRevsCount = 
collectUnusedInternalPropertyRevisions(doc, phases, updateOp, 
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+            }
+
+            // then some accounting...
+            int deletedInternalRevsCount = deletedTotalRevsCount - 
deletedUserRevsCount;
+            if (deletedUserRevsCount != 0) {
+                deletedPropRevsCountMap.merge(doc.getId(), 
deletedUserRevsCount, Integer::sum);
+            }
+            if (deletedInternalRevsCount != 0) {
+                deletedInternalPropRevsCountMap.merge(doc.getId(), 
deletedInternalRevsCount, Integer::sum);
+            }
+            phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+        }
+
+        private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final Set<Revision> allKeepRevs) {
+            int deletedRevsCount = 0;
+
+            // phase 1 : non bundled nodes only
+            for (PropertyState prop : traversedMainNode.getProperties()) {
+                final String escapedPropName = 
Utils.escapePropertyName(prop.getName());
+                deletedRevsCount += removeUnusedPropertyEntries(doc, 
traversedMainNode, updateOp, escapedPropName,
+                        (r) -> { updateOp.removeMapEntry(escapedPropName, r); 
return null;},
+                        allKeepRevs);
+            }

Review Comment:
   ```suggestion
               int deletedRevsCount = 
StreamSupport.stream(traversedMainNode.getProperties().spliterator(), 
false).map(p -> escapePropertyName(p.getName()))
                       .mapToInt(p -> removeUnusedPropertyEntries(doc, 
traversedMainNode, updateOp, p,
                               r ->  updateOp.removeMapEntry(p, r), // using 
Consumer here in place of Function
                               allKeepRevs)).sum();
   ```



##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void 
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
             }
         }
 
+        /**
+         * Remove all property revisions in the local document that are no 
longer used.
+         * This includes bundled properties. It also includes related entries 
that
+         * become obsolete as a result - i.e. _commitRoot and _bc (though the 
latter
+         * is never removed on root)
+         */
+        private void collectUnusedPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final boolean ignoreInternalProperties) {
+
+            if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+                // cancelled
+                return;
+            }
+            final Set<Revision> allKeepRevs = new HashSet<>();
+            // phase A : collectUnusedUserPropertyRevisions
+            int deletedTotalRevsCount = 
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, 
allKeepRevs);
+            int deletedUserRevsCount = deletedTotalRevsCount;
+            // phase B : collectUnusedInternalPropertyRevisions
+            if (!ignoreInternalProperties) {
+                deletedTotalRevsCount = 
collectUnusedInternalPropertyRevisions(doc, phases, updateOp, 
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+            }
+
+            // then some accounting...
+            int deletedInternalRevsCount = deletedTotalRevsCount - 
deletedUserRevsCount;
+            if (deletedUserRevsCount != 0) {
+                deletedPropRevsCountMap.merge(doc.getId(), 
deletedUserRevsCount, Integer::sum);
+            }
+            if (deletedInternalRevsCount != 0) {
+                deletedInternalPropRevsCountMap.merge(doc.getId(), 
deletedInternalRevsCount, Integer::sum);
+            }
+            phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+        }
+
+        private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final Set<Revision> allKeepRevs) {
+            int deletedRevsCount = 0;
+
+            // phase 1 : non bundled nodes only
+            for (PropertyState prop : traversedMainNode.getProperties()) {
+                final String escapedPropName = 
Utils.escapePropertyName(prop.getName());
+                deletedRevsCount += removeUnusedPropertyEntries(doc, 
traversedMainNode, updateOp, escapedPropName,
+                        (r) -> { updateOp.removeMapEntry(escapedPropName, r); 
return null;},
+                        allKeepRevs);
+            }
+
+            // phase 2 : bundled nodes only
+            final Map<Path, DocumentNodeState> bundledNodeStates = new 
HashMap<>();
+            for (DocumentNodeState dns : 
traversedMainNode.getAllBundledNodesStates()) {
+                bundledNodeStates.put(dns.getPath(), dns);
+            }

Review Comment:
   ```suggestion
               final Map<Path, DocumentNodeState> bundledNodeStates = 
stream(traversedMainNode.getAllBundledNodesStates().spliterator(), 
false).collect(toMap(DocumentNodeState::getPath, identity()));
   ```



##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void 
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
             }
         }
 
+        /**
+         * Remove all property revisions in the local document that are no 
longer used.
+         * This includes bundled properties. It also includes related entries 
that
+         * become obsolete as a result - i.e. _commitRoot and _bc (though the 
latter
+         * is never removed on root)
+         */
+        private void collectUnusedPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final boolean ignoreInternalProperties) {
+
+            if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+                // cancelled
+                return;
+            }
+            final Set<Revision> allKeepRevs = new HashSet<>();
+            // phase A : collectUnusedUserPropertyRevisions
+            int deletedTotalRevsCount = 
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, 
allKeepRevs);
+            int deletedUserRevsCount = deletedTotalRevsCount;
+            // phase B : collectUnusedInternalPropertyRevisions
+            if (!ignoreInternalProperties) {
+                deletedTotalRevsCount = 
collectUnusedInternalPropertyRevisions(doc, phases, updateOp, 
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+            }
+
+            // then some accounting...
+            int deletedInternalRevsCount = deletedTotalRevsCount - 
deletedUserRevsCount;
+            if (deletedUserRevsCount != 0) {
+                deletedPropRevsCountMap.merge(doc.getId(), 
deletedUserRevsCount, Integer::sum);
+            }
+            if (deletedInternalRevsCount != 0) {
+                deletedInternalPropRevsCountMap.merge(doc.getId(), 
deletedInternalRevsCount, Integer::sum);
+            }
+            phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+        }
+
+        private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final Set<Revision> allKeepRevs) {
+            int deletedRevsCount = 0;
+
+            // phase 1 : non bundled nodes only
+            for (PropertyState prop : traversedMainNode.getProperties()) {
+                final String escapedPropName = 
Utils.escapePropertyName(prop.getName());
+                deletedRevsCount += removeUnusedPropertyEntries(doc, 
traversedMainNode, updateOp, escapedPropName,
+                        (r) -> { updateOp.removeMapEntry(escapedPropName, r); 
return null;},
+                        allKeepRevs);
+            }
+
+            // phase 2 : bundled nodes only
+            final Map<Path, DocumentNodeState> bundledNodeStates = new 
HashMap<>();
+            for (DocumentNodeState dns : 
traversedMainNode.getAllBundledNodesStates()) {
+                bundledNodeStates.put(dns.getPath(), dns);
+            }
+            // remember that getAllBundledProperties returns unescaped keys
+            for (String propName : 
traversedMainNode.getAllBundledProperties().keySet()) {
+                final int lastSlash = propName.lastIndexOf("/");
+                if (lastSlash == -1) {
+                    // then it is an unbundled property which was already 
handled in phase 1
+                    continue;
+                }
+                final String escapedPropName = 
Utils.escapePropertyName(propName);
+                // bundled values are of format sub/tree/propertyKey
+                // to extract this we need the last index of "/"
+                final String unbundledSubtreeName = propName.substring(0, 
lastSlash);
+                final String unbundledPropName = propName.substring(lastSlash 
+ 1);
+                final String unbundledPath = 
traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName;
+                final DocumentNodeState traversedNode = 
bundledNodeStates.get(Path.fromString(unbundledPath));
+                if (traversedNode == null) {
+                    log.error("collectUnusedPropertyRevisions : could not find 
traversed node for bundled key {} unbundledPath {} in doc {}",
+                            propName, unbundledPath, doc.getId());
+                    continue;
+                }
+                final PropertyState traversedProperty = 
traversedNode.getProperty(unbundledPropName);
+                if (traversedProperty == null) {
+                    log.error("collectUnusedPropertyRevisions : could not get 
property {} from traversed node {}",
+                            unbundledPropName, traversedNode.getPath());
+                    continue;
+                }
+                deletedRevsCount += removeUnusedPropertyEntries(doc, 
traversedNode, updateOp, escapedPropName,
+                        (r) -> { updateOp.removeMapEntry(escapedPropName, r); 
return null;},
+                        allKeepRevs);
+            }
+
+            // phase 3 : "_deleted"
+            int numDeleted = removeUnusedPropertyEntries(doc, 
traversedMainNode,
+                    updateOp, NodeDocument.DELETED,
+                    (r) -> { NodeDocument.removeDeleted(updateOp, r); return 
null;} ,
+                    allKeepRevs);
+            deletedRevsCount += numDeleted;
+            return deletedRevsCount;
+        }
+
+        private int collectUnusedInternalPropertyRevisions(final NodeDocument 
doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final Set<Revision> toKeepUserPropRevs,
+                int deletedRevsCount) {
+            boolean hasUnmergedBranchCommits = false;
+            for (Revision localBc : doc.getLocalBranchCommits()) {
+                if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) {
+                    hasUnmergedBranchCommits = true;
+                }
+            }
+            if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) {
+                return deletedRevsCount;
+            }
+            // if we did some rev deletion OR there are unmerged BCs, then 
let's deep-dive
+            Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs);
+            for (Revision revision : 
doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) {
+                if (!allRequiredRevs.contains(revision)) {
+                    Operation has = updateOp.getChanges().get(new 
Key(NodeDocument.COLLISIONS, revision));
+                    if (has != null) {
+                        // then skip
+                        continue;
+                    }
+                    NodeDocument.removeCollision(updateOp, revision);
+                    deletedRevsCount++;
+                }
+            }
+            // "_revisions"
+            for (Entry<Revision, String> e : 
doc.getLocalRevisions().entrySet()) {
+                Revision revision = e.getKey();
+                if (allRequiredRevs.contains(revision)) {
+                    // if it is still referenced locally, keep it
+                    continue;
+                }
+                final boolean isRoot = 
doc.getId().equals(Utils.getIdFromPath(Path.ROOT));
+                // local bcs only considered for removal
+                final boolean isBC = 
!doc.getLocalBranchCommits().contains(revision);
+                final boolean newerThanSweep = 
nodeStore.getSweepRevisions().isRevisionNewer(revision);
+                if (newerThanSweep) {
+                    //TODO wondering if we should at all do any DGC on 
documents newer than sweep
+                    if (isBC) {
+                        // analysed further down, we can remove them if 
unmerged
+                    } else {
+                        // for normal commits, we need to keep them -> also 
add to allRequiredRevs
+                        allRequiredRevs.add(revision);
+                        continue;
+                    }
+                }
+                // committed revisions are hard to delete, as determining
+                // whether or not they are orphaned is difficult.
+                // child nodes could be having _commitRoots pointing to this 
doc
+                // and this revision - in which case it wouldn't be orphaned.
+                // without scanning the children (or having knowledge about
+                // which revisions are referenced by children) it is unsafe
+                // to delete committed revisions.
+                // if they are uncommitted, then they are garbage, as then they
+                // are either normal commits (thus a not finished commit)
+                // or an unmerged branch commit. In both cases they are 
garbage.
+                boolean isCommitted = 
isCommitted(nodeStore.getCommitValue(revision, doc));
+                if (isCommitted) {
+                    if (isRoot) {
+                        // root : cannot remove since it could be referenced 
and required by a child
+                        // also add to allRequiredRevs
+                        allRequiredRevs.add(revision);
+                        continue;
+                    } else if (!isBC) {
+                        // non root and normal : cannot remove, same as above,
+                        // it could be referenced and required by a child 
+                        // also add to allRequiredRevs
+                        allRequiredRevs.add(revision);
+                        continue;
+                    } else {
+                        // non root and bc : can remove, as non root bc cannot 
be referenced by child
+                    }
+                }
+                Operation has = updateOp.getChanges().get(new 
Key(NodeDocument.REVISIONS, revision));
+                if (has != null) {
+                    // then skip
+                    continue;
+                }
+                NodeDocument.removeRevision(updateOp, revision);
+                deletedRevsCount++;
+            }
+            // "_commitRoot"
+            for (Revision revision : doc.getLocalCommitRoot().keySet()) {
+                if (!allRequiredRevs.contains(revision)) {
+                    Operation has = updateOp.getChanges().get(new 
Key(NodeDocument.COMMIT_ROOT, revision));
+                    if (has != null) {
+                        // then skip
+                        continue;
+                    }
+                    NodeDocument.removeCommitRoot(updateOp, revision);
+                    deletedRevsCount++;
+                }
+            }

Review Comment:
   ```suggestion
               deletedRevsCount += 
getDeletedRevsCount(doc.getLocalCommitRoot().keySet(), updateOp, 
allRequiredRevs, COMMIT_ROOT, NodeDocument::removeCommitRoot);
   ```



##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -139,6 +142,14 @@ public class VersionGarbageCollector {
      */
     static final String 
SETTINGS_COLLECTION_DETAILED_GC_DRY_RUN_DOCUMENT_ID_PROP = "detailedGCDryRunId";
 
+    static enum RDGCType {
+        KeepOneFullMode,
+        KeepOneCleanupUserPropertiesOnlyMode,
+        OlderThan24AndBetweenCheckpointsMode

Review Comment:
   Please use capitol for enum constants.



##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void 
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
             }
         }
 
+        /**
+         * Remove all property revisions in the local document that are no 
longer used.
+         * This includes bundled properties. It also includes related entries 
that
+         * become obsolete as a result - i.e. _commitRoot and _bc (though the 
latter
+         * is never removed on root)
+         */
+        private void collectUnusedPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final boolean ignoreInternalProperties) {
+
+            if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+                // cancelled
+                return;
+            }
+            final Set<Revision> allKeepRevs = new HashSet<>();
+            // phase A : collectUnusedUserPropertyRevisions
+            int deletedTotalRevsCount = 
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, 
allKeepRevs);
+            int deletedUserRevsCount = deletedTotalRevsCount;
+            // phase B : collectUnusedInternalPropertyRevisions
+            if (!ignoreInternalProperties) {
+                deletedTotalRevsCount = 
collectUnusedInternalPropertyRevisions(doc, phases, updateOp, 
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+            }
+
+            // then some accounting...
+            int deletedInternalRevsCount = deletedTotalRevsCount - 
deletedUserRevsCount;
+            if (deletedUserRevsCount != 0) {
+                deletedPropRevsCountMap.merge(doc.getId(), 
deletedUserRevsCount, Integer::sum);
+            }
+            if (deletedInternalRevsCount != 0) {
+                deletedInternalPropRevsCountMap.merge(doc.getId(), 
deletedInternalRevsCount, Integer::sum);
+            }
+            phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+        }
+
+        private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final Set<Revision> allKeepRevs) {
+            int deletedRevsCount = 0;
+
+            // phase 1 : non bundled nodes only
+            for (PropertyState prop : traversedMainNode.getProperties()) {
+                final String escapedPropName = 
Utils.escapePropertyName(prop.getName());
+                deletedRevsCount += removeUnusedPropertyEntries(doc, 
traversedMainNode, updateOp, escapedPropName,
+                        (r) -> { updateOp.removeMapEntry(escapedPropName, r); 
return null;},
+                        allKeepRevs);
+            }
+
+            // phase 2 : bundled nodes only
+            final Map<Path, DocumentNodeState> bundledNodeStates = new 
HashMap<>();
+            for (DocumentNodeState dns : 
traversedMainNode.getAllBundledNodesStates()) {
+                bundledNodeStates.put(dns.getPath(), dns);
+            }
+            // remember that getAllBundledProperties returns unescaped keys
+            for (String propName : 
traversedMainNode.getAllBundledProperties().keySet()) {
+                final int lastSlash = propName.lastIndexOf("/");
+                if (lastSlash == -1) {
+                    // then it is an unbundled property which was already 
handled in phase 1
+                    continue;
+                }
+                final String escapedPropName = 
Utils.escapePropertyName(propName);
+                // bundled values are of format sub/tree/propertyKey
+                // to extract this we need the last index of "/"
+                final String unbundledSubtreeName = propName.substring(0, 
lastSlash);
+                final String unbundledPropName = propName.substring(lastSlash 
+ 1);
+                final String unbundledPath = 
traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName;
+                final DocumentNodeState traversedNode = 
bundledNodeStates.get(Path.fromString(unbundledPath));
+                if (traversedNode == null) {
+                    log.error("collectUnusedPropertyRevisions : could not find 
traversed node for bundled key {} unbundledPath {} in doc {}",
+                            propName, unbundledPath, doc.getId());
+                    continue;
+                }
+                final PropertyState traversedProperty = 
traversedNode.getProperty(unbundledPropName);
+                if (traversedProperty == null) {
+                    log.error("collectUnusedPropertyRevisions : could not get 
property {} from traversed node {}",
+                            unbundledPropName, traversedNode.getPath());
+                    continue;
+                }
+                deletedRevsCount += removeUnusedPropertyEntries(doc, 
traversedNode, updateOp, escapedPropName,
+                        (r) -> { updateOp.removeMapEntry(escapedPropName, r); 
return null;},
+                        allKeepRevs);
+            }
+
+            // phase 3 : "_deleted"
+            int numDeleted = removeUnusedPropertyEntries(doc, 
traversedMainNode,
+                    updateOp, NodeDocument.DELETED,
+                    (r) -> { NodeDocument.removeDeleted(updateOp, r); return 
null;} ,
+                    allKeepRevs);
+            deletedRevsCount += numDeleted;
+            return deletedRevsCount;
+        }
+
+        private int collectUnusedInternalPropertyRevisions(final NodeDocument 
doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final Set<Revision> toKeepUserPropRevs,
+                int deletedRevsCount) {
+            boolean hasUnmergedBranchCommits = false;
+            for (Revision localBc : doc.getLocalBranchCommits()) {
+                if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) {
+                    hasUnmergedBranchCommits = true;
+                }
+            }
+            if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) {
+                return deletedRevsCount;
+            }
+            // if we did some rev deletion OR there are unmerged BCs, then 
let's deep-dive
+            Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs);
+            for (Revision revision : 
doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) {
+                if (!allRequiredRevs.contains(revision)) {
+                    Operation has = updateOp.getChanges().get(new 
Key(NodeDocument.COLLISIONS, revision));
+                    if (has != null) {
+                        // then skip
+                        continue;
+                    }
+                    NodeDocument.removeCollision(updateOp, revision);
+                    deletedRevsCount++;
+                }
+            }
+            // "_revisions"
+            for (Entry<Revision, String> e : 
doc.getLocalRevisions().entrySet()) {
+                Revision revision = e.getKey();
+                if (allRequiredRevs.contains(revision)) {
+                    // if it is still referenced locally, keep it
+                    continue;
+                }
+                final boolean isRoot = 
doc.getId().equals(Utils.getIdFromPath(Path.ROOT));
+                // local bcs only considered for removal
+                final boolean isBC = 
!doc.getLocalBranchCommits().contains(revision);
+                final boolean newerThanSweep = 
nodeStore.getSweepRevisions().isRevisionNewer(revision);
+                if (newerThanSweep) {
+                    //TODO wondering if we should at all do any DGC on 
documents newer than sweep
+                    if (isBC) {
+                        // analysed further down, we can remove them if 
unmerged
+                    } else {
+                        // for normal commits, we need to keep them -> also 
add to allRequiredRevs
+                        allRequiredRevs.add(revision);
+                        continue;
+                    }
+                }
+                // committed revisions are hard to delete, as determining
+                // whether or not they are orphaned is difficult.
+                // child nodes could be having _commitRoots pointing to this 
doc
+                // and this revision - in which case it wouldn't be orphaned.
+                // without scanning the children (or having knowledge about
+                // which revisions are referenced by children) it is unsafe
+                // to delete committed revisions.
+                // if they are uncommitted, then they are garbage, as then they
+                // are either normal commits (thus a not finished commit)
+                // or an unmerged branch commit. In both cases they are 
garbage.
+                boolean isCommitted = 
isCommitted(nodeStore.getCommitValue(revision, doc));
+                if (isCommitted) {
+                    if (isRoot) {
+                        // root : cannot remove since it could be referenced 
and required by a child
+                        // also add to allRequiredRevs
+                        allRequiredRevs.add(revision);
+                        continue;
+                    } else if (!isBC) {
+                        // non root and normal : cannot remove, same as above,
+                        // it could be referenced and required by a child 
+                        // also add to allRequiredRevs
+                        allRequiredRevs.add(revision);
+                        continue;
+                    } else {
+                        // non root and bc : can remove, as non root bc cannot 
be referenced by child
+                    }
+                }
+                Operation has = updateOp.getChanges().get(new 
Key(NodeDocument.REVISIONS, revision));
+                if (has != null) {
+                    // then skip
+                    continue;
+                }
+                NodeDocument.removeRevision(updateOp, revision);
+                deletedRevsCount++;
+            }
+            // "_commitRoot"
+            for (Revision revision : doc.getLocalCommitRoot().keySet()) {
+                if (!allRequiredRevs.contains(revision)) {
+                    Operation has = updateOp.getChanges().get(new 
Key(NodeDocument.COMMIT_ROOT, revision));
+                    if (has != null) {
+                        // then skip
+                        continue;
+                    }
+                    NodeDocument.removeCommitRoot(updateOp, revision);
+                    deletedRevsCount++;
+                }
+            }
+            // "_bc"
+            for (Revision revision : doc.getLocalBranchCommits()) {
+                if (!allRequiredRevs.contains(revision)) {
+                    Operation has = updateOp.getChanges().get(new 
Key(NodeDocument.BRANCH_COMMITS, revision));
+                    if (has != null) {
+                        // then skip
+                        continue;
+                    }
+
+                    NodeDocument.removeBranchCommit(updateOp, revision);
+                    deletedRevsCount++;
+                }
+            }
+            return deletedRevsCount;
+        }
+
+        private int removeUnusedPropertyEntries(NodeDocument doc,
+                DocumentNodeState traversedMainNode, UpdateOp updateOp,
+                String propertyKey, Function<Revision, Void> removeRevision,

Review Comment:
   Everywhere we call `removeUnusedPropertyEntries`, we return `null` from 
`removeRevision` `Function`. We could replace this `Function` with `Consumer` 
type `FunctionalInterface`. wydt?



##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void 
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
             }
         }
 
+        /**
+         * Remove all property revisions in the local document that are no 
longer used.
+         * This includes bundled properties. It also includes related entries 
that
+         * become obsolete as a result - i.e. _commitRoot and _bc (though the 
latter
+         * is never removed on root)
+         */
+        private void collectUnusedPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final boolean ignoreInternalProperties) {
+
+            if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+                // cancelled
+                return;
+            }
+            final Set<Revision> allKeepRevs = new HashSet<>();
+            // phase A : collectUnusedUserPropertyRevisions
+            int deletedTotalRevsCount = 
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, 
allKeepRevs);
+            int deletedUserRevsCount = deletedTotalRevsCount;
+            // phase B : collectUnusedInternalPropertyRevisions
+            if (!ignoreInternalProperties) {
+                deletedTotalRevsCount = 
collectUnusedInternalPropertyRevisions(doc, phases, updateOp, 
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+            }
+
+            // then some accounting...
+            int deletedInternalRevsCount = deletedTotalRevsCount - 
deletedUserRevsCount;
+            if (deletedUserRevsCount != 0) {
+                deletedPropRevsCountMap.merge(doc.getId(), 
deletedUserRevsCount, Integer::sum);
+            }
+            if (deletedInternalRevsCount != 0) {
+                deletedInternalPropRevsCountMap.merge(doc.getId(), 
deletedInternalRevsCount, Integer::sum);
+            }
+            phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+        }
+
+        private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final Set<Revision> allKeepRevs) {
+            int deletedRevsCount = 0;
+
+            // phase 1 : non bundled nodes only
+            for (PropertyState prop : traversedMainNode.getProperties()) {
+                final String escapedPropName = 
Utils.escapePropertyName(prop.getName());
+                deletedRevsCount += removeUnusedPropertyEntries(doc, 
traversedMainNode, updateOp, escapedPropName,
+                        (r) -> { updateOp.removeMapEntry(escapedPropName, r); 
return null;},
+                        allKeepRevs);
+            }
+
+            // phase 2 : bundled nodes only
+            final Map<Path, DocumentNodeState> bundledNodeStates = new 
HashMap<>();
+            for (DocumentNodeState dns : 
traversedMainNode.getAllBundledNodesStates()) {
+                bundledNodeStates.put(dns.getPath(), dns);
+            }
+            // remember that getAllBundledProperties returns unescaped keys
+            for (String propName : 
traversedMainNode.getAllBundledProperties().keySet()) {
+                final int lastSlash = propName.lastIndexOf("/");
+                if (lastSlash == -1) {
+                    // then it is an unbundled property which was already 
handled in phase 1
+                    continue;
+                }
+                final String escapedPropName = 
Utils.escapePropertyName(propName);
+                // bundled values are of format sub/tree/propertyKey
+                // to extract this we need the last index of "/"
+                final String unbundledSubtreeName = propName.substring(0, 
lastSlash);
+                final String unbundledPropName = propName.substring(lastSlash 
+ 1);
+                final String unbundledPath = 
traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName;
+                final DocumentNodeState traversedNode = 
bundledNodeStates.get(Path.fromString(unbundledPath));
+                if (traversedNode == null) {
+                    log.error("collectUnusedPropertyRevisions : could not find 
traversed node for bundled key {} unbundledPath {} in doc {}",
+                            propName, unbundledPath, doc.getId());
+                    continue;
+                }
+                final PropertyState traversedProperty = 
traversedNode.getProperty(unbundledPropName);
+                if (traversedProperty == null) {
+                    log.error("collectUnusedPropertyRevisions : could not get 
property {} from traversed node {}",
+                            unbundledPropName, traversedNode.getPath());
+                    continue;
+                }
+                deletedRevsCount += removeUnusedPropertyEntries(doc, 
traversedNode, updateOp, escapedPropName,
+                        (r) -> { updateOp.removeMapEntry(escapedPropName, r); 
return null;},
+                        allKeepRevs);
+            }
+
+            // phase 3 : "_deleted"
+            int numDeleted = removeUnusedPropertyEntries(doc, 
traversedMainNode,
+                    updateOp, NodeDocument.DELETED,
+                    (r) -> { NodeDocument.removeDeleted(updateOp, r); return 
null;} ,
+                    allKeepRevs);
+            deletedRevsCount += numDeleted;
+            return deletedRevsCount;
+        }
+
+        private int collectUnusedInternalPropertyRevisions(final NodeDocument 
doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final Set<Revision> toKeepUserPropRevs,
+                int deletedRevsCount) {
+            boolean hasUnmergedBranchCommits = false;
+            for (Revision localBc : doc.getLocalBranchCommits()) {
+                if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) {
+                    hasUnmergedBranchCommits = true;
+                }
+            }

Review Comment:
   I believe here, we need to have `break` in case `hasUnmergedBranchCommits` 
is set to true otherwise it might change to false.
   
   ```suggestion
               boolean hasUnmergedBranchCommits = 
doc.getLocalBranchCommits().stream().anyMatch(r -> 
!isCommitted(nodeStore.getCommitValue(r, doc)));
   ```



##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void 
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
             }
         }
 
+        /**
+         * Remove all property revisions in the local document that are no 
longer used.
+         * This includes bundled properties. It also includes related entries 
that
+         * become obsolete as a result - i.e. _commitRoot and _bc (though the 
latter
+         * is never removed on root)
+         */
+        private void collectUnusedPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final boolean ignoreInternalProperties) {
+
+            if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+                // cancelled
+                return;
+            }
+            final Set<Revision> allKeepRevs = new HashSet<>();
+            // phase A : collectUnusedUserPropertyRevisions
+            int deletedTotalRevsCount = 
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, 
allKeepRevs);
+            int deletedUserRevsCount = deletedTotalRevsCount;
+            // phase B : collectUnusedInternalPropertyRevisions
+            if (!ignoreInternalProperties) {
+                deletedTotalRevsCount = 
collectUnusedInternalPropertyRevisions(doc, phases, updateOp, 
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+            }
+
+            // then some accounting...
+            int deletedInternalRevsCount = deletedTotalRevsCount - 
deletedUserRevsCount;
+            if (deletedUserRevsCount != 0) {
+                deletedPropRevsCountMap.merge(doc.getId(), 
deletedUserRevsCount, Integer::sum);
+            }
+            if (deletedInternalRevsCount != 0) {
+                deletedInternalPropRevsCountMap.merge(doc.getId(), 
deletedInternalRevsCount, Integer::sum);
+            }
+            phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+        }
+
+        private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final Set<Revision> allKeepRevs) {
+            int deletedRevsCount = 0;
+
+            // phase 1 : non bundled nodes only
+            for (PropertyState prop : traversedMainNode.getProperties()) {
+                final String escapedPropName = 
Utils.escapePropertyName(prop.getName());
+                deletedRevsCount += removeUnusedPropertyEntries(doc, 
traversedMainNode, updateOp, escapedPropName,
+                        (r) -> { updateOp.removeMapEntry(escapedPropName, r); 
return null;},
+                        allKeepRevs);
+            }
+
+            // phase 2 : bundled nodes only
+            final Map<Path, DocumentNodeState> bundledNodeStates = new 
HashMap<>();
+            for (DocumentNodeState dns : 
traversedMainNode.getAllBundledNodesStates()) {
+                bundledNodeStates.put(dns.getPath(), dns);
+            }
+            // remember that getAllBundledProperties returns unescaped keys
+            for (String propName : 
traversedMainNode.getAllBundledProperties().keySet()) {
+                final int lastSlash = propName.lastIndexOf("/");
+                if (lastSlash == -1) {
+                    // then it is an unbundled property which was already 
handled in phase 1
+                    continue;
+                }
+                final String escapedPropName = 
Utils.escapePropertyName(propName);
+                // bundled values are of format sub/tree/propertyKey
+                // to extract this we need the last index of "/"
+                final String unbundledSubtreeName = propName.substring(0, 
lastSlash);
+                final String unbundledPropName = propName.substring(lastSlash 
+ 1);
+                final String unbundledPath = 
traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName;
+                final DocumentNodeState traversedNode = 
bundledNodeStates.get(Path.fromString(unbundledPath));
+                if (traversedNode == null) {
+                    log.error("collectUnusedPropertyRevisions : could not find 
traversed node for bundled key {} unbundledPath {} in doc {}",
+                            propName, unbundledPath, doc.getId());
+                    continue;
+                }
+                final PropertyState traversedProperty = 
traversedNode.getProperty(unbundledPropName);
+                if (traversedProperty == null) {
+                    log.error("collectUnusedPropertyRevisions : could not get 
property {} from traversed node {}",
+                            unbundledPropName, traversedNode.getPath());
+                    continue;
+                }
+                deletedRevsCount += removeUnusedPropertyEntries(doc, 
traversedNode, updateOp, escapedPropName,
+                        (r) -> { updateOp.removeMapEntry(escapedPropName, r); 
return null;},
+                        allKeepRevs);
+            }
+
+            // phase 3 : "_deleted"
+            int numDeleted = removeUnusedPropertyEntries(doc, 
traversedMainNode,
+                    updateOp, NodeDocument.DELETED,
+                    (r) -> { NodeDocument.removeDeleted(updateOp, r); return 
null;} ,
+                    allKeepRevs);
+            deletedRevsCount += numDeleted;
+            return deletedRevsCount;
+        }
+
+        private int collectUnusedInternalPropertyRevisions(final NodeDocument 
doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final Set<Revision> toKeepUserPropRevs,
+                int deletedRevsCount) {
+            boolean hasUnmergedBranchCommits = false;
+            for (Revision localBc : doc.getLocalBranchCommits()) {
+                if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) {
+                    hasUnmergedBranchCommits = true;
+                }
+            }
+            if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) {
+                return deletedRevsCount;
+            }
+            // if we did some rev deletion OR there are unmerged BCs, then 
let's deep-dive
+            Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs);
+            for (Revision revision : 
doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) {
+                if (!allRequiredRevs.contains(revision)) {
+                    Operation has = updateOp.getChanges().get(new 
Key(NodeDocument.COLLISIONS, revision));
+                    if (has != null) {
+                        // then skip
+                        continue;
+                    }
+                    NodeDocument.removeCollision(updateOp, revision);
+                    deletedRevsCount++;
+                }
+            }
+            // "_revisions"
+            for (Entry<Revision, String> e : 
doc.getLocalRevisions().entrySet()) {
+                Revision revision = e.getKey();
+                if (allRequiredRevs.contains(revision)) {
+                    // if it is still referenced locally, keep it
+                    continue;
+                }
+                final boolean isRoot = 
doc.getId().equals(Utils.getIdFromPath(Path.ROOT));
+                // local bcs only considered for removal
+                final boolean isBC = 
!doc.getLocalBranchCommits().contains(revision);
+                final boolean newerThanSweep = 
nodeStore.getSweepRevisions().isRevisionNewer(revision);
+                if (newerThanSweep) {
+                    //TODO wondering if we should at all do any DGC on 
documents newer than sweep
+                    if (isBC) {
+                        // analysed further down, we can remove them if 
unmerged
+                    } else {
+                        // for normal commits, we need to keep them -> also 
add to allRequiredRevs
+                        allRequiredRevs.add(revision);
+                        continue;
+                    }
+                }
+                // committed revisions are hard to delete, as determining
+                // whether or not they are orphaned is difficult.
+                // child nodes could be having _commitRoots pointing to this 
doc
+                // and this revision - in which case it wouldn't be orphaned.
+                // without scanning the children (or having knowledge about
+                // which revisions are referenced by children) it is unsafe
+                // to delete committed revisions.
+                // if they are uncommitted, then they are garbage, as then they
+                // are either normal commits (thus a not finished commit)
+                // or an unmerged branch commit. In both cases they are 
garbage.
+                boolean isCommitted = 
isCommitted(nodeStore.getCommitValue(revision, doc));
+                if (isCommitted) {
+                    if (isRoot) {
+                        // root : cannot remove since it could be referenced 
and required by a child
+                        // also add to allRequiredRevs
+                        allRequiredRevs.add(revision);
+                        continue;
+                    } else if (!isBC) {
+                        // non root and normal : cannot remove, same as above,
+                        // it could be referenced and required by a child 
+                        // also add to allRequiredRevs
+                        allRequiredRevs.add(revision);
+                        continue;
+                    } else {
+                        // non root and bc : can remove, as non root bc cannot 
be referenced by child
+                    }
+                }
+                Operation has = updateOp.getChanges().get(new 
Key(NodeDocument.REVISIONS, revision));
+                if (has != null) {
+                    // then skip
+                    continue;
+                }
+                NodeDocument.removeRevision(updateOp, revision);
+                deletedRevsCount++;
+            }
+            // "_commitRoot"
+            for (Revision revision : doc.getLocalCommitRoot().keySet()) {
+                if (!allRequiredRevs.contains(revision)) {
+                    Operation has = updateOp.getChanges().get(new 
Key(NodeDocument.COMMIT_ROOT, revision));
+                    if (has != null) {
+                        // then skip
+                        continue;
+                    }
+                    NodeDocument.removeCommitRoot(updateOp, revision);
+                    deletedRevsCount++;
+                }
+            }
+            // "_bc"
+            for (Revision revision : doc.getLocalBranchCommits()) {
+                if (!allRequiredRevs.contains(revision)) {
+                    Operation has = updateOp.getChanges().get(new 
Key(NodeDocument.BRANCH_COMMITS, revision));
+                    if (has != null) {
+                        // then skip
+                        continue;
+                    }
+
+                    NodeDocument.removeBranchCommit(updateOp, revision);
+                    deletedRevsCount++;
+                }
+            }

Review Comment:
   ```suggestion
   
   deletedRevsCount += getDeletedRevsCount(doc.getLocalBranchCommits(), 
updateOp, allRequiredRevs, BRANCH_COMMITS, NodeDocument::removeBranchCommit);
   return deletedRevsCount;
   }
   
               private int getDeletedRevsCount(Set<Revision> revisionSet, 
UpdateOp updateOp, Set<Revision> allRequiredRevs, String updateOpKey, 
BiConsumer<UpdateOp, Revision> op) {
   
               return revisionSet.stream().filter(r -> 
!allRequiredRevs.contains(r))
                       .mapToInt(r -> {
                           Operation has = updateOp.getChanges().get(new 
Key(updateOpKey, r));
                           if (has != null) {
                               // then skip
                               return 0;
                           }
   
                           op.accept(updateOp, r);
                           return 1;
                       }).sum();
           }
   ```



##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void 
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
             }
         }
 
+        /**
+         * Remove all property revisions in the local document that are no 
longer used.
+         * This includes bundled properties. It also includes related entries 
that
+         * become obsolete as a result - i.e. _commitRoot and _bc (though the 
latter
+         * is never removed on root)
+         */
+        private void collectUnusedPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final boolean ignoreInternalProperties) {
+
+            if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+                // cancelled
+                return;
+            }
+            final Set<Revision> allKeepRevs = new HashSet<>();
+            // phase A : collectUnusedUserPropertyRevisions
+            int deletedTotalRevsCount = 
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, 
allKeepRevs);
+            int deletedUserRevsCount = deletedTotalRevsCount;
+            // phase B : collectUnusedInternalPropertyRevisions
+            if (!ignoreInternalProperties) {
+                deletedTotalRevsCount = 
collectUnusedInternalPropertyRevisions(doc, phases, updateOp, 
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+            }
+
+            // then some accounting...
+            int deletedInternalRevsCount = deletedTotalRevsCount - 
deletedUserRevsCount;
+            if (deletedUserRevsCount != 0) {
+                deletedPropRevsCountMap.merge(doc.getId(), 
deletedUserRevsCount, Integer::sum);
+            }
+            if (deletedInternalRevsCount != 0) {
+                deletedInternalPropRevsCountMap.merge(doc.getId(), 
deletedInternalRevsCount, Integer::sum);
+            }
+            phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+        }
+
+        private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final Set<Revision> allKeepRevs) {
+            int deletedRevsCount = 0;
+
+            // phase 1 : non bundled nodes only
+            for (PropertyState prop : traversedMainNode.getProperties()) {
+                final String escapedPropName = 
Utils.escapePropertyName(prop.getName());
+                deletedRevsCount += removeUnusedPropertyEntries(doc, 
traversedMainNode, updateOp, escapedPropName,
+                        (r) -> { updateOp.removeMapEntry(escapedPropName, r); 
return null;},
+                        allKeepRevs);
+            }
+
+            // phase 2 : bundled nodes only
+            final Map<Path, DocumentNodeState> bundledNodeStates = new 
HashMap<>();
+            for (DocumentNodeState dns : 
traversedMainNode.getAllBundledNodesStates()) {
+                bundledNodeStates.put(dns.getPath(), dns);
+            }
+            // remember that getAllBundledProperties returns unescaped keys
+            for (String propName : 
traversedMainNode.getAllBundledProperties().keySet()) {
+                final int lastSlash = propName.lastIndexOf("/");
+                if (lastSlash == -1) {
+                    // then it is an unbundled property which was already 
handled in phase 1
+                    continue;
+                }
+                final String escapedPropName = 
Utils.escapePropertyName(propName);
+                // bundled values are of format sub/tree/propertyKey
+                // to extract this we need the last index of "/"
+                final String unbundledSubtreeName = propName.substring(0, 
lastSlash);
+                final String unbundledPropName = propName.substring(lastSlash 
+ 1);
+                final String unbundledPath = 
traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName;
+                final DocumentNodeState traversedNode = 
bundledNodeStates.get(Path.fromString(unbundledPath));
+                if (traversedNode == null) {
+                    log.error("collectUnusedPropertyRevisions : could not find 
traversed node for bundled key {} unbundledPath {} in doc {}",
+                            propName, unbundledPath, doc.getId());
+                    continue;
+                }
+                final PropertyState traversedProperty = 
traversedNode.getProperty(unbundledPropName);
+                if (traversedProperty == null) {
+                    log.error("collectUnusedPropertyRevisions : could not get 
property {} from traversed node {}",
+                            unbundledPropName, traversedNode.getPath());
+                    continue;
+                }
+                deletedRevsCount += removeUnusedPropertyEntries(doc, 
traversedNode, updateOp, escapedPropName,
+                        (r) -> { updateOp.removeMapEntry(escapedPropName, r); 
return null;},
+                        allKeepRevs);
+            }
+
+            // phase 3 : "_deleted"
+            int numDeleted = removeUnusedPropertyEntries(doc, 
traversedMainNode,
+                    updateOp, NodeDocument.DELETED,
+                    (r) -> { NodeDocument.removeDeleted(updateOp, r); return 
null;} ,
+                    allKeepRevs);
+            deletedRevsCount += numDeleted;
+            return deletedRevsCount;
+        }
+
+        private int collectUnusedInternalPropertyRevisions(final NodeDocument 
doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final Set<Revision> toKeepUserPropRevs,
+                int deletedRevsCount) {
+            boolean hasUnmergedBranchCommits = false;
+            for (Revision localBc : doc.getLocalBranchCommits()) {
+                if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) {
+                    hasUnmergedBranchCommits = true;
+                }
+            }
+            if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) {
+                return deletedRevsCount;
+            }
+            // if we did some rev deletion OR there are unmerged BCs, then 
let's deep-dive
+            Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs);
+            for (Revision revision : 
doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) {
+                if (!allRequiredRevs.contains(revision)) {
+                    Operation has = updateOp.getChanges().get(new 
Key(NodeDocument.COLLISIONS, revision));
+                    if (has != null) {
+                        // then skip
+                        continue;
+                    }
+                    NodeDocument.removeCollision(updateOp, revision);
+                    deletedRevsCount++;
+                }
+            }
+            // "_revisions"
+            for (Entry<Revision, String> e : 
doc.getLocalRevisions().entrySet()) {
+                Revision revision = e.getKey();
+                if (allRequiredRevs.contains(revision)) {
+                    // if it is still referenced locally, keep it
+                    continue;
+                }
+                final boolean isRoot = 
doc.getId().equals(Utils.getIdFromPath(Path.ROOT));
+                // local bcs only considered for removal
+                final boolean isBC = 
!doc.getLocalBranchCommits().contains(revision);
+                final boolean newerThanSweep = 
nodeStore.getSweepRevisions().isRevisionNewer(revision);
+                if (newerThanSweep) {
+                    //TODO wondering if we should at all do any DGC on 
documents newer than sweep
+                    if (isBC) {
+                        // analysed further down, we can remove them if 
unmerged
+                    } else {
+                        // for normal commits, we need to keep them -> also 
add to allRequiredRevs
+                        allRequiredRevs.add(revision);
+                        continue;
+                    }
+                }
+                // committed revisions are hard to delete, as determining
+                // whether or not they are orphaned is difficult.
+                // child nodes could be having _commitRoots pointing to this 
doc
+                // and this revision - in which case it wouldn't be orphaned.
+                // without scanning the children (or having knowledge about
+                // which revisions are referenced by children) it is unsafe
+                // to delete committed revisions.
+                // if they are uncommitted, then they are garbage, as then they
+                // are either normal commits (thus a not finished commit)
+                // or an unmerged branch commit. In both cases they are 
garbage.
+                boolean isCommitted = 
isCommitted(nodeStore.getCommitValue(revision, doc));
+                if (isCommitted) {
+                    if (isRoot) {
+                        // root : cannot remove since it could be referenced 
and required by a child
+                        // also add to allRequiredRevs
+                        allRequiredRevs.add(revision);
+                        continue;
+                    } else if (!isBC) {
+                        // non root and normal : cannot remove, same as above,
+                        // it could be referenced and required by a child 
+                        // also add to allRequiredRevs
+                        allRequiredRevs.add(revision);
+                        continue;
+                    } else {
+                        // non root and bc : can remove, as non root bc cannot 
be referenced by child
+                    }
+                }
+                Operation has = updateOp.getChanges().get(new 
Key(NodeDocument.REVISIONS, revision));
+                if (has != null) {
+                    // then skip
+                    continue;
+                }
+                NodeDocument.removeRevision(updateOp, revision);
+                deletedRevsCount++;
+            }
+            // "_commitRoot"
+            for (Revision revision : doc.getLocalCommitRoot().keySet()) {
+                if (!allRequiredRevs.contains(revision)) {
+                    Operation has = updateOp.getChanges().get(new 
Key(NodeDocument.COMMIT_ROOT, revision));
+                    if (has != null) {
+                        // then skip
+                        continue;
+                    }
+                    NodeDocument.removeCommitRoot(updateOp, revision);
+                    deletedRevsCount++;
+                }
+            }
+            // "_bc"
+            for (Revision revision : doc.getLocalBranchCommits()) {
+                if (!allRequiredRevs.contains(revision)) {
+                    Operation has = updateOp.getChanges().get(new 
Key(NodeDocument.BRANCH_COMMITS, revision));
+                    if (has != null) {
+                        // then skip
+                        continue;
+                    }
+
+                    NodeDocument.removeBranchCommit(updateOp, revision);
+                    deletedRevsCount++;
+                }
+            }
+            return deletedRevsCount;
+        }
+
+        private int removeUnusedPropertyEntries(NodeDocument doc,
+                DocumentNodeState traversedMainNode, UpdateOp updateOp,
+                String propertyKey, Function<Revision, Void> removeRevision,
+                Set<Revision> allKeepRevs) {
+            // we need to use the traversedNode.getLastRevision() as the 
readRevision,
+            // as that is what was originally used in getNodeAtRevision when 
traversing
+            final Revision keepCommitRev = 
doc.localCommitRevisionOfProperty(nodeStore,
+                    traversedMainNode.getLastRevision(), propertyKey);
+            if (keepCommitRev == null) {
+                // could be due to node not existing or current value being in 
a split
+                // doc - while the former is unexpected, the latter might 
happen.
+                // in both cases let's skip this property
+                log.debug("removeUnusedPropertyEntries : no visible revision 
for property {} in doc {}",

Review Comment:
   ```suggestion
   if (log.isDebugEnabled()) {
                   log.debug("removeUnusedPropertyEntries : no visible revision 
for property {} in doc {}",
                   }
   ```



##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void 
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
             }
         }
 
+        /**
+         * Remove all property revisions in the local document that are no 
longer used.
+         * This includes bundled properties. It also includes related entries 
that
+         * become obsolete as a result - i.e. _commitRoot and _bc (though the 
latter
+         * is never removed on root)
+         */
+        private void collectUnusedPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final boolean ignoreInternalProperties) {
+
+            if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+                // cancelled
+                return;
+            }
+            final Set<Revision> allKeepRevs = new HashSet<>();
+            // phase A : collectUnusedUserPropertyRevisions
+            int deletedTotalRevsCount = 
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, 
allKeepRevs);
+            int deletedUserRevsCount = deletedTotalRevsCount;
+            // phase B : collectUnusedInternalPropertyRevisions
+            if (!ignoreInternalProperties) {
+                deletedTotalRevsCount = 
collectUnusedInternalPropertyRevisions(doc, phases, updateOp, 
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+            }
+
+            // then some accounting...
+            int deletedInternalRevsCount = deletedTotalRevsCount - 
deletedUserRevsCount;
+            if (deletedUserRevsCount != 0) {
+                deletedPropRevsCountMap.merge(doc.getId(), 
deletedUserRevsCount, Integer::sum);
+            }
+            if (deletedInternalRevsCount != 0) {
+                deletedInternalPropRevsCountMap.merge(doc.getId(), 
deletedInternalRevsCount, Integer::sum);
+            }
+            phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+        }
+
+        private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final Set<Revision> allKeepRevs) {
+            int deletedRevsCount = 0;
+
+            // phase 1 : non bundled nodes only
+            for (PropertyState prop : traversedMainNode.getProperties()) {
+                final String escapedPropName = 
Utils.escapePropertyName(prop.getName());
+                deletedRevsCount += removeUnusedPropertyEntries(doc, 
traversedMainNode, updateOp, escapedPropName,
+                        (r) -> { updateOp.removeMapEntry(escapedPropName, r); 
return null;},
+                        allKeepRevs);
+            }
+
+            // phase 2 : bundled nodes only
+            final Map<Path, DocumentNodeState> bundledNodeStates = new 
HashMap<>();
+            for (DocumentNodeState dns : 
traversedMainNode.getAllBundledNodesStates()) {
+                bundledNodeStates.put(dns.getPath(), dns);
+            }
+            // remember that getAllBundledProperties returns unescaped keys
+            for (String propName : 
traversedMainNode.getAllBundledProperties().keySet()) {
+                final int lastSlash = propName.lastIndexOf("/");
+                if (lastSlash == -1) {
+                    // then it is an unbundled property which was already 
handled in phase 1
+                    continue;
+                }
+                final String escapedPropName = 
Utils.escapePropertyName(propName);
+                // bundled values are of format sub/tree/propertyKey
+                // to extract this we need the last index of "/"
+                final String unbundledSubtreeName = propName.substring(0, 
lastSlash);
+                final String unbundledPropName = propName.substring(lastSlash 
+ 1);
+                final String unbundledPath = 
traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName;
+                final DocumentNodeState traversedNode = 
bundledNodeStates.get(Path.fromString(unbundledPath));
+                if (traversedNode == null) {
+                    log.error("collectUnusedPropertyRevisions : could not find 
traversed node for bundled key {} unbundledPath {} in doc {}",
+                            propName, unbundledPath, doc.getId());
+                    continue;
+                }
+                final PropertyState traversedProperty = 
traversedNode.getProperty(unbundledPropName);
+                if (traversedProperty == null) {
+                    log.error("collectUnusedPropertyRevisions : could not get 
property {} from traversed node {}",
+                            unbundledPropName, traversedNode.getPath());
+                    continue;
+                }
+                deletedRevsCount += removeUnusedPropertyEntries(doc, 
traversedNode, updateOp, escapedPropName,
+                        (r) -> { updateOp.removeMapEntry(escapedPropName, r); 
return null;},
+                        allKeepRevs);
+            }
+
+            // phase 3 : "_deleted"
+            int numDeleted = removeUnusedPropertyEntries(doc, 
traversedMainNode,
+                    updateOp, NodeDocument.DELETED,
+                    (r) -> { NodeDocument.removeDeleted(updateOp, r); return 
null;} ,
+                    allKeepRevs);
+            deletedRevsCount += numDeleted;
+            return deletedRevsCount;
+        }
+
+        private int collectUnusedInternalPropertyRevisions(final NodeDocument 
doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final Set<Revision> toKeepUserPropRevs,
+                int deletedRevsCount) {
+            boolean hasUnmergedBranchCommits = false;
+            for (Revision localBc : doc.getLocalBranchCommits()) {
+                if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) {
+                    hasUnmergedBranchCommits = true;
+                }
+            }
+            if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) {
+                return deletedRevsCount;
+            }
+            // if we did some rev deletion OR there are unmerged BCs, then 
let's deep-dive
+            Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs);
+            for (Revision revision : 
doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) {
+                if (!allRequiredRevs.contains(revision)) {
+                    Operation has = updateOp.getChanges().get(new 
Key(NodeDocument.COLLISIONS, revision));
+                    if (has != null) {
+                        // then skip
+                        continue;
+                    }
+                    NodeDocument.removeCollision(updateOp, revision);
+                    deletedRevsCount++;
+                }
+            }

Review Comment:
   ```suggestion
               // "_collisions"
               deletedRevsCount += 
getDeletedRevsCount(doc.getLocalCommitRoot().keySet(), updateOp, 
allRequiredRevs, COLLISIONS, NodeDocument::removeCollision);
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@jackrabbit.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to