stefan-egli commented on code in PR #1372:
URL: https://github.com/apache/jackrabbit-oak/pull/1372#discussion_r1537642939


##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void 
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
             }
         }
 
+        /**
+         * Remove all property revisions in the local document that are no 
longer used.
+         * This includes bundled properties. It also includes related entries 
that
+         * become obsolete as a result - i.e. _commitRoot and _bc (though the 
latter
+         * is never removed on root)
+         */
+        private void collectUnusedPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final boolean ignoreInternalProperties) {
+
+            if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+                // cancelled
+                return;
+            }
+            final Set<Revision> allKeepRevs = new HashSet<>();
+            // phase A : collectUnusedUserPropertyRevisions
+            int deletedTotalRevsCount = 
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, 
allKeepRevs);
+            int deletedUserRevsCount = deletedTotalRevsCount;
+            // phase B : collectUnusedInternalPropertyRevisions
+            if (!ignoreInternalProperties) {
+                deletedTotalRevsCount = 
collectUnusedInternalPropertyRevisions(doc, phases, updateOp, 
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+            }
+
+            // then some accounting...
+            int deletedInternalRevsCount = deletedTotalRevsCount - 
deletedUserRevsCount;
+            if (deletedUserRevsCount != 0) {
+                deletedPropRevsCountMap.merge(doc.getId(), 
deletedUserRevsCount, Integer::sum);
+            }
+            if (deletedInternalRevsCount != 0) {
+                deletedInternalPropRevsCountMap.merge(doc.getId(), 
deletedInternalRevsCount, Integer::sum);
+            }
+            phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+        }
+
+        private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final Set<Revision> allKeepRevs) {
+            int deletedRevsCount = 0;
+
+            // phase 1 : non bundled nodes only
+            for (PropertyState prop : traversedMainNode.getProperties()) {
+                final String escapedPropName = 
Utils.escapePropertyName(prop.getName());
+                deletedRevsCount += removeUnusedPropertyEntries(doc, 
traversedMainNode, updateOp, escapedPropName,
+                        (r) -> { updateOp.removeMapEntry(escapedPropName, r); 
return null;},
+                        allKeepRevs);
+            }
+
+            // phase 2 : bundled nodes only
+            final Map<Path, DocumentNodeState> bundledNodeStates = new 
HashMap<>();
+            for (DocumentNodeState dns : 
traversedMainNode.getAllBundledNodesStates()) {
+                bundledNodeStates.put(dns.getPath(), dns);
+            }
+            // remember that getAllBundledProperties returns unescaped keys
+            for (String propName : 
traversedMainNode.getAllBundledProperties().keySet()) {
+                final int lastSlash = propName.lastIndexOf("/");
+                if (lastSlash == -1) {
+                    // then it is an unbundled property which was already 
handled in phase 1
+                    continue;
+                }
+                final String escapedPropName = 
Utils.escapePropertyName(propName);
+                // bundled values are of format sub/tree/propertyKey
+                // to extract this we need the last index of "/"
+                final String unbundledSubtreeName = propName.substring(0, 
lastSlash);
+                final String unbundledPropName = propName.substring(lastSlash 
+ 1);
+                final String unbundledPath = 
traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName;
+                final DocumentNodeState traversedNode = 
bundledNodeStates.get(Path.fromString(unbundledPath));
+                if (traversedNode == null) {
+                    log.error("collectUnusedPropertyRevisions : could not find 
traversed node for bundled key {} unbundledPath {} in doc {}",
+                            propName, unbundledPath, doc.getId());
+                    continue;
+                }
+                final PropertyState traversedProperty = 
traversedNode.getProperty(unbundledPropName);
+                if (traversedProperty == null) {
+                    log.error("collectUnusedPropertyRevisions : could not get 
property {} from traversed node {}",
+                            unbundledPropName, traversedNode.getPath());
+                    continue;
+                }
+                deletedRevsCount += removeUnusedPropertyEntries(doc, 
traversedNode, updateOp, escapedPropName,
+                        (r) -> { updateOp.removeMapEntry(escapedPropName, r); 
return null;},
+                        allKeepRevs);
+            }
+
+            // phase 3 : "_deleted"
+            int numDeleted = removeUnusedPropertyEntries(doc, 
traversedMainNode,
+                    updateOp, NodeDocument.DELETED,
+                    (r) -> { NodeDocument.removeDeleted(updateOp, r); return 
null;} ,
+                    allKeepRevs);
+            deletedRevsCount += numDeleted;
+            return deletedRevsCount;
+        }
+
+        private int collectUnusedInternalPropertyRevisions(final NodeDocument 
doc,
+                final GCPhases phases, final UpdateOp updateOp,
+                final DocumentNodeState traversedMainNode,
+                final Set<Revision> toKeepUserPropRevs,
+                int deletedRevsCount) {
+            boolean hasUnmergedBranchCommits = false;
+            for (Revision localBc : doc.getLocalBranchCommits()) {
+                if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) {
+                    hasUnmergedBranchCommits = true;
+                }
+            }
+            if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) {
+                return deletedRevsCount;
+            }
+            // if we did some rev deletion OR there are unmerged BCs, then 
let's deep-dive
+            Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs);
+            for (Revision revision : 
doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) {
+                if (!allRequiredRevs.contains(revision)) {
+                    Operation has = updateOp.getChanges().get(new 
Key(NodeDocument.COLLISIONS, revision));
+                    if (has != null) {
+                        // then skip
+                        continue;
+                    }
+                    NodeDocument.removeCollision(updateOp, revision);
+                    deletedRevsCount++;
+                }
+            }

Review Comment:
   incorporated - plus fixed `getLocalCommitRoot()` to `getLocalMap(..)` in 
https://github.com/apache/jackrabbit-oak/pull/1372/commits/4bbb8ed82c9ac7ae53994f9de1cbed15ca515718



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to