stefan-egli commented on code in PR #1372:
URL: https://github.com/apache/jackrabbit-oak/pull/1372#discussion_r1537642939
##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
}
}
+ /**
+ * Remove all property revisions in the local document that are no
longer used.
+ * This includes bundled properties. It also includes related entries
that
+ * become obsolete as a result - i.e. _commitRoot and _bc (though the
latter
+ * is never removed on root)
+ */
+ private void collectUnusedPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final boolean ignoreInternalProperties) {
+
+ if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+ // cancelled
+ return;
+ }
+ final Set<Revision> allKeepRevs = new HashSet<>();
+ // phase A : collectUnusedUserPropertyRevisions
+ int deletedTotalRevsCount =
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode,
allKeepRevs);
+ int deletedUserRevsCount = deletedTotalRevsCount;
+ // phase B : collectUnusedInternalPropertyRevisions
+ if (!ignoreInternalProperties) {
+ deletedTotalRevsCount =
collectUnusedInternalPropertyRevisions(doc, phases, updateOp,
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+ }
+
+ // then some accounting...
+ int deletedInternalRevsCount = deletedTotalRevsCount -
deletedUserRevsCount;
+ if (deletedUserRevsCount != 0) {
+ deletedPropRevsCountMap.merge(doc.getId(),
deletedUserRevsCount, Integer::sum);
+ }
+ if (deletedInternalRevsCount != 0) {
+ deletedInternalPropRevsCountMap.merge(doc.getId(),
deletedInternalRevsCount, Integer::sum);
+ }
+ phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+ }
+
+ private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final Set<Revision> allKeepRevs) {
+ int deletedRevsCount = 0;
+
+ // phase 1 : non bundled nodes only
+ for (PropertyState prop : traversedMainNode.getProperties()) {
+ final String escapedPropName =
Utils.escapePropertyName(prop.getName());
+ deletedRevsCount += removeUnusedPropertyEntries(doc,
traversedMainNode, updateOp, escapedPropName,
+ (r) -> { updateOp.removeMapEntry(escapedPropName, r);
return null;},
+ allKeepRevs);
+ }
+
+ // phase 2 : bundled nodes only
+ final Map<Path, DocumentNodeState> bundledNodeStates = new
HashMap<>();
+ for (DocumentNodeState dns :
traversedMainNode.getAllBundledNodesStates()) {
+ bundledNodeStates.put(dns.getPath(), dns);
+ }
+ // remember that getAllBundledProperties returns unescaped keys
+ for (String propName :
traversedMainNode.getAllBundledProperties().keySet()) {
+ final int lastSlash = propName.lastIndexOf("/");
+ if (lastSlash == -1) {
+ // then it is an unbundled property which was already
handled in phase 1
+ continue;
+ }
+ final String escapedPropName =
Utils.escapePropertyName(propName);
+ // bundled values are of format sub/tree/propertyKey
+ // to extract this we need the last index of "/"
+ final String unbundledSubtreeName = propName.substring(0,
lastSlash);
+ final String unbundledPropName = propName.substring(lastSlash
+ 1);
+ final String unbundledPath =
traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName;
+ final DocumentNodeState traversedNode =
bundledNodeStates.get(Path.fromString(unbundledPath));
+ if (traversedNode == null) {
+ log.error("collectUnusedPropertyRevisions : could not find
traversed node for bundled key {} unbundledPath {} in doc {}",
+ propName, unbundledPath, doc.getId());
+ continue;
+ }
+ final PropertyState traversedProperty =
traversedNode.getProperty(unbundledPropName);
+ if (traversedProperty == null) {
+ log.error("collectUnusedPropertyRevisions : could not get
property {} from traversed node {}",
+ unbundledPropName, traversedNode.getPath());
+ continue;
+ }
+ deletedRevsCount += removeUnusedPropertyEntries(doc,
traversedNode, updateOp, escapedPropName,
+ (r) -> { updateOp.removeMapEntry(escapedPropName, r);
return null;},
+ allKeepRevs);
+ }
+
+ // phase 3 : "_deleted"
+ int numDeleted = removeUnusedPropertyEntries(doc,
traversedMainNode,
+ updateOp, NodeDocument.DELETED,
+ (r) -> { NodeDocument.removeDeleted(updateOp, r); return
null;} ,
+ allKeepRevs);
+ deletedRevsCount += numDeleted;
+ return deletedRevsCount;
+ }
+
+ private int collectUnusedInternalPropertyRevisions(final NodeDocument
doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final Set<Revision> toKeepUserPropRevs,
+ int deletedRevsCount) {
+ boolean hasUnmergedBranchCommits = false;
+ for (Revision localBc : doc.getLocalBranchCommits()) {
+ if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) {
+ hasUnmergedBranchCommits = true;
+ }
+ }
+ if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) {
+ return deletedRevsCount;
+ }
+ // if we did some rev deletion OR there are unmerged BCs, then
let's deep-dive
+ Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs);
+ for (Revision revision :
doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) {
+ if (!allRequiredRevs.contains(revision)) {
+ Operation has = updateOp.getChanges().get(new
Key(NodeDocument.COLLISIONS, revision));
+ if (has != null) {
+ // then skip
+ continue;
+ }
+ NodeDocument.removeCollision(updateOp, revision);
+ deletedRevsCount++;
+ }
+ }
Review Comment:
incorporated - plus fixed `getLocalCommitRoot()` to `getLocalMap(..)` in
https://github.com/apache/jackrabbit-oak/pull/1372/commits/4bbb8ed82c9ac7ae53994f9de1cbed15ca515718
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]