rishabhdaim commented on code in PR #1372:
URL: https://github.com/apache/jackrabbit-oak/pull/1372#discussion_r1535173583
##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java:
##########
@@ -929,6 +929,53 @@ private boolean isValidRevision(@NotNull RevisionContext
context,
return false;
}
+ /**
+ * Resolve the commit revision that holds the current value of a property
based
+ * on provided readRevision if the current value is in the local
+ * map - null if the current value might be in a split doc or the node or
property
+ * does not exist at all.
+ *
+ * @param nodeStore the node store.
+ * @param readRevision the read revision.
+ * @param key the key of the property to resolve
+ * @return a Revision if the value of the property resolves to a value
based
+ * on what's in the local document, null if the node or property
does
+ * not exist at all or the value is in a split document.
+ */
+ Revision localCommitRevisionOfProperty(@NotNull DocumentNodeStore
nodeStore,
+ @NotNull RevisionVector
readRevision,
+ @NotNull String key) {
+ Map<Revision, String> validRevisions = Maps.newHashMap();
Review Comment:
```suggestion
Map<Revision, String> validRevisions = new HashMap<>();
```
##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
}
}
+ /**
+ * Remove all property revisions in the local document that are no
longer used.
+ * This includes bundled properties. It also includes related entries
that
+ * become obsolete as a result - i.e. _commitRoot and _bc (though the
latter
+ * is never removed on root)
+ */
+ private void collectUnusedPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final boolean ignoreInternalProperties) {
+
+ if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+ // cancelled
+ return;
+ }
+ final Set<Revision> allKeepRevs = new HashSet<>();
+ // phase A : collectUnusedUserPropertyRevisions
+ int deletedTotalRevsCount =
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode,
allKeepRevs);
+ int deletedUserRevsCount = deletedTotalRevsCount;
+ // phase B : collectUnusedInternalPropertyRevisions
+ if (!ignoreInternalProperties) {
+ deletedTotalRevsCount =
collectUnusedInternalPropertyRevisions(doc, phases, updateOp,
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+ }
+
+ // then some accounting...
+ int deletedInternalRevsCount = deletedTotalRevsCount -
deletedUserRevsCount;
+ if (deletedUserRevsCount != 0) {
+ deletedPropRevsCountMap.merge(doc.getId(),
deletedUserRevsCount, Integer::sum);
+ }
+ if (deletedInternalRevsCount != 0) {
+ deletedInternalPropRevsCountMap.merge(doc.getId(),
deletedInternalRevsCount, Integer::sum);
+ }
+ phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+ }
+
+ private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final Set<Revision> allKeepRevs) {
+ int deletedRevsCount = 0;
+
+ // phase 1 : non bundled nodes only
+ for (PropertyState prop : traversedMainNode.getProperties()) {
+ final String escapedPropName =
Utils.escapePropertyName(prop.getName());
+ deletedRevsCount += removeUnusedPropertyEntries(doc,
traversedMainNode, updateOp, escapedPropName,
+ (r) -> { updateOp.removeMapEntry(escapedPropName, r);
return null;},
+ allKeepRevs);
+ }
Review Comment:
```suggestion
int deletedRevsCount =
StreamSupport.stream(traversedMainNode.getProperties().spliterator(),
false).map(p -> escapePropertyName(p.getName()))
.mapToInt(p -> removeUnusedPropertyEntries(doc,
traversedMainNode, updateOp, p,
r -> updateOp.removeMapEntry(p, r), // using
Consumer here in place of Function
allKeepRevs)).sum();
```
##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
}
}
+ /**
+ * Remove all property revisions in the local document that are no
longer used.
+ * This includes bundled properties. It also includes related entries
that
+ * become obsolete as a result - i.e. _commitRoot and _bc (though the
latter
+ * is never removed on root)
+ */
+ private void collectUnusedPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final boolean ignoreInternalProperties) {
+
+ if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+ // cancelled
+ return;
+ }
+ final Set<Revision> allKeepRevs = new HashSet<>();
+ // phase A : collectUnusedUserPropertyRevisions
+ int deletedTotalRevsCount =
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode,
allKeepRevs);
+ int deletedUserRevsCount = deletedTotalRevsCount;
+ // phase B : collectUnusedInternalPropertyRevisions
+ if (!ignoreInternalProperties) {
+ deletedTotalRevsCount =
collectUnusedInternalPropertyRevisions(doc, phases, updateOp,
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+ }
+
+ // then some accounting...
+ int deletedInternalRevsCount = deletedTotalRevsCount -
deletedUserRevsCount;
+ if (deletedUserRevsCount != 0) {
+ deletedPropRevsCountMap.merge(doc.getId(),
deletedUserRevsCount, Integer::sum);
+ }
+ if (deletedInternalRevsCount != 0) {
+ deletedInternalPropRevsCountMap.merge(doc.getId(),
deletedInternalRevsCount, Integer::sum);
+ }
+ phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+ }
+
+ private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final Set<Revision> allKeepRevs) {
+ int deletedRevsCount = 0;
+
+ // phase 1 : non bundled nodes only
+ for (PropertyState prop : traversedMainNode.getProperties()) {
+ final String escapedPropName =
Utils.escapePropertyName(prop.getName());
+ deletedRevsCount += removeUnusedPropertyEntries(doc,
traversedMainNode, updateOp, escapedPropName,
+ (r) -> { updateOp.removeMapEntry(escapedPropName, r);
return null;},
+ allKeepRevs);
+ }
+
+ // phase 2 : bundled nodes only
+ final Map<Path, DocumentNodeState> bundledNodeStates = new
HashMap<>();
+ for (DocumentNodeState dns :
traversedMainNode.getAllBundledNodesStates()) {
+ bundledNodeStates.put(dns.getPath(), dns);
+ }
Review Comment:
```suggestion
final Map<Path, DocumentNodeState> bundledNodeStates =
stream(traversedMainNode.getAllBundledNodesStates().spliterator(),
false).collect(toMap(DocumentNodeState::getPath, identity()));
```
##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
}
}
+ /**
+ * Remove all property revisions in the local document that are no
longer used.
+ * This includes bundled properties. It also includes related entries
that
+ * become obsolete as a result - i.e. _commitRoot and _bc (though the
latter
+ * is never removed on root)
+ */
+ private void collectUnusedPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final boolean ignoreInternalProperties) {
+
+ if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+ // cancelled
+ return;
+ }
+ final Set<Revision> allKeepRevs = new HashSet<>();
+ // phase A : collectUnusedUserPropertyRevisions
+ int deletedTotalRevsCount =
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode,
allKeepRevs);
+ int deletedUserRevsCount = deletedTotalRevsCount;
+ // phase B : collectUnusedInternalPropertyRevisions
+ if (!ignoreInternalProperties) {
+ deletedTotalRevsCount =
collectUnusedInternalPropertyRevisions(doc, phases, updateOp,
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+ }
+
+ // then some accounting...
+ int deletedInternalRevsCount = deletedTotalRevsCount -
deletedUserRevsCount;
+ if (deletedUserRevsCount != 0) {
+ deletedPropRevsCountMap.merge(doc.getId(),
deletedUserRevsCount, Integer::sum);
+ }
+ if (deletedInternalRevsCount != 0) {
+ deletedInternalPropRevsCountMap.merge(doc.getId(),
deletedInternalRevsCount, Integer::sum);
+ }
+ phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+ }
+
+ private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final Set<Revision> allKeepRevs) {
+ int deletedRevsCount = 0;
+
+ // phase 1 : non bundled nodes only
+ for (PropertyState prop : traversedMainNode.getProperties()) {
+ final String escapedPropName =
Utils.escapePropertyName(prop.getName());
+ deletedRevsCount += removeUnusedPropertyEntries(doc,
traversedMainNode, updateOp, escapedPropName,
+ (r) -> { updateOp.removeMapEntry(escapedPropName, r);
return null;},
+ allKeepRevs);
+ }
+
+ // phase 2 : bundled nodes only
+ final Map<Path, DocumentNodeState> bundledNodeStates = new
HashMap<>();
+ for (DocumentNodeState dns :
traversedMainNode.getAllBundledNodesStates()) {
+ bundledNodeStates.put(dns.getPath(), dns);
+ }
+ // remember that getAllBundledProperties returns unescaped keys
+ for (String propName :
traversedMainNode.getAllBundledProperties().keySet()) {
+ final int lastSlash = propName.lastIndexOf("/");
+ if (lastSlash == -1) {
+ // then it is an unbundled property which was already
handled in phase 1
+ continue;
+ }
+ final String escapedPropName =
Utils.escapePropertyName(propName);
+ // bundled values are of format sub/tree/propertyKey
+ // to extract this we need the last index of "/"
+ final String unbundledSubtreeName = propName.substring(0,
lastSlash);
+ final String unbundledPropName = propName.substring(lastSlash
+ 1);
+ final String unbundledPath =
traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName;
+ final DocumentNodeState traversedNode =
bundledNodeStates.get(Path.fromString(unbundledPath));
+ if (traversedNode == null) {
+ log.error("collectUnusedPropertyRevisions : could not find
traversed node for bundled key {} unbundledPath {} in doc {}",
+ propName, unbundledPath, doc.getId());
+ continue;
+ }
+ final PropertyState traversedProperty =
traversedNode.getProperty(unbundledPropName);
+ if (traversedProperty == null) {
+ log.error("collectUnusedPropertyRevisions : could not get
property {} from traversed node {}",
+ unbundledPropName, traversedNode.getPath());
+ continue;
+ }
+ deletedRevsCount += removeUnusedPropertyEntries(doc,
traversedNode, updateOp, escapedPropName,
+ (r) -> { updateOp.removeMapEntry(escapedPropName, r);
return null;},
+ allKeepRevs);
+ }
+
+ // phase 3 : "_deleted"
+ int numDeleted = removeUnusedPropertyEntries(doc,
traversedMainNode,
+ updateOp, NodeDocument.DELETED,
+ (r) -> { NodeDocument.removeDeleted(updateOp, r); return
null;} ,
+ allKeepRevs);
+ deletedRevsCount += numDeleted;
+ return deletedRevsCount;
+ }
+
+ private int collectUnusedInternalPropertyRevisions(final NodeDocument
doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final Set<Revision> toKeepUserPropRevs,
+ int deletedRevsCount) {
+ boolean hasUnmergedBranchCommits = false;
+ for (Revision localBc : doc.getLocalBranchCommits()) {
+ if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) {
+ hasUnmergedBranchCommits = true;
+ }
+ }
+ if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) {
+ return deletedRevsCount;
+ }
+ // if we did some rev deletion OR there are unmerged BCs, then
let's deep-dive
+ Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs);
+ for (Revision revision :
doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) {
+ if (!allRequiredRevs.contains(revision)) {
+ Operation has = updateOp.getChanges().get(new
Key(NodeDocument.COLLISIONS, revision));
+ if (has != null) {
+ // then skip
+ continue;
+ }
+ NodeDocument.removeCollision(updateOp, revision);
+ deletedRevsCount++;
+ }
+ }
+ // "_revisions"
+ for (Entry<Revision, String> e :
doc.getLocalRevisions().entrySet()) {
+ Revision revision = e.getKey();
+ if (allRequiredRevs.contains(revision)) {
+ // if it is still referenced locally, keep it
+ continue;
+ }
+ final boolean isRoot =
doc.getId().equals(Utils.getIdFromPath(Path.ROOT));
+ // local bcs only considered for removal
+ final boolean isBC =
!doc.getLocalBranchCommits().contains(revision);
+ final boolean newerThanSweep =
nodeStore.getSweepRevisions().isRevisionNewer(revision);
+ if (newerThanSweep) {
+ //TODO wondering if we should at all do any DGC on
documents newer than sweep
+ if (isBC) {
+ // analysed further down, we can remove them if
unmerged
+ } else {
+ // for normal commits, we need to keep them -> also
add to allRequiredRevs
+ allRequiredRevs.add(revision);
+ continue;
+ }
+ }
+ // committed revisions are hard to delete, as determining
+ // whether or not they are orphaned is difficult.
+ // child nodes could be having _commitRoots pointing to this
doc
+ // and this revision - in which case it wouldn't be orphaned.
+ // without scanning the children (or having knowledge about
+ // which revisions are referenced by children) it is unsafe
+ // to delete committed revisions.
+ // if they are uncommitted, then they are garbage, as then they
+ // are either normal commits (thus a not finished commit)
+ // or an unmerged branch commit. In both cases they are
garbage.
+ boolean isCommitted =
isCommitted(nodeStore.getCommitValue(revision, doc));
+ if (isCommitted) {
+ if (isRoot) {
+ // root : cannot remove since it could be referenced
and required by a child
+ // also add to allRequiredRevs
+ allRequiredRevs.add(revision);
+ continue;
+ } else if (!isBC) {
+ // non root and normal : cannot remove, same as above,
+ // it could be referenced and required by a child
+ // also add to allRequiredRevs
+ allRequiredRevs.add(revision);
+ continue;
+ } else {
+ // non root and bc : can remove, as non root bc cannot
be referenced by child
+ }
+ }
+ Operation has = updateOp.getChanges().get(new
Key(NodeDocument.REVISIONS, revision));
+ if (has != null) {
+ // then skip
+ continue;
+ }
+ NodeDocument.removeRevision(updateOp, revision);
+ deletedRevsCount++;
+ }
+ // "_commitRoot"
+ for (Revision revision : doc.getLocalCommitRoot().keySet()) {
+ if (!allRequiredRevs.contains(revision)) {
+ Operation has = updateOp.getChanges().get(new
Key(NodeDocument.COMMIT_ROOT, revision));
+ if (has != null) {
+ // then skip
+ continue;
+ }
+ NodeDocument.removeCommitRoot(updateOp, revision);
+ deletedRevsCount++;
+ }
+ }
Review Comment:
```suggestion
deletedRevsCount +=
getDeletedRevsCount(doc.getLocalCommitRoot().keySet(), updateOp,
allRequiredRevs, COMMIT_ROOT, NodeDocument::removeCommitRoot);
```
##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -139,6 +142,14 @@ public class VersionGarbageCollector {
*/
static final String
SETTINGS_COLLECTION_DETAILED_GC_DRY_RUN_DOCUMENT_ID_PROP = "detailedGCDryRunId";
+ static enum RDGCType {
+ KeepOneFullMode,
+ KeepOneCleanupUserPropertiesOnlyMode,
+ OlderThan24AndBetweenCheckpointsMode
Review Comment:
Please use capitol for enum constants.
##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
}
}
+ /**
+ * Remove all property revisions in the local document that are no
longer used.
+ * This includes bundled properties. It also includes related entries
that
+ * become obsolete as a result - i.e. _commitRoot and _bc (though the
latter
+ * is never removed on root)
+ */
+ private void collectUnusedPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final boolean ignoreInternalProperties) {
+
+ if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+ // cancelled
+ return;
+ }
+ final Set<Revision> allKeepRevs = new HashSet<>();
+ // phase A : collectUnusedUserPropertyRevisions
+ int deletedTotalRevsCount =
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode,
allKeepRevs);
+ int deletedUserRevsCount = deletedTotalRevsCount;
+ // phase B : collectUnusedInternalPropertyRevisions
+ if (!ignoreInternalProperties) {
+ deletedTotalRevsCount =
collectUnusedInternalPropertyRevisions(doc, phases, updateOp,
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+ }
+
+ // then some accounting...
+ int deletedInternalRevsCount = deletedTotalRevsCount -
deletedUserRevsCount;
+ if (deletedUserRevsCount != 0) {
+ deletedPropRevsCountMap.merge(doc.getId(),
deletedUserRevsCount, Integer::sum);
+ }
+ if (deletedInternalRevsCount != 0) {
+ deletedInternalPropRevsCountMap.merge(doc.getId(),
deletedInternalRevsCount, Integer::sum);
+ }
+ phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+ }
+
+ private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final Set<Revision> allKeepRevs) {
+ int deletedRevsCount = 0;
+
+ // phase 1 : non bundled nodes only
+ for (PropertyState prop : traversedMainNode.getProperties()) {
+ final String escapedPropName =
Utils.escapePropertyName(prop.getName());
+ deletedRevsCount += removeUnusedPropertyEntries(doc,
traversedMainNode, updateOp, escapedPropName,
+ (r) -> { updateOp.removeMapEntry(escapedPropName, r);
return null;},
+ allKeepRevs);
+ }
+
+ // phase 2 : bundled nodes only
+ final Map<Path, DocumentNodeState> bundledNodeStates = new
HashMap<>();
+ for (DocumentNodeState dns :
traversedMainNode.getAllBundledNodesStates()) {
+ bundledNodeStates.put(dns.getPath(), dns);
+ }
+ // remember that getAllBundledProperties returns unescaped keys
+ for (String propName :
traversedMainNode.getAllBundledProperties().keySet()) {
+ final int lastSlash = propName.lastIndexOf("/");
+ if (lastSlash == -1) {
+ // then it is an unbundled property which was already
handled in phase 1
+ continue;
+ }
+ final String escapedPropName =
Utils.escapePropertyName(propName);
+ // bundled values are of format sub/tree/propertyKey
+ // to extract this we need the last index of "/"
+ final String unbundledSubtreeName = propName.substring(0,
lastSlash);
+ final String unbundledPropName = propName.substring(lastSlash
+ 1);
+ final String unbundledPath =
traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName;
+ final DocumentNodeState traversedNode =
bundledNodeStates.get(Path.fromString(unbundledPath));
+ if (traversedNode == null) {
+ log.error("collectUnusedPropertyRevisions : could not find
traversed node for bundled key {} unbundledPath {} in doc {}",
+ propName, unbundledPath, doc.getId());
+ continue;
+ }
+ final PropertyState traversedProperty =
traversedNode.getProperty(unbundledPropName);
+ if (traversedProperty == null) {
+ log.error("collectUnusedPropertyRevisions : could not get
property {} from traversed node {}",
+ unbundledPropName, traversedNode.getPath());
+ continue;
+ }
+ deletedRevsCount += removeUnusedPropertyEntries(doc,
traversedNode, updateOp, escapedPropName,
+ (r) -> { updateOp.removeMapEntry(escapedPropName, r);
return null;},
+ allKeepRevs);
+ }
+
+ // phase 3 : "_deleted"
+ int numDeleted = removeUnusedPropertyEntries(doc,
traversedMainNode,
+ updateOp, NodeDocument.DELETED,
+ (r) -> { NodeDocument.removeDeleted(updateOp, r); return
null;} ,
+ allKeepRevs);
+ deletedRevsCount += numDeleted;
+ return deletedRevsCount;
+ }
+
+ private int collectUnusedInternalPropertyRevisions(final NodeDocument
doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final Set<Revision> toKeepUserPropRevs,
+ int deletedRevsCount) {
+ boolean hasUnmergedBranchCommits = false;
+ for (Revision localBc : doc.getLocalBranchCommits()) {
+ if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) {
+ hasUnmergedBranchCommits = true;
+ }
+ }
+ if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) {
+ return deletedRevsCount;
+ }
+ // if we did some rev deletion OR there are unmerged BCs, then
let's deep-dive
+ Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs);
+ for (Revision revision :
doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) {
+ if (!allRequiredRevs.contains(revision)) {
+ Operation has = updateOp.getChanges().get(new
Key(NodeDocument.COLLISIONS, revision));
+ if (has != null) {
+ // then skip
+ continue;
+ }
+ NodeDocument.removeCollision(updateOp, revision);
+ deletedRevsCount++;
+ }
+ }
+ // "_revisions"
+ for (Entry<Revision, String> e :
doc.getLocalRevisions().entrySet()) {
+ Revision revision = e.getKey();
+ if (allRequiredRevs.contains(revision)) {
+ // if it is still referenced locally, keep it
+ continue;
+ }
+ final boolean isRoot =
doc.getId().equals(Utils.getIdFromPath(Path.ROOT));
+ // local bcs only considered for removal
+ final boolean isBC =
!doc.getLocalBranchCommits().contains(revision);
+ final boolean newerThanSweep =
nodeStore.getSweepRevisions().isRevisionNewer(revision);
+ if (newerThanSweep) {
+ //TODO wondering if we should at all do any DGC on
documents newer than sweep
+ if (isBC) {
+ // analysed further down, we can remove them if
unmerged
+ } else {
+ // for normal commits, we need to keep them -> also
add to allRequiredRevs
+ allRequiredRevs.add(revision);
+ continue;
+ }
+ }
+ // committed revisions are hard to delete, as determining
+ // whether or not they are orphaned is difficult.
+ // child nodes could be having _commitRoots pointing to this
doc
+ // and this revision - in which case it wouldn't be orphaned.
+ // without scanning the children (or having knowledge about
+ // which revisions are referenced by children) it is unsafe
+ // to delete committed revisions.
+ // if they are uncommitted, then they are garbage, as then they
+ // are either normal commits (thus a not finished commit)
+ // or an unmerged branch commit. In both cases they are
garbage.
+ boolean isCommitted =
isCommitted(nodeStore.getCommitValue(revision, doc));
+ if (isCommitted) {
+ if (isRoot) {
+ // root : cannot remove since it could be referenced
and required by a child
+ // also add to allRequiredRevs
+ allRequiredRevs.add(revision);
+ continue;
+ } else if (!isBC) {
+ // non root and normal : cannot remove, same as above,
+ // it could be referenced and required by a child
+ // also add to allRequiredRevs
+ allRequiredRevs.add(revision);
+ continue;
+ } else {
+ // non root and bc : can remove, as non root bc cannot
be referenced by child
+ }
+ }
+ Operation has = updateOp.getChanges().get(new
Key(NodeDocument.REVISIONS, revision));
+ if (has != null) {
+ // then skip
+ continue;
+ }
+ NodeDocument.removeRevision(updateOp, revision);
+ deletedRevsCount++;
+ }
+ // "_commitRoot"
+ for (Revision revision : doc.getLocalCommitRoot().keySet()) {
+ if (!allRequiredRevs.contains(revision)) {
+ Operation has = updateOp.getChanges().get(new
Key(NodeDocument.COMMIT_ROOT, revision));
+ if (has != null) {
+ // then skip
+ continue;
+ }
+ NodeDocument.removeCommitRoot(updateOp, revision);
+ deletedRevsCount++;
+ }
+ }
+ // "_bc"
+ for (Revision revision : doc.getLocalBranchCommits()) {
+ if (!allRequiredRevs.contains(revision)) {
+ Operation has = updateOp.getChanges().get(new
Key(NodeDocument.BRANCH_COMMITS, revision));
+ if (has != null) {
+ // then skip
+ continue;
+ }
+
+ NodeDocument.removeBranchCommit(updateOp, revision);
+ deletedRevsCount++;
+ }
+ }
+ return deletedRevsCount;
+ }
+
+ private int removeUnusedPropertyEntries(NodeDocument doc,
+ DocumentNodeState traversedMainNode, UpdateOp updateOp,
+ String propertyKey, Function<Revision, Void> removeRevision,
Review Comment:
Everywhere we call `removeUnusedPropertyEntries`, we return `null` from
`removeRevision` `Function`. We could replace this `Function` with `Consumer`
type `FunctionalInterface`. wydt?
##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
}
}
+ /**
+ * Remove all property revisions in the local document that are no
longer used.
+ * This includes bundled properties. It also includes related entries
that
+ * become obsolete as a result - i.e. _commitRoot and _bc (though the
latter
+ * is never removed on root)
+ */
+ private void collectUnusedPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final boolean ignoreInternalProperties) {
+
+ if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+ // cancelled
+ return;
+ }
+ final Set<Revision> allKeepRevs = new HashSet<>();
+ // phase A : collectUnusedUserPropertyRevisions
+ int deletedTotalRevsCount =
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode,
allKeepRevs);
+ int deletedUserRevsCount = deletedTotalRevsCount;
+ // phase B : collectUnusedInternalPropertyRevisions
+ if (!ignoreInternalProperties) {
+ deletedTotalRevsCount =
collectUnusedInternalPropertyRevisions(doc, phases, updateOp,
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+ }
+
+ // then some accounting...
+ int deletedInternalRevsCount = deletedTotalRevsCount -
deletedUserRevsCount;
+ if (deletedUserRevsCount != 0) {
+ deletedPropRevsCountMap.merge(doc.getId(),
deletedUserRevsCount, Integer::sum);
+ }
+ if (deletedInternalRevsCount != 0) {
+ deletedInternalPropRevsCountMap.merge(doc.getId(),
deletedInternalRevsCount, Integer::sum);
+ }
+ phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+ }
+
+ private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final Set<Revision> allKeepRevs) {
+ int deletedRevsCount = 0;
+
+ // phase 1 : non bundled nodes only
+ for (PropertyState prop : traversedMainNode.getProperties()) {
+ final String escapedPropName =
Utils.escapePropertyName(prop.getName());
+ deletedRevsCount += removeUnusedPropertyEntries(doc,
traversedMainNode, updateOp, escapedPropName,
+ (r) -> { updateOp.removeMapEntry(escapedPropName, r);
return null;},
+ allKeepRevs);
+ }
+
+ // phase 2 : bundled nodes only
+ final Map<Path, DocumentNodeState> bundledNodeStates = new
HashMap<>();
+ for (DocumentNodeState dns :
traversedMainNode.getAllBundledNodesStates()) {
+ bundledNodeStates.put(dns.getPath(), dns);
+ }
+ // remember that getAllBundledProperties returns unescaped keys
+ for (String propName :
traversedMainNode.getAllBundledProperties().keySet()) {
+ final int lastSlash = propName.lastIndexOf("/");
+ if (lastSlash == -1) {
+ // then it is an unbundled property which was already
handled in phase 1
+ continue;
+ }
+ final String escapedPropName =
Utils.escapePropertyName(propName);
+ // bundled values are of format sub/tree/propertyKey
+ // to extract this we need the last index of "/"
+ final String unbundledSubtreeName = propName.substring(0,
lastSlash);
+ final String unbundledPropName = propName.substring(lastSlash
+ 1);
+ final String unbundledPath =
traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName;
+ final DocumentNodeState traversedNode =
bundledNodeStates.get(Path.fromString(unbundledPath));
+ if (traversedNode == null) {
+ log.error("collectUnusedPropertyRevisions : could not find
traversed node for bundled key {} unbundledPath {} in doc {}",
+ propName, unbundledPath, doc.getId());
+ continue;
+ }
+ final PropertyState traversedProperty =
traversedNode.getProperty(unbundledPropName);
+ if (traversedProperty == null) {
+ log.error("collectUnusedPropertyRevisions : could not get
property {} from traversed node {}",
+ unbundledPropName, traversedNode.getPath());
+ continue;
+ }
+ deletedRevsCount += removeUnusedPropertyEntries(doc,
traversedNode, updateOp, escapedPropName,
+ (r) -> { updateOp.removeMapEntry(escapedPropName, r);
return null;},
+ allKeepRevs);
+ }
+
+ // phase 3 : "_deleted"
+ int numDeleted = removeUnusedPropertyEntries(doc,
traversedMainNode,
+ updateOp, NodeDocument.DELETED,
+ (r) -> { NodeDocument.removeDeleted(updateOp, r); return
null;} ,
+ allKeepRevs);
+ deletedRevsCount += numDeleted;
+ return deletedRevsCount;
+ }
+
+ private int collectUnusedInternalPropertyRevisions(final NodeDocument
doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final Set<Revision> toKeepUserPropRevs,
+ int deletedRevsCount) {
+ boolean hasUnmergedBranchCommits = false;
+ for (Revision localBc : doc.getLocalBranchCommits()) {
+ if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) {
+ hasUnmergedBranchCommits = true;
+ }
+ }
Review Comment:
I believe here, we need to have `break` in case `hasUnmergedBranchCommits`
is set to true otherwise it might change to false.
```suggestion
boolean hasUnmergedBranchCommits =
doc.getLocalBranchCommits().stream().anyMatch(r ->
!isCommitted(nodeStore.getCommitValue(r, doc)));
```
##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
}
}
+ /**
+ * Remove all property revisions in the local document that are no
longer used.
+ * This includes bundled properties. It also includes related entries
that
+ * become obsolete as a result - i.e. _commitRoot and _bc (though the
latter
+ * is never removed on root)
+ */
+ private void collectUnusedPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final boolean ignoreInternalProperties) {
+
+ if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+ // cancelled
+ return;
+ }
+ final Set<Revision> allKeepRevs = new HashSet<>();
+ // phase A : collectUnusedUserPropertyRevisions
+ int deletedTotalRevsCount =
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode,
allKeepRevs);
+ int deletedUserRevsCount = deletedTotalRevsCount;
+ // phase B : collectUnusedInternalPropertyRevisions
+ if (!ignoreInternalProperties) {
+ deletedTotalRevsCount =
collectUnusedInternalPropertyRevisions(doc, phases, updateOp,
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+ }
+
+ // then some accounting...
+ int deletedInternalRevsCount = deletedTotalRevsCount -
deletedUserRevsCount;
+ if (deletedUserRevsCount != 0) {
+ deletedPropRevsCountMap.merge(doc.getId(),
deletedUserRevsCount, Integer::sum);
+ }
+ if (deletedInternalRevsCount != 0) {
+ deletedInternalPropRevsCountMap.merge(doc.getId(),
deletedInternalRevsCount, Integer::sum);
+ }
+ phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+ }
+
+ private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final Set<Revision> allKeepRevs) {
+ int deletedRevsCount = 0;
+
+ // phase 1 : non bundled nodes only
+ for (PropertyState prop : traversedMainNode.getProperties()) {
+ final String escapedPropName =
Utils.escapePropertyName(prop.getName());
+ deletedRevsCount += removeUnusedPropertyEntries(doc,
traversedMainNode, updateOp, escapedPropName,
+ (r) -> { updateOp.removeMapEntry(escapedPropName, r);
return null;},
+ allKeepRevs);
+ }
+
+ // phase 2 : bundled nodes only
+ final Map<Path, DocumentNodeState> bundledNodeStates = new
HashMap<>();
+ for (DocumentNodeState dns :
traversedMainNode.getAllBundledNodesStates()) {
+ bundledNodeStates.put(dns.getPath(), dns);
+ }
+ // remember that getAllBundledProperties returns unescaped keys
+ for (String propName :
traversedMainNode.getAllBundledProperties().keySet()) {
+ final int lastSlash = propName.lastIndexOf("/");
+ if (lastSlash == -1) {
+ // then it is an unbundled property which was already
handled in phase 1
+ continue;
+ }
+ final String escapedPropName =
Utils.escapePropertyName(propName);
+ // bundled values are of format sub/tree/propertyKey
+ // to extract this we need the last index of "/"
+ final String unbundledSubtreeName = propName.substring(0,
lastSlash);
+ final String unbundledPropName = propName.substring(lastSlash
+ 1);
+ final String unbundledPath =
traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName;
+ final DocumentNodeState traversedNode =
bundledNodeStates.get(Path.fromString(unbundledPath));
+ if (traversedNode == null) {
+ log.error("collectUnusedPropertyRevisions : could not find
traversed node for bundled key {} unbundledPath {} in doc {}",
+ propName, unbundledPath, doc.getId());
+ continue;
+ }
+ final PropertyState traversedProperty =
traversedNode.getProperty(unbundledPropName);
+ if (traversedProperty == null) {
+ log.error("collectUnusedPropertyRevisions : could not get
property {} from traversed node {}",
+ unbundledPropName, traversedNode.getPath());
+ continue;
+ }
+ deletedRevsCount += removeUnusedPropertyEntries(doc,
traversedNode, updateOp, escapedPropName,
+ (r) -> { updateOp.removeMapEntry(escapedPropName, r);
return null;},
+ allKeepRevs);
+ }
+
+ // phase 3 : "_deleted"
+ int numDeleted = removeUnusedPropertyEntries(doc,
traversedMainNode,
+ updateOp, NodeDocument.DELETED,
+ (r) -> { NodeDocument.removeDeleted(updateOp, r); return
null;} ,
+ allKeepRevs);
+ deletedRevsCount += numDeleted;
+ return deletedRevsCount;
+ }
+
+ private int collectUnusedInternalPropertyRevisions(final NodeDocument
doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final Set<Revision> toKeepUserPropRevs,
+ int deletedRevsCount) {
+ boolean hasUnmergedBranchCommits = false;
+ for (Revision localBc : doc.getLocalBranchCommits()) {
+ if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) {
+ hasUnmergedBranchCommits = true;
+ }
+ }
+ if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) {
+ return deletedRevsCount;
+ }
+ // if we did some rev deletion OR there are unmerged BCs, then
let's deep-dive
+ Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs);
+ for (Revision revision :
doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) {
+ if (!allRequiredRevs.contains(revision)) {
+ Operation has = updateOp.getChanges().get(new
Key(NodeDocument.COLLISIONS, revision));
+ if (has != null) {
+ // then skip
+ continue;
+ }
+ NodeDocument.removeCollision(updateOp, revision);
+ deletedRevsCount++;
+ }
+ }
+ // "_revisions"
+ for (Entry<Revision, String> e :
doc.getLocalRevisions().entrySet()) {
+ Revision revision = e.getKey();
+ if (allRequiredRevs.contains(revision)) {
+ // if it is still referenced locally, keep it
+ continue;
+ }
+ final boolean isRoot =
doc.getId().equals(Utils.getIdFromPath(Path.ROOT));
+ // local bcs only considered for removal
+ final boolean isBC =
!doc.getLocalBranchCommits().contains(revision);
+ final boolean newerThanSweep =
nodeStore.getSweepRevisions().isRevisionNewer(revision);
+ if (newerThanSweep) {
+ //TODO wondering if we should at all do any DGC on
documents newer than sweep
+ if (isBC) {
+ // analysed further down, we can remove them if
unmerged
+ } else {
+ // for normal commits, we need to keep them -> also
add to allRequiredRevs
+ allRequiredRevs.add(revision);
+ continue;
+ }
+ }
+ // committed revisions are hard to delete, as determining
+ // whether or not they are orphaned is difficult.
+ // child nodes could be having _commitRoots pointing to this
doc
+ // and this revision - in which case it wouldn't be orphaned.
+ // without scanning the children (or having knowledge about
+ // which revisions are referenced by children) it is unsafe
+ // to delete committed revisions.
+ // if they are uncommitted, then they are garbage, as then they
+ // are either normal commits (thus a not finished commit)
+ // or an unmerged branch commit. In both cases they are
garbage.
+ boolean isCommitted =
isCommitted(nodeStore.getCommitValue(revision, doc));
+ if (isCommitted) {
+ if (isRoot) {
+ // root : cannot remove since it could be referenced
and required by a child
+ // also add to allRequiredRevs
+ allRequiredRevs.add(revision);
+ continue;
+ } else if (!isBC) {
+ // non root and normal : cannot remove, same as above,
+ // it could be referenced and required by a child
+ // also add to allRequiredRevs
+ allRequiredRevs.add(revision);
+ continue;
+ } else {
+ // non root and bc : can remove, as non root bc cannot
be referenced by child
+ }
+ }
+ Operation has = updateOp.getChanges().get(new
Key(NodeDocument.REVISIONS, revision));
+ if (has != null) {
+ // then skip
+ continue;
+ }
+ NodeDocument.removeRevision(updateOp, revision);
+ deletedRevsCount++;
+ }
+ // "_commitRoot"
+ for (Revision revision : doc.getLocalCommitRoot().keySet()) {
+ if (!allRequiredRevs.contains(revision)) {
+ Operation has = updateOp.getChanges().get(new
Key(NodeDocument.COMMIT_ROOT, revision));
+ if (has != null) {
+ // then skip
+ continue;
+ }
+ NodeDocument.removeCommitRoot(updateOp, revision);
+ deletedRevsCount++;
+ }
+ }
+ // "_bc"
+ for (Revision revision : doc.getLocalBranchCommits()) {
+ if (!allRequiredRevs.contains(revision)) {
+ Operation has = updateOp.getChanges().get(new
Key(NodeDocument.BRANCH_COMMITS, revision));
+ if (has != null) {
+ // then skip
+ continue;
+ }
+
+ NodeDocument.removeBranchCommit(updateOp, revision);
+ deletedRevsCount++;
+ }
+ }
Review Comment:
```suggestion
deletedRevsCount += getDeletedRevsCount(doc.getLocalBranchCommits(),
updateOp, allRequiredRevs, BRANCH_COMMITS, NodeDocument::removeBranchCommit);
return deletedRevsCount;
}
private int getDeletedRevsCount(Set<Revision> revisionSet,
UpdateOp updateOp, Set<Revision> allRequiredRevs, String updateOpKey,
BiConsumer<UpdateOp, Revision> op) {
return revisionSet.stream().filter(r ->
!allRequiredRevs.contains(r))
.mapToInt(r -> {
Operation has = updateOp.getChanges().get(new
Key(updateOpKey, r));
if (has != null) {
// then skip
return 0;
}
op.accept(updateOp, r);
return 1;
}).sum();
}
```
##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
}
}
+ /**
+ * Remove all property revisions in the local document that are no
longer used.
+ * This includes bundled properties. It also includes related entries
that
+ * become obsolete as a result - i.e. _commitRoot and _bc (though the
latter
+ * is never removed on root)
+ */
+ private void collectUnusedPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final boolean ignoreInternalProperties) {
+
+ if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+ // cancelled
+ return;
+ }
+ final Set<Revision> allKeepRevs = new HashSet<>();
+ // phase A : collectUnusedUserPropertyRevisions
+ int deletedTotalRevsCount =
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode,
allKeepRevs);
+ int deletedUserRevsCount = deletedTotalRevsCount;
+ // phase B : collectUnusedInternalPropertyRevisions
+ if (!ignoreInternalProperties) {
+ deletedTotalRevsCount =
collectUnusedInternalPropertyRevisions(doc, phases, updateOp,
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+ }
+
+ // then some accounting...
+ int deletedInternalRevsCount = deletedTotalRevsCount -
deletedUserRevsCount;
+ if (deletedUserRevsCount != 0) {
+ deletedPropRevsCountMap.merge(doc.getId(),
deletedUserRevsCount, Integer::sum);
+ }
+ if (deletedInternalRevsCount != 0) {
+ deletedInternalPropRevsCountMap.merge(doc.getId(),
deletedInternalRevsCount, Integer::sum);
+ }
+ phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+ }
+
+ private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final Set<Revision> allKeepRevs) {
+ int deletedRevsCount = 0;
+
+ // phase 1 : non bundled nodes only
+ for (PropertyState prop : traversedMainNode.getProperties()) {
+ final String escapedPropName =
Utils.escapePropertyName(prop.getName());
+ deletedRevsCount += removeUnusedPropertyEntries(doc,
traversedMainNode, updateOp, escapedPropName,
+ (r) -> { updateOp.removeMapEntry(escapedPropName, r);
return null;},
+ allKeepRevs);
+ }
+
+ // phase 2 : bundled nodes only
+ final Map<Path, DocumentNodeState> bundledNodeStates = new
HashMap<>();
+ for (DocumentNodeState dns :
traversedMainNode.getAllBundledNodesStates()) {
+ bundledNodeStates.put(dns.getPath(), dns);
+ }
+ // remember that getAllBundledProperties returns unescaped keys
+ for (String propName :
traversedMainNode.getAllBundledProperties().keySet()) {
+ final int lastSlash = propName.lastIndexOf("/");
+ if (lastSlash == -1) {
+ // then it is an unbundled property which was already
handled in phase 1
+ continue;
+ }
+ final String escapedPropName =
Utils.escapePropertyName(propName);
+ // bundled values are of format sub/tree/propertyKey
+ // to extract this we need the last index of "/"
+ final String unbundledSubtreeName = propName.substring(0,
lastSlash);
+ final String unbundledPropName = propName.substring(lastSlash
+ 1);
+ final String unbundledPath =
traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName;
+ final DocumentNodeState traversedNode =
bundledNodeStates.get(Path.fromString(unbundledPath));
+ if (traversedNode == null) {
+ log.error("collectUnusedPropertyRevisions : could not find
traversed node for bundled key {} unbundledPath {} in doc {}",
+ propName, unbundledPath, doc.getId());
+ continue;
+ }
+ final PropertyState traversedProperty =
traversedNode.getProperty(unbundledPropName);
+ if (traversedProperty == null) {
+ log.error("collectUnusedPropertyRevisions : could not get
property {} from traversed node {}",
+ unbundledPropName, traversedNode.getPath());
+ continue;
+ }
+ deletedRevsCount += removeUnusedPropertyEntries(doc,
traversedNode, updateOp, escapedPropName,
+ (r) -> { updateOp.removeMapEntry(escapedPropName, r);
return null;},
+ allKeepRevs);
+ }
+
+ // phase 3 : "_deleted"
+ int numDeleted = removeUnusedPropertyEntries(doc,
traversedMainNode,
+ updateOp, NodeDocument.DELETED,
+ (r) -> { NodeDocument.removeDeleted(updateOp, r); return
null;} ,
+ allKeepRevs);
+ deletedRevsCount += numDeleted;
+ return deletedRevsCount;
+ }
+
+ private int collectUnusedInternalPropertyRevisions(final NodeDocument
doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final Set<Revision> toKeepUserPropRevs,
+ int deletedRevsCount) {
+ boolean hasUnmergedBranchCommits = false;
+ for (Revision localBc : doc.getLocalBranchCommits()) {
+ if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) {
+ hasUnmergedBranchCommits = true;
+ }
+ }
+ if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) {
+ return deletedRevsCount;
+ }
+ // if we did some rev deletion OR there are unmerged BCs, then
let's deep-dive
+ Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs);
+ for (Revision revision :
doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) {
+ if (!allRequiredRevs.contains(revision)) {
+ Operation has = updateOp.getChanges().get(new
Key(NodeDocument.COLLISIONS, revision));
+ if (has != null) {
+ // then skip
+ continue;
+ }
+ NodeDocument.removeCollision(updateOp, revision);
+ deletedRevsCount++;
+ }
+ }
+ // "_revisions"
+ for (Entry<Revision, String> e :
doc.getLocalRevisions().entrySet()) {
+ Revision revision = e.getKey();
+ if (allRequiredRevs.contains(revision)) {
+ // if it is still referenced locally, keep it
+ continue;
+ }
+ final boolean isRoot =
doc.getId().equals(Utils.getIdFromPath(Path.ROOT));
+ // local bcs only considered for removal
+ final boolean isBC =
!doc.getLocalBranchCommits().contains(revision);
+ final boolean newerThanSweep =
nodeStore.getSweepRevisions().isRevisionNewer(revision);
+ if (newerThanSweep) {
+ //TODO wondering if we should at all do any DGC on
documents newer than sweep
+ if (isBC) {
+ // analysed further down, we can remove them if
unmerged
+ } else {
+ // for normal commits, we need to keep them -> also
add to allRequiredRevs
+ allRequiredRevs.add(revision);
+ continue;
+ }
+ }
+ // committed revisions are hard to delete, as determining
+ // whether or not they are orphaned is difficult.
+ // child nodes could be having _commitRoots pointing to this
doc
+ // and this revision - in which case it wouldn't be orphaned.
+ // without scanning the children (or having knowledge about
+ // which revisions are referenced by children) it is unsafe
+ // to delete committed revisions.
+ // if they are uncommitted, then they are garbage, as then they
+ // are either normal commits (thus a not finished commit)
+ // or an unmerged branch commit. In both cases they are
garbage.
+ boolean isCommitted =
isCommitted(nodeStore.getCommitValue(revision, doc));
+ if (isCommitted) {
+ if (isRoot) {
+ // root : cannot remove since it could be referenced
and required by a child
+ // also add to allRequiredRevs
+ allRequiredRevs.add(revision);
+ continue;
+ } else if (!isBC) {
+ // non root and normal : cannot remove, same as above,
+ // it could be referenced and required by a child
+ // also add to allRequiredRevs
+ allRequiredRevs.add(revision);
+ continue;
+ } else {
+ // non root and bc : can remove, as non root bc cannot
be referenced by child
+ }
+ }
+ Operation has = updateOp.getChanges().get(new
Key(NodeDocument.REVISIONS, revision));
+ if (has != null) {
+ // then skip
+ continue;
+ }
+ NodeDocument.removeRevision(updateOp, revision);
+ deletedRevsCount++;
+ }
+ // "_commitRoot"
+ for (Revision revision : doc.getLocalCommitRoot().keySet()) {
+ if (!allRequiredRevs.contains(revision)) {
+ Operation has = updateOp.getChanges().get(new
Key(NodeDocument.COMMIT_ROOT, revision));
+ if (has != null) {
+ // then skip
+ continue;
+ }
+ NodeDocument.removeCommitRoot(updateOp, revision);
+ deletedRevsCount++;
+ }
+ }
+ // "_bc"
+ for (Revision revision : doc.getLocalBranchCommits()) {
+ if (!allRequiredRevs.contains(revision)) {
+ Operation has = updateOp.getChanges().get(new
Key(NodeDocument.BRANCH_COMMITS, revision));
+ if (has != null) {
+ // then skip
+ continue;
+ }
+
+ NodeDocument.removeBranchCommit(updateOp, revision);
+ deletedRevsCount++;
+ }
+ }
+ return deletedRevsCount;
+ }
+
+ private int removeUnusedPropertyEntries(NodeDocument doc,
+ DocumentNodeState traversedMainNode, UpdateOp updateOp,
+ String propertyKey, Function<Revision, Void> removeRevision,
+ Set<Revision> allKeepRevs) {
+ // we need to use the traversedNode.getLastRevision() as the
readRevision,
+ // as that is what was originally used in getNodeAtRevision when
traversing
+ final Revision keepCommitRev =
doc.localCommitRevisionOfProperty(nodeStore,
+ traversedMainNode.getLastRevision(), propertyKey);
+ if (keepCommitRev == null) {
+ // could be due to node not existing or current value being in
a split
+ // doc - while the former is unexpected, the latter might
happen.
+ // in both cases let's skip this property
+ log.debug("removeUnusedPropertyEntries : no visible revision
for property {} in doc {}",
Review Comment:
```suggestion
if (log.isDebugEnabled()) {
log.debug("removeUnusedPropertyEntries : no visible revision
for property {} in doc {}",
}
```
##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -1246,6 +1349,256 @@ private void
collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen
}
}
+ /**
+ * Remove all property revisions in the local document that are no
longer used.
+ * This includes bundled properties. It also includes related entries
that
+ * become obsolete as a result - i.e. _commitRoot and _bc (though the
latter
+ * is never removed on root)
+ */
+ private void collectUnusedPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final boolean ignoreInternalProperties) {
+
+ if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
+ // cancelled
+ return;
+ }
+ final Set<Revision> allKeepRevs = new HashSet<>();
+ // phase A : collectUnusedUserPropertyRevisions
+ int deletedTotalRevsCount =
collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode,
allKeepRevs);
+ int deletedUserRevsCount = deletedTotalRevsCount;
+ // phase B : collectUnusedInternalPropertyRevisions
+ if (!ignoreInternalProperties) {
+ deletedTotalRevsCount =
collectUnusedInternalPropertyRevisions(doc, phases, updateOp,
traversedMainNode, allKeepRevs, deletedTotalRevsCount);
+ }
+
+ // then some accounting...
+ int deletedInternalRevsCount = deletedTotalRevsCount -
deletedUserRevsCount;
+ if (deletedUserRevsCount != 0) {
+ deletedPropRevsCountMap.merge(doc.getId(),
deletedUserRevsCount, Integer::sum);
+ }
+ if (deletedInternalRevsCount != 0) {
+ deletedInternalPropRevsCountMap.merge(doc.getId(),
deletedInternalRevsCount, Integer::sum);
+ }
+ phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
+ }
+
+ private int collectUnusedUserPropertyRevisions(final NodeDocument doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final Set<Revision> allKeepRevs) {
+ int deletedRevsCount = 0;
+
+ // phase 1 : non bundled nodes only
+ for (PropertyState prop : traversedMainNode.getProperties()) {
+ final String escapedPropName =
Utils.escapePropertyName(prop.getName());
+ deletedRevsCount += removeUnusedPropertyEntries(doc,
traversedMainNode, updateOp, escapedPropName,
+ (r) -> { updateOp.removeMapEntry(escapedPropName, r);
return null;},
+ allKeepRevs);
+ }
+
+ // phase 2 : bundled nodes only
+ final Map<Path, DocumentNodeState> bundledNodeStates = new
HashMap<>();
+ for (DocumentNodeState dns :
traversedMainNode.getAllBundledNodesStates()) {
+ bundledNodeStates.put(dns.getPath(), dns);
+ }
+ // remember that getAllBundledProperties returns unescaped keys
+ for (String propName :
traversedMainNode.getAllBundledProperties().keySet()) {
+ final int lastSlash = propName.lastIndexOf("/");
+ if (lastSlash == -1) {
+ // then it is an unbundled property which was already
handled in phase 1
+ continue;
+ }
+ final String escapedPropName =
Utils.escapePropertyName(propName);
+ // bundled values are of format sub/tree/propertyKey
+ // to extract this we need the last index of "/"
+ final String unbundledSubtreeName = propName.substring(0,
lastSlash);
+ final String unbundledPropName = propName.substring(lastSlash
+ 1);
+ final String unbundledPath =
traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName;
+ final DocumentNodeState traversedNode =
bundledNodeStates.get(Path.fromString(unbundledPath));
+ if (traversedNode == null) {
+ log.error("collectUnusedPropertyRevisions : could not find
traversed node for bundled key {} unbundledPath {} in doc {}",
+ propName, unbundledPath, doc.getId());
+ continue;
+ }
+ final PropertyState traversedProperty =
traversedNode.getProperty(unbundledPropName);
+ if (traversedProperty == null) {
+ log.error("collectUnusedPropertyRevisions : could not get
property {} from traversed node {}",
+ unbundledPropName, traversedNode.getPath());
+ continue;
+ }
+ deletedRevsCount += removeUnusedPropertyEntries(doc,
traversedNode, updateOp, escapedPropName,
+ (r) -> { updateOp.removeMapEntry(escapedPropName, r);
return null;},
+ allKeepRevs);
+ }
+
+ // phase 3 : "_deleted"
+ int numDeleted = removeUnusedPropertyEntries(doc,
traversedMainNode,
+ updateOp, NodeDocument.DELETED,
+ (r) -> { NodeDocument.removeDeleted(updateOp, r); return
null;} ,
+ allKeepRevs);
+ deletedRevsCount += numDeleted;
+ return deletedRevsCount;
+ }
+
+ private int collectUnusedInternalPropertyRevisions(final NodeDocument
doc,
+ final GCPhases phases, final UpdateOp updateOp,
+ final DocumentNodeState traversedMainNode,
+ final Set<Revision> toKeepUserPropRevs,
+ int deletedRevsCount) {
+ boolean hasUnmergedBranchCommits = false;
+ for (Revision localBc : doc.getLocalBranchCommits()) {
+ if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) {
+ hasUnmergedBranchCommits = true;
+ }
+ }
+ if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) {
+ return deletedRevsCount;
+ }
+ // if we did some rev deletion OR there are unmerged BCs, then
let's deep-dive
+ Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs);
+ for (Revision revision :
doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) {
+ if (!allRequiredRevs.contains(revision)) {
+ Operation has = updateOp.getChanges().get(new
Key(NodeDocument.COLLISIONS, revision));
+ if (has != null) {
+ // then skip
+ continue;
+ }
+ NodeDocument.removeCollision(updateOp, revision);
+ deletedRevsCount++;
+ }
+ }
Review Comment:
```suggestion
// "_collisions"
deletedRevsCount +=
getDeletedRevsCount(doc.getLocalCommitRoot().keySet(), updateOp,
allRequiredRevs, COLLISIONS, NodeDocument::removeCollision);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]