rishabhdaim commented on code in PR #1372: URL: https://github.com/apache/jackrabbit-oak/pull/1372#discussion_r1535173583
########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java: ########## @@ -929,6 +929,53 @@ private boolean isValidRevision(@NotNull RevisionContext context, return false; } + /** + * Resolve the commit revision that holds the current value of a property based + * on provided readRevision if the current value is in the local + * map - null if the current value might be in a split doc or the node or property + * does not exist at all. + * + * @param nodeStore the node store. + * @param readRevision the read revision. + * @param key the key of the property to resolve + * @return a Revision if the value of the property resolves to a value based + * on what's in the local document, null if the node or property does + * not exist at all or the value is in a split document. + */ + Revision localCommitRevisionOfProperty(@NotNull DocumentNodeStore nodeStore, + @NotNull RevisionVector readRevision, + @NotNull String key) { + Map<Revision, String> validRevisions = Maps.newHashMap(); Review Comment: ```suggestion Map<Revision, String> validRevisions = new HashMap<>(); ``` ########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java: ########## @@ -1246,6 +1349,256 @@ private void collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen } } + /** + * Remove all property revisions in the local document that are no longer used. + * This includes bundled properties. It also includes related entries that + * become obsolete as a result - i.e. _commitRoot and _bc (though the latter + * is never removed on root) + */ + private void collectUnusedPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final boolean ignoreInternalProperties) { + + if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){ + // cancelled + return; + } + final Set<Revision> allKeepRevs = new HashSet<>(); + // phase A : collectUnusedUserPropertyRevisions + int deletedTotalRevsCount = collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs); + int deletedUserRevsCount = deletedTotalRevsCount; + // phase B : collectUnusedInternalPropertyRevisions + if (!ignoreInternalProperties) { + deletedTotalRevsCount = collectUnusedInternalPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs, deletedTotalRevsCount); + } + + // then some accounting... + int deletedInternalRevsCount = deletedTotalRevsCount - deletedUserRevsCount; + if (deletedUserRevsCount != 0) { + deletedPropRevsCountMap.merge(doc.getId(), deletedUserRevsCount, Integer::sum); + } + if (deletedInternalRevsCount != 0) { + deletedInternalPropRevsCountMap.merge(doc.getId(), deletedInternalRevsCount, Integer::sum); + } + phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS); + } + + private int collectUnusedUserPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final Set<Revision> allKeepRevs) { + int deletedRevsCount = 0; + + // phase 1 : non bundled nodes only + for (PropertyState prop : traversedMainNode.getProperties()) { + final String escapedPropName = Utils.escapePropertyName(prop.getName()); + deletedRevsCount += removeUnusedPropertyEntries(doc, traversedMainNode, updateOp, escapedPropName, + (r) -> { updateOp.removeMapEntry(escapedPropName, r); return null;}, + allKeepRevs); + } Review Comment: ```suggestion int deletedRevsCount = StreamSupport.stream(traversedMainNode.getProperties().spliterator(), false).map(p -> escapePropertyName(p.getName())) .mapToInt(p -> removeUnusedPropertyEntries(doc, traversedMainNode, updateOp, p, r -> updateOp.removeMapEntry(p, r), // using Consumer here in place of Function allKeepRevs)).sum(); ``` ########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java: ########## @@ -1246,6 +1349,256 @@ private void collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen } } + /** + * Remove all property revisions in the local document that are no longer used. + * This includes bundled properties. It also includes related entries that + * become obsolete as a result - i.e. _commitRoot and _bc (though the latter + * is never removed on root) + */ + private void collectUnusedPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final boolean ignoreInternalProperties) { + + if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){ + // cancelled + return; + } + final Set<Revision> allKeepRevs = new HashSet<>(); + // phase A : collectUnusedUserPropertyRevisions + int deletedTotalRevsCount = collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs); + int deletedUserRevsCount = deletedTotalRevsCount; + // phase B : collectUnusedInternalPropertyRevisions + if (!ignoreInternalProperties) { + deletedTotalRevsCount = collectUnusedInternalPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs, deletedTotalRevsCount); + } + + // then some accounting... + int deletedInternalRevsCount = deletedTotalRevsCount - deletedUserRevsCount; + if (deletedUserRevsCount != 0) { + deletedPropRevsCountMap.merge(doc.getId(), deletedUserRevsCount, Integer::sum); + } + if (deletedInternalRevsCount != 0) { + deletedInternalPropRevsCountMap.merge(doc.getId(), deletedInternalRevsCount, Integer::sum); + } + phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS); + } + + private int collectUnusedUserPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final Set<Revision> allKeepRevs) { + int deletedRevsCount = 0; + + // phase 1 : non bundled nodes only + for (PropertyState prop : traversedMainNode.getProperties()) { + final String escapedPropName = Utils.escapePropertyName(prop.getName()); + deletedRevsCount += removeUnusedPropertyEntries(doc, traversedMainNode, updateOp, escapedPropName, + (r) -> { updateOp.removeMapEntry(escapedPropName, r); return null;}, + allKeepRevs); + } + + // phase 2 : bundled nodes only + final Map<Path, DocumentNodeState> bundledNodeStates = new HashMap<>(); + for (DocumentNodeState dns : traversedMainNode.getAllBundledNodesStates()) { + bundledNodeStates.put(dns.getPath(), dns); + } Review Comment: ```suggestion final Map<Path, DocumentNodeState> bundledNodeStates = stream(traversedMainNode.getAllBundledNodesStates().spliterator(), false).collect(toMap(DocumentNodeState::getPath, identity())); ``` ########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java: ########## @@ -1246,6 +1349,256 @@ private void collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen } } + /** + * Remove all property revisions in the local document that are no longer used. + * This includes bundled properties. It also includes related entries that + * become obsolete as a result - i.e. _commitRoot and _bc (though the latter + * is never removed on root) + */ + private void collectUnusedPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final boolean ignoreInternalProperties) { + + if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){ + // cancelled + return; + } + final Set<Revision> allKeepRevs = new HashSet<>(); + // phase A : collectUnusedUserPropertyRevisions + int deletedTotalRevsCount = collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs); + int deletedUserRevsCount = deletedTotalRevsCount; + // phase B : collectUnusedInternalPropertyRevisions + if (!ignoreInternalProperties) { + deletedTotalRevsCount = collectUnusedInternalPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs, deletedTotalRevsCount); + } + + // then some accounting... + int deletedInternalRevsCount = deletedTotalRevsCount - deletedUserRevsCount; + if (deletedUserRevsCount != 0) { + deletedPropRevsCountMap.merge(doc.getId(), deletedUserRevsCount, Integer::sum); + } + if (deletedInternalRevsCount != 0) { + deletedInternalPropRevsCountMap.merge(doc.getId(), deletedInternalRevsCount, Integer::sum); + } + phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS); + } + + private int collectUnusedUserPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final Set<Revision> allKeepRevs) { + int deletedRevsCount = 0; + + // phase 1 : non bundled nodes only + for (PropertyState prop : traversedMainNode.getProperties()) { + final String escapedPropName = Utils.escapePropertyName(prop.getName()); + deletedRevsCount += removeUnusedPropertyEntries(doc, traversedMainNode, updateOp, escapedPropName, + (r) -> { updateOp.removeMapEntry(escapedPropName, r); return null;}, + allKeepRevs); + } + + // phase 2 : bundled nodes only + final Map<Path, DocumentNodeState> bundledNodeStates = new HashMap<>(); + for (DocumentNodeState dns : traversedMainNode.getAllBundledNodesStates()) { + bundledNodeStates.put(dns.getPath(), dns); + } + // remember that getAllBundledProperties returns unescaped keys + for (String propName : traversedMainNode.getAllBundledProperties().keySet()) { + final int lastSlash = propName.lastIndexOf("/"); + if (lastSlash == -1) { + // then it is an unbundled property which was already handled in phase 1 + continue; + } + final String escapedPropName = Utils.escapePropertyName(propName); + // bundled values are of format sub/tree/propertyKey + // to extract this we need the last index of "/" + final String unbundledSubtreeName = propName.substring(0, lastSlash); + final String unbundledPropName = propName.substring(lastSlash + 1); + final String unbundledPath = traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName; + final DocumentNodeState traversedNode = bundledNodeStates.get(Path.fromString(unbundledPath)); + if (traversedNode == null) { + log.error("collectUnusedPropertyRevisions : could not find traversed node for bundled key {} unbundledPath {} in doc {}", + propName, unbundledPath, doc.getId()); + continue; + } + final PropertyState traversedProperty = traversedNode.getProperty(unbundledPropName); + if (traversedProperty == null) { + log.error("collectUnusedPropertyRevisions : could not get property {} from traversed node {}", + unbundledPropName, traversedNode.getPath()); + continue; + } + deletedRevsCount += removeUnusedPropertyEntries(doc, traversedNode, updateOp, escapedPropName, + (r) -> { updateOp.removeMapEntry(escapedPropName, r); return null;}, + allKeepRevs); + } + + // phase 3 : "_deleted" + int numDeleted = removeUnusedPropertyEntries(doc, traversedMainNode, + updateOp, NodeDocument.DELETED, + (r) -> { NodeDocument.removeDeleted(updateOp, r); return null;} , + allKeepRevs); + deletedRevsCount += numDeleted; + return deletedRevsCount; + } + + private int collectUnusedInternalPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final Set<Revision> toKeepUserPropRevs, + int deletedRevsCount) { + boolean hasUnmergedBranchCommits = false; + for (Revision localBc : doc.getLocalBranchCommits()) { + if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) { + hasUnmergedBranchCommits = true; + } + } + if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) { + return deletedRevsCount; + } + // if we did some rev deletion OR there are unmerged BCs, then let's deep-dive + Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs); + for (Revision revision : doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) { + if (!allRequiredRevs.contains(revision)) { + Operation has = updateOp.getChanges().get(new Key(NodeDocument.COLLISIONS, revision)); + if (has != null) { + // then skip + continue; + } + NodeDocument.removeCollision(updateOp, revision); + deletedRevsCount++; + } + } + // "_revisions" + for (Entry<Revision, String> e : doc.getLocalRevisions().entrySet()) { + Revision revision = e.getKey(); + if (allRequiredRevs.contains(revision)) { + // if it is still referenced locally, keep it + continue; + } + final boolean isRoot = doc.getId().equals(Utils.getIdFromPath(Path.ROOT)); + // local bcs only considered for removal + final boolean isBC = !doc.getLocalBranchCommits().contains(revision); + final boolean newerThanSweep = nodeStore.getSweepRevisions().isRevisionNewer(revision); + if (newerThanSweep) { + //TODO wondering if we should at all do any DGC on documents newer than sweep + if (isBC) { + // analysed further down, we can remove them if unmerged + } else { + // for normal commits, we need to keep them -> also add to allRequiredRevs + allRequiredRevs.add(revision); + continue; + } + } + // committed revisions are hard to delete, as determining + // whether or not they are orphaned is difficult. + // child nodes could be having _commitRoots pointing to this doc + // and this revision - in which case it wouldn't be orphaned. + // without scanning the children (or having knowledge about + // which revisions are referenced by children) it is unsafe + // to delete committed revisions. + // if they are uncommitted, then they are garbage, as then they + // are either normal commits (thus a not finished commit) + // or an unmerged branch commit. In both cases they are garbage. + boolean isCommitted = isCommitted(nodeStore.getCommitValue(revision, doc)); + if (isCommitted) { + if (isRoot) { + // root : cannot remove since it could be referenced and required by a child + // also add to allRequiredRevs + allRequiredRevs.add(revision); + continue; + } else if (!isBC) { + // non root and normal : cannot remove, same as above, + // it could be referenced and required by a child + // also add to allRequiredRevs + allRequiredRevs.add(revision); + continue; + } else { + // non root and bc : can remove, as non root bc cannot be referenced by child + } + } + Operation has = updateOp.getChanges().get(new Key(NodeDocument.REVISIONS, revision)); + if (has != null) { + // then skip + continue; + } + NodeDocument.removeRevision(updateOp, revision); + deletedRevsCount++; + } + // "_commitRoot" + for (Revision revision : doc.getLocalCommitRoot().keySet()) { + if (!allRequiredRevs.contains(revision)) { + Operation has = updateOp.getChanges().get(new Key(NodeDocument.COMMIT_ROOT, revision)); + if (has != null) { + // then skip + continue; + } + NodeDocument.removeCommitRoot(updateOp, revision); + deletedRevsCount++; + } + } Review Comment: ```suggestion deletedRevsCount += getDeletedRevsCount(doc.getLocalCommitRoot().keySet(), updateOp, allRequiredRevs, COMMIT_ROOT, NodeDocument::removeCommitRoot); ``` ########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java: ########## @@ -139,6 +142,14 @@ public class VersionGarbageCollector { */ static final String SETTINGS_COLLECTION_DETAILED_GC_DRY_RUN_DOCUMENT_ID_PROP = "detailedGCDryRunId"; + static enum RDGCType { + KeepOneFullMode, + KeepOneCleanupUserPropertiesOnlyMode, + OlderThan24AndBetweenCheckpointsMode Review Comment: Please use capitol for enum constants. ########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java: ########## @@ -1246,6 +1349,256 @@ private void collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen } } + /** + * Remove all property revisions in the local document that are no longer used. + * This includes bundled properties. It also includes related entries that + * become obsolete as a result - i.e. _commitRoot and _bc (though the latter + * is never removed on root) + */ + private void collectUnusedPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final boolean ignoreInternalProperties) { + + if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){ + // cancelled + return; + } + final Set<Revision> allKeepRevs = new HashSet<>(); + // phase A : collectUnusedUserPropertyRevisions + int deletedTotalRevsCount = collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs); + int deletedUserRevsCount = deletedTotalRevsCount; + // phase B : collectUnusedInternalPropertyRevisions + if (!ignoreInternalProperties) { + deletedTotalRevsCount = collectUnusedInternalPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs, deletedTotalRevsCount); + } + + // then some accounting... + int deletedInternalRevsCount = deletedTotalRevsCount - deletedUserRevsCount; + if (deletedUserRevsCount != 0) { + deletedPropRevsCountMap.merge(doc.getId(), deletedUserRevsCount, Integer::sum); + } + if (deletedInternalRevsCount != 0) { + deletedInternalPropRevsCountMap.merge(doc.getId(), deletedInternalRevsCount, Integer::sum); + } + phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS); + } + + private int collectUnusedUserPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final Set<Revision> allKeepRevs) { + int deletedRevsCount = 0; + + // phase 1 : non bundled nodes only + for (PropertyState prop : traversedMainNode.getProperties()) { + final String escapedPropName = Utils.escapePropertyName(prop.getName()); + deletedRevsCount += removeUnusedPropertyEntries(doc, traversedMainNode, updateOp, escapedPropName, + (r) -> { updateOp.removeMapEntry(escapedPropName, r); return null;}, + allKeepRevs); + } + + // phase 2 : bundled nodes only + final Map<Path, DocumentNodeState> bundledNodeStates = new HashMap<>(); + for (DocumentNodeState dns : traversedMainNode.getAllBundledNodesStates()) { + bundledNodeStates.put(dns.getPath(), dns); + } + // remember that getAllBundledProperties returns unescaped keys + for (String propName : traversedMainNode.getAllBundledProperties().keySet()) { + final int lastSlash = propName.lastIndexOf("/"); + if (lastSlash == -1) { + // then it is an unbundled property which was already handled in phase 1 + continue; + } + final String escapedPropName = Utils.escapePropertyName(propName); + // bundled values are of format sub/tree/propertyKey + // to extract this we need the last index of "/" + final String unbundledSubtreeName = propName.substring(0, lastSlash); + final String unbundledPropName = propName.substring(lastSlash + 1); + final String unbundledPath = traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName; + final DocumentNodeState traversedNode = bundledNodeStates.get(Path.fromString(unbundledPath)); + if (traversedNode == null) { + log.error("collectUnusedPropertyRevisions : could not find traversed node for bundled key {} unbundledPath {} in doc {}", + propName, unbundledPath, doc.getId()); + continue; + } + final PropertyState traversedProperty = traversedNode.getProperty(unbundledPropName); + if (traversedProperty == null) { + log.error("collectUnusedPropertyRevisions : could not get property {} from traversed node {}", + unbundledPropName, traversedNode.getPath()); + continue; + } + deletedRevsCount += removeUnusedPropertyEntries(doc, traversedNode, updateOp, escapedPropName, + (r) -> { updateOp.removeMapEntry(escapedPropName, r); return null;}, + allKeepRevs); + } + + // phase 3 : "_deleted" + int numDeleted = removeUnusedPropertyEntries(doc, traversedMainNode, + updateOp, NodeDocument.DELETED, + (r) -> { NodeDocument.removeDeleted(updateOp, r); return null;} , + allKeepRevs); + deletedRevsCount += numDeleted; + return deletedRevsCount; + } + + private int collectUnusedInternalPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final Set<Revision> toKeepUserPropRevs, + int deletedRevsCount) { + boolean hasUnmergedBranchCommits = false; + for (Revision localBc : doc.getLocalBranchCommits()) { + if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) { + hasUnmergedBranchCommits = true; + } + } + if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) { + return deletedRevsCount; + } + // if we did some rev deletion OR there are unmerged BCs, then let's deep-dive + Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs); + for (Revision revision : doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) { + if (!allRequiredRevs.contains(revision)) { + Operation has = updateOp.getChanges().get(new Key(NodeDocument.COLLISIONS, revision)); + if (has != null) { + // then skip + continue; + } + NodeDocument.removeCollision(updateOp, revision); + deletedRevsCount++; + } + } + // "_revisions" + for (Entry<Revision, String> e : doc.getLocalRevisions().entrySet()) { + Revision revision = e.getKey(); + if (allRequiredRevs.contains(revision)) { + // if it is still referenced locally, keep it + continue; + } + final boolean isRoot = doc.getId().equals(Utils.getIdFromPath(Path.ROOT)); + // local bcs only considered for removal + final boolean isBC = !doc.getLocalBranchCommits().contains(revision); + final boolean newerThanSweep = nodeStore.getSweepRevisions().isRevisionNewer(revision); + if (newerThanSweep) { + //TODO wondering if we should at all do any DGC on documents newer than sweep + if (isBC) { + // analysed further down, we can remove them if unmerged + } else { + // for normal commits, we need to keep them -> also add to allRequiredRevs + allRequiredRevs.add(revision); + continue; + } + } + // committed revisions are hard to delete, as determining + // whether or not they are orphaned is difficult. + // child nodes could be having _commitRoots pointing to this doc + // and this revision - in which case it wouldn't be orphaned. + // without scanning the children (or having knowledge about + // which revisions are referenced by children) it is unsafe + // to delete committed revisions. + // if they are uncommitted, then they are garbage, as then they + // are either normal commits (thus a not finished commit) + // or an unmerged branch commit. In both cases they are garbage. + boolean isCommitted = isCommitted(nodeStore.getCommitValue(revision, doc)); + if (isCommitted) { + if (isRoot) { + // root : cannot remove since it could be referenced and required by a child + // also add to allRequiredRevs + allRequiredRevs.add(revision); + continue; + } else if (!isBC) { + // non root and normal : cannot remove, same as above, + // it could be referenced and required by a child + // also add to allRequiredRevs + allRequiredRevs.add(revision); + continue; + } else { + // non root and bc : can remove, as non root bc cannot be referenced by child + } + } + Operation has = updateOp.getChanges().get(new Key(NodeDocument.REVISIONS, revision)); + if (has != null) { + // then skip + continue; + } + NodeDocument.removeRevision(updateOp, revision); + deletedRevsCount++; + } + // "_commitRoot" + for (Revision revision : doc.getLocalCommitRoot().keySet()) { + if (!allRequiredRevs.contains(revision)) { + Operation has = updateOp.getChanges().get(new Key(NodeDocument.COMMIT_ROOT, revision)); + if (has != null) { + // then skip + continue; + } + NodeDocument.removeCommitRoot(updateOp, revision); + deletedRevsCount++; + } + } + // "_bc" + for (Revision revision : doc.getLocalBranchCommits()) { + if (!allRequiredRevs.contains(revision)) { + Operation has = updateOp.getChanges().get(new Key(NodeDocument.BRANCH_COMMITS, revision)); + if (has != null) { + // then skip + continue; + } + + NodeDocument.removeBranchCommit(updateOp, revision); + deletedRevsCount++; + } + } + return deletedRevsCount; + } + + private int removeUnusedPropertyEntries(NodeDocument doc, + DocumentNodeState traversedMainNode, UpdateOp updateOp, + String propertyKey, Function<Revision, Void> removeRevision, Review Comment: Everywhere we call `removeUnusedPropertyEntries`, we return `null` from `removeRevision` `Function`. We could replace this `Function` with `Consumer` type `FunctionalInterface`. wydt? ########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java: ########## @@ -1246,6 +1349,256 @@ private void collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen } } + /** + * Remove all property revisions in the local document that are no longer used. + * This includes bundled properties. It also includes related entries that + * become obsolete as a result - i.e. _commitRoot and _bc (though the latter + * is never removed on root) + */ + private void collectUnusedPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final boolean ignoreInternalProperties) { + + if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){ + // cancelled + return; + } + final Set<Revision> allKeepRevs = new HashSet<>(); + // phase A : collectUnusedUserPropertyRevisions + int deletedTotalRevsCount = collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs); + int deletedUserRevsCount = deletedTotalRevsCount; + // phase B : collectUnusedInternalPropertyRevisions + if (!ignoreInternalProperties) { + deletedTotalRevsCount = collectUnusedInternalPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs, deletedTotalRevsCount); + } + + // then some accounting... + int deletedInternalRevsCount = deletedTotalRevsCount - deletedUserRevsCount; + if (deletedUserRevsCount != 0) { + deletedPropRevsCountMap.merge(doc.getId(), deletedUserRevsCount, Integer::sum); + } + if (deletedInternalRevsCount != 0) { + deletedInternalPropRevsCountMap.merge(doc.getId(), deletedInternalRevsCount, Integer::sum); + } + phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS); + } + + private int collectUnusedUserPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final Set<Revision> allKeepRevs) { + int deletedRevsCount = 0; + + // phase 1 : non bundled nodes only + for (PropertyState prop : traversedMainNode.getProperties()) { + final String escapedPropName = Utils.escapePropertyName(prop.getName()); + deletedRevsCount += removeUnusedPropertyEntries(doc, traversedMainNode, updateOp, escapedPropName, + (r) -> { updateOp.removeMapEntry(escapedPropName, r); return null;}, + allKeepRevs); + } + + // phase 2 : bundled nodes only + final Map<Path, DocumentNodeState> bundledNodeStates = new HashMap<>(); + for (DocumentNodeState dns : traversedMainNode.getAllBundledNodesStates()) { + bundledNodeStates.put(dns.getPath(), dns); + } + // remember that getAllBundledProperties returns unescaped keys + for (String propName : traversedMainNode.getAllBundledProperties().keySet()) { + final int lastSlash = propName.lastIndexOf("/"); + if (lastSlash == -1) { + // then it is an unbundled property which was already handled in phase 1 + continue; + } + final String escapedPropName = Utils.escapePropertyName(propName); + // bundled values are of format sub/tree/propertyKey + // to extract this we need the last index of "/" + final String unbundledSubtreeName = propName.substring(0, lastSlash); + final String unbundledPropName = propName.substring(lastSlash + 1); + final String unbundledPath = traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName; + final DocumentNodeState traversedNode = bundledNodeStates.get(Path.fromString(unbundledPath)); + if (traversedNode == null) { + log.error("collectUnusedPropertyRevisions : could not find traversed node for bundled key {} unbundledPath {} in doc {}", + propName, unbundledPath, doc.getId()); + continue; + } + final PropertyState traversedProperty = traversedNode.getProperty(unbundledPropName); + if (traversedProperty == null) { + log.error("collectUnusedPropertyRevisions : could not get property {} from traversed node {}", + unbundledPropName, traversedNode.getPath()); + continue; + } + deletedRevsCount += removeUnusedPropertyEntries(doc, traversedNode, updateOp, escapedPropName, + (r) -> { updateOp.removeMapEntry(escapedPropName, r); return null;}, + allKeepRevs); + } + + // phase 3 : "_deleted" + int numDeleted = removeUnusedPropertyEntries(doc, traversedMainNode, + updateOp, NodeDocument.DELETED, + (r) -> { NodeDocument.removeDeleted(updateOp, r); return null;} , + allKeepRevs); + deletedRevsCount += numDeleted; + return deletedRevsCount; + } + + private int collectUnusedInternalPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final Set<Revision> toKeepUserPropRevs, + int deletedRevsCount) { + boolean hasUnmergedBranchCommits = false; + for (Revision localBc : doc.getLocalBranchCommits()) { + if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) { + hasUnmergedBranchCommits = true; + } + } Review Comment: I believe here, we need to have `break` in case `hasUnmergedBranchCommits` is set to true otherwise it might change to false. ```suggestion boolean hasUnmergedBranchCommits = doc.getLocalBranchCommits().stream().anyMatch(r -> !isCommitted(nodeStore.getCommitValue(r, doc))); ``` ########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java: ########## @@ -1246,6 +1349,256 @@ private void collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen } } + /** + * Remove all property revisions in the local document that are no longer used. + * This includes bundled properties. It also includes related entries that + * become obsolete as a result - i.e. _commitRoot and _bc (though the latter + * is never removed on root) + */ + private void collectUnusedPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final boolean ignoreInternalProperties) { + + if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){ + // cancelled + return; + } + final Set<Revision> allKeepRevs = new HashSet<>(); + // phase A : collectUnusedUserPropertyRevisions + int deletedTotalRevsCount = collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs); + int deletedUserRevsCount = deletedTotalRevsCount; + // phase B : collectUnusedInternalPropertyRevisions + if (!ignoreInternalProperties) { + deletedTotalRevsCount = collectUnusedInternalPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs, deletedTotalRevsCount); + } + + // then some accounting... + int deletedInternalRevsCount = deletedTotalRevsCount - deletedUserRevsCount; + if (deletedUserRevsCount != 0) { + deletedPropRevsCountMap.merge(doc.getId(), deletedUserRevsCount, Integer::sum); + } + if (deletedInternalRevsCount != 0) { + deletedInternalPropRevsCountMap.merge(doc.getId(), deletedInternalRevsCount, Integer::sum); + } + phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS); + } + + private int collectUnusedUserPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final Set<Revision> allKeepRevs) { + int deletedRevsCount = 0; + + // phase 1 : non bundled nodes only + for (PropertyState prop : traversedMainNode.getProperties()) { + final String escapedPropName = Utils.escapePropertyName(prop.getName()); + deletedRevsCount += removeUnusedPropertyEntries(doc, traversedMainNode, updateOp, escapedPropName, + (r) -> { updateOp.removeMapEntry(escapedPropName, r); return null;}, + allKeepRevs); + } + + // phase 2 : bundled nodes only + final Map<Path, DocumentNodeState> bundledNodeStates = new HashMap<>(); + for (DocumentNodeState dns : traversedMainNode.getAllBundledNodesStates()) { + bundledNodeStates.put(dns.getPath(), dns); + } + // remember that getAllBundledProperties returns unescaped keys + for (String propName : traversedMainNode.getAllBundledProperties().keySet()) { + final int lastSlash = propName.lastIndexOf("/"); + if (lastSlash == -1) { + // then it is an unbundled property which was already handled in phase 1 + continue; + } + final String escapedPropName = Utils.escapePropertyName(propName); + // bundled values are of format sub/tree/propertyKey + // to extract this we need the last index of "/" + final String unbundledSubtreeName = propName.substring(0, lastSlash); + final String unbundledPropName = propName.substring(lastSlash + 1); + final String unbundledPath = traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName; + final DocumentNodeState traversedNode = bundledNodeStates.get(Path.fromString(unbundledPath)); + if (traversedNode == null) { + log.error("collectUnusedPropertyRevisions : could not find traversed node for bundled key {} unbundledPath {} in doc {}", + propName, unbundledPath, doc.getId()); + continue; + } + final PropertyState traversedProperty = traversedNode.getProperty(unbundledPropName); + if (traversedProperty == null) { + log.error("collectUnusedPropertyRevisions : could not get property {} from traversed node {}", + unbundledPropName, traversedNode.getPath()); + continue; + } + deletedRevsCount += removeUnusedPropertyEntries(doc, traversedNode, updateOp, escapedPropName, + (r) -> { updateOp.removeMapEntry(escapedPropName, r); return null;}, + allKeepRevs); + } + + // phase 3 : "_deleted" + int numDeleted = removeUnusedPropertyEntries(doc, traversedMainNode, + updateOp, NodeDocument.DELETED, + (r) -> { NodeDocument.removeDeleted(updateOp, r); return null;} , + allKeepRevs); + deletedRevsCount += numDeleted; + return deletedRevsCount; + } + + private int collectUnusedInternalPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final Set<Revision> toKeepUserPropRevs, + int deletedRevsCount) { + boolean hasUnmergedBranchCommits = false; + for (Revision localBc : doc.getLocalBranchCommits()) { + if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) { + hasUnmergedBranchCommits = true; + } + } + if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) { + return deletedRevsCount; + } + // if we did some rev deletion OR there are unmerged BCs, then let's deep-dive + Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs); + for (Revision revision : doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) { + if (!allRequiredRevs.contains(revision)) { + Operation has = updateOp.getChanges().get(new Key(NodeDocument.COLLISIONS, revision)); + if (has != null) { + // then skip + continue; + } + NodeDocument.removeCollision(updateOp, revision); + deletedRevsCount++; + } + } + // "_revisions" + for (Entry<Revision, String> e : doc.getLocalRevisions().entrySet()) { + Revision revision = e.getKey(); + if (allRequiredRevs.contains(revision)) { + // if it is still referenced locally, keep it + continue; + } + final boolean isRoot = doc.getId().equals(Utils.getIdFromPath(Path.ROOT)); + // local bcs only considered for removal + final boolean isBC = !doc.getLocalBranchCommits().contains(revision); + final boolean newerThanSweep = nodeStore.getSweepRevisions().isRevisionNewer(revision); + if (newerThanSweep) { + //TODO wondering if we should at all do any DGC on documents newer than sweep + if (isBC) { + // analysed further down, we can remove them if unmerged + } else { + // for normal commits, we need to keep them -> also add to allRequiredRevs + allRequiredRevs.add(revision); + continue; + } + } + // committed revisions are hard to delete, as determining + // whether or not they are orphaned is difficult. + // child nodes could be having _commitRoots pointing to this doc + // and this revision - in which case it wouldn't be orphaned. + // without scanning the children (or having knowledge about + // which revisions are referenced by children) it is unsafe + // to delete committed revisions. + // if they are uncommitted, then they are garbage, as then they + // are either normal commits (thus a not finished commit) + // or an unmerged branch commit. In both cases they are garbage. + boolean isCommitted = isCommitted(nodeStore.getCommitValue(revision, doc)); + if (isCommitted) { + if (isRoot) { + // root : cannot remove since it could be referenced and required by a child + // also add to allRequiredRevs + allRequiredRevs.add(revision); + continue; + } else if (!isBC) { + // non root and normal : cannot remove, same as above, + // it could be referenced and required by a child + // also add to allRequiredRevs + allRequiredRevs.add(revision); + continue; + } else { + // non root and bc : can remove, as non root bc cannot be referenced by child + } + } + Operation has = updateOp.getChanges().get(new Key(NodeDocument.REVISIONS, revision)); + if (has != null) { + // then skip + continue; + } + NodeDocument.removeRevision(updateOp, revision); + deletedRevsCount++; + } + // "_commitRoot" + for (Revision revision : doc.getLocalCommitRoot().keySet()) { + if (!allRequiredRevs.contains(revision)) { + Operation has = updateOp.getChanges().get(new Key(NodeDocument.COMMIT_ROOT, revision)); + if (has != null) { + // then skip + continue; + } + NodeDocument.removeCommitRoot(updateOp, revision); + deletedRevsCount++; + } + } + // "_bc" + for (Revision revision : doc.getLocalBranchCommits()) { + if (!allRequiredRevs.contains(revision)) { + Operation has = updateOp.getChanges().get(new Key(NodeDocument.BRANCH_COMMITS, revision)); + if (has != null) { + // then skip + continue; + } + + NodeDocument.removeBranchCommit(updateOp, revision); + deletedRevsCount++; + } + } Review Comment: ```suggestion deletedRevsCount += getDeletedRevsCount(doc.getLocalBranchCommits(), updateOp, allRequiredRevs, BRANCH_COMMITS, NodeDocument::removeBranchCommit); return deletedRevsCount; } private int getDeletedRevsCount(Set<Revision> revisionSet, UpdateOp updateOp, Set<Revision> allRequiredRevs, String updateOpKey, BiConsumer<UpdateOp, Revision> op) { return revisionSet.stream().filter(r -> !allRequiredRevs.contains(r)) .mapToInt(r -> { Operation has = updateOp.getChanges().get(new Key(updateOpKey, r)); if (has != null) { // then skip return 0; } op.accept(updateOp, r); return 1; }).sum(); } ``` ########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java: ########## @@ -1246,6 +1349,256 @@ private void collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen } } + /** + * Remove all property revisions in the local document that are no longer used. + * This includes bundled properties. It also includes related entries that + * become obsolete as a result - i.e. _commitRoot and _bc (though the latter + * is never removed on root) + */ + private void collectUnusedPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final boolean ignoreInternalProperties) { + + if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){ + // cancelled + return; + } + final Set<Revision> allKeepRevs = new HashSet<>(); + // phase A : collectUnusedUserPropertyRevisions + int deletedTotalRevsCount = collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs); + int deletedUserRevsCount = deletedTotalRevsCount; + // phase B : collectUnusedInternalPropertyRevisions + if (!ignoreInternalProperties) { + deletedTotalRevsCount = collectUnusedInternalPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs, deletedTotalRevsCount); + } + + // then some accounting... + int deletedInternalRevsCount = deletedTotalRevsCount - deletedUserRevsCount; + if (deletedUserRevsCount != 0) { + deletedPropRevsCountMap.merge(doc.getId(), deletedUserRevsCount, Integer::sum); + } + if (deletedInternalRevsCount != 0) { + deletedInternalPropRevsCountMap.merge(doc.getId(), deletedInternalRevsCount, Integer::sum); + } + phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS); + } + + private int collectUnusedUserPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final Set<Revision> allKeepRevs) { + int deletedRevsCount = 0; + + // phase 1 : non bundled nodes only + for (PropertyState prop : traversedMainNode.getProperties()) { + final String escapedPropName = Utils.escapePropertyName(prop.getName()); + deletedRevsCount += removeUnusedPropertyEntries(doc, traversedMainNode, updateOp, escapedPropName, + (r) -> { updateOp.removeMapEntry(escapedPropName, r); return null;}, + allKeepRevs); + } + + // phase 2 : bundled nodes only + final Map<Path, DocumentNodeState> bundledNodeStates = new HashMap<>(); + for (DocumentNodeState dns : traversedMainNode.getAllBundledNodesStates()) { + bundledNodeStates.put(dns.getPath(), dns); + } + // remember that getAllBundledProperties returns unescaped keys + for (String propName : traversedMainNode.getAllBundledProperties().keySet()) { + final int lastSlash = propName.lastIndexOf("/"); + if (lastSlash == -1) { + // then it is an unbundled property which was already handled in phase 1 + continue; + } + final String escapedPropName = Utils.escapePropertyName(propName); + // bundled values are of format sub/tree/propertyKey + // to extract this we need the last index of "/" + final String unbundledSubtreeName = propName.substring(0, lastSlash); + final String unbundledPropName = propName.substring(lastSlash + 1); + final String unbundledPath = traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName; + final DocumentNodeState traversedNode = bundledNodeStates.get(Path.fromString(unbundledPath)); + if (traversedNode == null) { + log.error("collectUnusedPropertyRevisions : could not find traversed node for bundled key {} unbundledPath {} in doc {}", + propName, unbundledPath, doc.getId()); + continue; + } + final PropertyState traversedProperty = traversedNode.getProperty(unbundledPropName); + if (traversedProperty == null) { + log.error("collectUnusedPropertyRevisions : could not get property {} from traversed node {}", + unbundledPropName, traversedNode.getPath()); + continue; + } + deletedRevsCount += removeUnusedPropertyEntries(doc, traversedNode, updateOp, escapedPropName, + (r) -> { updateOp.removeMapEntry(escapedPropName, r); return null;}, + allKeepRevs); + } + + // phase 3 : "_deleted" + int numDeleted = removeUnusedPropertyEntries(doc, traversedMainNode, + updateOp, NodeDocument.DELETED, + (r) -> { NodeDocument.removeDeleted(updateOp, r); return null;} , + allKeepRevs); + deletedRevsCount += numDeleted; + return deletedRevsCount; + } + + private int collectUnusedInternalPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final Set<Revision> toKeepUserPropRevs, + int deletedRevsCount) { + boolean hasUnmergedBranchCommits = false; + for (Revision localBc : doc.getLocalBranchCommits()) { + if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) { + hasUnmergedBranchCommits = true; + } + } + if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) { + return deletedRevsCount; + } + // if we did some rev deletion OR there are unmerged BCs, then let's deep-dive + Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs); + for (Revision revision : doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) { + if (!allRequiredRevs.contains(revision)) { + Operation has = updateOp.getChanges().get(new Key(NodeDocument.COLLISIONS, revision)); + if (has != null) { + // then skip + continue; + } + NodeDocument.removeCollision(updateOp, revision); + deletedRevsCount++; + } + } + // "_revisions" + for (Entry<Revision, String> e : doc.getLocalRevisions().entrySet()) { + Revision revision = e.getKey(); + if (allRequiredRevs.contains(revision)) { + // if it is still referenced locally, keep it + continue; + } + final boolean isRoot = doc.getId().equals(Utils.getIdFromPath(Path.ROOT)); + // local bcs only considered for removal + final boolean isBC = !doc.getLocalBranchCommits().contains(revision); + final boolean newerThanSweep = nodeStore.getSweepRevisions().isRevisionNewer(revision); + if (newerThanSweep) { + //TODO wondering if we should at all do any DGC on documents newer than sweep + if (isBC) { + // analysed further down, we can remove them if unmerged + } else { + // for normal commits, we need to keep them -> also add to allRequiredRevs + allRequiredRevs.add(revision); + continue; + } + } + // committed revisions are hard to delete, as determining + // whether or not they are orphaned is difficult. + // child nodes could be having _commitRoots pointing to this doc + // and this revision - in which case it wouldn't be orphaned. + // without scanning the children (or having knowledge about + // which revisions are referenced by children) it is unsafe + // to delete committed revisions. + // if they are uncommitted, then they are garbage, as then they + // are either normal commits (thus a not finished commit) + // or an unmerged branch commit. In both cases they are garbage. + boolean isCommitted = isCommitted(nodeStore.getCommitValue(revision, doc)); + if (isCommitted) { + if (isRoot) { + // root : cannot remove since it could be referenced and required by a child + // also add to allRequiredRevs + allRequiredRevs.add(revision); + continue; + } else if (!isBC) { + // non root and normal : cannot remove, same as above, + // it could be referenced and required by a child + // also add to allRequiredRevs + allRequiredRevs.add(revision); + continue; + } else { + // non root and bc : can remove, as non root bc cannot be referenced by child + } + } + Operation has = updateOp.getChanges().get(new Key(NodeDocument.REVISIONS, revision)); + if (has != null) { + // then skip + continue; + } + NodeDocument.removeRevision(updateOp, revision); + deletedRevsCount++; + } + // "_commitRoot" + for (Revision revision : doc.getLocalCommitRoot().keySet()) { + if (!allRequiredRevs.contains(revision)) { + Operation has = updateOp.getChanges().get(new Key(NodeDocument.COMMIT_ROOT, revision)); + if (has != null) { + // then skip + continue; + } + NodeDocument.removeCommitRoot(updateOp, revision); + deletedRevsCount++; + } + } + // "_bc" + for (Revision revision : doc.getLocalBranchCommits()) { + if (!allRequiredRevs.contains(revision)) { + Operation has = updateOp.getChanges().get(new Key(NodeDocument.BRANCH_COMMITS, revision)); + if (has != null) { + // then skip + continue; + } + + NodeDocument.removeBranchCommit(updateOp, revision); + deletedRevsCount++; + } + } + return deletedRevsCount; + } + + private int removeUnusedPropertyEntries(NodeDocument doc, + DocumentNodeState traversedMainNode, UpdateOp updateOp, + String propertyKey, Function<Revision, Void> removeRevision, + Set<Revision> allKeepRevs) { + // we need to use the traversedNode.getLastRevision() as the readRevision, + // as that is what was originally used in getNodeAtRevision when traversing + final Revision keepCommitRev = doc.localCommitRevisionOfProperty(nodeStore, + traversedMainNode.getLastRevision(), propertyKey); + if (keepCommitRev == null) { + // could be due to node not existing or current value being in a split + // doc - while the former is unexpected, the latter might happen. + // in both cases let's skip this property + log.debug("removeUnusedPropertyEntries : no visible revision for property {} in doc {}", Review Comment: ```suggestion if (log.isDebugEnabled()) { log.debug("removeUnusedPropertyEntries : no visible revision for property {} in doc {}", } ``` ########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java: ########## @@ -1246,6 +1349,256 @@ private void collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocumen } } + /** + * Remove all property revisions in the local document that are no longer used. + * This includes bundled properties. It also includes related entries that + * become obsolete as a result - i.e. _commitRoot and _bc (though the latter + * is never removed on root) + */ + private void collectUnusedPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final boolean ignoreInternalProperties) { + + if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){ + // cancelled + return; + } + final Set<Revision> allKeepRevs = new HashSet<>(); + // phase A : collectUnusedUserPropertyRevisions + int deletedTotalRevsCount = collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs); + int deletedUserRevsCount = deletedTotalRevsCount; + // phase B : collectUnusedInternalPropertyRevisions + if (!ignoreInternalProperties) { + deletedTotalRevsCount = collectUnusedInternalPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs, deletedTotalRevsCount); + } + + // then some accounting... + int deletedInternalRevsCount = deletedTotalRevsCount - deletedUserRevsCount; + if (deletedUserRevsCount != 0) { + deletedPropRevsCountMap.merge(doc.getId(), deletedUserRevsCount, Integer::sum); + } + if (deletedInternalRevsCount != 0) { + deletedInternalPropRevsCountMap.merge(doc.getId(), deletedInternalRevsCount, Integer::sum); + } + phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS); + } + + private int collectUnusedUserPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final Set<Revision> allKeepRevs) { + int deletedRevsCount = 0; + + // phase 1 : non bundled nodes only + for (PropertyState prop : traversedMainNode.getProperties()) { + final String escapedPropName = Utils.escapePropertyName(prop.getName()); + deletedRevsCount += removeUnusedPropertyEntries(doc, traversedMainNode, updateOp, escapedPropName, + (r) -> { updateOp.removeMapEntry(escapedPropName, r); return null;}, + allKeepRevs); + } + + // phase 2 : bundled nodes only + final Map<Path, DocumentNodeState> bundledNodeStates = new HashMap<>(); + for (DocumentNodeState dns : traversedMainNode.getAllBundledNodesStates()) { + bundledNodeStates.put(dns.getPath(), dns); + } + // remember that getAllBundledProperties returns unescaped keys + for (String propName : traversedMainNode.getAllBundledProperties().keySet()) { + final int lastSlash = propName.lastIndexOf("/"); + if (lastSlash == -1) { + // then it is an unbundled property which was already handled in phase 1 + continue; + } + final String escapedPropName = Utils.escapePropertyName(propName); + // bundled values are of format sub/tree/propertyKey + // to extract this we need the last index of "/" + final String unbundledSubtreeName = propName.substring(0, lastSlash); + final String unbundledPropName = propName.substring(lastSlash + 1); + final String unbundledPath = traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName; + final DocumentNodeState traversedNode = bundledNodeStates.get(Path.fromString(unbundledPath)); + if (traversedNode == null) { + log.error("collectUnusedPropertyRevisions : could not find traversed node for bundled key {} unbundledPath {} in doc {}", + propName, unbundledPath, doc.getId()); + continue; + } + final PropertyState traversedProperty = traversedNode.getProperty(unbundledPropName); + if (traversedProperty == null) { + log.error("collectUnusedPropertyRevisions : could not get property {} from traversed node {}", + unbundledPropName, traversedNode.getPath()); + continue; + } + deletedRevsCount += removeUnusedPropertyEntries(doc, traversedNode, updateOp, escapedPropName, + (r) -> { updateOp.removeMapEntry(escapedPropName, r); return null;}, + allKeepRevs); + } + + // phase 3 : "_deleted" + int numDeleted = removeUnusedPropertyEntries(doc, traversedMainNode, + updateOp, NodeDocument.DELETED, + (r) -> { NodeDocument.removeDeleted(updateOp, r); return null;} , + allKeepRevs); + deletedRevsCount += numDeleted; + return deletedRevsCount; + } + + private int collectUnusedInternalPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final Set<Revision> toKeepUserPropRevs, + int deletedRevsCount) { + boolean hasUnmergedBranchCommits = false; + for (Revision localBc : doc.getLocalBranchCommits()) { + if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) { + hasUnmergedBranchCommits = true; + } + } + if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) { + return deletedRevsCount; + } + // if we did some rev deletion OR there are unmerged BCs, then let's deep-dive + Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs); + for (Revision revision : doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) { + if (!allRequiredRevs.contains(revision)) { + Operation has = updateOp.getChanges().get(new Key(NodeDocument.COLLISIONS, revision)); + if (has != null) { + // then skip + continue; + } + NodeDocument.removeCollision(updateOp, revision); + deletedRevsCount++; + } + } Review Comment: ```suggestion // "_collisions" deletedRevsCount += getDeletedRevsCount(doc.getLocalCommitRoot().keySet(), updateOp, allRequiredRevs, COLLISIONS, NodeDocument::removeCollision); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@jackrabbit.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org