This is an automated email from the ASF dual-hosted git repository. stefanegli pushed a commit to branch OAK-10688-rebase in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 40225c85a6c0784ea120ffe1b4aaa486c50fecc8 Author: Stefan Egli <[email protected]> AuthorDate: Wed Mar 20 16:22:37 2024 +0100 OAK-10688 : keep only traversed property revisions - take special care with internal properties, esp _bc --- .../oak/plugins/document/NodeDocument.java | 70 ++- .../plugins/document/VersionGarbageCollector.java | 369 +++++++++++++++- .../oak/plugins/document/BranchCommitGCTest.java | 112 +++-- .../oak/plugins/document/DetailGCHelper.java | 31 +- .../document/VersionGarbageCollectorIT.java | 481 +++++++++++++++++---- 5 files changed, 924 insertions(+), 139 deletions(-) diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java index 8f0cfa6d80..8ac9c060ee 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java @@ -169,7 +169,7 @@ public final class NodeDocument extends Document { /** * Whether this node is deleted. Key: revision, value: true/false. */ - private static final String DELETED = "_deleted"; + static final String DELETED = "_deleted"; /** * Flag indicating that whether this node was ever deleted. Its just used as @@ -228,7 +228,7 @@ public final class NodeDocument extends Document { /** * Contains revision entries for changes done by branch commits. */ - private static final String BRANCH_COMMITS = "_bc"; + static final String BRANCH_COMMITS = "_bc"; /** * The revision set by the background document sweeper. The revision @@ -929,6 +929,53 @@ public final class NodeDocument extends Document { return false; } + /** + * Resolve the commit revision that holds the current value of a property based + * on provided readRevision if the current value is in the local + * map - null if the current value might be in a split doc or the node or property + * does not exist at all. + * + * @param nodeStore the node store. + * @param readRevision the read revision. + * @param key the key of the property to resolve + * @return a Revision if the value of the property resolves to a value based + * on what's in the local document, null if the node or property does + * not exist at all or the value is in a split document. + */ + Revision localCommitRevisionOfProperty(@NotNull DocumentNodeStore nodeStore, + @NotNull RevisionVector readRevision, + @NotNull String key) { + Map<Revision, String> validRevisions = Maps.newHashMap(); + Branch branch = nodeStore.getBranches().getBranch(readRevision); + LastRevs lastRevs = createLastRevs(readRevision, + nodeStore, branch, null); + + Revision min = getLiveRevision(nodeStore, readRevision, validRevisions, lastRevs); + if (min == null) { + // node is deleted + return null; + } + + // ignore when local map is empty (OAK-2442) + SortedMap<Revision, String> local = getLocalMap(key); + if (local.isEmpty()) { + return null; + } + + // first check local map, which contains most recent values + Value value = getLatestValue(nodeStore, local.entrySet(), + readRevision, validRevisions, lastRevs); + if (value == null) { + return null; + } + // check if there may be more recent values in a previous document + if (requiresCompleteMapCheck(value, local, nodeStore)) { + return null; + } else { + return value.valueEntry.getKey(); + } + } + /** * Returns a {@link DocumentNodeState} as seen at the given * <code>readRevision</code>. @@ -967,6 +1014,7 @@ public final class NodeDocument extends Document { if (local.isEmpty()) { continue; } + // first check local map, which contains most recent values Value value = getLatestValue(nodeStore, local.entrySet(), readRevision, validRevisions, lastRevs); @@ -980,7 +1028,7 @@ public final class NodeDocument extends Document { readRevision, validRevisions, lastRevs); } String propertyName = Utils.unescapePropertyName(key); - String v = value != null ? value.value : null; + String v = value != null ? value.valueEntry.getValue() : null; if (v != null){ props.add(nodeStore.createPropertyState(propertyName, v)); } @@ -1065,7 +1113,7 @@ public final class NodeDocument extends Document { value = getLatestValue(context, getDeleted().entrySet(), readRevision, validRevisions, lastRevs); } - return value != null && "false".equals(value.value) ? value.revision : null; + return value != null && "false".equals(value.valueEntry.getValue()) ? value.revision : null; } /** @@ -2253,7 +2301,7 @@ public final class NodeDocument extends Document { } if (isValidRevision(context, propRev, commitValue, readRevision, validRevisions)) { - return new Value(commitRev, entry.getValue()); + return new Value(commitRev, entry); } } return null; @@ -2363,14 +2411,16 @@ public final class NodeDocument extends Document { final Revision revision; /** - * The value of a property at the given revision. A {@code null} value + * valueEntry contains both the underlying (commit) revision and + * the (String) value of a property. valueEntry is never null. + * valueEntry.getValue() being {@code null} * indicates the property was removed. */ - final String value; + final Map.Entry<Revision, String> valueEntry; - Value(@NotNull Revision revision, @Nullable String value) { - this.revision = checkNotNull(revision); - this.value = value; + Value(@NotNull Revision mergeRevision, @NotNull Map.Entry<Revision, String> valueEntry) { + this.revision = checkNotNull(mergeRevision); + this.valueEntry = valueEntry; } } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java index 71e64690b2..b3a4b8e828 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java @@ -29,8 +29,10 @@ import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Objects; import java.util.Set; +import java.util.SortedMap; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; @@ -55,6 +57,7 @@ import org.apache.jackrabbit.oak.spi.gc.DelegatingGCMonitor; import org.apache.jackrabbit.oak.spi.gc.GCMonitor; import org.apache.jackrabbit.oak.spi.state.NodeState; import org.apache.jackrabbit.oak.stats.Clock; +import org.apache.jackrabbit.oak.api.PropertyState; import org.apache.jackrabbit.oak.commons.TimeDurationFormatter; import org.apache.jackrabbit.oak.stats.StatisticsProvider; import org.jetbrains.annotations.NotNull; @@ -139,6 +142,14 @@ public class VersionGarbageCollector { */ static final String SETTINGS_COLLECTION_DETAILED_GC_DRY_RUN_DOCUMENT_ID_PROP = "detailedGCDryRunId"; + static enum RDGCType { + KeepOneFullMode, + KeepOneCleanupUserPropertiesOnlyMode, + OlderThan24AndBetweenCheckpointsMode + } + + final static RDGCType revisionDetailedGcType = RDGCType.KeepOneFullMode; + private final DocumentNodeStore nodeStore; private final DocumentStore ds; private final boolean detailedGCEnabled; @@ -337,6 +348,9 @@ public class VersionGarbageCollector { int updatedDetailedGCDocsCount; int skippedDetailedGCDocsCount; int deletedPropsCount; + int deletedInternalPropsCount; + int deletedPropRevsCount; + int deletedInternalPropRevsCount; int deletedUnmergedBCCount; final TimeDurationFormatter df = TimeDurationFormatter.forLogging(); final Stopwatch active = Stopwatch.createUnstarted(); @@ -417,6 +431,9 @@ public class VersionGarbageCollector { ", updatedDetailedGCDocsCount=" + updatedDetailedGCDocsCount + ", skippedDetailedGCDocsCount=" + skippedDetailedGCDocsCount + ", deletedPropsCount=" + deletedPropsCount + + ", deletedInternalPropsCount=" + deletedInternalPropsCount + + ", deletedPropRevsCount=" + deletedPropRevsCount + + ", deletedInternalPropRevsCount=" + deletedInternalPropRevsCount + ", deletedUnmergedBCCount=" + deletedUnmergedBCCount + ", iterationCount=" + iterationCount + ", timeDetailedGCActive=" + df.format(detailedGCActiveElapsed, MICROSECONDS) + @@ -443,6 +460,9 @@ public class VersionGarbageCollector { this.updatedDetailedGCDocsCount += run.updatedDetailedGCDocsCount; this.skippedDetailedGCDocsCount += run.skippedDetailedGCDocsCount; this.deletedPropsCount += run.deletedPropsCount; + this.deletedInternalPropsCount += run.deletedInternalPropsCount; + this.deletedPropRevsCount += run.deletedPropRevsCount; + this.deletedInternalPropRevsCount += run.deletedInternalPropRevsCount; this.deletedUnmergedBCCount += run.deletedUnmergedBCCount; if (run.iterationCount > 0) { // run is cumulative with times in elapsed fields @@ -913,6 +933,20 @@ public class VersionGarbageCollector { * In order to calculate the correct no. of updated documents & deleted properties, we save them in a map */ private final Map<String, Integer> deletedPropsCountMap; + private final Map<String, Integer> deletedInternalPropsCountMap; + + /** + * Map of documentId => total no. of deleted property revisions. + * <p> + * + * The document can be updated between collecting and deletion phases. + * This would lead to document not getting deleted (since now modified date & mod count would have changed) + * SO the Bulk API wouldn't update this doc. + * <p> + * In order to calculate the correct no. of updated documents & deleted property revisions, we save them in a map + */ + private final Map<String, Integer> deletedPropRevsCountMap; + private final Map<String, Integer> deletedInternalPropRevsCountMap; /** * {@link Set} of unmergedBranchCommit Revisions to calculate the no. of unmergedBranchCommits that would be @@ -933,6 +967,9 @@ public class VersionGarbageCollector { this.updateOpList = new ArrayList<>(); this.orphanOrDeletedRemovalMap = new HashMap<>(); this.deletedPropsCountMap = new HashMap<>(); + this.deletedInternalPropsCountMap = new HashMap<>(); + this.deletedPropRevsCountMap = new HashMap<>(); + this.deletedInternalPropRevsCountMap = new HashMap<>(); this.deletedUnmergedBCSet = new HashSet<>(); this.timer = createUnstarted(); // clusterId is not used @@ -964,8 +1001,22 @@ public class VersionGarbageCollector { } else { // here the node is not orphaned which means that we can reach the node from root collectDeletedProperties(doc, phases, op, traversedState); - collectUnmergedBranchCommits(doc, phases, op, toModifiedMs); - collectRevisionsOlderThan24hAndBetweenCheckpoints(doc, phases, op); + switch(revisionDetailedGcType) { + case KeepOneFullMode : { + collectUnusedPropertyRevisions(doc, phases, op, (DocumentNodeState) traversedState, false); + combineInternalPropRemovals(doc, op); + break; + } + case KeepOneCleanupUserPropertiesOnlyMode : { + collectUnusedPropertyRevisions(doc, phases, op, (DocumentNodeState) traversedState, true); + combineInternalPropRemovals(doc, op); + break; + } + case OlderThan24AndBetweenCheckpointsMode : { + collectRevisionsOlderThan24hAndBetweenCheckpoints(doc, phases, op); + break; + } + } // only add if there are changes for this doc if (op.hasChanges()) { garbageDocsCount++; @@ -979,6 +1030,40 @@ public class VersionGarbageCollector { } } + private void combineInternalPropRemovals(final NodeDocument doc, + final UpdateOp op) { + // now for any of the handled system properties (the normal properties would + // already be cleaned up by cleanupDeletedProperties), the resulting + // sub document could in theory become empty after removing all unmerged branch + // commit revisions is done later. + // so we need to go through all of them and check if we'd have removed + // the entirety - and then, instead of individually remove revisions, just + // delete the entire property. + if (op.hasChanges()) { + final int deletedSystemPropsCount = getSystemRemoveMapEntryCounts(op) + .entrySet().stream() + .filter(e -> filterEmptyProps(doc, e.getKey(), e.getValue())) + .mapToInt(e -> { + final String prop = e.getKey(); + int countBefore = op.getChanges().entrySet().size(); + boolean removed = op.getChanges().entrySet().removeIf(opEntry -> Objects.equals(prop, opEntry.getKey().getName())); + int countAfter = op.getChanges().entrySet().size(); + if (removed) { + if (prop.startsWith("_")) { + deletedInternalPropRevsCountMap.merge(doc.getId(), (countAfter - countBefore), Integer::sum); + } else { + deletedPropRevsCountMap.merge(doc.getId(), (countAfter - countBefore), Integer::sum); + } + } + op.remove(prop); + return 1;}) + .sum(); + + // update the deleted properties count Map to calculate the total no. of deleted properties + deletedInternalPropsCountMap.merge(doc.getId(), deletedSystemPropsCount, Integer::sum); + } + } + /** * Check if the node represented by the given doc and traversedState is * <i>orphaned</i>. A node is considered orphaned if it does not have a visible @@ -1096,7 +1181,7 @@ public class VersionGarbageCollector { .sum(); // update the deleted properties count Map to calculate the total no. of deleted properties - deletedPropsCountMap.merge(doc.getId(), deletedSystemPropsCount, Integer::sum); + deletedInternalPropsCountMap.merge(doc.getId(), deletedSystemPropsCount, Integer::sum); } phases.stop(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC); } @@ -1180,13 +1265,19 @@ public class VersionGarbageCollector { */ private void removeUnmergedBCRevision(final Revision unmergedBCRevision, final NodeDocument doc, final UpdateOp updateOp) { + int internalRevEntriesCount = 0; + int revEntriesCount = 0; // caller ensures the provided revision is an unmerged branch commit + if (doc.getLocalBranchCommits().contains(unmergedBCRevision)) { + internalRevEntriesCount++; + } NodeDocument.removeBranchCommit(updateOp, unmergedBCRevision); // phase 1 : remove unmerged bc revisions from _deleted - unmerged branch // commits can only be in the local set final String unmergedDeleted = doc.getLocalDeleted().get(unmergedBCRevision); if (unmergedDeleted != null) { + internalRevEntriesCount++; NodeDocument.removeDeleted(updateOp, unmergedBCRevision); // phase 2: the document could now effectively be "deleted" the actual @@ -1213,12 +1304,15 @@ public class VersionGarbageCollector { } // phase 3 : go through other system properties if (doc.getLocalCommitRoot().containsKey(unmergedBCRevision)) { + internalRevEntriesCount++; NodeDocument.removeCommitRoot(updateOp, unmergedBCRevision); } if (doc.getLocalRevisions().containsKey(unmergedBCRevision)) { + internalRevEntriesCount++; NodeDocument.removeRevision(updateOp, unmergedBCRevision); } if (doc.getLocalMap(NodeDocument.COLLISIONS).containsKey(unmergedBCRevision)) { + internalRevEntriesCount++; NodeDocument.removeCollision(updateOp, unmergedBCRevision); } // phase 4 : go through normal properties @@ -1233,8 +1327,16 @@ public class VersionGarbageCollector { } if (doc.getLocalMap(propName).containsKey(unmergedBCRevision)) { updateOp.removeMapEntry(propName, unmergedBCRevision); + revEntriesCount++; } }; + if (internalRevEntriesCount > 0) { + deletedInternalPropRevsCountMap.merge(doc.getId(), internalRevEntriesCount, Integer::sum); + } + if (revEntriesCount > 0) { + deletedPropRevsCountMap.merge(doc.getId(), revEntriesCount, Integer::sum); + } + } private void collectRevisionsOlderThan24hAndBetweenCheckpoints(final NodeDocument doc, @@ -1246,6 +1348,256 @@ public class VersionGarbageCollector { } } + /** + * Remove all property revisions in the local document that are no longer used. + * This includes bundled properties. It also includes related entries that + * become obsolete as a result - i.e. _commitRoot and _bc (though the latter + * is never removed on root) + */ + private void collectUnusedPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final boolean ignoreInternalProperties) { + + if (!phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){ + // cancelled + return; + } + final Set<Revision> allKeepRevs = new HashSet<>(); + // phase A : collectUnusedUserPropertyRevisions + int deletedTotalRevsCount = collectUnusedUserPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs); + int deletedUserRevsCount = deletedTotalRevsCount; + // phase B : collectUnusedInternalPropertyRevisions + if (!ignoreInternalProperties) { + deletedTotalRevsCount = collectUnusedInternalPropertyRevisions(doc, phases, updateOp, traversedMainNode, allKeepRevs, deletedTotalRevsCount); + } + + // then some accounting... + int deletedInternalRevsCount = deletedTotalRevsCount - deletedUserRevsCount; + if (deletedUserRevsCount != 0) { + deletedPropRevsCountMap.merge(doc.getId(), deletedUserRevsCount, Integer::sum); + } + if (deletedInternalRevsCount != 0) { + deletedInternalPropRevsCountMap.merge(doc.getId(), deletedInternalRevsCount, Integer::sum); + } + phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS); + } + + private int collectUnusedUserPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final Set<Revision> allKeepRevs) { + int deletedRevsCount = 0; + + // phase 1 : non bundled nodes only + for (PropertyState prop : traversedMainNode.getProperties()) { + final String escapedPropName = Utils.escapePropertyName(prop.getName()); + deletedRevsCount += removeUnusedPropertyEntries(doc, traversedMainNode, updateOp, escapedPropName, + (r) -> { updateOp.removeMapEntry(escapedPropName, r); return null;}, + allKeepRevs); + } + + // phase 2 : bundled nodes only + final Map<Path, DocumentNodeState> bundledNodeStates = new HashMap<>(); + for (DocumentNodeState dns : traversedMainNode.getAllBundledNodesStates()) { + bundledNodeStates.put(dns.getPath(), dns); + } + // remember that getAllBundledProperties returns unescaped keys + for (String propName : traversedMainNode.getAllBundledProperties().keySet()) { + final int lastSlash = propName.lastIndexOf("/"); + if (lastSlash == -1) { + // then it is an unbundled property which was already handled in phase 1 + continue; + } + final String escapedPropName = Utils.escapePropertyName(propName); + // bundled values are of format sub/tree/propertyKey + // to extract this we need the last index of "/" + final String unbundledSubtreeName = propName.substring(0, lastSlash); + final String unbundledPropName = propName.substring(lastSlash + 1); + final String unbundledPath = traversedMainNode.getPath().toString() + "/" + unbundledSubtreeName; + final DocumentNodeState traversedNode = bundledNodeStates.get(Path.fromString(unbundledPath)); + if (traversedNode == null) { + log.error("collectUnusedPropertyRevisions : could not find traversed node for bundled key {} unbundledPath {} in doc {}", + propName, unbundledPath, doc.getId()); + continue; + } + final PropertyState traversedProperty = traversedNode.getProperty(unbundledPropName); + if (traversedProperty == null) { + log.error("collectUnusedPropertyRevisions : could not get property {} from traversed node {}", + unbundledPropName, traversedNode.getPath()); + continue; + } + deletedRevsCount += removeUnusedPropertyEntries(doc, traversedNode, updateOp, escapedPropName, + (r) -> { updateOp.removeMapEntry(escapedPropName, r); return null;}, + allKeepRevs); + } + + // phase 3 : "_deleted" + int numDeleted = removeUnusedPropertyEntries(doc, traversedMainNode, + updateOp, NodeDocument.DELETED, + (r) -> { NodeDocument.removeDeleted(updateOp, r); return null;} , + allKeepRevs); + deletedRevsCount += numDeleted; + return deletedRevsCount; + } + + private int collectUnusedInternalPropertyRevisions(final NodeDocument doc, + final GCPhases phases, final UpdateOp updateOp, + final DocumentNodeState traversedMainNode, + final Set<Revision> toKeepUserPropRevs, + int deletedRevsCount) { + boolean hasUnmergedBranchCommits = false; + for (Revision localBc : doc.getLocalBranchCommits()) { + if (!isCommitted(nodeStore.getCommitValue(localBc, doc))) { + hasUnmergedBranchCommits = true; + } + } + if (deletedRevsCount == 0 && !hasUnmergedBranchCommits) { + return deletedRevsCount; + } + // if we did some rev deletion OR there are unmerged BCs, then let's deep-dive + Set<Revision> allRequiredRevs = new HashSet<>(toKeepUserPropRevs); + for (Revision revision : doc.getLocalMap(NodeDocument.COLLISIONS).keySet()) { + if (!allRequiredRevs.contains(revision)) { + Operation has = updateOp.getChanges().get(new Key(NodeDocument.COLLISIONS, revision)); + if (has != null) { + // then skip + continue; + } + NodeDocument.removeCollision(updateOp, revision); + deletedRevsCount++; + } + } + // "_revisions" + for (Entry<Revision, String> e : doc.getLocalRevisions().entrySet()) { + Revision revision = e.getKey(); + if (allRequiredRevs.contains(revision)) { + // if it is still referenced locally, keep it + continue; + } + final boolean isRoot = doc.getId().equals(Utils.getIdFromPath(Path.ROOT)); + // local bcs only considered for removal + final boolean isBC = !doc.getLocalBranchCommits().contains(revision); + final boolean newerThanSweep = nodeStore.getSweepRevisions().isRevisionNewer(revision); + if (newerThanSweep) { + //TODO wondering if we should at all do any DGC on documents newer than sweep + if (isBC) { + // analysed further down, we can remove them if unmerged + } else { + // for normal commits, we need to keep them -> also add to allRequiredRevs + allRequiredRevs.add(revision); + continue; + } + } + // committed revisions are hard to delete, as determining + // whether or not they are orphaned is difficult. + // child nodes could be having _commitRoots pointing to this doc + // and this revision - in which case it wouldn't be orphaned. + // without scanning the children (or having knowledge about + // which revisions are referenced by children) it is unsafe + // to delete committed revisions. + // if they are uncommitted, then they are garbage, as then they + // are either normal commits (thus a not finished commit) + // or an unmerged branch commit. In both cases they are garbage. + boolean isCommitted = isCommitted(nodeStore.getCommitValue(revision, doc)); + if (isCommitted) { + if (isRoot) { + // root : cannot remove since it could be referenced and required by a child + // also add to allRequiredRevs + allRequiredRevs.add(revision); + continue; + } else if (!isBC) { + // non root and normal : cannot remove, same as above, + // it could be referenced and required by a child + // also add to allRequiredRevs + allRequiredRevs.add(revision); + continue; + } else { + // non root and bc : can remove, as non root bc cannot be referenced by child + } + } + Operation has = updateOp.getChanges().get(new Key(NodeDocument.REVISIONS, revision)); + if (has != null) { + // then skip + continue; + } + NodeDocument.removeRevision(updateOp, revision); + deletedRevsCount++; + } + // "_commitRoot" + for (Revision revision : doc.getLocalCommitRoot().keySet()) { + if (!allRequiredRevs.contains(revision)) { + Operation has = updateOp.getChanges().get(new Key(NodeDocument.COMMIT_ROOT, revision)); + if (has != null) { + // then skip + continue; + } + NodeDocument.removeCommitRoot(updateOp, revision); + deletedRevsCount++; + } + } + // "_bc" + for (Revision revision : doc.getLocalBranchCommits()) { + if (!allRequiredRevs.contains(revision)) { + Operation has = updateOp.getChanges().get(new Key(NodeDocument.BRANCH_COMMITS, revision)); + if (has != null) { + // then skip + continue; + } + + NodeDocument.removeBranchCommit(updateOp, revision); + deletedRevsCount++; + } + } + return deletedRevsCount; + } + + private int removeUnusedPropertyEntries(NodeDocument doc, + DocumentNodeState traversedMainNode, UpdateOp updateOp, + String propertyKey, Function<Revision, Void> removeRevision, + Set<Revision> allKeepRevs) { + // we need to use the traversedNode.getLastRevision() as the readRevision, + // as that is what was originally used in getNodeAtRevision when traversing + final Revision keepCommitRev = doc.localCommitRevisionOfProperty(nodeStore, + traversedMainNode.getLastRevision(), propertyKey); + if (keepCommitRev == null) { + // could be due to node not existing or current value being in a split + // doc - while the former is unexpected, the latter might happen. + // in both cases let's skip this property + log.debug("removeUnusedPropertyEntries : no visible revision for property {} in doc {}", + propertyKey, doc.getId()); + return 0; + } + // if we get a revision it is from the local map. + // paranoia check that + final SortedMap<Revision, String> localMap = doc.getLocalMap(propertyKey); + if (!localMap.containsKey(keepCommitRev)) { + // this is unexpected - log and skip this property + log.error("removeUnusedPropertyEntries : revision {} for property {} not found in doc {}", + keepCommitRev, propertyKey, doc.getId()); + return 0; + } + int count = 0; + // in this case we are good to delete all but the keepRevision + for (Revision localRev : localMap.keySet()) { + if (!keepCommitRev.equals(localRev)) { + // if the localRev is a branch commit, it might be unmerged, + // in which case it might already have been marked for removal + // via collectUnmergedBranchCommits. Checking for that next. + Operation c = updateOp.getChanges().get(new Key(propertyKey, localRev)); + if (c == null) { + log.trace("removeUnusedPropertyEntries : removing property key {} with revision {} from doc {}", + propertyKey, localRev, doc.getId()); + removeRevision.apply(localRev); + count++; + } + } + } + allKeepRevs.add(keepCommitRev); + return count; + } + + int getGarbageCount() { return totalGarbageDocsCount; } @@ -1325,10 +1677,18 @@ public class VersionGarbageCollector { if (!updateOpList.isEmpty()) { List<NodeDocument> oldDocs = ds.findAndUpdate(NODES, updateOpList); + + int deletedProps = oldDocs.stream().filter(Objects::nonNull).mapToInt(d -> deletedPropsCountMap.getOrDefault(d.getId(), 0)).sum(); + int deletedInternalProps = oldDocs.stream().filter(Objects::nonNull).mapToInt(d -> deletedInternalPropsCountMap.getOrDefault(d.getId(), 0)).sum(); + int deletedRevEntriesCount = oldDocs.stream().filter(Objects::nonNull).mapToInt(d -> deletedPropRevsCountMap.getOrDefault(d.getId(), 0)).sum(); + int deletedInternalRevEntriesCount = oldDocs.stream().filter(Objects::nonNull).mapToInt(d -> deletedInternalPropRevsCountMap.getOrDefault(d.getId(), 0)).sum(); int updatedDocs = (int) oldDocs.stream().filter(Objects::nonNull).count(); stats.updatedDetailedGCDocsCount += updatedDocs; stats.deletedPropsCount += deletedProps; + stats.deletedInternalPropsCount += deletedInternalProps; + stats.deletedPropRevsCount += deletedRevEntriesCount; + stats.deletedInternalPropRevsCount += deletedInternalRevEntriesCount; stats.deletedUnmergedBCCount += deletedUnmergedBCSet.size(); if (log.isDebugEnabled()) { @@ -1349,6 +1709,9 @@ public class VersionGarbageCollector { updateOpList.clear(); orphanOrDeletedRemovalMap.clear(); deletedPropsCountMap.clear(); + deletedInternalPropsCountMap.clear(); + deletedPropRevsCountMap.clear(); + deletedInternalPropRevsCountMap.clear(); deletedUnmergedBCSet.clear(); garbageDocsCount = 0; delayOnModifications(timer.stop().elapsed(MILLISECONDS), cancel); diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BranchCommitGCTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BranchCommitGCTest.java index a81bd15cff..e6198a57c9 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BranchCommitGCTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BranchCommitGCTest.java @@ -22,8 +22,6 @@ import org.apache.jackrabbit.oak.spi.commit.CommitInfo; import org.apache.jackrabbit.oak.spi.commit.EmptyHook; import org.apache.jackrabbit.oak.spi.state.NodeBuilder; import org.apache.jackrabbit.oak.stats.Clock; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; import org.junit.After; import org.junit.Before; import org.junit.Rule; @@ -41,11 +39,8 @@ import static java.util.concurrent.TimeUnit.HOURS; import static org.apache.commons.lang3.reflect.FieldUtils.writeField; import static org.apache.jackrabbit.oak.plugins.document.DetailGCHelper.assertBranchRevisionRemovedFromAllDocuments; import static org.apache.jackrabbit.oak.plugins.document.DetailGCHelper.build; -import static org.apache.jackrabbit.oak.plugins.document.DetailGCHelper.mergedBranchCommit; -import static org.apache.jackrabbit.oak.plugins.document.DetailGCHelper.unmergedBranchCommit; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; @@ -97,9 +92,13 @@ public class BranchCommitGCTest { // clean everything older than one hour VersionGarbageCollector.VersionGCStats stats = gc.gc(1, HOURS); - assertEquals(3, stats.updatedDetailedGCDocsCount); + assertEquals(2, stats.updatedDetailedGCDocsCount); assertEquals(2, stats.deletedDocGCCount); - assertEquals(1, stats.deletedUnmergedBCCount); + assertEquals(0, stats.deletedUnmergedBCCount); + assertEquals(0, stats.deletedPropRevsCount); + assertEquals(0, stats.deletedInternalPropRevsCount); + assertEquals(0, stats.deletedPropsCount); + assertEquals(0, stats.deletedInternalPropsCount); // now do another gc - should not have anything left to clean up though clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); @@ -107,6 +106,10 @@ public class BranchCommitGCTest { assertEquals(0, stats.updatedDetailedGCDocsCount); assertEquals(0, stats.deletedDocGCCount); assertEquals(0, stats.deletedUnmergedBCCount); + assertEquals(0, stats.deletedPropRevsCount); + assertEquals(0, stats.deletedInternalPropRevsCount); + assertEquals(0, stats.deletedPropRevsCount); + assertEquals(0, stats.deletedInternalPropsCount); assertNotExists("1:/a"); assertNotExists("1:/b"); assertBranchRevisionRemovedFromAllDocuments(store, br); @@ -130,6 +133,8 @@ public class BranchCommitGCTest { b.child("b"); }); + assertEquals(1, countCollisionsOnRoot()); + store.runBackgroundOperations(); mergedBranchCommit(b -> { @@ -139,15 +144,27 @@ public class BranchCommitGCTest { store.runBackgroundOperations(); + int collisionsBeforeGC = countCollisionsOnRoot(); // wait two hours clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); // clean everything older than one hour VersionGarbageCollector.VersionGCStats stats = gc.gc(1, HOURS); - assertTrue("stats.updatedDetailedGCDocsCount expected 1 or less, was: " + stats.updatedDetailedGCDocsCount, stats.updatedDetailedGCDocsCount <= 1); assertEquals(2, stats.deletedDocGCCount); - assertEquals(1, stats.deletedUnmergedBCCount); + assertEquals(0, stats.deletedUnmergedBCCount); + if (collisionsBeforeGC == 1) { + // expects a collision to have happened - which was cleaned up - hence a _bc (but not the _revision I guess) + assertEquals(0, stats.deletedPropRevsCount); + // the collisions cleaned up - with the 1 collision and a _bc + assertEquals(0, stats.deletedPropsCount); + assertEquals(1, stats.deletedInternalPropsCount); + } else { + // in this case classic collision cleanup already took care of everything, nothing left + assertEquals(0, stats.deletedPropRevsCount); + // the collisions cleaned up - even though empty + assertEquals(1, stats.deletedPropsCount); + } assertNotExists("1:/a"); assertNotExists("1:/b"); @@ -160,6 +177,13 @@ public class BranchCommitGCTest { assertBranchRevisionRemovedFromAllDocuments(store, br); } + private int countCollisionsOnRoot() { + NodeDocument r = store.getDocumentStore().find(Collection.NODES, "0:/"); + SortedMap<Revision, String> colls = r.getLocalMap(NodeDocument.COLLISIONS); + int countCollisions = colls.size(); + return countCollisions; + } + @Test public void testDeletedPropsAndUnmergedBC() throws Exception { // create a node with property. @@ -192,10 +216,13 @@ public class BranchCommitGCTest { // clean everything older than one hour VersionGarbageCollector.VersionGCStats stats = gc.gc(1, HOURS); + // 6 deleted props: 0:/[_collisions], 1:/foo[p, a], 1:/bar[_bc,prop,_revisions] + assertEquals(3, stats.deletedPropsCount); + assertEquals(3, stats.deletedInternalPropsCount); + assertEquals(1, stats.deletedPropRevsCount); + assertEquals(17, stats.deletedInternalPropRevsCount); + assertEquals(0, stats.deletedUnmergedBCCount); assertEquals(3, stats.updatedDetailedGCDocsCount); - // deleted properties are : 1:/foo -> prop, a, _collisions & p && 1:/bar -> _bc - assertEquals(5, stats.deletedPropsCount); - assertEquals(4, stats.deletedUnmergedBCCount); assertBranchRevisionRemovedFromAllDocuments(store, br1); assertBranchRevisionRemovedFromAllDocuments(store, br2); assertBranchRevisionRemovedFromAllDocuments(store, br3); @@ -226,7 +253,12 @@ public class BranchCommitGCTest { assertEquals(3, stats.updatedDetailedGCDocsCount); assertEquals(2, stats.deletedDocGCCount); - assertEquals(2, stats.deletedUnmergedBCCount); + assertEquals(0, stats.deletedUnmergedBCCount); + assertEquals(0, stats.deletedPropsCount); + assertEquals(1, stats.deletedInternalPropsCount); + assertEquals(0, stats.deletedPropRevsCount); + assertEquals(0, stats.deletedInternalPropRevsCount); + assertEquals(0, stats.deletedPropRevsCount); assertNotExists("1:/a"); assertNotExists("1:/b"); @@ -237,6 +269,10 @@ public class BranchCommitGCTest { assertEquals(0, stats.updatedDetailedGCDocsCount); assertEquals(0, stats.deletedDocGCCount); assertEquals(0, stats.deletedUnmergedBCCount); + assertEquals(0, stats.deletedPropRevsCount); + assertEquals(0, stats.deletedInternalPropsCount); + assertEquals(0, stats.deletedPropRevsCount); + assertEquals(0, stats.deletedInternalPropRevsCount); assertBranchRevisionRemovedFromAllDocuments(store, br1); assertBranchRevisionRemovedFromAllDocuments(store, br2); } @@ -273,7 +309,9 @@ public class BranchCommitGCTest { assertTrue("should have been 2 or more, was: " + stats.updatedDetailedGCDocsCount, stats.updatedDetailedGCDocsCount >= 2); assertEquals(0, stats.deletedDocGCCount); - assertEquals(2, stats.deletedUnmergedBCCount); + assertEquals(0, stats.deletedUnmergedBCCount); + assertEquals(4, stats.deletedPropRevsCount); + assertEquals(12, stats.deletedInternalPropRevsCount); assertExists("1:/a"); assertExists("1:/b"); @@ -284,6 +322,7 @@ public class BranchCommitGCTest { assertEquals(0, stats.updatedDetailedGCDocsCount); assertEquals(0, stats.deletedDocGCCount); assertEquals(0, stats.deletedUnmergedBCCount); + assertEquals(0, stats.deletedPropRevsCount); assertBranchRevisionRemovedFromAllDocuments(store, br1); assertBranchRevisionRemovedFromAllDocuments(store, br2); } @@ -330,7 +369,9 @@ public class BranchCommitGCTest { assertEquals(3, stats.updatedDetailedGCDocsCount); assertEquals(0, stats.deletedDocGCCount); - assertEquals(4, stats.deletedUnmergedBCCount); + assertEquals(8, stats.deletedPropRevsCount); + assertEquals(20, stats.deletedInternalPropRevsCount); + assertEquals(0, stats.deletedUnmergedBCCount); assertExists("1:/a"); assertExists("1:/b"); @@ -340,6 +381,7 @@ public class BranchCommitGCTest { stats = gc.gc(1, HOURS); assertEquals(0, stats.updatedDetailedGCDocsCount); assertEquals(0, stats.deletedDocGCCount); + assertEquals(0, stats.deletedPropRevsCount); assertEquals(0, stats.deletedUnmergedBCCount); assertBranchRevisionRemovedFromAllDocuments(store, br1); assertBranchRevisionRemovedFromAllDocuments(store, br2); @@ -366,8 +408,10 @@ public class BranchCommitGCTest { // first gc round now deletes it, via orphaned node deletion assertEquals(1, stats.deletedDocGCCount); - assertEquals(4, stats.updatedDetailedGCDocsCount); - assertEquals(1, stats.deletedUnmergedBCCount); + assertEquals(3, stats.updatedDetailedGCDocsCount); + assertEquals(0, stats.deletedUnmergedBCCount); + assertEquals(1, stats.deletedPropRevsCount); + assertEquals(4, stats.deletedInternalPropRevsCount); // wait two hours clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); @@ -376,6 +420,7 @@ public class BranchCommitGCTest { assertEquals(0, stats.updatedDetailedGCDocsCount); assertEquals(0, stats.deletedDocGCCount); assertEquals(0, stats.deletedUnmergedBCCount); + assertEquals(0, stats.deletedPropRevsCount); assertBranchRevisionRemovedFromAllDocuments(store, br); } @@ -397,9 +442,12 @@ public class BranchCommitGCTest { // clean everything older than one hour stats = gc.gc(1, HOURS); - assertEquals(2, stats.updatedDetailedGCDocsCount); assertEquals(0, stats.deletedPropsCount); - assertEquals(1, stats.deletedUnmergedBCCount); + assertEquals(0, stats.deletedInternalPropsCount); + assertEquals(1, stats.deletedPropRevsCount); + assertEquals(4, stats.deletedInternalPropRevsCount); + assertEquals(0, stats.deletedUnmergedBCCount); + assertEquals(2, stats.updatedDetailedGCDocsCount); assertBranchRevisionRemovedFromAllDocuments(store, br); } @@ -414,9 +462,13 @@ public class BranchCommitGCTest { // clean everything older than one hour stats = gc.gc(1, HOURS); - assertEquals(2, stats.updatedDetailedGCDocsCount); + assertEquals(1, stats.updatedDetailedGCDocsCount); + // 1 deleted prop: 1:/foo[a] assertEquals(1, stats.deletedPropsCount); - assertEquals(1, stats.deletedUnmergedBCCount); + assertEquals(0, stats.deletedInternalPropsCount); + assertEquals(0, stats.deletedPropRevsCount); + assertEquals(2, stats.deletedInternalPropRevsCount); + assertEquals(0, stats.deletedUnmergedBCCount); assertBranchRevisionRemovedFromAllDocuments(store, br); } @@ -447,7 +499,9 @@ public class BranchCommitGCTest { stats = gc.gc(1, HOURS); assertEquals(2, stats.updatedDetailedGCDocsCount); - assertEquals(10, stats.deletedUnmergedBCCount); + assertEquals(0, stats.deletedUnmergedBCCount); + assertEquals(10, stats.deletedPropRevsCount); + assertEquals(20, stats.deletedInternalPropRevsCount); doc = store.getDocumentStore().find(Collection.NODES, "1:/foo"); Long finalModified = doc.getModified(); @@ -487,7 +541,9 @@ public class BranchCommitGCTest { stats = gc.gc(1, HOURS); assertEquals(2, stats.updatedDetailedGCDocsCount); - assertEquals(10, stats.deletedUnmergedBCCount); + assertEquals(10, stats.deletedPropRevsCount); + assertEquals(20, stats.deletedInternalPropRevsCount); + assertEquals(0, stats.deletedUnmergedBCCount); for (RevisionVector br : brs) { assertBranchRevisionRemovedFromAllDocuments(store, br); } @@ -504,6 +560,7 @@ public class BranchCommitGCTest { assertEquals(0, stats.updatedDetailedGCDocsCount); assertEquals(0, stats.deletedUnmergedBCCount); assertEquals(0, stats.deletedPropsCount); + assertEquals(0, stats.deletedPropRevsCount); RevisionVector br = unmergedBranchCommit(b -> { b.setProperty("rootProp", "v"); @@ -533,7 +590,9 @@ public class BranchCommitGCTest { stats = gc.gc(1, HOURS); assertEquals(2, stats.updatedDetailedGCDocsCount); - assertEquals(1, stats.deletedUnmergedBCCount); + assertEquals(0, stats.deletedUnmergedBCCount); + assertEquals(0, stats.deletedPropRevsCount); + assertEquals(6, stats.deletedInternalPropRevsCount); // deleted properties are 0:/ -> rootProp, _collisions & 1:/foo -> a { // some flakyness diagnostics @@ -545,7 +604,10 @@ public class BranchCommitGCTest { assertNotNull(d); assertEquals(0, d.getLocalMap("a").size()); } - assertEquals(3, stats.deletedPropsCount); + // deleted props: 0:/[rootProp], 1:/foo[a] + assertEquals(2, stats.deletedPropsCount); + // deleted prop : 0:/ _collision + assertEquals(1, stats.deletedInternalPropsCount); assertBranchRevisionRemovedFromAllDocuments(store, br); } diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java index a64900fe57..1f6caff165 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java @@ -18,6 +18,7 @@ */ package org.apache.jackrabbit.oak.plugins.document; +import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.RDGCType; import org.apache.jackrabbit.oak.plugins.document.util.Utils; import org.apache.jackrabbit.oak.spi.commit.CommitInfo; import org.apache.jackrabbit.oak.spi.commit.EmptyHook; @@ -25,7 +26,9 @@ import org.apache.jackrabbit.oak.spi.state.NodeBuilder; import org.apache.jackrabbit.oak.spi.state.NodeState; import org.apache.jackrabbit.oak.spi.state.NodeStore; +import java.util.Map.Entry; import java.util.Objects; +import java.util.Set; import java.util.function.Consumer; import static org.apache.commons.lang3.reflect.FieldUtils.writeField; @@ -67,17 +70,35 @@ public class DetailGCHelper { return result; } - public static void assertBranchRevisionRemovedFromAllDocuments(final DocumentNodeStore store, final RevisionVector br) { + public static void assertBranchRevisionRemovedFromAllDocuments( + final DocumentNodeStore store, final RevisionVector br, + final String... exceptIds) { assertTrue(br.isBranch()); Revision br1 = br.getRevision(1); assert br1 != null; Revision r1 = br1.asTrunkRevision(); + Set<String> except = Set.of(exceptIds); for (NodeDocument nd : Utils.getAllDocuments(store.getDocumentStore())) { - if (Objects.equals(nd.getId(), "0:/")) { - NodeDocument target = new NodeDocument(store.getDocumentStore()); - nd.deepCopy(target); + if (nd.getId().equals(Utils.getIdFromPath(Path.ROOT)) + || except.contains(nd.getId())) { + // skip root and the provided ids + continue; + } + NodeDocument target = new NodeDocument(store.getDocumentStore()); + nd.deepCopy(target); + // ignore the _revisions entry, as we cannot always cleanup everything in there + target.remove(NodeDocument.REVISIONS); + for (Entry<String, Object> e : target.data.entrySet()) { + String k = e.getKey(); + final boolean internal = k.startsWith("_"); + final boolean dgcSupportsInternalPropCleanup = (VersionGarbageCollector.revisionDetailedGcType != RDGCType.KeepOneCleanupUserPropertiesOnlyMode); + if (internal && !dgcSupportsInternalPropCleanup) { + // skip + continue; + } + assertFalse("document not cleaned up for prop " + k + " : " + e, + e.toString().contains(r1.toString())); } - assertFalse("document not fully cleaned up: " + nd.asString(), nd.asString().contains(r1.toString())); } } diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java index 449e6aa594..e731facc7c 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java @@ -25,6 +25,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Random; import java.util.Set; import java.util.SortedMap; @@ -108,6 +109,7 @@ import org.apache.jackrabbit.oak.api.PropertyState; import org.apache.jackrabbit.oak.api.Type; import org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.RDBFixture; import org.apache.jackrabbit.oak.plugins.document.FailingDocumentStore.FailedUpdateOpListener; +import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.RDGCType; import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats; import org.apache.jackrabbit.oak.plugins.document.bundlor.BundlingConfigInitializer; import org.apache.jackrabbit.oak.plugins.document.mongo.MongoTestUtils; @@ -428,15 +430,14 @@ public class VersionGarbageCollectorIT { // 6. Now run gc after checkpoint and see removed properties gets collected clock.waitUntil(clock.getTime() + delta*2); VersionGCStats stats = gc(gc, delta, MILLISECONDS); - assertEquals(1, stats.deletedPropsCount); - assertEquals(1, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 1, 0, 0, 0, 0, 1); assertTrue(stats.ignoredGCDueToCheckPoint); assertTrue(stats.ignoredDetailedGCDueToCheckPoint); assertTrue(stats.canceled); } @Test - public void testGCDeletedProps() throws Exception{ + public void testGCDeletedProps() throws Exception { //1. Create nodes with properties NodeBuilder b1 = store1.getRoot().builder(); @@ -462,8 +463,7 @@ public class VersionGarbageCollectorIT { //1. Go past GC age and check no GC done as nothing deleted clock.waitUntil(getCurrentTimestamp() + maxAge); VersionGCStats stats = gc(gc, maxAge, HOURS); - assertEquals(0, stats.deletedPropsCount); - assertEquals(0, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); //Remove property NodeBuilder b2 = store1.getRoot().builder(); @@ -476,15 +476,13 @@ public class VersionGarbageCollectorIT { //Clock cannot move back (it moved forward in #1) so double the maxAge clock.waitUntil(clock.getTime() + delta); stats = gc(gc, maxAge*2, HOURS); - assertEquals(0, stats.deletedPropsCount); - assertEquals(0, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); //3. Check that deleted property does get collected post maxAge clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); stats = gc(gc, maxAge*2, HOURS); - assertEquals(1, stats.deletedPropsCount); - assertEquals(1, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 1, 0, 0, 0, 0, 1); assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId); //4. Check that a revived property (deleted and created again) does not get gc @@ -497,9 +495,9 @@ public class VersionGarbageCollectorIT { store1.merge(b4, EmptyHook.INSTANCE, CommitInfo.EMPTY); clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); + stats = gc(gc, maxAge*2, HOURS); - assertEquals(0, stats.deletedPropsCount); - assertEquals(0, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 0, 0, 2, 1, 0, 1); assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId); } @@ -536,8 +534,7 @@ public class VersionGarbageCollectorIT { clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); VersionGCStats stats = gc(gc, maxAge*2, HOURS); - assertEquals(50_000, stats.deletedPropsCount); - assertEquals(5_000, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 50_000, 0, 0, 0, 0, 5_000); assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId); } @@ -546,16 +543,12 @@ public class VersionGarbageCollectorIT { //1. Create nodes with properties NodeBuilder b1 = store1.getRoot().builder(); for (int k = 0; k < 50; k ++) { - b1 = store1.getRoot().builder(); // Add property to node & save for (int i = 0; i < 100; i++) { for (int j = 0; j < 10; j++) { b1.child(k + "z" + i).setProperty("prop" + j, "foo", STRING); } } - store1.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); - // increase the clock to create new revision for next batch - clock.waitUntil(Revision.getCurrentTimestamp() + (k * 5)); } store1.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); @@ -577,15 +570,13 @@ public class VersionGarbageCollectorIT { // increase the clock to create new revision for next batch clock.waitUntil(getCurrentTimestamp() + (k * 5)); } - store1.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY); store1.runBackgroundOperations(); //3. Check that deleted property does get collected post maxAge clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); VersionGCStats stats = gc(gc, maxAge, HOURS); - assertEquals(50_000, stats.deletedPropsCount); - assertEquals(5_000, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 50_000, 0, 0, 0, 0, 5_000); assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId); } @@ -647,7 +638,7 @@ public class VersionGarbageCollectorIT { //1. Go past GC age and check no GC done as nothing deleted clock.waitUntil(getCurrentTimestamp() + maxAge); VersionGCStats stats = gc(gc, maxAge, HOURS); - assertEquals(0, stats.deletedPropsCount); + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); //Remove property NodeBuilder b2 = store1.getRoot().builder(); @@ -661,8 +652,7 @@ public class VersionGarbageCollectorIT { clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); stats = gc(gc, maxAge*2, HOURS); - assertEquals(10, stats.deletedPropsCount); - assertEquals(10, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 10, 0, 0, 0, 0, 10); assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId); //3. now reCreate those properties again @@ -686,8 +676,7 @@ public class VersionGarbageCollectorIT { // increment the clock again by more than 2 hours + delta clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); stats = gc(gc, maxAge*2, HOURS); - assertEquals(10, stats.deletedPropsCount); - assertEquals(10, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 10, 0, 0, 0, 0, 10); assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId); } @@ -757,8 +746,7 @@ public class VersionGarbageCollectorIT { // increment the clock again by more than 2 hours + delta clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); VersionGCStats stats = gc(gc, maxAge*2, HOURS); - assertEquals(10, stats.deletedPropsCount); - assertEquals(10, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 10, 0, 0, 0, 0, 10); assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId); } @@ -780,8 +768,7 @@ public class VersionGarbageCollectorIT { //1. Go past GC age and check no GC done as nothing deleted clock.waitUntil(getCurrentTimestamp() + maxAge); VersionGCStats stats = gc(gc, maxAge, HOURS); - assertEquals(0, stats.deletedPropsCount); - assertEquals(0, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); //Remove property NodeBuilder b2 = store1.getRoot().builder(); @@ -796,14 +783,13 @@ public class VersionGarbageCollectorIT { //Clock cannot move back (it moved forward in #1) so double the maxAge clock.waitUntil(clock.getTime() + delta); stats = gc(gc, maxAge*2, HOURS); - assertEquals(0, stats.deletedPropsCount); - assertEquals(0, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); //3. Check that deleted property does get collected post maxAge clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); stats = gc(gc, maxAge*2, HOURS); - assertEquals(10, stats.deletedPropsCount); + assertStatsCountsEqual(stats, 0, 10, 0, 0, 0, 0, 1); //4. Check that a revived property (deleted and created again) does not get gc NodeBuilder b4 = store1.getRoot().builder(); @@ -814,8 +800,7 @@ public class VersionGarbageCollectorIT { clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); stats = gc(gc, maxAge*2, HOURS); - assertEquals(0, stats.deletedPropsCount); - assertEquals(0, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); } @Test @@ -838,8 +823,7 @@ public class VersionGarbageCollectorIT { //1. Go past GC age and check no GC done as nothing deleted clock.waitUntil(getCurrentTimestamp() + maxAge); VersionGCStats stats = gc(gc, maxAge, HOURS); - assertEquals(0, stats.deletedPropsCount); - assertEquals(0, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); //Remove property NodeBuilder b2 = store1.getRoot().builder(); @@ -854,14 +838,13 @@ public class VersionGarbageCollectorIT { //Clock cannot move back (it moved forward in #1) so double the maxAge clock.waitUntil(clock.getTime() + delta); stats = gc(gc, maxAge*2, HOURS); - assertEquals(0, stats.deletedPropsCount); - assertEquals(0, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); //3. Check that deleted property does get collected post maxAge clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); stats = gc(gc, maxAge*2, HOURS); - assertEquals(10, stats.deletedPropsCount); + assertStatsCountsEqual(stats, 0, 10, 0, 0, 0, 0, 1); //4. Check that a revived property (deleted and created again) does not get gc NodeBuilder b4 = store1.getRoot().builder(); @@ -872,8 +855,7 @@ public class VersionGarbageCollectorIT { clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); stats = gc(gc, maxAge*2, HOURS); - assertEquals(0, stats.deletedPropsCount); - assertEquals(0, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); } @Test @@ -905,8 +887,7 @@ public class VersionGarbageCollectorIT { //1. Go past GC age and check no GC done as nothing deleted clock.waitUntil(getCurrentTimestamp() + maxAge); VersionGCStats stats = gc(gc, maxAge, HOURS); - assertEquals(0, stats.deletedPropsCount); - assertEquals(0, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); //Remove property NodeBuilder b2 = store1.getRoot().builder(); @@ -921,14 +902,13 @@ public class VersionGarbageCollectorIT { //Clock cannot move back (it moved forward in #1) so double the maxAge clock.waitUntil(clock.getTime() + delta); stats = gc(gc, maxAge*2, HOURS); - assertEquals(0, stats.deletedPropsCount); - assertEquals(0, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); //3. Check that deleted property does get collected post maxAge clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); stats = gc(gc, maxAge*2, HOURS); - assertEquals(10, stats.deletedPropsCount); + assertStatsCountsEqual(stats, 0, 10, 0, 0, 0, 0, 1); } @Test @@ -946,13 +926,23 @@ public class VersionGarbageCollectorIT { b1.child("x").setProperty("jcr:primaryType", "nt:file", NAME); // Add property to node & save - for (int i = 0; i < 10; i++) { + // adding 11 to have the 10th being a special case as we're not removing it below + for (int i = 0; i < 11; i++) { b1.child("x").child("jcr:content").setProperty("prop"+i, "t", STRING); b1.child("x").setProperty(META_PROP_PATTERN, of("jcr:content"), STRINGS); b1.child("x").setProperty("prop"+i, "bar", STRING); } store1.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + NodeState x = store1.getRoot().getChildNode("x"); + assertTrue(x.exists()); + assertTrue(x.hasProperty("prop0")); + assertTrue(x.hasProperty("prop10")); + NodeState jcrContent = x.getChildNode("jcr:content"); + assertTrue(jcrContent.exists()); + assertTrue(jcrContent.hasProperty("prop10")); + assertTrue(jcrContent.hasProperty("prop0")); + // enable the detailed gc flag writeField(gc, "detailedGCEnabled", true, true); long maxAge = 1; //hours @@ -960,8 +950,16 @@ public class VersionGarbageCollectorIT { //1. Go past GC age and check no GC done as nothing deleted clock.waitUntil(getCurrentTimestamp() + maxAge); VersionGCStats stats = gc(gc, maxAge, HOURS); - assertEquals(0, stats.deletedPropsCount); - assertEquals(0, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); + + x = store1.getRoot().getChildNode("x"); + assertTrue(x.exists()); + assertTrue(x.hasProperty("prop0")); + assertTrue(x.hasProperty("prop10")); + jcrContent = x.getChildNode("jcr:content"); + assertTrue(jcrContent.exists()); + assertTrue(jcrContent.hasProperty("prop10")); + assertTrue(jcrContent.hasProperty("prop0")); //Remove property NodeBuilder b2 = store1.getRoot().builder(); @@ -976,14 +974,58 @@ public class VersionGarbageCollectorIT { //Clock cannot move back (it moved forward in #1) so double the maxAge clock.waitUntil(clock.getTime() + delta); stats = gc(gc, maxAge*2, HOURS); - assertEquals(0, stats.deletedPropsCount); - assertEquals(0, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); + + x = store1.getRoot().getChildNode("x"); + assertTrue(x.exists()); + assertTrue(x.hasProperty("prop0")); + assertTrue(x.hasProperty("prop10")); + jcrContent = x.getChildNode("jcr:content"); + assertTrue(jcrContent.exists()); + assertTrue(jcrContent.hasProperty("prop10")); + assertFalse(jcrContent.hasProperty("prop0")); //3. Check that deleted property does get collected post maxAge clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); stats = gc(gc, maxAge*2, HOURS); - assertEquals(10, stats.deletedPropsCount); + assertStatsCountsEqual(stats, 0, 10, 0, 0, 0, 0, 1); + + x = store1.getRoot().getChildNode("x"); + assertTrue(x.exists()); + assertTrue(x.hasProperty("prop0")); + assertTrue(x.hasProperty("prop10")); + jcrContent = x.getChildNode("jcr:content"); + assertTrue(jcrContent.exists()); + assertTrue(jcrContent.hasProperty("prop10")); + assertFalse(jcrContent.hasProperty("prop0")); + } + + private void assertStatsCountsEqual(VersionGCStats stats, + int deletedDocGCCount, + int deletedPropsCount, + int deletedInternalPropsCount, + int deletedPropRevsCount, + int deletedInternalPropRevsCount, + int deletedUnmergedBCCount, + int updatedDetailedGCDocsCount) { + assertNotNull(stats); + assertEquals(deletedDocGCCount, stats.deletedDocGCCount); + assertEquals(deletedPropsCount, stats.deletedPropsCount); + if (VersionGarbageCollector.revisionDetailedGcType == RDGCType.KeepOneFullMode) { + assertEquals(deletedInternalPropsCount, stats.deletedInternalPropsCount); + } + assertEquals(deletedPropRevsCount, stats.deletedPropRevsCount); + if (VersionGarbageCollector.revisionDetailedGcType == RDGCType.KeepOneFullMode) { + assertEquals(deletedInternalPropRevsCount, stats.deletedInternalPropRevsCount); + } + assertEquals(deletedUnmergedBCCount, stats.deletedUnmergedBCCount); + if (VersionGarbageCollector.revisionDetailedGcType != RDGCType.KeepOneCleanupUserPropertiesOnlyMode) { + // TODO: unfortunately, in cleanup-user-props-only mode the expected count below + // is inaccurate. So either we extend this assert methods to get values per mode, + // or we ignore this for now. not nice but should only be temporary + assertEquals(updatedDetailedGCDocsCount, stats.updatedDetailedGCDocsCount); + } } @Test @@ -1073,8 +1115,7 @@ public class VersionGarbageCollectorIT { //1. Go past GC age and check no GC done as nothing deleted clock.waitUntil(getCurrentTimestamp() + maxAge); VersionGCStats stats = gc(gc, maxAge, HOURS); - assertEquals(0, stats.deletedPropsCount); - assertEquals(0, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); //Remove property NodeBuilder b2 = store1.getRoot().builder(); @@ -1124,6 +1165,154 @@ public class VersionGarbageCollectorIT { // OAK-10199 END + @Test + public void parentGCChildIndependent() throws Exception { + assumeTrue(fixture.hasSinglePersistence()); + NodeBuilder nb = store1.getRoot().builder(); + NodeBuilder parent = nb.child("parent"); + parent.setProperty("pk", "pv"); + parent.child("child").setProperty("ck", "cv"); + store1.merge(nb, EmptyHook.INSTANCE, CommitInfo.EMPTY); + nb = store1.getRoot().builder(); + nb.child("parent").setProperty("pk", "pv2"); + store1.merge(nb, EmptyHook.INSTANCE, CommitInfo.EMPTY); + store1.runBackgroundOperations(); + + // enable the detailed gc flag + writeField(gc, "detailedGCEnabled", true, true); + + // wait two hours + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + + // now make a child change so that only parent (and root) gets GCed + nb = store1.getRoot().builder(); + nb.child("parent").child("child").setProperty("ck", "cv2"); + store1.merge(nb, EmptyHook.INSTANCE, CommitInfo.EMPTY); + // now unlike usually, DONT do a store1.runBackgroundOperations() here + // this will leave the parent GC-able + + // now the GC + VersionGCStats stats = gc(gc, 1, HOURS); + assertStatsCountsEqual(stats, 0, 0, 0, 1, 0, 0, 1); + } + + @Test + public void testPartialMergeRootCleanup() throws Exception { + assumeTrue(fixture.hasSinglePersistence()); + + createSecondaryStore(LeaseCheckMode.LENIENT, true); + + NodeBuilder nb = store2.getRoot().builder(); + nb.child("node1").setProperty("a", "1"); + store2.merge(nb, EmptyHook.INSTANCE, CommitInfo.EMPTY); + store2.runBackgroundOperations(); + + // create the orphaned paths + final FailingDocumentStore fds = (FailingDocumentStore) ds2; + fds.fail().after(1).eternally(); + + // create partial merge + nb = store2.getRoot().builder(); + nb.child("node1").setProperty("a", "2"); + nb.setProperty("rootProp1", "rootValue1"); + try { + store2.merge(nb, EmptyHook.INSTANCE, CommitInfo.EMPTY); + fail("expected 'OakOak0001: write operation failed'"); + } catch(CommitFailedException cfe) { + assertEquals("OakOak0001: write operation failed", cfe.getMessage()); + } + + // enable the detailed gc flag + writeField(gc, "detailedGCEnabled", true, true); + + // wait two hours + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + // clean everything older than one hour + + // before the gc, 1h+ later, do a last-minute modification (only) on /node1 (without root update) + nb = store1.getRoot().builder(); + nb.child("node1").setProperty("b", "4"); + try { + store1.merge(nb, EmptyHook.INSTANCE, CommitInfo.EMPTY); + fail("should fail"); + } catch(Exception e) { + // expected to fail + } + VersionGCStats stats = gc(gc, 1, HOURS); + store1.runBackgroundOperations(); + store1.runBackgroundOperations(); + createSecondaryStore(LeaseCheckMode.LENIENT); + NodeState node1 = store2.getRoot().getChildNode("node1"); + assertEquals("1", node1.getProperty("a").getValue(Type.STRING)); + assertFalse(node1.hasProperty("b")); + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); + } + + @Test + public void testUnmergedBCRootCleanup() throws Exception { + assumeTrue(fixture.hasSinglePersistence()); + NodeBuilder nb = store1.getRoot().builder(); + nb.child("node1").setProperty("a", "1"); + store1.merge(nb, EmptyHook.INSTANCE, CommitInfo.EMPTY); + store1.runBackgroundOperations(); + + // create branch commits + RevisionVector br1 = unmergedBranchCommit(store1, b -> b.child("node1").setProperty("a", "2")); + store1.runBackgroundOperations(); + store1.invalidateNodeChildrenCache(); + store1.getNodeCache().invalidateAll(); + assertEquals("1", store1.getRoot().getChildNode("node1").getProperty("a").getValue(Type.STRING)); + + // enable the detailed gc flag + writeField(gc, "detailedGCEnabled", true, true); + + // wait two hours + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + + store1.invalidateNodeChildrenCache(); + store1.getNodeCache().invalidateAll(); + assertEquals("1", store1.getRoot().getChildNode("node1").getProperty("a").getValue(Type.STRING)); + + // clean everything older than one hour + + // before the gc, 1h+ later, do a last-minute modification (only) on /node1 (without root update) + nb = store1.getRoot().builder(); + nb.child("node1").setProperty("b", "4"); + store1.merge(nb, EmptyHook.INSTANCE, CommitInfo.EMPTY); + VersionGCStats stats = gc(gc, 1, HOURS); + store1.invalidateNodeChildrenCache(); + store1.getNodeCache().invalidateAll(); + assertEquals("1", store1.getRoot().getChildNode("node1").getProperty("a").getValue(Type.STRING)); + store1.runBackgroundOperations(); + store1.invalidateNodeChildrenCache(); + store1.getNodeCache().invalidateAll(); + assertEquals("1", store1.getRoot().getChildNode("node1").getProperty("a").getValue(Type.STRING)); + store1.runBackgroundOperations(); + store1.invalidateNodeChildrenCache(); + store1.getNodeCache().invalidateAll(); + assertEquals("1", store1.getRoot().getChildNode("node1").getProperty("a").getValue(Type.STRING)); + createSecondaryStore(LeaseCheckMode.LENIENT); + + // while "2" was written to node1/a via an unmerged branch commit, + // it should not have been made visible through DGC/sweep combo + store2.invalidateNodeChildrenCache(); + store2.getNodeCache().invalidateAll(); + assertEquals("1", store2.getRoot().getChildNode("node1").getProperty("a").getValue(Type.STRING)); + assertEquals("4", store2.getRoot().getChildNode("node1").getProperty("b").getValue(Type.STRING)); + store2.invalidateNodeChildrenCache(); + store2.getNodeCache().invalidateAll(); + assertEquals("1", store2.getRoot().getChildNode("node1").getProperty("a").getValue(Type.STRING)); + assertEquals("4", store2.getRoot().getChildNode("node1").getProperty("b").getValue(Type.STRING)); + + // deletedPropsCount=0 : _bc on /node1 and / CANNOT be removed + // deletedPropRevsCount=1 : (nothing on /node1[a, _commitRoot), /[_revisions] + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); + // checking for br1 revisino to have disappeared doesn't really make much sense, + // since 1:/node1 isn't GCed as it is young, and 0:/ being root cannot guarantee full removal + // (if br1 is deleted form 0:/ _bc, then the commit value resolution flips it to committed) + assertBranchRevisionRemovedFromAllDocuments(store1, br1, "1:/node1"); + } + // OAK-8646 @Test public void testDeletedPropsAndUnmergedBCWithoutCollision() throws Exception { @@ -1155,10 +1344,7 @@ public class VersionGarbageCollectorIT { // clean everything older than one hour VersionGCStats stats = gc(gc, 1, HOURS); - assertEquals(3, stats.updatedDetailedGCDocsCount); - // deleted properties are : 1:/foo -> prop, a & p && 1:/bar -> _bc - assertEquals(4, stats.deletedPropsCount); - assertEquals(2, stats.deletedUnmergedBCCount); + assertStatsCountsEqual(stats, 0, 3, 2, 1, 9, 0, 3); assertBranchRevisionRemovedFromAllDocuments(store1, br1); assertBranchRevisionRemovedFromAllDocuments(store1, br4); } @@ -1193,12 +1379,9 @@ public class VersionGarbageCollectorIT { // wait two hours clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); // clean everything older than one hour - VersionGCStats stats = gc(gc, 1, HOURS); - assertEquals(3, stats.updatedDetailedGCDocsCount); - // deleted properties are : 1:/foo -> prop, a, _collisions & p && 1:/bar -> _bc - assertEquals(5, stats.deletedPropsCount); - assertEquals(4, stats.deletedUnmergedBCCount); + VersionGCStats stats = gc(gc, 1, HOURS); + assertStatsCountsEqual(stats, 0, 3, 3, 1, 17, 0, 3); assertBranchRevisionRemovedFromAllDocuments(store1, br1); assertBranchRevisionRemovedFromAllDocuments(store1, br2); assertBranchRevisionRemovedFromAllDocuments(store1, br3); @@ -1261,7 +1444,6 @@ public class VersionGarbageCollectorIT { assertNotNull(store1.getDocumentStore().find(NODES, "2:/a/b")); assertNotNull(store1.getDocumentStore().find(NODES, "4:/a/b/c/d")); assertTrue(getChildeNodeState(store1, "/a/b/c/d", true).exists()); - //TODO: below assert fails currently as uncommitted revisions aren't yet removed // should be 3 as it should clean up the _deleted from /a/b, /a/b/c and /a/b/c/d assertEquals(3, stats.updatedDetailedGCDocsCount); } @@ -1332,6 +1514,123 @@ public class VersionGarbageCollectorIT { createNodes("/a/b/c/d/e"); } + @Ignore(value="this is a reminder to add bundling-detailedGC tests in general, plus some of those cases combined with OAK-10542") + @Test + public void testBundlingAndLatestSplit() throws Exception { + fail("yet to be implemented"); + } + + @Test + public void testBundledPropUnmergedBCGC() throws Exception { + //0. Initialize bundling configs + final NodeBuilder builder = store1.getRoot().builder(); + new InitialContent().initialize(builder); + BundlingConfigInitializer.INSTANCE.initialize(builder); + merge(store1, builder); + store1.runBackgroundOperations(); + + //1. Create nodes with properties + NodeBuilder b1 = store1.getRoot().builder(); + b1.child("x").setProperty("jcr:primaryType", "nt:file", NAME); + String nasty_key1 = "nas.ty_key$%^"; + String nasty_key2 = "nas.ty_key$%^2"; + b1.child("x").setProperty(nasty_key1, "v", STRING); + b1.child("x").child("jcr:content").setProperty(nasty_key2, "v2", STRING); + store1.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + NodeBuilder nb = store1.getRoot().builder(); + nb.child("x").setProperty(nasty_key1, "v3", STRING); + nb.child("x").child("jcr:content").setProperty("prop", "value"); + store1.merge(nb, EmptyHook.INSTANCE, CommitInfo.EMPTY); + store1.runBackgroundOperations(); + + // create branch commits + mergedBranchCommit(store1, b -> b.child("x").child("jcr:content").setProperty("p", "prop")); + unmergedBranchCommit(store1, b -> b.child("x").child("jcr:content").setProperty("a", "b")); + unmergedBranchCommit(store1, b -> b.child("x").child("jcr:content").setProperty(nasty_key2, "c")); + unmergedBranchCommit(store1, b -> b.child("x").child("jcr:content").setProperty(nasty_key2, "d")); + mergedBranchCommit(store1, b -> b.child("x").child("jcr:content").removeProperty("p")); + store1.runBackgroundOperations(); + + // enable the detailed gc flag + writeField(gc, "detailedGCEnabled", true, true); + long maxAgeHours = 1; + long maxAgeMillis = TimeUnit.HOURS.toMillis(maxAgeHours); + //1. Go past GC age and check no GC done as nothing deleted + clock.waitUntil(getCurrentTimestamp() + maxAgeMillis + 1); + + VersionGCStats stats = gc(gc, maxAgeHours, HOURS); + assertStatsCountsEqual(stats, 0, 2, 2, 3, 11, 0, 2); + } + + @Test + public void testBundledPropRevGC() throws Exception { + //0. Initialize bundling configs + final NodeBuilder builder = store1.getRoot().builder(); + new InitialContent().initialize(builder); + BundlingConfigInitializer.INSTANCE.initialize(builder); + merge(store1, builder); + store1.runBackgroundOperations(); + + //1. Create nodes with properties + NodeBuilder b1 = store1.getRoot().builder(); + b1.child("x").setProperty("jcr:primaryType", "nt:file", NAME); + + // Add property to node & save + for (int i = 0; i < 10; i++) { + b1.child("x").child("jcr:content").setProperty("bprop"+i, "t", STRING); + b1.child("x").setProperty(META_PROP_PATTERN, of("jcr:content"), STRINGS); + b1.child("x").setProperty("prop"+i, "bar", STRING); + } + String nasty_key1 = "nas.ty_key$%^"; + String nasty_key2 = "nas.ty_key$%^2"; + b1.child("x").setProperty(nasty_key1, "v", STRING); + b1.child("x").child("jcr:content").setProperty(nasty_key2, "v2", STRING); + store1.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + // make some overwrites for DetailedGC to cleanup + b1 = store1.getRoot().builder(); + for (int i = 0; i < 6; i++) { + b1.child("x").child("jcr:content").setProperty("bprop"+i, "t2", STRING); + } + for (int i = 0; i < 3; i++) { + b1.child("x").setProperty("prop"+i, "bar2", STRING); + } + b1.child("x").setProperty(nasty_key1, "bv", STRING); + b1.child("x").child("jcr:content").setProperty(nasty_key2, "bv2", STRING); + store1.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + store1.runBackgroundOperations(); + + // enable the detailed gc flag + writeField(gc, "detailedGCEnabled", true, true); + long maxAgeHours = 1; + long maxAgeMillis = TimeUnit.HOURS.toMillis(maxAgeHours); + //1. Go past GC age and check no GC done as nothing deleted + clock.waitUntil(getCurrentTimestamp() + maxAgeMillis + 1); + VersionGCStats stats = gc(gc, maxAgeHours, HOURS); + assertStatsCountsEqual(stats, 0, 0, 0, 11, 0, 0, 1); + + NodeState x = store1.getRoot().getChildNode("x"); + assertTrue(x.exists()); + assertEquals(x.getProperty("prop0").getValue(Type.STRING), "bar2"); + assertEquals(x.getProperty("prop9").getValue(Type.STRING), "bar"); + NodeState jcrContent = x.getChildNode("jcr:content"); + assertTrue(jcrContent.exists()); + assertEquals(jcrContent.getProperty("bprop0").getValue(Type.STRING), "t2"); + assertEquals(jcrContent.getProperty("bprop9").getValue(Type.STRING), "t"); + + NodeDocument doc = store1.getDocumentStore().find(NODES, "1:/x", -1); + assertNotNull(doc); + for (Entry<String, Object> e : doc.entrySet()) { + Object v = e.getValue(); + if (v instanceof Map) { + @SuppressWarnings("rawtypes") + Map m = (Map)v; + assertEquals("more than 1 entry for " + e.getKey(), 1, m.size()); + } + } + } + // OAK-10370 @Test public void testGCDeletedPropsWithDryRunMode() throws Exception { @@ -1357,8 +1656,7 @@ public class VersionGarbageCollectorIT { clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); VersionGCStats stats = gc(gc, maxAge*2, HOURS); - assertEquals(1, stats.deletedPropsCount); - assertEquals(1, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 1, 0, 0, 0, 0, 1); assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId); // 4. Save values of detailedGC settings collection fields @@ -1384,8 +1682,7 @@ public class VersionGarbageCollectorIT { final String oldestModifiedDryRunDocId = stats.oldestModifiedDocId; final long oldestModifiedDocDryRunTimeStamp = stats.oldestModifiedDocTimeStamp; - assertEquals(0, stats.deletedPropsCount); - assertEquals(0, stats.updatedDetailedGCDocsCount); + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId); assertTrue(stats.detailedGCDryRunMode); @@ -1430,11 +1727,7 @@ public class VersionGarbageCollectorIT { clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); // clean everything older than one hour VersionGCStats stats = gc(gc, 1, HOURS); - - assertEquals(0, stats.updatedDetailedGCDocsCount); - // deleted properties are : 1:/foo -> prop, a, _collisions & p && 1:/bar -> _bc - assertEquals(0, stats.deletedPropsCount); - assertEquals(0, stats.deletedUnmergedBCCount); + assertStatsCountsEqual(stats, 0, 0, 0, 0, 0, 0, 0); assertTrue(stats.detailedGCDryRunMode); assertBranchRevisionNotRemovedFromAllDocuments(store1, br1); @@ -1474,10 +1767,9 @@ public class VersionGarbageCollectorIT { clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); // clean everything older than one hour VersionGCStats stats = gc(store1.getVersionGarbageCollector(), 1, HOURS); + assertFalse(store1.getRoot().getChildNode("bar").hasProperty("prop")); assertNotNull(stats); - assertEquals(0, stats.deletedDocGCCount); - assertEquals(1, stats.deletedPropsCount); - + assertStatsCountsEqual(stats, 0, 1, 0, 0, 0, 0, 1); assertDocumentsExist(of("/bar")); } @@ -1509,11 +1801,12 @@ public class VersionGarbageCollectorIT { clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); // clean everything older than one hour VersionGCStats stats = gc(store1.getVersionGarbageCollector(), 1, HOURS); + assertFalse(store1.getRoot().getChildNode("bar").hasProperty("prop")); assertNotNull(stats); - assertEquals(0, stats.deletedDocGCCount); - // since we have updated a totally unrelated path i.e. "/a", we should still be seeing the garbage from late write and + // since we have updated a totally unrelated path i.e. "/a", + // we should still be seeing the garbage from late write and // thus it will be collected. - assertEquals(1, stats.deletedPropsCount); + assertStatsCountsEqual(stats, 0, 1, 0, 0, 0, 0, 1); assertDocumentsExist(of("/foo/bar/baz")); } @@ -1546,12 +1839,13 @@ public class VersionGarbageCollectorIT { clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); // clean everything older than one hour VersionGCStats stats = gc(store1.getVersionGarbageCollector(), 1, HOURS); - assertNotNull(stats); - assertEquals(0, stats.deletedDocGCCount); - // we shouldn't be able to remove the property since we have updated an related path that has lead to an update - // of common ancestor and this would make late write visible - assertEquals(0, stats.deletedPropsCount); - + assertEquals("value2", store1.getRoot().getChildNode("bar").getProperty("prop").getValue(Type.STRING)); + // deletedPropsCount : we shouldn't be able to remove the property since we have + // updated an related path that has lead to an update of common ancestor and + // this would make late write visible + // deletedPropRevsCount : 2 prop-revs GCed : the original prop=value, plus the + // removeProperty(prop) plus 1 _commitRoot entry + assertStatsCountsEqual(stats, 0, 0, 0, 2, 1, 0, 1); assertDocumentsExist(of("/bar")); } @@ -1578,9 +1872,8 @@ public class VersionGarbageCollectorIT { // clean everything older than one hour VersionGCStats stats = gc(store1.getVersionGarbageCollector(), 1, HOURS); assertNotNull(stats); - assertEquals(0, stats.deletedDocGCCount); - assertEquals(0, stats.deletedPropsCount); - + // 1 prop-rev removal : the late-write null + assertStatsCountsEqual(stats, 0, 0, 1, 1, 0, 0, 1); assertDocumentsExist(of("/bar")); } @@ -1607,11 +1900,10 @@ public class VersionGarbageCollectorIT { // clean everything older than one hour VersionGCStats stats = gc(store1.getVersionGarbageCollector(), 1, HOURS); assertNotNull(stats); - assertEquals(0, stats.deletedDocGCCount); // since we have updated an totally unrelated path i.e. "/a", we should still be seeing the garbage from late write and // thus it will be collected. - assertEquals(0, stats.deletedPropsCount); - + // removes 1 prop-rev : the late-write null + assertStatsCountsEqual(stats, 0, 0, 1, 1, 0, 0, 1); assertDocumentsExist(of("/foo/bar/baz")); } @@ -1638,10 +1930,9 @@ public class VersionGarbageCollectorIT { // clean everything older than one hour VersionGCStats stats = gc(store1.getVersionGarbageCollector(), 1, HOURS); assertNotNull(stats); - assertEquals(0, stats.deletedDocGCCount); // we should be able to remove the property since we have updated an related path that has lead to an update // of common ancestor and this would make late write visible - assertEquals(1, stats.deletedPropsCount); + assertStatsCountsEqual(stats, 0, 1, 0, 0, 0, 0, 1); assertDocumentsExist(of("/bar")); } @@ -2756,9 +3047,7 @@ public class VersionGarbageCollectorIT { fail("merge must fail"); } catch (CommitFailedException e) { // expected - String msg = e.getMessage(); - e.printStackTrace(); - assertEquals("OakOak0001: write operation failed", msg); + assertEquals("OakOak0001: write operation failed", e.getMessage()); } } disposeQuietly(store2);
