This is an automated email from the ASF dual-hosted git repository. daim pushed a commit to branch OAK-10199 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 2e75322f0d7f246e03c1e13421eceeb62e137228 Author: Rishabh Kumar <d...@adobe.com> AuthorDate: Wed Jul 12 21:05:02 2023 +0530 OAK-10199 : fixed query to avoid skipping documents with greater _modified timestamp --- .../plugins/document/VersionGCRecommendations.java | 85 +++++++++++++------ .../oak/plugins/document/VersionGCSupport.java | 39 ++++++--- .../plugins/document/VersionGarbageCollector.java | 65 +++++++++------ .../document/mongo/MongoVersionGCSupport.java | 38 ++++----- .../plugins/document/rdb/RDBVersionGCSupport.java | 57 ++++++++++--- .../oak/plugins/document/VersionGCInitTest.java | 13 +-- .../oak/plugins/document/VersionGCSupportTest.java | 8 +- .../oak/plugins/document/VersionGCTest.java | 8 +- .../document/VersionGarbageCollectorIT.java | 96 ++++++++++++++++++++-- 9 files changed, 298 insertions(+), 111 deletions(-) diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java index 0fd0766f5b..2092844299 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java @@ -21,19 +21,21 @@ package org.apache.jackrabbit.oak.plugins.document; import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats; import org.apache.jackrabbit.oak.plugins.document.util.TimeInterval; import org.apache.jackrabbit.oak.spi.gc.GCMonitor; import org.apache.jackrabbit.oak.stats.Clock; +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static java.lang.Long.MAX_VALUE; import static java.util.Map.of; +import static java.util.Optional.ofNullable; import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE; -import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL; import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP; import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP; import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_ID; @@ -52,6 +54,7 @@ public class VersionGCRecommendations { private final GCMonitor gcmon; final boolean ignoreDueToCheckPoint; + final boolean ignoreDetailGCDueToCheckPoint; final TimeInterval scope; final TimeInterval scopeDetailedGC; final long maxCollect; @@ -63,6 +66,7 @@ public class VersionGCRecommendations { private final long precisionMs; final long suggestedIntervalMs; private final boolean scopeIsComplete; + private final boolean detailedGCEnabled; /** * With the given maximum age of revisions to keep (earliest time in the past to collect), @@ -85,20 +89,23 @@ public class VersionGCRecommendations { * @param vgc VersionGC support class * @param options options for running the gc * @param gcMonitor monitor class for messages + * @param detailedGCEnabled whether detailedGC is enabled or not */ public VersionGCRecommendations(long maxRevisionAgeMs, Checkpoints checkpoints, Clock clock, VersionGCSupport vgc, - VersionGCOptions options, GCMonitor gcMonitor) { + VersionGCOptions options, GCMonitor gcMonitor, final boolean detailedGCEnabled) { boolean ignoreDueToCheckPoint = false; + boolean ignoreDetailGCDueToCheckPoint = false; long deletedOnceCount = 0; long suggestedIntervalMs; long oldestPossible; - long oldestModifiedDocTimeStamp; + final AtomicLong oldestModifiedDocTimeStamp = new AtomicLong(); String oldestModifiedDocId; long collectLimit = options.collectLimit; this.vgc = vgc; this.gcmon = gcMonitor; this.originalCollectLimit = options.collectLimit; + this.detailedGCEnabled = detailedGCEnabled; TimeInterval keep = new TimeInterval(clock.getTime() - maxRevisionAgeMs, Long.MAX_VALUE); @@ -120,20 +127,17 @@ public class VersionGCRecommendations { if (detailedGCTimestamp == 0) { // it will only happen for the very first time, we run this detailedGC log.info("No detailedGCTimestamp found, querying for the oldest modified candidate"); - final NodeDocument doc = vgc.getOldestModifiedDoc(clock); - if (doc == NULL) { - oldestModifiedDocTimeStamp = 0L; - } else { - oldestModifiedDocTimeStamp = doc.getModified() == null ? 0L : SECONDS.toMillis(doc.getModified()) - 1L; - } + vgc.getOldestModifiedDoc(clock).ifPresentOrElse( + d -> oldestModifiedDocTimeStamp.set(SECONDS.toMillis(ofNullable(d.getModified()).orElse(0L))), + () -> oldestModifiedDocTimeStamp.set(0L)); oldestModifiedDocId = MIN_ID_VALUE; - log.info("detailedGCTimestamp found: {}", timestampToString(oldestModifiedDocTimeStamp)); + log.info("detailedGCTimestamp found: {}", timestampToString(oldestModifiedDocTimeStamp.get())); } else { - oldestModifiedDocTimeStamp = detailedGCTimestamp - 1L; + oldestModifiedDocTimeStamp.set(detailedGCTimestamp); } - TimeInterval detailedGCTimeInternal = new TimeInterval(oldestModifiedDocTimeStamp, MAX_VALUE); - detailedGCTimeInternal = detailedGCTimeInternal.notLaterThan(keep.fromMs); + TimeInterval scopeDetailedGC = new TimeInterval(oldestModifiedDocTimeStamp.get(), MAX_VALUE); + scopeDetailedGC = scopeDetailedGC.notLaterThan(keep.fromMs); suggestedIntervalMs = (long) settings.get(SETTINGS_COLLECTION_REC_INTERVAL_PROP); if (suggestedIntervalMs > 0) { @@ -171,18 +175,14 @@ public class VersionGCRecommendations { //Check for any registered checkpoint which prevent the GC from running Revision checkpoint = checkpoints.getOldestRevisionToKeep(); - if (checkpoint != null && scope.endsAfter(checkpoint.getTimestamp())) { - TimeInterval minimalScope = scope.startAndDuration(options.precisionMs); - if (minimalScope.endsAfter(checkpoint.getTimestamp())) { - log.warn("Ignoring RGC run because a valid checkpoint [{}] exists inside minimal scope {}.", - checkpoint.toReadableString(), minimalScope); - ignoreDueToCheckPoint = true; - } else { - scope = scope.notLaterThan(checkpoint.getTimestamp() - 1); - detailedGCTimeInternal = detailedGCTimeInternal.notLaterThan(checkpoint.getTimestamp() - 1); - log.info("checkpoint at [{}] found, scope now {}, detailedGcScope now {}", timestampToString(checkpoint.getTimestamp()), scope, detailedGCTimeInternal); - } - } + + final GCResult gcResult = getResult(options, ignoreDueToCheckPoint, scope, checkpoint); + scope = gcResult.gcScope; + ignoreDueToCheckPoint = gcResult.ignoreGC; + + final GCResult detailGCResult = getResult(options, ignoreDetailGCDueToCheckPoint, scopeDetailedGC, checkpoint); + scopeDetailedGC = detailGCResult.gcScope; + ignoreDetailGCDueToCheckPoint = detailGCResult.ignoreGC; if (scope.getDurationMs() <= options.precisionMs) { // If we have narrowed the collect time interval down as much as we can, no @@ -194,7 +194,8 @@ public class VersionGCRecommendations { this.precisionMs = options.precisionMs; this.ignoreDueToCheckPoint = ignoreDueToCheckPoint; this.scope = scope; - this.scopeDetailedGC = detailedGCTimeInternal; + this.ignoreDetailGCDueToCheckPoint = ignoreDetailGCDueToCheckPoint; + this.scopeDetailedGC = scopeDetailedGC; this.detailedGCId = oldestModifiedDocId; this.scopeIsComplete = scope.toMs >= keep.fromMs; this.maxCollect = collectLimit; @@ -248,6 +249,13 @@ public class VersionGCRecommendations { } stats.needRepeat = !scopeIsComplete; } + + // save data for detailed GC + if (detailedGCEnabled && !stats.canceled && !stats.ignoredDetailGCDueToCheckPoint) { + // success, we would not expect to encounter revisions older than this in the future + setLongSetting(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, stats.oldestModifiedDocTimeStamp); + setStringSetting(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP, stats.oldestModifiedDocId); + } } private Map<String, Object> getVGCSettings() { @@ -287,4 +295,29 @@ public class VersionGCRecommendations { propValMap.forEach(updateOp::set); vgc.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp); } + + @NotNull + private static GCResult getResult(VersionGCOptions options, boolean ignoreGC, TimeInterval gcScope, Revision checkpoint) { + if (checkpoint != null && gcScope.endsAfter(checkpoint.getTimestamp())) { + TimeInterval minimalScope = gcScope.startAndDuration(options.precisionMs); + if (minimalScope.endsAfter(checkpoint.getTimestamp())) { + log.warn("Ignoring GC run because a valid checkpoint [{}] exists inside minimal scope {}.", checkpoint.toReadableString(), minimalScope); + ignoreGC = true; + } else { + gcScope = gcScope.notLaterThan(checkpoint.getTimestamp() - 1); + log.info("checkpoint at [{}] found, detailedGCScope now {}", timestampToString(checkpoint.getTimestamp()), gcScope); + } + } + return new GCResult(ignoreGC, gcScope); + } + + private static class GCResult { + public final boolean ignoreGC; + public final TimeInterval gcScope; + + public GCResult(boolean ignoreGC, TimeInterval gcScope) { + this.ignoreGC = ignoreGC; + this.gcScope = gcScope; + } + } } \ No newline at end of file diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java index 6086eef772..1e19eb6af7 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java @@ -20,17 +20,21 @@ package org.apache.jackrabbit.oak.plugins.document; import static java.util.Comparator.comparing; +import static java.util.Optional.empty; +import static java.util.Optional.ofNullable; +import static java.util.stream.Stream.concat; +import static java.util.stream.StreamSupport.stream; import static org.apache.jackrabbit.guava.common.collect.Iterables.filter; import static java.util.stream.Collectors.toList; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS; -import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs; import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getAllDocuments; import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getSelectedDocuments; +import java.util.Optional; import java.util.Set; -import java.util.stream.StreamSupport; +import java.util.stream.Stream; import org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType; import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats; @@ -66,8 +70,7 @@ public class VersionGCSupport { * @return matching documents. */ public Iterable<NodeDocument> getPossiblyDeletedDocs(final long fromModified, final long toModified) { - return StreamSupport - .stream(getSelectedDocuments(store, NodeDocument.DELETED_ONCE, 1).spliterator(), false) + return stream(getSelectedDocuments(store, NodeDocument.DELETED_ONCE, 1).spliterator(), false) .filter(input -> input.wasDeletedOnce() && modifiedGreaterThanEquals(input, fromModified) && modifiedLessThan(input, toModified)) .collect(toList()); } @@ -90,9 +93,14 @@ public class VersionGCSupport { */ public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit, @NotNull final String fromId) { - return StreamSupport - .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, fromId).spliterator(), false) - .filter(input -> modifiedGreaterThanEquals(input, fromModified) && modifiedLessThan(input, toModified)) + // (_modified = fromModified && _id > fromId || _modified > fromModified && _modified < toModified) + final Stream<NodeDocument> s1 = stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, fromId).spliterator(), false) + .filter(input -> modifiedEqualsTo(input, fromModified)); + + final Stream<NodeDocument> s2 = stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1).spliterator(), false) + .filter(input -> modifiedGreaterThan(input, fromModified) && modifiedLessThan(input, toModified)); + + return concat(s1, s2) .sorted((o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)) .limit(limit) .collect(toList()); @@ -102,6 +110,17 @@ public class VersionGCSupport { Long modified = doc.getModified(); return modified != null && modified.compareTo(getModifiedInSecs(time)) >= 0; } + + private boolean modifiedGreaterThan(final NodeDocument doc, final long time) { + Long modified = doc.getModified(); + return modified != null && modified.compareTo(getModifiedInSecs(time)) > 0; + } + + private boolean modifiedEqualsTo(final NodeDocument doc, final long time) { + Long modified = doc.getModified(); + return modified != null && modified.compareTo(getModifiedInSecs(time)) == 0; + } + private boolean modifiedLessThan(final NodeDocument doc, final long time) { Long modified = doc.getModified(); return modified != null && modified.compareTo(getModifiedInSecs(time)) < 0; @@ -189,7 +208,7 @@ public class VersionGCSupport { * * @return the oldest modified document. */ - public NodeDocument getOldestModifiedDoc(final Clock clock) { + public Optional<NodeDocument> getOldestModifiedDoc(final Clock clock) { long ts = 0; long now = clock.getTime(); Iterable<NodeDocument> docs = null; @@ -198,13 +217,13 @@ public class VersionGCSupport { try { docs = getModifiedDocs(ts, now, 1, MIN_ID_VALUE); if (docs.iterator().hasNext()) { - return docs.iterator().next(); + return ofNullable(docs.iterator().next()); } } finally { Utils.closeIfCloseable(docs); } LOG.info("find oldest modified document to be {}", Utils.timestampToString(ts)); - return NULL; + return empty(); } public long getDeletedOnceCount() throws UnsupportedOperationException { diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java index 1e5b1129b9..766c8e4cc5 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java @@ -77,8 +77,6 @@ import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_I import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH; -import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.setDeleted; -import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.setModified; import static org.slf4j.helpers.MessageFormatter.arrayFormat; public class VersionGarbageCollector { @@ -233,7 +231,7 @@ public class VersionGarbageCollector { long maxRevisionAgeInMillis = unit.toMillis(maxRevisionAge); long now = nodeStore.getClock().getTime(); VersionGCRecommendations rec = new VersionGCRecommendations(maxRevisionAgeInMillis, nodeStore.getCheckpoints(), - nodeStore.getClock(), versionStore, options, gcMonitor); + nodeStore.getClock(), versionStore, options, gcMonitor, detailedGCEnabled); int estimatedIterations = -1; if (rec.suggestedIntervalMs > 0) { estimatedIterations = (int)Math.ceil( @@ -272,6 +270,7 @@ public class VersionGarbageCollector { public static class VersionGCStats { boolean ignoredGCDueToCheckPoint; + boolean ignoredDetailGCDueToCheckPoint; boolean canceled; boolean success = true; boolean limitExceeded; @@ -349,6 +348,7 @@ public class VersionGarbageCollector { return "VersionGCStats{" + "ignoredGCDueToCheckPoint=" + ignoredGCDueToCheckPoint + + "ignoredDetailGCDueToCheckPoint=" + ignoredDetailGCDueToCheckPoint + ", canceled=" + canceled + ", deletedDocGCCount=" + deletedDocGCCount + " (of which leaf: " + deletedLeafDocGCCount + ")" + ", updateResurrectedGCCount=" + updateResurrectedGCCount + @@ -366,6 +366,7 @@ public class VersionGarbageCollector { void addRun(VersionGCStats run) { ++iterationCount; this.ignoredGCDueToCheckPoint = run.ignoredGCDueToCheckPoint; + this.ignoredDetailGCDueToCheckPoint = run.ignoredDetailGCDueToCheckPoint; this.canceled = run.canceled; this.success = run.success; this.limitExceeded = run.limitExceeded; @@ -566,13 +567,12 @@ public class VersionGarbageCollector { VersionGCStats stats = new VersionGCStats(); stats.active.start(); VersionGCRecommendations rec = new VersionGCRecommendations(maxRevisionAgeInMillis, nodeStore.getCheckpoints(), - nodeStore.getClock(), versionStore, options, gcMonitor); + nodeStore.getClock(), versionStore, options, gcMonitor, detailedGCEnabled); GCPhases phases = new GCPhases(cancel, stats, gcMonitor); try { if (rec.ignoreDueToCheckPoint) { phases.stats.ignoredGCDueToCheckPoint = true; monitor.skipped("Checkpoint prevented revision garbage collection"); - cancel.set(true); } else { final RevisionVector headRevision = nodeStore.getHeadRevision(); final RevisionVector sweepRevisions = nodeStore.getSweepRevisions(); @@ -580,11 +580,26 @@ public class VersionGarbageCollector { collectDeletedDocuments(phases, headRevision, rec); collectSplitDocuments(phases, sweepRevisions, rec); - if (detailedGCEnabled) { - // run only if detailed GC enabled + } + + // now run detailed GC if enabled + if (detailedGCEnabled) { + if (rec.ignoreDetailGCDueToCheckPoint) { + phases.stats.ignoredDetailGCDueToCheckPoint = true; + monitor.skipped("Checkpoint prevented detailed revision garbage collection"); + } else { + final RevisionVector headRevision = nodeStore.getHeadRevision(); + monitor.info("Looking at revisions in {} for detailed GC", rec.scopeDetailedGC); collectDetailedGarbage(phases, headRevision, rec); } } + + if (detailedGCEnabled && rec.ignoreDueToCheckPoint && rec.ignoreDetailGCDueToCheckPoint) { + cancel.set(true); + } else if (!detailedGCEnabled && rec.ignoreDueToCheckPoint) { + cancel.set(true); + } + } catch (LimitExceededException ex) { stats.limitExceeded = true; } finally { @@ -623,20 +638,19 @@ public class VersionGarbageCollector { int docsTraversed = 0; boolean foundDoc = true; final long oldestModifiedMs = rec.scopeDetailedGC.fromMs; + final long toModified = rec.scopeDetailedGC.toMs; long oldModifiedMs = oldestModifiedMs; final String oldestModifiedDocId = rec.detailedGCId; try (DetailedGC gc = new DetailedGC(headRevision, monitor, cancel)) { long fromModified = oldestModifiedMs; - String fromId = oldestModifiedDocId; - NodeDocument lastDoc = null; - final long toModified = rec.scopeDetailedGC.toMs; + String fromId = ofNullable(oldestModifiedDocId).orElse(MIN_ID_VALUE); + NodeDocument lastDoc; if (phases.start(GCPhase.DETAILED_GC)) { - while (foundDoc && fromModified < toModified && docsTraversed <= PROGRESS_BATCH_SIZE) { + while (foundDoc && fromModified < toModified && docsTraversed < PROGRESS_BATCH_SIZE) { // set foundDoc to false to allow exiting the while loop foundDoc = false; lastDoc = null; Iterable<NodeDocument> itr = versionStore.getModifiedDocs(fromModified, toModified, 1000, fromId); - final Revision revision = nodeStore.newRevision(); try { for (NodeDocument doc : itr) { foundDoc = true; @@ -647,7 +661,7 @@ public class VersionGarbageCollector { break; } docsTraversed++; - if (docsTraversed % PROGRESS_BATCH_SIZE == 0) { + if (docsTraversed % 100 == 0) { monitor.info("Iterated through {} documents so far. {} had detail garbage", docsTraversed, gc.getGarbageDocsCount()); } @@ -655,7 +669,7 @@ public class VersionGarbageCollector { lastDoc = doc; // collect the data to delete in next step if (phases.start(GCPhase.COLLECTING)) { - gc.collectGarbage(doc, phases, revision); + gc.collectGarbage(doc, phases); phases.stop(GCPhase.COLLECTING); } @@ -681,7 +695,7 @@ public class VersionGarbageCollector { } finally { Utils.closeIfCloseable(itr); phases.stats.oldestModifiedDocTimeStamp = fromModified; - if (fromModified > (oldModifiedMs + 1)) { + if (fromModified > oldModifiedMs) { // we have moved ahead, now we can reset oldestModifiedId to min value fromId = MIN_ID_VALUE; phases.stats.oldestModifiedDocId = MIN_ID_VALUE; @@ -690,10 +704,10 @@ public class VersionGarbageCollector { // save the last _id traversed to avoid re-fetching of ids phases.stats.oldestModifiedDocId = fromId; } - oldModifiedMs = fromModified - 1; + oldModifiedMs = fromModified; } - - // if we are already at last document of current timeStamp, + // if we didn't find any document i.e. either we are already at last document + // of current timeStamp or there is no document for this timeStamp // we need to reset fromId & increment fromModified and check again if (!foundDoc && !Objects.equals(fromId, MIN_ID_VALUE)) { fromId = MIN_ID_VALUE; @@ -703,6 +717,13 @@ public class VersionGarbageCollector { } phases.stop(GCPhase.DETAILED_GC); } + } finally { + if (docsTraversed < PROGRESS_BATCH_SIZE) { + // we have traversed all the docs within given time range and nothing is left + // lets set oldModifiedDocTimeStamp to upper limit of this cycle + phases.stats.oldestModifiedDocTimeStamp = toModified; + phases.stats.oldestModifiedDocId = MIN_ID_VALUE; + } } } @@ -815,14 +836,14 @@ public class VersionGarbageCollector { this.timer = Stopwatch.createUnstarted(); } - public void collectGarbage(final NodeDocument doc, final GCPhases phases, final Revision revision) { + public void collectGarbage(final NodeDocument doc, final GCPhases phases) { monitor.info("Collecting Detailed Garbage for doc [{}]", doc.getId()); final UpdateOp op = new UpdateOp(requireNonNull(doc.getId()), false); op.equals(MODIFIED_IN_SECS, doc.getModified()); - collectDeletedProperties(doc, phases, op, revision); + collectDeletedProperties(doc, phases, op); collectUnmergedBranchCommitDocument(doc, phases, op); collectOldRevisions(doc, phases, op); // only add if there are changes for this doc @@ -845,7 +866,7 @@ public class VersionGarbageCollector { } - private void collectDeletedProperties(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp, final Revision revision) { + private void collectDeletedProperties(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) { // get Map of all properties along with their values if (phases.start(GCPhase.COLLECT_PROPS)) { @@ -864,8 +885,6 @@ public class VersionGarbageCollector { .filter(p -> !retainPropSet.contains(p)) .mapToInt(x -> { updateOp.remove(x); - setModified(updateOp,revision); - setDeleted(updateOp, revision, false); return 1;}) .sum(); diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java index cf821fcf48..6637afa4aa 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java @@ -22,34 +22,36 @@ package org.apache.jackrabbit.oak.plugins.document.mongo; import static com.mongodb.client.model.Filters.eq; import static com.mongodb.client.model.Filters.exists; import static com.mongodb.client.model.Filters.gt; +import static com.mongodb.client.model.Filters.or; +import static java.util.Optional.empty; import static java.util.Optional.ofNullable; import static org.apache.jackrabbit.guava.common.collect.Iterables.concat; import static org.apache.jackrabbit.guava.common.collect.Iterables.filter; import static org.apache.jackrabbit.guava.common.collect.Iterables.transform; import static com.mongodb.client.model.Filters.and; -import static com.mongodb.client.model.Filters.gte; import static com.mongodb.client.model.Filters.lt; import static java.util.Collections.emptyList; import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; import static org.apache.jackrabbit.oak.plugins.document.Document.ID; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.DELETED_ONCE; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS; -import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.PATH; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_MAX_REV_TIME_IN_SECS; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_TYPE; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH; import static org.apache.jackrabbit.oak.plugins.document.mongo.MongoUtils.hasIndex; +import static org.apache.jackrabbit.oak.plugins.document.util.CloseableIterable.wrap; import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; -import java.util.function.Consumer; import java.util.regex.Pattern; +import com.mongodb.client.MongoCursor; import org.apache.jackrabbit.oak.plugins.document.Document; import org.apache.jackrabbit.oak.plugins.document.NodeDocument; import org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType; @@ -148,9 +150,11 @@ public class MongoVersionGCSupport extends VersionGCSupport { @Override public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit, @NotNull final String fromId) { - // _modified >= fromModified && _modified < toModified && _id > fromId - final Bson query = and(gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)), - lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)), gt(ID, fromId)); + // (_modified = fromModified && _id > fromId || _modified > fromModified && _modified < toModified) + final Bson query = or( + and(eq(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)), gt(ID, fromId)), + and(gt(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)), lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)))); + // first sort by _modified and then by _id final Bson sort = and(eq(MODIFIED_IN_SECS, 1), eq(ID, 1)); @@ -158,7 +162,7 @@ public class MongoVersionGCSupport extends VersionGCSupport { .find(query) .sort(sort) .limit(limit); - return CloseableIterable.wrap(transform(cursor, input -> store.convertFromDBObject(NODES, input))); + return wrap(transform(cursor, input -> store.convertFromDBObject(NODES, input))); } @Override @@ -241,28 +245,22 @@ public class MongoVersionGCSupport extends VersionGCSupport { * @return the timestamp of the oldest modified document. */ @Override - public NodeDocument getOldestModifiedDoc(final Clock clock) { + public Optional<NodeDocument> getOldestModifiedDoc(final Clock clock) { LOG.info("getOldestModifiedDoc() <- start"); final Bson sort = and(eq(MODIFIED_IN_SECS, 1), eq(ID, 1)); - final List<NodeDocument> result = new ArrayList<>(1); // we need to add query condition to ignore `previous` documents which doesn't have this field final Bson query = exists(MODIFIED_IN_SECS); - getNodeCollection().find(query).sort(sort).limit(1).forEach( - (Consumer<BasicDBObject>) document -> - ofNullable(store.convertFromDBObject(NODES, document)) - .ifPresent(doc -> { - LOG.info("getOldestModifiedDoc() -> {}", doc); - result.add(doc); - })); + FindIterable<BasicDBObject> limit = getNodeCollection().find(query).sort(sort).limit(1); - if (result.isEmpty()) { - LOG.info("getOldestModifiedDoc() -> none found, return NULL document"); - result.add(NULL); + try(MongoCursor<BasicDBObject> cur = limit.iterator()) { + return cur.hasNext() ? ofNullable(store.convertFromDBObject(NODES, cur.next())) : empty(); + } catch (Exception ex) { + LOG.error("getOldestModifiedDoc() <- error while fetching data from Mongo", ex); } - return result.get(0); + return empty(); } private List<Bson> createQueries(Set<SplitDocType> gcTypes, diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java index 9d96c35811..1b35a30f07 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java @@ -16,15 +16,23 @@ */ package org.apache.jackrabbit.oak.plugins.document.rdb; +import static java.util.Comparator.comparing; import static java.util.List.of; +import static java.util.Optional.empty; +import static java.util.Optional.ofNullable; +import static java.util.stream.Collectors.toList; +import static java.util.stream.Stream.concat; +import static java.util.stream.StreamSupport.stream; import static org.apache.jackrabbit.guava.common.collect.Iterables.filter; +import static org.apache.jackrabbit.guava.common.collect.Iterables.size; import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; import static org.apache.jackrabbit.oak.plugins.document.Document.ID; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS; -import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs; import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.EMPTY_KEY_PATTERN; +import static org.apache.jackrabbit.oak.plugins.document.util.CloseableIterable.wrap; +import static org.apache.jackrabbit.oak.plugins.document.util.Utils.closeIfCloseable; import java.io.Closeable; import java.io.IOException; @@ -33,11 +41,14 @@ import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.List; +import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.stream.Stream; import org.apache.jackrabbit.oak.commons.properties.SystemPropertySupplier; import org.apache.jackrabbit.oak.plugins.document.Collection; +import org.apache.jackrabbit.oak.plugins.document.Document; import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException; import org.apache.jackrabbit.oak.plugins.document.NodeDocument; import org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType; @@ -113,13 +124,40 @@ public class RDBVersionGCSupport extends VersionGCSupport { @Override public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit, @NotNull final String fromId) { - List<QueryCondition> conditions = of(new QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)), - new QueryCondition(MODIFIED_IN_SECS, ">=", getModifiedInSecs(fromModified)), + // (_modified = fromModified && _id > fromId || _modified > fromModified && _modified < toModified) + // TODO : introduce support for OR where clause in RDBDocumentStore + final List<QueryCondition> c1 = of(new QueryCondition(MODIFIED_IN_SECS, "=", getModifiedInSecs(fromModified)), new QueryCondition(ID, ">", of(fromId))); + + final List<QueryCondition> c2 = of(new QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)), + new QueryCondition(MODIFIED_IN_SECS, ">", getModifiedInSecs(fromModified))); + if (MODE == 1) { - return getIterator(EMPTY_KEY_PATTERN, conditions); + final Iterable<NodeDocument> itr1 = getIterator(EMPTY_KEY_PATTERN, c1); + if (size(itr1) >= limit) { + return itr1; + } + final Iterable<NodeDocument> itr2 = getIterator(EMPTY_KEY_PATTERN, c2); + + final Stream<NodeDocument> s1 = stream(itr1.spliterator(), false); + final Stream<NodeDocument> s2 = stream(itr2.spliterator(), false); + return wrap(concat(s1, s2).sorted((o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)).limit(limit).collect(toList()), () -> { + closeIfCloseable(itr1); + closeIfCloseable(itr2); + }); } else { - return store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, conditions, limit, of(MODIFIED_IN_SECS, ID)); + final Iterable<NodeDocument> itr1 = store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, c1, limit, of(MODIFIED_IN_SECS, ID)); + if (size(itr1) >= limit) { + return itr1; + } + final Iterable<NodeDocument> itr2 = store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, c2, limit, of(MODIFIED_IN_SECS, ID)); + + final Stream<NodeDocument> s1 = stream(itr1.spliterator(), false); + final Stream<NodeDocument> s2 = stream(itr2.spliterator(), false); + return wrap(concat(s1, s2).sorted((o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)).limit(limit).collect(toList()), () -> { + closeIfCloseable(itr1); + closeIfCloseable(itr2); + }); } } @@ -284,20 +322,19 @@ public class RDBVersionGCSupport extends VersionGCSupport { * @return the timestamp of the oldest modified document. */ @Override - public NodeDocument getOldestModifiedDoc(Clock clock) { - NodeDocument doc = NULL; + public Optional<NodeDocument> getOldestModifiedDoc(Clock clock) { LOG.info("getOldestModifiedDoc() <- start"); Iterable<NodeDocument> modifiedDocs = null; try { modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, MIN_ID_VALUE); - doc = modifiedDocs.iterator().hasNext() ? modifiedDocs.iterator().next() : NULL; + return modifiedDocs.iterator().hasNext() ? ofNullable(modifiedDocs.iterator().next()) : empty(); } catch (DocumentStoreException ex) { LOG.error("getOldestModifiedDoc()", ex); } finally { - Utils.closeIfCloseable(modifiedDocs); + closeIfCloseable(modifiedDocs); } - return doc; + return empty(); } @Override diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java index 738c1109ad..0c6b2fccdf 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java @@ -18,6 +18,7 @@ */ package org.apache.jackrabbit.oak.plugins.document; +import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -74,12 +75,13 @@ public class VersionGCInitTest { UpdateOp op = new UpdateOp(id, true); NodeDocument.setModified(op, r); store.createOrUpdate(NODES, op); - ns.getVersionGarbageCollector().gc(1, DAYS); + VersionGCStats stats = ns.getVersionGarbageCollector().gc(1, DAYS); vgc = store.find(SETTINGS, "versionGC"); assertNotNull(vgc); - assertEquals(40_000L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP)); - assertEquals("1:/node", vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP)); + assertEquals(stats.oldestModifiedDocTimeStamp, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP)); + assertEquals(stats.oldestModifiedDocId, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP)); + assertEquals(MIN_ID_VALUE, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP)); } @Test @@ -89,11 +91,12 @@ public class VersionGCInitTest { assertNull(vgc); enableDetailGC(ns.getVersionGarbageCollector()); - ns.getVersionGarbageCollector().gc(1, DAYS); + VersionGCStats stats = ns.getVersionGarbageCollector().gc(1, DAYS); vgc = store.find(SETTINGS, "versionGC"); assertNotNull(vgc); - assertEquals(0L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP)); + assertEquals(stats.oldestModifiedDocTimeStamp, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP)); + assertEquals(stats.oldestModifiedDocId, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP)); assertEquals(MIN_ID_VALUE, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP)); } } diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java index 6d02fd38f7..565600a143 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java @@ -196,7 +196,7 @@ public class VersionGCSupportTest { setModified(op, r); store.create(NODES, of(op)); - NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE); + NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE).orElse(NULL); String oldestModifiedDocId = oldestModifiedDoc.getId(); long reportedSecs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L); assertTrue("diff (s) should be < 5: " + Math.abs(secs - reportedSecs), Math.abs(secs - reportedSecs) < 5); @@ -219,7 +219,7 @@ public class VersionGCSupportTest { // create 5_000 nodes store.create(NODES, updateOps); - NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE); + NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE).orElse(NULL); String oldestModifiedDocId = oldestModifiedDoc.getId(); long oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L); assertEquals(40L, oldestModifiedDocTs); @@ -254,7 +254,7 @@ public class VersionGCSupportTest { // create 5_001 nodes store.create(NODES, updateOps); - NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE); + NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE).orElse(NULL); String oldestModifiedDocId = oldestModifiedDoc.getId(); long oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L); assertEquals(40L, oldestModifiedDocTs); @@ -290,7 +290,7 @@ public class VersionGCSupportTest { @Test public void findModifiedDocsWhenOldestDocIsAbsent() { - NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE); + NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE).orElse(NULL); String oldestModifiedDocId = MIN_ID_VALUE; long oldestModifiedDocTs = 0L; assertEquals(NULL, oldestModifiedDoc); diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java index cf3148a86d..39ca136c8d 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java @@ -324,7 +324,7 @@ public class VersionGCTest { VersionGCSupport localgcsupport = fakeVersionGCSupport(ns.getDocumentStore(), oneYearAgo, twelveTimesTheLimit); VersionGCRecommendations rec = new VersionGCRecommendations(secondsPerDay, ns.getCheckpoints(), ns.getClock(), localgcsupport, - options, new TestGCMonitor()); + options, new TestGCMonitor(), false); // should select a duration of roughly one month long duration= rec.scope.getDurationMs(); @@ -338,7 +338,7 @@ public class VersionGCTest { assertTrue(stats.needRepeat); rec = new VersionGCRecommendations(secondsPerDay, ns.getCheckpoints(), ns.getClock(), localgcsupport, options, - new TestGCMonitor()); + new TestGCMonitor(), false); // new duration should be half long nduration = rec.scope.getDurationMs(); @@ -367,7 +367,7 @@ public class VersionGCTest { // loop until the recommended interval is at 60s (precisionMS) do { rec = new VersionGCRecommendations(secondsPerDay, ns.getCheckpoints(), ns.getClock(), localgcsupport, options, - testmonitor); + testmonitor, false); stats = new VersionGCStats(); stats.limitExceeded = true; rec.evaluate(stats); @@ -384,7 +384,7 @@ public class VersionGCTest { deletedCount -= deleted; localgcsupport = fakeVersionGCSupport(ns.getDocumentStore(), oldestDeleted, deletedCount); rec = new VersionGCRecommendations(secondsPerDay, ns.getCheckpoints(), ns.getClock(), localgcsupport, options, - testmonitor); + testmonitor, false); stats = new VersionGCStats(); stats.limitExceeded = false; stats.deletedDocGCCount = deleted; diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java index 6d7382bac5..df785878b3 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java @@ -35,6 +35,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import static java.util.Objects.requireNonNull; +import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.stream.Collectors.toList; import static java.util.stream.StreamSupport.stream; import static org.apache.commons.lang3.reflect.FieldUtils.writeField; @@ -171,11 +172,15 @@ public class VersionGarbageCollectorIT { clock.waitUntil(cp.getTimestamp() + expiryTime - maxAge); VersionGCStats stats = gc.gc(maxAge, TimeUnit.MILLISECONDS); assertTrue(stats.ignoredGCDueToCheckPoint); + assertFalse(stats.ignoredDetailGCDueToCheckPoint); + assertTrue(stats.canceled); //Fast forward time to future such that checkpoint get expired clock.waitUntil(clock.getTime() + expiryTime + 1); stats = gc.gc(maxAge, TimeUnit.MILLISECONDS); assertFalse("GC should be performed", stats.ignoredGCDueToCheckPoint); + assertFalse("Detailed GC shouldn't be performed", stats.ignoredDetailGCDueToCheckPoint); + assertFalse(stats.canceled); } @Test @@ -242,6 +247,78 @@ public class VersionGarbageCollectorIT { } // OAK-10199 + @Test + public void detailedGCIgnoredForCheckpoint() throws Exception { + long expiryTime = 100, maxAge = 20; + // enable the detailed gc flag + writeField(gc, "detailedGCEnabled", true, true); + + Revision cp = Revision.fromString(store.checkpoint(expiryTime)); + + //Fast forward time to future but before expiry of checkpoint + clock.waitUntil(cp.getTimestamp() + expiryTime - maxAge); + VersionGCStats stats = gc.gc(maxAge, TimeUnit.MILLISECONDS); + assertTrue(stats.ignoredDetailGCDueToCheckPoint); + assertTrue(stats.canceled); + + //Fast forward time to future such that checkpoint get expired + clock.waitUntil(clock.getTime() + expiryTime + 1); + stats = gc.gc(maxAge, TimeUnit.MILLISECONDS); + assertFalse("Detailed GC should be performed", stats.ignoredDetailGCDueToCheckPoint); + assertFalse(stats.canceled); + } + + @Test + public void testDetailedGCNotIgnoredForRGCCheckpoint() throws Exception { + + // enable the detailed gc flag + writeField(gc, "detailedGCEnabled", true, true); + + //1. Create nodes with properties + NodeBuilder b1 = store.getRoot().builder(); + + // Add property to node & save + b1.child("x").setProperty("test", "t", STRING); + b1.child("z").setProperty("test", "t", STRING); + store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + //Remove property + NodeBuilder b2 = store.getRoot().builder(); + b2.getChildNode("x").removeProperty("test"); + store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY); + store.runBackgroundOperations(); + + //2. move clock forward with 2 hours + clock.waitUntil(clock.getTime() + HOURS.toMillis(2)); + + //3. Create a checkpoint now with expiry of 1 hour + long expiryTime = 1, delta = MINUTES.toMillis(10); + NodeBuilder b3 = store.getRoot().builder(); + b3.getChildNode("z").removeProperty("test"); + store.merge(b3, EmptyHook.INSTANCE, CommitInfo.EMPTY); + store.runBackgroundOperations(); + + Revision.fromString(store.checkpoint(HOURS.toMillis(expiryTime))); + + //4. move clock forward by 10 mins + clock.waitUntil(clock.getTime() + delta); + + // 5. Remove a node + NodeBuilder b4 = store.getRoot().builder(); + b4.getChildNode("z").remove(); + store.merge(b4, EmptyHook.INSTANCE, CommitInfo.EMPTY); + store.runBackgroundOperations(); + + // 6. Now run gc after checkpoint and see removed properties gets collected + clock.waitUntil(clock.getTime() + delta*2); + VersionGCStats stats = gc.gc(delta, MILLISECONDS); + assertEquals(1, stats.deletedPropsGCCount); + assertEquals(1, stats.updatedDetailedGCDocsCount); + assertTrue(stats.ignoredGCDueToCheckPoint); + assertFalse(stats.ignoredDetailGCDueToCheckPoint); + assertFalse(stats.canceled); + } + @Test public void testGCDeletedProps() throws Exception { //1. Create nodes with properties @@ -265,7 +342,7 @@ public class VersionGarbageCollectorIT { // enable the detailed gc flag writeField(gc, "detailedGCEnabled", true, true); long maxAge = 1; //hours - long delta = TimeUnit.MINUTES.toMillis(10); + long delta = MINUTES.toMillis(10); //1. Go past GC age and check no GC done as nothing deleted clock.waitUntil(Revision.getCurrentTimestamp() + maxAge); VersionGCStats stats = gc.gc(maxAge, HOURS); @@ -365,7 +442,7 @@ public class VersionGarbageCollectorIT { // enable the detailed gc flag writeField(gc, "detailedGCEnabled", true, true); long maxAge = 1; //hours - long delta = TimeUnit.MINUTES.toMillis(20); + long delta = MINUTES.toMillis(20); //Remove property NodeBuilder b2 = store.getRoot().builder(); @@ -412,7 +489,7 @@ public class VersionGarbageCollectorIT { // enable the detailed gc flag writeField(gc, "detailedGCEnabled", true, true); long maxAge = 1; //hours - long delta = TimeUnit.MINUTES.toMillis(20); + long delta = MINUTES.toMillis(20); store.runBackgroundOperations(); @@ -426,6 +503,7 @@ public class VersionGarbageCollectorIT { long oldestModifiedDocTimeStamp = stats.oldestModifiedDocTimeStamp; Document document = store.getDocumentStore().find(SETTINGS, SETTINGS_COLLECTION_ID); + assert document != null; assertEquals(document.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP), oldestModifiedDocTimeStamp); assertEquals(document.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP), oldestModifiedDocId); } @@ -444,7 +522,7 @@ public class VersionGarbageCollectorIT { // enable the detailed gc flag writeField(gc, "detailedGCEnabled", true, true); long maxAge = 1; //hours - long delta = TimeUnit.MINUTES.toMillis(10); + long delta = MINUTES.toMillis(10); //1. Go past GC age and check no GC done as nothing deleted clock.waitUntil(Revision.getCurrentTimestamp() + maxAge); VersionGCStats stats = gc.gc(maxAge, HOURS); @@ -525,7 +603,7 @@ public class VersionGarbageCollectorIT { // enable the detailed gc flag writeField(gc, "detailedGCEnabled", true, true); long maxAge = 1; //hours - long delta = TimeUnit.MINUTES.toMillis(10); + long delta = MINUTES.toMillis(10); //3. Check that deleted property does get collected again // increment the clock again by more than 2 hours + delta @@ -574,7 +652,7 @@ public class VersionGarbageCollectorIT { // enable the detailed gc flag writeField(gc, "detailedGCEnabled", true, true); long maxAge = 1; //hours - long delta = TimeUnit.MINUTES.toMillis(10); + long delta = MINUTES.toMillis(10); //1. Go past GC age and check no GC done as nothing deleted clock.waitUntil(Revision.getCurrentTimestamp() + maxAge); VersionGCStats stats = gc.gc(maxAge, HOURS); @@ -630,7 +708,7 @@ public class VersionGarbageCollectorIT { // enable the detailed gc flag writeField(gc, "detailedGCEnabled", true, true); long maxAge = 1; //hours - long delta = TimeUnit.MINUTES.toMillis(10); + long delta = MINUTES.toMillis(10); //1. Go past GC age and check no GC done as nothing deleted clock.waitUntil(Revision.getCurrentTimestamp() + maxAge); VersionGCStats stats = gc.gc(maxAge, HOURS); @@ -651,7 +729,7 @@ public class VersionGarbageCollectorIT { stats = gc.gc(maxAge*2, HOURS); assertEquals(0, stats.deletedPropsGCCount); assertEquals(0, stats.updatedDetailedGCDocsCount); - assertNull(stats.oldestModifiedDocId); // as GC hadn't run + assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId); // as GC hadn't run //3. Check that deleted property does get collected post maxAge clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); @@ -699,7 +777,7 @@ public class VersionGarbageCollectorIT { // enable the detailed gc flag writeField(gc, "detailedGCEnabled", true, true); long maxAge = 1; //hours - long delta = TimeUnit.MINUTES.toMillis(10); + long delta = MINUTES.toMillis(10); //1. Go past GC age and check no GC done as nothing deleted clock.waitUntil(Revision.getCurrentTimestamp() + maxAge); VersionGCStats stats = gc.gc(maxAge, HOURS);