This is an automated email from the ASF dual-hosted git repository. daim pushed a commit to branch OAK-10199 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit d8540cb3963fbe4419a38cbb5ed7833bd3668ab3 Author: Rishabh Kumar <d...@adobe.com> AuthorDate: Wed Apr 26 21:16:44 2023 +0530 OAK-10199 : override getModifiedDocs() for RDB and added unit cases for deletedProps --- .../oak/plugins/document/NodeDocument.java | 12 +- .../plugins/document/VersionGCRecommendations.java | 36 +- .../plugins/document/VersionGarbageCollector.java | 383 ++++++++++++--------- .../document/mongo/MongoVersionGCSupport.java | 51 ++- .../plugins/document/rdb/RDBDocumentStoreJDBC.java | 1 + .../plugins/document/rdb/RDBVersionGCSupport.java | 56 +++ .../oak/plugins/document/DetailGCHelper.java | 22 +- .../oak/plugins/document/NodeDocumentTest.java | 32 ++ .../oak/plugins/document/VersionGCInitTest.java | 17 + .../oak/plugins/document/VersionGCStatsTest.java | 15 + .../oak/plugins/document/VersionGCTest.java | 13 +- .../document/VersionGarbageCollectorIT.java | 64 ++++ 12 files changed, 482 insertions(+), 220 deletions(-) diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java index 71abba0a2e..66a1bc2eae 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java @@ -32,7 +32,6 @@ import java.util.SortedSet; import java.util.TreeMap; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; -import java.util.stream.Collectors; import org.apache.jackrabbit.guava.common.base.Function; import org.apache.jackrabbit.guava.common.base.Predicate; @@ -59,6 +58,7 @@ import org.apache.jackrabbit.guava.common.collect.Iterables; import org.apache.jackrabbit.guava.common.collect.Maps; import org.apache.jackrabbit.guava.common.collect.Sets; +import static java.util.stream.Collectors.toSet; import static org.apache.jackrabbit.guava.common.base.Objects.equal; import static org.apache.jackrabbit.guava.common.base.Preconditions.checkArgument; import static org.apache.jackrabbit.guava.common.base.Preconditions.checkNotNull; @@ -67,7 +67,6 @@ import static org.apache.jackrabbit.guava.common.collect.Iterables.filter; import static org.apache.jackrabbit.guava.common.collect.Iterables.mergeSorted; import static org.apache.jackrabbit.guava.common.collect.Iterables.transform; import static java.util.Objects.requireNonNull; -import static java.util.stream.Collectors.toMap; import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; import static org.apache.jackrabbit.oak.plugins.document.StableRevisionComparator.REVERSE; import static org.apache.jackrabbit.oak.plugins.document.UpdateOp.Key; @@ -1672,17 +1671,16 @@ public final class NodeDocument extends Document { } /** - * Returns all the properties on this document - * @return Map of all properties along with their values + * Returns name of all the properties on this document + * @return Set of all property names */ @NotNull - Map<String, SortedMap<Revision, String>> getProperties() { + Set<String> getPropertyNames() { return data .keySet() .stream() .filter(Utils::isPropertyName) - .map(o -> Map.entry(o, getLocalMap(o))) - .collect(toMap(Entry::getKey, Entry::getValue)); + .collect(toSet()); } /** diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java index d8b091261d..f04b56fc52 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java @@ -18,6 +18,7 @@ */ package org.apache.jackrabbit.oak.plugins.document; +import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -30,9 +31,7 @@ import org.apache.jackrabbit.oak.stats.Clock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.jackrabbit.guava.common.collect.Maps; - -import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP; +import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP; import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP; /** @@ -51,7 +50,7 @@ public class VersionGCRecommendations { final long maxCollect; final long deleteCandidateCount; final long lastOldestTimestamp; - final long fullDetailGCTimestamp; + final long detailedGCTimestamp; final long originalCollectLimit; private final long precisionMs; @@ -96,7 +95,7 @@ public class VersionGCRecommendations { TimeInterval keep = new TimeInterval(clock.getTime() - maxRevisionAgeMs, Long.MAX_VALUE); Map<String, Long> settings = getLongSettings(); - lastOldestTimestamp = settings.get(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP); + lastOldestTimestamp = settings.get(VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP); if (lastOldestTimestamp == 0) { log.debug("No lastOldestTimestamp found, querying for the oldest deletedOnce candidate"); oldestPossible = vgc.getOldestDeletedOnceTimestamp(clock, options.precisionMs) - 1; @@ -108,17 +107,17 @@ public class VersionGCRecommendations { TimeInterval scope = new TimeInterval(oldestPossible, Long.MAX_VALUE); scope = scope.notLaterThan(keep.fromMs); - fullDetailGCTimestamp = settings.get(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP); - if (fullDetailGCTimestamp == 0) { + detailedGCTimestamp = settings.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP); + if (detailedGCTimestamp == 0) { if (log.isDebugEnabled()) { - log.debug("No fullDetailGCTimestamp found, querying for the oldest deletedOnce candidate"); + log.debug("No detailedGCTimestamp found, querying for the oldest deletedOnce candidate"); } oldestPossibleFullGC = vgc.getOldestModifiedTimestamp(clock) - 1; if (log.isDebugEnabled()) { - log.debug("fullDetailGCTimestamp found: {}", Utils.timestampToString(oldestPossibleFullGC)); + log.debug("detailedGCTimestamp found: {}", Utils.timestampToString(oldestPossibleFullGC)); } } else { - oldestPossibleFullGC = fullDetailGCTimestamp - 1; + oldestPossibleFullGC = detailedGCTimestamp - 1; } TimeInterval scopeFullGC = new TimeInterval(oldestPossibleFullGC, Long.MAX_VALUE); @@ -206,10 +205,8 @@ public class VersionGCRecommendations { stats.needRepeat = true; } else if (!stats.canceled && !stats.ignoredGCDueToCheckPoint) { // success, we would not expect to encounter revisions older than this in the future -// setLongSetting(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, scope.toMs); -// setLongSetting(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, stats.oldestModifiedGced); setLongSetting(ImmutableMap.of(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, scope.toMs, - SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, stats.oldestModifiedGced)); + SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, stats.oldestModifiedGced)); int count = stats.deletedDocGCCount - stats.deletedLeafDocGCCount; double usedFraction; @@ -240,11 +237,11 @@ public class VersionGCRecommendations { private Map<String, Long> getLongSettings() { Document versionGCDoc = vgc.getDocumentStore().find(Collection.SETTINGS, VersionGarbageCollector.SETTINGS_COLLECTION_ID, 0); - Map<String, Long> settings = Maps.newHashMap(); + Map<String, Long> settings = new HashMap<>(); // default values - settings.put(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, 0L); + settings.put(VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, 0L); settings.put(VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP, 0L); - settings.put(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, 0L); + settings.put(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, 0L); if (versionGCDoc != null) { for (String k : versionGCDoc.keySet()) { Object value = versionGCDoc.get(k); @@ -256,14 +253,11 @@ public class VersionGCRecommendations { return settings; } - void setLongSetting(String propName, long val) { + private void setLongSetting(String propName, long val) { setLongSetting(Map.of(propName, val)); -// UpdateOp updateOp = new UpdateOp(VersionGarbageCollector.SETTINGS_COLLECTION_ID, true); -// updateOp.set(propName, val); -// vgc.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp); } - void setLongSetting(final Map<String, Long> propValMap) { + private void setLongSetting(final Map<String, Long> propValMap) { UpdateOp updateOp = new UpdateOp(VersionGarbageCollector.SETTINGS_COLLECTION_ID, true); propValMap.forEach(updateOp::set); vgc.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp); diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java index 608ba02398..27ee36204a 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java @@ -21,16 +21,18 @@ package org.apache.jackrabbit.oak.plugins.document; import java.io.Closeable; import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; import java.util.EnumSet; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; -import java.util.SortedMap; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; import org.apache.jackrabbit.guava.common.base.Function; import org.apache.jackrabbit.guava.common.base.Joiner; @@ -55,7 +57,11 @@ import org.jetbrains.annotations.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static java.lang.Math.round; +import static java.util.Collections.emptySet; import static java.util.Objects.requireNonNull; +import static java.util.Optional.ofNullable; +import static java.util.concurrent.TimeUnit.MILLISECONDS; import static org.apache.jackrabbit.guava.common.base.StandardSystemProperty.LINE_SEPARATOR; import static org.apache.jackrabbit.guava.common.collect.Iterables.all; import static org.apache.jackrabbit.guava.common.collect.Iterators.partition; @@ -67,14 +73,10 @@ import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_I import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH; -import static org.apache.jackrabbit.oak.plugins.document.StableRevisionComparator.REVERSE; import static org.slf4j.helpers.MessageFormatter.arrayFormat; public class VersionGarbageCollector { - /** TODO temporary global flag to enable 'detail gc' during prototyping. Should eventually become eg a system property */ - public static boolean DETAIL_GC_ENABLED = false; - //Kept less than MongoDocumentStore.IN_CLAUSE_BATCH_SIZE to avoid re-partitioning private static final int DELETE_BATCH_SIZE = 450; private static final int UPDATE_BATCH_SIZE = 450; @@ -105,15 +107,9 @@ public class VersionGarbageCollector { static final String SETTINGS_COLLECTION_REC_INTERVAL_PROP = "recommendedIntervalMs"; /** - * Property name to timestamp when last full-detail-GC run happened, or -1 if not applicable/in-use. - * <p> - * <ul> - * <li>-1 : full repo scan is disabled</li> - * <li>0 : full repo scan is enabled and bound to start from zero == oldest _modified </li> - * <li>gt 0 : full repo scan is enabled, was already done up until this value</li> - * </ul> + * Property name to timestamp till when last detailed-GC run happened */ - static final String SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP = "fullDetailGCTimeStamp"; + static final String SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP = "detailedGCTimeStamp"; private final DocumentNodeStore nodeStore; private final DocumentStore ds; @@ -264,7 +260,6 @@ public class VersionGarbageCollector { } public static class VersionGCStats { - public long oldestModifiedGced; boolean ignoredGCDueToCheckPoint; boolean canceled; boolean success = true; @@ -276,18 +271,26 @@ public class VersionGarbageCollector { int splitDocGCCount; int intermediateSplitDocGCCount; int updateResurrectedGCCount; + long oldestModifiedGced; + int updatedDetailedGCDocsCount; + int deletedPropsGCCount; final TimeDurationFormatter df = TimeDurationFormatter.forLogging(); final Stopwatch active = Stopwatch.createUnstarted(); final Stopwatch collectDeletedDocs = Stopwatch.createUnstarted(); final Stopwatch checkDeletedDocs = Stopwatch.createUnstarted(); - final Stopwatch detailedGcDocs = Stopwatch.createUnstarted(); - final Stopwatch deleteDeletedDocs = Stopwatch.createUnstarted(); - final Stopwatch collectAndDeleteSplitDocs = Stopwatch.createUnstarted(); + final Stopwatch detailedGCDocs = Stopwatch.createUnstarted(); + final Stopwatch deleteDetailedGCDocs = Stopwatch.createUnstarted(); final Stopwatch deleteSplitDocs = Stopwatch.createUnstarted(); final Stopwatch sortDocIds = Stopwatch.createUnstarted(); final Stopwatch updateResurrectedDocuments = Stopwatch.createUnstarted(); + final Stopwatch deleteDeletedDocs = Stopwatch.createUnstarted(); + final Stopwatch collectAndDeleteSplitDocs = Stopwatch.createUnstarted(); + final Stopwatch collectDeletedProps = Stopwatch.createUnstarted(); + final Stopwatch collectDeletedOldRevs = Stopwatch.createUnstarted(); + final Stopwatch collectUnmergedBC = Stopwatch.createUnstarted(); long activeElapsed, collectDeletedDocsElapsed, checkDeletedDocsElapsed, deleteDeletedDocsElapsed, collectAndDeleteSplitDocsElapsed, - deleteSplitDocsElapsed, sortDocIdsElapsed, updateResurrectedDocumentsElapsed, detailedGcDocsElapsed; + deleteSplitDocsElapsed, sortDocIdsElapsed, updateResurrectedDocumentsElapsed, detailedGCDocsElapsed, collectDeletedPropsElapsed, + deleteDetailedGCDocsElapsed, collectDeletedOldRevsElapsed, collectUnmergedBCElapsed; @Override public String toString() { @@ -306,6 +309,11 @@ public class VersionGarbageCollector { df.format(updateResurrectedDocumentsElapsed, MICROSECONDS), df.format(deleteDeletedDocsElapsed, MICROSECONDS), df.format(collectAndDeleteSplitDocsElapsed, MICROSECONDS), + df.format(detailedGCDocsElapsed, MICROSECONDS), + df.format(deleteDetailedGCDocsElapsed, MICROSECONDS), + df.format(collectDeletedPropsElapsed, MICROSECONDS), + df.format(collectDeletedOldRevsElapsed, MICROSECONDS), + df.format(collectUnmergedBCElapsed, MICROSECONDS), timeDeletingSplitDocs); } else { String timeDeletingSplitDocs = ""; @@ -319,17 +327,24 @@ public class VersionGarbageCollector { df.format(updateResurrectedDocuments.elapsed(MICROSECONDS), MICROSECONDS), df.format(deleteDeletedDocs.elapsed(MICROSECONDS), MICROSECONDS), df.format(collectAndDeleteSplitDocs.elapsed(MICROSECONDS), MICROSECONDS), + df.format(detailedGCDocs.elapsed(MICROSECONDS), MICROSECONDS), + df.format(deleteDetailedGCDocs.elapsed(MICROSECONDS), MICROSECONDS), + df.format(collectDeletedProps.elapsed(MICROSECONDS), MICROSECONDS), + df.format(collectDeletedOldRevs.elapsed(MICROSECONDS), MICROSECONDS), + df.format(collectUnmergedBC.elapsed(MICROSECONDS), MICROSECONDS), timeDeletingSplitDocs); } return "VersionGCStats{" + "ignoredGCDueToCheckPoint=" + ignoredGCDueToCheckPoint + - ", oldestModifiedGced=" + oldestModifiedGced + ", canceled=" + canceled + ", deletedDocGCCount=" + deletedDocGCCount + " (of which leaf: " + deletedLeafDocGCCount + ")" + ", updateResurrectedGCCount=" + updateResurrectedGCCount + ", splitDocGCCount=" + splitDocGCCount + ", intermediateSplitDocGCCount=" + intermediateSplitDocGCCount + + ", oldestModifiedGced=" + oldestModifiedGced + + ", updatedDetailedGCDocsCount=" + updatedDetailedGCDocsCount + + ", deletedPropsGCCount=" + deletedPropsGCCount + ", iterationCount=" + iterationCount + ", timeActive=" + df.format(activeElapsed, MICROSECONDS) + ", " + timings + "}"; @@ -338,7 +353,6 @@ public class VersionGarbageCollector { void addRun(VersionGCStats run) { ++iterationCount; this.ignoredGCDueToCheckPoint = run.ignoredGCDueToCheckPoint; - this.oldestModifiedGced = run.oldestModifiedGced; this.canceled = run.canceled; this.success = run.success; this.limitExceeded = run.limitExceeded; @@ -348,6 +362,9 @@ public class VersionGarbageCollector { this.splitDocGCCount += run.splitDocGCCount; this.intermediateSplitDocGCCount += run.intermediateSplitDocGCCount; this.updateResurrectedGCCount += run.updateResurrectedGCCount; + this.oldestModifiedGced = run.oldestModifiedGced; + this.updatedDetailedGCDocsCount += run.updatedDetailedGCDocsCount; + this.deletedPropsGCCount += run.deletedPropsGCCount; if (run.iterationCount > 0) { // run is cumulative with times in elapsed fields this.activeElapsed += run.activeElapsed; @@ -358,7 +375,11 @@ public class VersionGarbageCollector { this.deleteSplitDocsElapsed += run.deleteSplitDocsElapsed; this.sortDocIdsElapsed += run.sortDocIdsElapsed; this.updateResurrectedDocumentsElapsed += run.updateResurrectedDocumentsElapsed; - this.detailedGcDocsElapsed += run.detailedGcDocsElapsed; + this.detailedGCDocsElapsed += run.detailedGCDocsElapsed; + this.deleteDetailedGCDocsElapsed += run.deleteDetailedGCDocsElapsed; + this.collectDeletedPropsElapsed += run.collectDeletedPropsElapsed; + this.collectDeletedOldRevsElapsed += run.collectDeletedOldRevsElapsed; + this.collectUnmergedBCElapsed += run.collectUnmergedBCElapsed; } else { // single run -> read from stop watches this.activeElapsed += run.active.elapsed(MICROSECONDS); @@ -369,7 +390,11 @@ public class VersionGarbageCollector { this.deleteSplitDocsElapsed += run.deleteSplitDocs.elapsed(MICROSECONDS); this.sortDocIdsElapsed += run.sortDocIds.elapsed(MICROSECONDS); this.updateResurrectedDocumentsElapsed += run.updateResurrectedDocuments.elapsed(MICROSECONDS); - this.detailedGcDocsElapsed += run.detailedGcDocs.elapsed(MICROSECONDS); + this.detailedGCDocsElapsed += run.detailedGCDocs.elapsed(MICROSECONDS); + this.deleteDetailedGCDocsElapsed += run.deleteDetailedGCDocs.elapsed(MICROSECONDS); + this.collectDeletedPropsElapsed += run.collectDeletedProps.elapsed(MICROSECONDS); + this.collectDeletedOldRevsElapsed += run.collectDeletedOldRevs.elapsed(MICROSECONDS); + this.collectUnmergedBCElapsed += run.collectUnmergedBC.elapsed(MICROSECONDS); } } } @@ -378,10 +403,14 @@ public class VersionGarbageCollector { NONE, COLLECTING, CHECKING, - DETAILED_GC, DELETING, SORTING, SPLITS_CLEANUP, + DETAILED_GC, + COLLECT_PROPS, + COLLECT_OLD_REVS, + COLLECT_UNMERGED_BC, + DETAILED_GC_CLEANUP, UPDATING } @@ -406,11 +435,15 @@ public class VersionGarbageCollector { this.watches.put(GCPhase.NONE, Stopwatch.createStarted()); this.watches.put(GCPhase.COLLECTING, stats.collectDeletedDocs); this.watches.put(GCPhase.CHECKING, stats.checkDeletedDocs); - this.watches.put(GCPhase.DETAILED_GC, stats.detailedGcDocs); this.watches.put(GCPhase.DELETING, stats.deleteDeletedDocs); this.watches.put(GCPhase.SORTING, stats.sortDocIds); this.watches.put(GCPhase.SPLITS_CLEANUP, stats.collectAndDeleteSplitDocs); this.watches.put(GCPhase.UPDATING, stats.updateResurrectedDocuments); + this.watches.put(GCPhase.DETAILED_GC, stats.detailedGCDocs); + this.watches.put(GCPhase.COLLECT_PROPS, stats.collectDeletedProps); + this.watches.put(GCPhase.COLLECT_OLD_REVS, stats.collectDeletedOldRevs); + this.watches.put(GCPhase.COLLECT_UNMERGED_BC, stats.collectUnmergedBC); + this.watches.put(GCPhase.DETAILED_GC_CLEANUP, stats.deleteDetailedGCDocs); this.canceled = canceled; } @@ -534,7 +567,7 @@ public class VersionGarbageCollector { collectDeletedDocuments(phases, headRevision, rec); collectSplitDocuments(phases, sweepRevisions, rec); if (detailedGCEnabled) { - // run only if enabled + // run only if detailed GC enabled collectDetailedGarbage(phases, headRevision, rec); } } @@ -568,102 +601,74 @@ public class VersionGarbageCollector { * it is okay that it takes a considerable amount of time. * * @param phases {@link GCPhases} - * @param headRevision the current head revision of - * @throws IOException - * @throws LimitExceededException + * @param headRevision the current head revision of node store */ private void collectDetailedGarbage(final GCPhases phases, final RevisionVector headRevision, final VersionGCRecommendations rec) throws IOException, LimitExceededException { int docsTraversed = 0; - long oldestModifiedGced = rec.scopeFullGC.fromMs; + boolean foundDoc = true; + long oldestModifiedGCed = rec.scopeFullGC.fromMs; try (DetailedGC gc = new DetailedGC(headRevision, monitor, cancel)) { final long fromModified = rec.scopeFullGC.fromMs; final long toModified = rec.scopeFullGC.toMs; -// if (rec.fullDetailGCTimestamp == -1) { -// // then full detail-gc is disabled or over - use regular scope then -// fromModified = rec.scope.fromMs; -// toModified = rec.scope.toMs; -// } else { -// // then full detail-gc is enabled - use it then -// fromModified = rec.fullDetailGCTimestamp; // TODO: once we're passed rec.scope.fromMs we should -// // disable fullgc -// toModified = rec.scope.toMs; // the 'to' here is the max. it will process only eg 1 batch -// } - // TODO : remove me - boolean foundAnything = false; // I think this flag is redundant - if (phases.start(GCPhase.COLLECTING)) { - Iterable<NodeDocument> itr = versionStore.getModifiedDocs(oldestModifiedGced, toModified, 2000); - final Stopwatch timer = Stopwatch.createUnstarted(); - timer.reset().start(); - try { - for (NodeDocument doc : itr) { - // continue with GC? - if (cancel.get()) { - break; - } - if (phases.start(GCPhase.DETAILED_GC)) { - gc.detailedGC(doc, phases); - phases.stop(GCPhase.DETAILED_GC); - } + if (phases.start(GCPhase.DETAILED_GC)) { + while (foundDoc && oldestModifiedGCed < toModified && docsTraversed <= PROGRESS_BATCH_SIZE) { + // set foundDoc to false to allow exiting the while loop + foundDoc = false; + Iterable<NodeDocument> itr = versionStore.getModifiedDocs(oldestModifiedGCed, toModified, 1000); + try { + for (NodeDocument doc : itr) { + foundDoc = true; + // continue with GC? + if (cancel.get()) { + break; + } + docsTraversed++; + if (docsTraversed % PROGRESS_BATCH_SIZE == 0) { + monitor.info("Iterated through {} documents so far. {} had detail garbage", + docsTraversed, gc.getGarbageDocsCount()); + } - // TODO : remove this code, I don't think its possible to fetch these documents - // who doesn't have _modified field - final Long modified = doc.getModified(); - if (modified == null) { - monitor.warn("collectDetailGarbage : document has no _modified property : {}", - doc.getId()); - } else if (modified < oldestModifiedGced) { - monitor.warn( - "collectDetailGarbage : document has older _modified than query boundary : {} (from: {}, to: {})", - modified, fromModified, toModified); - } else { - oldestModifiedGced = modified; - } - foundAnything = true; - docsTraversed++; - if (docsTraversed % PROGRESS_BATCH_SIZE == 0) { - monitor.info("Iterated through {} documents so far. {} had detail garbage", - docsTraversed, gc.getNumDocuments()); - } - // this would never hit, since we are only fetching the oldest 2000 element in batches of 1000 - // TODO: remove this if above mentioned logic is fine - if (rec.maxCollect > 0 && gc.getNumDocuments() > rec.maxCollect) { - // TODO: how would we recover from this? - // If we don't want above solution, then one of the another solution is to use lower time duration - // as done in document deletion process or use lower limit value or - // we should perform all the update ops in 1 go - throw new LimitExceededException(); + // collect the data to delete in next step + if (phases.start(GCPhase.COLLECTING)) { + gc.collectGarbage(doc, phases); + phases.stop(GCPhase.COLLECTING); + } + + // TODO : remove this code, I don't think its possible to fetch these documents + // who doesn't have _modified field + final Long modified = doc.getModified(); + if (modified == null) { + monitor.warn("collectDetailGarbage : document has no _modified property : {}", + doc.getId()); + } else if (modified < oldestModifiedGCed) { + monitor.warn( + "collectDetailGarbage : document has older _modified than query boundary : {} (from: {}, to: {})", + modified, fromModified, toModified); + } else { + oldestModifiedGCed = modified; + } + + if (gc.hasGarbage()) { + if (phases.start(GCPhase.DETAILED_GC_CLEANUP)) { + gc.removeGarbage(phases.stats); + phases.stop(GCPhase.DETAILED_GC_CLEANUP); + } + } + + oldestModifiedGCed = modified == null ? fromModified : modified; } - oldestModifiedGced = modified == null ? fromModified : modified; + } finally { + Utils.closeIfCloseable(itr); + phases.stats.oldestModifiedGced = oldestModifiedGCed; } - } finally { - Utils.closeIfCloseable(itr); - // why do we need to stop this here, we are already stopping the original gc run. - // can this be removed - delayOnModifications(timer.stop().elapsed(TimeUnit.MILLISECONDS)); - phases.stats.oldestModifiedGced = oldestModifiedGced; } - phases.stop(GCPhase.COLLECTING); -// if (!cancel.get() && foundAnything) { -// // TODO: move to evaluate() -// rec.setLongSetting(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, oldestModifiedGced + 1); -// } + phases.stop(GCPhase.DETAILED_GC); } } } - private void delayOnModifications(long durationMs) { - long delayMs = Math.round(durationMs * options.delayFactor); - if (!cancel.get() && delayMs > 0) { - try { - Clock clock = nodeStore.getClock(); - clock.waitUntil(clock.getTime() + delayMs); - } - catch (InterruptedException ex) { - /* ignore */ - } - } - } + private void collectSplitDocuments(GCPhases phases, RevisionVector sweepRevisions, @@ -752,77 +757,146 @@ public class VersionGarbageCollector { } } - private static class DetailedGC implements Closeable { + private class DetailedGC implements Closeable { private final RevisionVector headRevision; private final GCMonitor monitor; private final AtomicBoolean cancel; - private int count; + private final Stopwatch timer; + private final List<UpdateOp> updateOpList; + private int garbageDocsCount; public DetailedGC(@NotNull RevisionVector headRevision, @NotNull GCMonitor monitor, @NotNull AtomicBoolean cancel) { this.headRevision = requireNonNull(headRevision); this.monitor = monitor; this.cancel = cancel; + this.updateOpList = new ArrayList<>(); + this.timer = Stopwatch.createUnstarted(); } - public void detailedGC(NodeDocument doc, GCPhases phases) { -// deleteSample(doc, phases); - UpdateOp updateOp = new UpdateOp(requireNonNull(doc.getId()), false); - deleteDeletedProperties(doc, phases, updateOp); - deleteUnmergedBranchCommitDocument(doc, phases, updateOp); - deleteOldRevisions(doc, phases, updateOp); + public void collectGarbage(final NodeDocument doc, final GCPhases phases) { + + monitor.info("Collecting Detailed Garbage for doc [{}]", doc.getId()); + + final UpdateOp op = new UpdateOp(requireNonNull(doc.getId()), false); + collectDeletedProperties(doc, phases, op); + collectUnmergedBranchCommitDocument(doc, phases, op); + collectOldRevisions(doc, phases, op); + // only add if there are changes for this doc + if (op.hasChanges()) { + garbageDocsCount++; + monitor.info("Collected [{}] garbage for doc [{}]", op.getChanges().size(), doc.getId()); + updateOpList.add(op); + } } - /** TODO remove, this is just a skeleton sample */ -// private void deleteSample(NodeDocument doc, GCPhases phases) { -// if (doc.getId().contains("should_delete")) { -// if (phases.start(GCPhase.DELETING)) { -// monitor.info("deleteSample: should do the deletion now, but this is demo only. I'm still learning"); -// System.out.println("do the actual deletion"); -// count++; -// phases.stop(GCPhase.DELETING); -// } -// } -// } + private boolean hasGarbage() { + return garbageDocsCount > 0; + } - private void deleteUnmergedBranchCommitDocument(NodeDocument doc, GCPhases phases, UpdateOp updateOp) { - // TODO Auto-generated method stub + private void collectUnmergedBranchCommitDocument(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) { + if (phases.start(GCPhase.COLLECT_UNMERGED_BC)){ + // TODO add umerged BC collection logic + phases.stop(GCPhase.COLLECT_UNMERGED_BC); + } } - private void deleteDeletedProperties(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) { + private void collectDeletedProperties(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) { // get Map of all properties along with their values - final Map<String, SortedMap<Revision, String>> properties = doc.getProperties(); - - // find all the properties which can be removed from document - // All the properties whose value is null in their respective - // latest revision are eligible to be garbage collected. - properties.forEach((propName, revisionStringSortedMap) -> { - if (revisionStringSortedMap.keySet() - .stream() - .sorted(REVERSE) - .limit(1) - .anyMatch(revision -> revisionStringSortedMap.get(revision) == null)) { - // set this property for removal - updateOp.remove(propName); + if (phases.start(GCPhase.COLLECT_PROPS)) { + final Set<String> properties = doc.getPropertyNames(); + + // find all the properties which can be removed from document. + // All the properties whose value is null in head revision are + // eligible to be garbage collected. + + final Set<String> retainPropSet = ofNullable(doc.getNodeAtRevision(nodeStore, headRevision, null)) + .map(DocumentNodeState::getPropertyNames) + .orElse(emptySet()); + final int deletedPropsGCCount = properties.stream() + .filter(p -> !retainPropSet.contains(p)) + .mapToInt(x -> { + updateOp.remove(x); + return 1;}) + .sum(); + + + phases.stats.deletedPropsGCCount += deletedPropsGCCount; + if (log.isDebugEnabled()) { + log.debug("Collected {} deleted properties for document {}", deletedPropsGCCount, doc.getId()); } - }); + phases.stop(GCPhase.COLLECT_PROPS); + } } - private void deleteOldRevisions(NodeDocument doc, GCPhases phases, UpdateOp updateOp) { - // TODO Auto-generated method stub + private void collectOldRevisions(NodeDocument doc, GCPhases phases, UpdateOp updateOp) { + + if (phases.start(GCPhase.COLLECT_OLD_REVS)){ + // TODO add old rev collection logic + phases.stop(GCPhase.COLLECT_OLD_REVS); + } } - long getNumDocuments() { - return count; + int getGarbageDocsCount() { + return garbageDocsCount; } @Override public void close() throws IOException { } + + public void removeGarbage(final VersionGCStats stats) { + + if (updateOpList.isEmpty()) { + if (log.isDebugEnabled()) { + log.debug("Skipping removal of detailed garbage, cause no garbage detected"); + } + return; + } + + int updatedDocs; + + monitor.info("Proceeding to update [{}] documents", updateOpList.size()); + + if (log.isDebugEnabled()) { + String collect = updateOpList.stream().map(UpdateOp::getId).collect(Collectors.joining(",")); + log.trace("Performing batch update of documents with following id's. \n" + collect); + } + + if (cancel.get()) { + log.info("Aborting the removal of detailed garbage since RGC had been cancelled"); + return; + } + + timer.reset().start(); + try { + // TODO create an api to bulk update findAndUpdate Ops + updatedDocs = (int) updateOpList.stream().map(op -> ds.findAndUpdate(NODES, op)).filter(Objects::nonNull).count(); + stats.updatedDetailedGCDocsCount += updatedDocs; + log.info("Updated [{}] documents", updatedDocs); + // now reset delete metadata + updateOpList.clear(); + garbageDocsCount = 0; + } finally { + delayOnModifications(timer.stop().elapsed(MILLISECONDS), cancel); + } + } + } + private void delayOnModifications(final long durationMs, final AtomicBoolean cancel) { + long delayMs = round(durationMs * options.delayFactor); + if (!cancel.get() && delayMs > 0) { + try { + Clock clock = nodeStore.getClock(); + clock.waitUntil(clock.getTime() + delayMs); + } + catch (InterruptedException ex) { + /* ignore */ + } + } } /** @@ -959,19 +1033,6 @@ public class VersionGarbageCollector { //------------------------------< internal >---------------------------- - private void delayOnModifications(long durationMs) { - long delayMs = Math.round(durationMs * options.delayFactor); - if (!cancel.get() && delayMs > 0) { - try { - Clock clock = nodeStore.getClock(); - clock.waitUntil(clock.getTime() + delayMs); - } - catch (InterruptedException ex) { - /* ignore */ - } - } - } - private Iterator<String> previousDocIdsFor(NodeDocument doc) { Map<Revision, Range> prevRanges = doc.getPreviousRanges(true); if (prevRanges.isEmpty()) { @@ -1127,7 +1188,7 @@ public class VersionGarbageCollector { monitor.info(msg); } } finally { - delayOnModifications(timer.stop().elapsed(TimeUnit.MILLISECONDS)); + delayOnModifications(timer.stop().elapsed(TimeUnit.MILLISECONDS), cancel); } } return deletedCount; @@ -1160,7 +1221,7 @@ public class VersionGarbageCollector { } } finally { - delayOnModifications(timer.stop().elapsed(TimeUnit.MILLISECONDS)); + delayOnModifications(timer.stop().elapsed(TimeUnit.MILLISECONDS), cancel); } return updateCount; } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java index e34d8f36b0..4d01e5d3da 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java @@ -19,6 +19,8 @@ package org.apache.jackrabbit.oak.plugins.document.mongo; +import static java.util.Optional.ofNullable; +import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.jackrabbit.guava.common.collect.Iterables.concat; import static org.apache.jackrabbit.guava.common.collect.Iterables.filter; import static org.apache.jackrabbit.guava.common.collect.Iterables.transform; @@ -42,6 +44,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; import java.util.regex.Pattern; import org.apache.jackrabbit.oak.plugins.document.Document; @@ -111,10 +114,10 @@ public class MongoVersionGCSupport extends VersionGCSupport { @Override public CloseableIterable<NodeDocument> getPossiblyDeletedDocs(final long fromModified, final long toModified) { //_deletedOnce == true && _modified >= fromModified && _modified < toModified - Bson query = and( + Bson query = Filters.and( Filters.eq(DELETED_ONCE, true), - gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)), - lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)) + Filters.gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)), + Filters.lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)) ); FindIterable<BasicDBObject> cursor = getNodeCollection() .find(query).batchSize(batchSize); @@ -139,9 +142,10 @@ public class MongoVersionGCSupport extends VersionGCSupport { // _modified >= fromModified && _modified < toModified final Bson query = and(gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)), lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified))); + final Bson sort = Filters.eq(MODIFIED_IN_SECS, 1); final FindIterable<BasicDBObject> cursor = getNodeCollection() .find(query) - .sort(new org.bson.Document(MODIFIED_IN_SECS, 1)) + .sort(sort) .limit(limit); return CloseableIterable.wrap(transform(cursor, input -> store.convertFromDBObject(NODES, input))); } @@ -219,6 +223,35 @@ public class MongoVersionGCSupport extends VersionGCSupport { return result.get(0); } + /** + * Retrieve the time of the oldest modified document. + * + * @param clock System Clock to measure time in accuracy of millis + * @return the timestamp of the oldest modified document. + */ + @Override + public long getOldestModifiedTimestamp(final Clock clock) { + LOG.info("getOldestModifiedTimestamp() <- start"); + + final Bson sort = Filters.eq(MODIFIED_IN_SECS, 1); + final List<Long> result = new ArrayList<>(1); + + getNodeCollection().find().sort(sort).limit(1).forEach( + (Consumer<BasicDBObject>) document -> + ofNullable(store.convertFromDBObject(NODES, document)) + .ifPresent(doc -> { + long modifiedMs = SECONDS.toMillis(ofNullable(doc.getModified()).orElse(0L)); + LOG.info("getOldestDeletedOnceTimestamp() -> {}", Utils.timestampToString(modifiedMs)); + result.add(modifiedMs); + })); + + if (result.isEmpty()) { + LOG.info("getOldestModifiedTimestamp() -> none found, return current time"); + result.add(clock.getTime()); + } + return result.get(0); + } + private List<Bson> createQueries(Set<SplitDocType> gcTypes, RevisionVector sweepRevs, long oldestRevTimeStamp) { @@ -233,9 +266,9 @@ public class MongoVersionGCSupport extends VersionGCSupport { } // OAK-8351: this (last) query only contains SD_TYPE and SD_MAX_REV_TIME_IN_SECS // so mongodb should really use that _sdType_1__sdMaxRevTime_1 index - result.add(and( + result.add(Filters.and( Filters.or(orClauses), - lt(SD_MAX_REV_TIME_IN_SECS, getModifiedInSecs(oldestRevTimeStamp)) + Filters.lt(SD_MAX_REV_TIME_IN_SECS, getModifiedInSecs(oldestRevTimeStamp)) )); return result; @@ -266,16 +299,16 @@ public class MongoVersionGCSupport extends VersionGCSupport { Bson idPathClause = Filters.or( Filters.regex(ID, Pattern.compile(".*" + idSuffix)), // previous documents with long paths do not have a '-' in the id - and( + Filters.and( Filters.regex(ID, Pattern.compile("[^-]*")), Filters.regex(PATH, Pattern.compile(".*" + idSuffix)) ) ); long minMaxRevTimeInSecs = Math.min(maxRevTimeInSecs, getModifiedInSecs(r.getTimestamp())); - result.add(and( + result.add(Filters.and( Filters.eq(SD_TYPE, DEFAULT_NO_BRANCH.typeCode()), - lt(SD_MAX_REV_TIME_IN_SECS, minMaxRevTimeInSecs), + Filters.lt(SD_MAX_REV_TIME_IN_SECS, minMaxRevTimeInSecs), idPathClause )); } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java index 5caa65d875..26fc1311fa 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java @@ -715,6 +715,7 @@ public class RDBDocumentStoreJDBC { } if (sortBy != null) { + // FIXME : order should be determined via sortBy field query.append(" order by ID"); } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java index a463499793..f26268bcd3 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java @@ -16,7 +16,13 @@ */ package org.apache.jackrabbit.oak.plugins.document.rdb; +import static java.util.Collections.emptyList; +import static java.util.List.of; import static org.apache.jackrabbit.guava.common.collect.Iterables.filter; +import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs; +import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.EMPTY_KEY_PATTERN; import java.io.Closeable; import java.io.IOException; @@ -85,6 +91,29 @@ public class RDBVersionGCSupport extends VersionGCSupport { } } + /** + * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value + * within the given range .The two passed modified timestamps are in milliseconds + * since the epoch and the implementation will convert them to seconds at + * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and + * then perform the comparison. + * + * @param fromModified the lower bound modified timestamp (inclusive) + * @param toModified the upper bound modified timestamp (exclusive) + * @param limit the limit of documents to return + * @return matching documents. + */ + @Override + public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit) { + List<QueryCondition> conditions = of(new QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)), + new QueryCondition(MODIFIED_IN_SECS, ">=", getModifiedInSecs(fromModified))); + if (MODE == 1) { + return getIterator(EMPTY_KEY_PATTERN, conditions); + } else { + return store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, conditions, limit, MODIFIED_IN_SECS); + } + } + @Override protected Iterable<NodeDocument> identifyGarbage(final Set<SplitDocType> gcTypes, final RevisionVector sweepRevs, final long oldestRevTimeStamp) { @@ -239,6 +268,33 @@ public class RDBVersionGCSupport extends VersionGCSupport { } } + /** + * Retrieve the time of the oldest modified document. + * + * @param clock System Clock + * @return the timestamp of the oldest modified document. + */ + @Override + public long getOldestModifiedTimestamp(Clock clock) { + long modifiedMs = Long.MIN_VALUE; + + LOG.info("getOldestModifiedTimestamp() <- start"); + try { + long modifiedSec = store.getMinValue(NODES, MODIFIED_IN_SECS, null, null, EMPTY_KEY_PATTERN, emptyList()); + modifiedMs = TimeUnit.SECONDS.toMillis(modifiedSec); + } catch (DocumentStoreException ex) { + LOG.error("getOldestModifiedTimestamp()", ex); + } + + if (modifiedMs > 0) { + LOG.info("getOldestModifiedTimestamp() -> {}", Utils.timestampToString(modifiedMs)); + return modifiedMs; + } else { + LOG.info("getOldestModifiedTimestamp() -> none found, return current time"); + return clock.getTime(); + } + } + @Override public long getDeletedOnceCount() { return store.queryCount(Collection.NODES, null, null, RDBDocumentStore.EMPTY_KEY_PATTERN, diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java index 8a585c7dc0..d52c5e33ae 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java @@ -18,25 +18,19 @@ */ package org.apache.jackrabbit.oak.plugins.document; -public class DetailGCHelper { +import static org.apache.commons.lang3.reflect.FieldUtils.writeField; - public static void setLongSetting(String propName, long val, DocumentNodeStore ns) { - UpdateOp updateOp = new UpdateOp(VersionGarbageCollector.SETTINGS_COLLECTION_ID, true); - updateOp.set(propName, val); - ns.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp); - } +public class DetailGCHelper { - public static void enableDetailGC(DocumentNodeStore ns) { - VersionGarbageCollector.DETAIL_GC_ENABLED = true; - if (ns != null) { - setLongSetting(VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, 0, ns); + public static void enableDetailGC(final VersionGarbageCollector vgc) throws IllegalAccessException { + if (vgc != null) { + writeField(vgc, "detailedGCEnabled", true, true); } } - public static void disableDetailGC(DocumentNodeStore ns) { - VersionGarbageCollector.DETAIL_GC_ENABLED = false; - if (ns != null) { - setLongSetting(VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, -1, ns); + public static void disableDetailGC(final VersionGarbageCollector vgc) throws IllegalAccessException { + if (vgc != null) { + writeField(vgc, "detailedGCEnabled", false, true); } } } diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/NodeDocumentTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/NodeDocumentTest.java index b99897dfb0..2adcc9f86a 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/NodeDocumentTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/NodeDocumentTest.java @@ -449,6 +449,38 @@ public class NodeDocumentTest { assertEquals(140, uncommittedRevisions); } + @Test + public void getPropertyNames() throws CommitFailedException { + DocumentStore store = new MemoryDocumentStore(); + DocumentNodeStore ns = new DocumentMK.Builder().setDocumentStore(store).setAsyncDelay(0).getNodeStore(); + + // add properties + for (int i = 0; i < 10; i++) { + NodeBuilder nb = ns.getRoot().builder(); + nb.child("x").setProperty("p"+i, i); + merge(ns, nb); + } + + final NodeDocument nodeDocument = store.find(NODES, "1:/x"); + assert nodeDocument != null; + assertEquals(10, nodeDocument.getPropertyNames().size()); + } + + @Test + public void getNoPropertyNames() throws CommitFailedException { + DocumentStore store = new MemoryDocumentStore(); + DocumentNodeStore ns = new DocumentMK.Builder().setDocumentStore(store).setAsyncDelay(0).getNodeStore(); + + // add no property + NodeBuilder nb = ns.getRoot().builder(); + nb.child("x"); + merge(ns, nb); + + final NodeDocument nodeDocument = store.find(NODES, "1:/x"); + assert nodeDocument != null; + assertEquals(0, nodeDocument.getPropertyNames().size()); + } + @Test public void getNewestRevisionTooExpensive() throws Exception { final int NUM_CHANGES = 200; diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java index 055188be46..ed39a372b2 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java @@ -22,6 +22,9 @@ import org.junit.Before; import org.junit.Rule; import org.junit.Test; +import static org.apache.jackrabbit.oak.plugins.document.DetailGCHelper.enableDetailGC; +import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; @@ -49,6 +52,20 @@ public class VersionGCInitTest { vgc = store.find(Collection.SETTINGS, "versionGC"); assertNotNull(vgc); + assertEquals(0L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP)); } + @Test + public void lazyInitializeWithDetailedGC() throws Exception { + DocumentStore store = ns.getDocumentStore(); + Document vgc = store.find(Collection.SETTINGS, "versionGC"); + assertNull(vgc); + + enableDetailGC(ns.getVersionGarbageCollector()); + ns.getVersionGarbageCollector().gc(1, TimeUnit.DAYS); + + vgc = store.find(Collection.SETTINGS, "versionGC"); + assertNotNull(vgc); + assertEquals(-1L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP)); + } } diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCStatsTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCStatsTest.java index 13448e5403..1515ea9312 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCStatsTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCStatsTest.java @@ -67,6 +67,11 @@ public class VersionGCStatsTest { assertEquals(stats.collectAndDeleteSplitDocs.elapsed(MICROSECONDS), cumulative.collectAndDeleteSplitDocsElapsed); assertEquals(stats.sortDocIds.elapsed(MICROSECONDS), cumulative.sortDocIdsElapsed); assertEquals(stats.updateResurrectedDocuments.elapsed(MICROSECONDS), cumulative.updateResurrectedDocumentsElapsed); + assertEquals(stats.detailedGCDocs.elapsed(MICROSECONDS), cumulative.detailedGCDocsElapsed); + assertEquals(stats.deleteDetailedGCDocs.elapsed(MICROSECONDS), cumulative.deleteDetailedGCDocsElapsed); + assertEquals(stats.collectDeletedProps.elapsed(MICROSECONDS), cumulative.collectDeletedPropsElapsed); + assertEquals(stats.collectDeletedOldRevs.elapsed(MICROSECONDS), cumulative.collectDeletedOldRevsElapsed); + assertEquals(stats.collectUnmergedBC.elapsed(MICROSECONDS), cumulative.collectUnmergedBCElapsed); } @Test @@ -83,6 +88,11 @@ public class VersionGCStatsTest { assertEquals(stats.collectAndDeleteSplitDocs.elapsed(MICROSECONDS) * 2, cumulative.collectAndDeleteSplitDocsElapsed); assertEquals(stats.sortDocIds.elapsed(MICROSECONDS) * 2, cumulative.sortDocIdsElapsed); assertEquals(stats.updateResurrectedDocuments.elapsed(MICROSECONDS) * 2, cumulative.updateResurrectedDocumentsElapsed); + assertEquals(stats.detailedGCDocs.elapsed(MICROSECONDS) * 2, cumulative.detailedGCDocsElapsed); + assertEquals(stats.deleteDetailedGCDocs.elapsed(MICROSECONDS) * 2, cumulative.deleteDetailedGCDocsElapsed); + assertEquals(stats.collectDeletedProps.elapsed(MICROSECONDS) * 2, cumulative.collectDeletedPropsElapsed); + assertEquals(stats.collectDeletedOldRevs.elapsed(MICROSECONDS) * 2, cumulative.collectDeletedOldRevsElapsed); + assertEquals(stats.collectUnmergedBC.elapsed(MICROSECONDS) * 2, cumulative.collectUnmergedBCElapsed); } private void forEachStopwatch(VersionGCStats stats, Callable c) { @@ -93,6 +103,11 @@ public class VersionGCStatsTest { c.call(stats.collectAndDeleteSplitDocs); c.call(stats.sortDocIds); c.call(stats.updateResurrectedDocuments); + c.call(stats.detailedGCDocs); + c.call(stats.deleteDetailedGCDocs); + c.call(stats.collectDeletedProps); + c.call(stats.collectDeletedOldRevs); + c.call(stats.collectUnmergedBC); } private interface Callable { diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java index 48f3f362ce..f29716ca5d 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java @@ -44,7 +44,6 @@ import org.jetbrains.annotations.NotNull; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; -import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; @@ -96,7 +95,7 @@ public class VersionGCTest { @After public void tearDown() throws Exception { - DetailGCHelper.disableDetailGC(ns); + DetailGCHelper.disableDetailGC(gc); execService.shutdown(); execService.awaitTermination(1, MINUTES); } @@ -343,21 +342,19 @@ public class VersionGCTest { // OAK-10199 @Test - @Ignore public void testDetailGcDocumentRead_disabled() throws Exception { - DetailGCHelper.disableDetailGC(ns); + DetailGCHelper.disableDetailGC(gc); VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES); assertNotNull(stats); - assertEquals(0, stats.detailedGcDocsElapsed); + assertEquals(0, stats.detailedGCDocsElapsed); } @Test - @Ignore public void testDetailGcDocumentRead_enabled() throws Exception { - DetailGCHelper.enableDetailGC(ns); + DetailGCHelper.enableDetailGC(gc); VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES); assertNotNull(stats); - assertNotEquals(0, stats.detailedGcDocsElapsed); + assertNotEquals(0, stats.detailedGCDocsElapsed); } // OAK-10199 diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java index d33cc8c7de..4470a07f96 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java @@ -34,10 +34,12 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; +import static org.apache.commons.lang3.reflect.FieldUtils.writeField; import static org.apache.jackrabbit.guava.common.collect.Iterables.filter; import static org.apache.jackrabbit.guava.common.collect.Iterables.size; import static java.util.concurrent.TimeUnit.HOURS; import static java.util.concurrent.TimeUnit.MINUTES; +import static org.apache.jackrabbit.oak.api.Type.STRING; import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NUM_REVS_THRESHOLD; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.PREV_SPLIT_FACTOR; @@ -225,6 +227,68 @@ public class VersionGarbageCollectorIT { public void gcLongPathSplitDocs() throws Exception { gcSplitDocsInternal(Strings.repeat("sub", 120)); } + + @Test + public void testGCDeletedProps() throws Exception{ + //1. Create nodes + NodeBuilder b1 = store.getRoot().builder(); + + // Add property to node & save + b1.child("x").setProperty("test", "t", STRING); + b1.child("z").setProperty("prop", "foo", STRING); + store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + // update the property + b1 = store.getRoot().builder(); + b1.getChildNode("z").setProperty("prop", "bar", STRING); + store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + // update property again + b1 = store.getRoot().builder(); + b1.getChildNode("z").setProperty("prop", "baz", STRING); + store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + // enable the detailed gc flag + writeField(gc, "detailedGCEnabled", true, true); + long maxAge = 1; //hours + long delta = TimeUnit.MINUTES.toMillis(10); + //1. Go past GC age and check no GC done as nothing deleted + clock.waitUntil(Revision.getCurrentTimestamp() + maxAge); + VersionGCStats stats = gc.gc(maxAge, HOURS); + assertEquals(0, stats.deletedPropsGCCount); + + //Remove property + NodeBuilder b2 = store.getRoot().builder(); + b2.getChildNode("z").removeProperty("prop"); + store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + store.runBackgroundOperations(); + + //2. Check that a deleted property is not collected before maxAge + //Clock cannot move back (it moved forward in #1) so double the maxAge + clock.waitUntil(clock.getTime() + delta); + stats = gc.gc(maxAge*2, HOURS); + assertEquals(0, stats.deletedPropsGCCount); + + //3. Check that deleted property does get collected post maxAge + clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); + + stats = gc.gc(maxAge*2, HOURS); + assertEquals(1, stats.deletedPropsGCCount); + + //4. Check that a revived property (deleted and created again) does not get gc + NodeBuilder b3 = store.getRoot().builder(); + b3.child("x").removeProperty("test"); + store.merge(b3, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + NodeBuilder b4 = store.getRoot().builder(); + b4.child("x").setProperty("test", "t", STRING); + store.merge(b4, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); + stats = gc.gc(maxAge*2, HOURS); + assertEquals(0, stats.deletedPropsGCCount); + } private void gcSplitDocsInternal(String subNodeName) throws Exception { long maxAge = 1; //hrs