This is an automated email from the ASF dual-hosted git repository. daim pushed a commit to branch OAK-10199 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 68d65b7377ab107bf2975bffa00d2d0c37afe710 Author: Rishabh Kumar <d...@adobe.com> AuthorDate: Mon Jun 19 13:45:20 2023 +0530 OAK-10199 : updated logic to fetch nodes by sorting them on the basis of _modified & _id --- .../plugins/document/VersionGCRecommendations.java | 83 +++++++++++------- .../oak/plugins/document/VersionGCSupport.java | 25 +++--- .../plugins/document/VersionGarbageCollector.java | 36 +++++--- .../document/mongo/MongoVersionGCSupport.java | 45 +++++----- .../oak/plugins/document/rdb/RDBDocumentStore.java | 16 +++- .../plugins/document/rdb/RDBDocumentStoreJDBC.java | 16 ++-- .../plugins/document/rdb/RDBVersionGCSupport.java | 41 ++++----- .../oak/plugins/document/util/Utils.java | 20 +++-- .../oak/plugins/document/VersionGCInitTest.java | 46 ++++++++-- .../oak/plugins/document/VersionGCSupportTest.java | 98 ++++++++++++++++++---- .../document/VersionGarbageCollectorIT.java | 52 +++++++++++- 11 files changed, 342 insertions(+), 136 deletions(-) diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java index 4584d925c0..056c2fe438 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java @@ -20,11 +20,11 @@ package org.apache.jackrabbit.oak.plugins.document; import java.util.HashMap; import java.util.Map; +import java.util.Objects; import java.util.concurrent.TimeUnit; import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats; import org.apache.jackrabbit.oak.plugins.document.util.TimeInterval; -import org.apache.jackrabbit.oak.plugins.document.util.Utils; import org.apache.jackrabbit.oak.spi.gc.GCMonitor; import org.apache.jackrabbit.oak.stats.Clock; import org.slf4j.Logger; @@ -32,8 +32,14 @@ import org.slf4j.LoggerFactory; import static java.lang.Long.MAX_VALUE; import static java.util.Map.of; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL; +import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP; import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP; +import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_ID; import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP; +import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP; +import static org.apache.jackrabbit.oak.plugins.document.util.Utils.timestampToString; /** * Gives a recommendation about parameters for the next revision garbage collection run. @@ -47,13 +53,13 @@ public class VersionGCRecommendations { final boolean ignoreDueToCheckPoint; final TimeInterval scope; - final TimeInterval scopeFullGC; + final TimeInterval scopeDetailedGC; final long maxCollect; final long deleteCandidateCount; final long lastOldestTimestamp; final long detailedGCTimestamp; + final String detailedGCId; final long originalCollectLimit; - private final long precisionMs; final long suggestedIntervalMs; private final boolean scopeIsComplete; @@ -86,7 +92,8 @@ public class VersionGCRecommendations { long deletedOnceCount = 0; long suggestedIntervalMs; long oldestPossible; - long oldestPossibleFullGC; + long oldestModifiedDocTimeStamp; + String oldestModifiedDocId; long collectLimit = options.collectLimit; this.vgc = vgc; @@ -95,12 +102,12 @@ public class VersionGCRecommendations { TimeInterval keep = new TimeInterval(clock.getTime() - maxRevisionAgeMs, Long.MAX_VALUE); - Map<String, Long> settings = getLongSettings(); - lastOldestTimestamp = settings.get(VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP); + Map<String, Object> settings = getVGCSettings(); + lastOldestTimestamp = (long) settings.get(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP); if (lastOldestTimestamp == 0) { - log.debug("No lastOldestTimestamp found, querying for the oldest deletedOnce candidate"); + log.info("No lastOldestTimestamp found, querying for the oldest deletedOnce candidate"); oldestPossible = vgc.getOldestDeletedOnceTimestamp(clock, options.precisionMs) - 1; - log.debug("lastOldestTimestamp found: {}", Utils.timestampToString(oldestPossible)); + log.info("lastOldestTimestamp found: {}", timestampToString(oldestPossible)); } else { oldestPossible = lastOldestTimestamp - 1; } @@ -108,23 +115,27 @@ public class VersionGCRecommendations { TimeInterval scope = new TimeInterval(oldestPossible, Long.MAX_VALUE); scope = scope.notLaterThan(keep.fromMs); - detailedGCTimestamp = settings.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP); - if (detailedGCTimestamp == 0) { - if (log.isDebugEnabled()) { - log.debug("No detailedGCTimestamp found, querying for the oldest deletedOnce candidate"); - } - oldestPossibleFullGC = vgc.getOldestModifiedTimestamp(clock) - 1; - if (log.isDebugEnabled()) { - log.debug("detailedGCTimestamp found: {}", Utils.timestampToString(oldestPossibleFullGC)); + detailedGCTimestamp = (long) settings.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP); + oldestModifiedDocId = (String) settings.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP); + if (detailedGCTimestamp == 0 || Objects.equals(oldestModifiedDocId, MIN_ID_VALUE)) { + log.info("No detailedGCTimestamp found, querying for the oldest modified candidate"); + final NodeDocument doc = vgc.getOldestModifiedDoc(clock); + if (doc == NULL) { + oldestModifiedDocTimeStamp = 0L; + oldestModifiedDocId = MIN_ID_VALUE; + } else { + oldestModifiedDocId = doc.getId(); + oldestModifiedDocTimeStamp = doc.getModified() == null ? 0L : doc.getModified() - 1; } + log.info("detailedGCTimestamp found: {}", timestampToString(oldestModifiedDocTimeStamp)); } else { - oldestPossibleFullGC = detailedGCTimestamp - 1; + oldestModifiedDocTimeStamp = detailedGCTimestamp - 1; } - TimeInterval fullGCTimeInternal = new TimeInterval(oldestPossibleFullGC, MAX_VALUE); - fullGCTimeInternal = fullGCTimeInternal.notLaterThan(keep.fromMs); + TimeInterval detailedGCTimeInternal = new TimeInterval(oldestModifiedDocTimeStamp, MAX_VALUE); + detailedGCTimeInternal = detailedGCTimeInternal.notLaterThan(keep.fromMs); - suggestedIntervalMs = settings.get(VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP); + suggestedIntervalMs = (long) settings.get(SETTINGS_COLLECTION_REC_INTERVAL_PROP); if (suggestedIntervalMs > 0) { suggestedIntervalMs = Math.max(suggestedIntervalMs, options.precisionMs); if (suggestedIntervalMs < scope.getDurationMs()) { @@ -168,7 +179,7 @@ public class VersionGCRecommendations { ignoreDueToCheckPoint = true; } else { scope = scope.notLaterThan(checkpoint.getTimestamp() - 1); - log.debug("checkpoint at [{}] found, scope now {}", Utils.timestampToString(checkpoint.getTimestamp()), scope); + log.debug("checkpoint at [{}] found, scope now {}", timestampToString(checkpoint.getTimestamp()), scope); } } @@ -182,7 +193,8 @@ public class VersionGCRecommendations { this.precisionMs = options.precisionMs; this.ignoreDueToCheckPoint = ignoreDueToCheckPoint; this.scope = scope; - this.scopeFullGC = fullGCTimeInternal; + this.scopeDetailedGC = detailedGCTimeInternal; + this.detailedGCId = oldestModifiedDocId; this.scopeIsComplete = scope.toMs >= keep.fromMs; this.maxCollect = collectLimit; this.suggestedIntervalMs = suggestedIntervalMs; @@ -207,7 +219,8 @@ public class VersionGCRecommendations { } else if (!stats.canceled && !stats.ignoredGCDueToCheckPoint) { // success, we would not expect to encounter revisions older than this in the future setLongSetting(of(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, scope.toMs, - SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, stats.oldestModifiedGced)); + SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, stats.oldestModifiedDocTimeStamp)); + setStringSetting(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP, stats.oldestModifiedDocId); int count = stats.deletedDocGCCount - stats.deletedLeafDocGCCount; double usedFraction; @@ -224,7 +237,7 @@ public class VersionGCRecommendations { long nextDuration = (long) Math.ceil(suggestedIntervalMs * 1.5); log.debug("successful run using {}% of limit, raising recommended interval to {} seconds", Math.round(usedFraction * 1000) / 10.0, TimeUnit.MILLISECONDS.toSeconds(nextDuration)); - setLongSetting(VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP, nextDuration); + setLongSetting(SETTINGS_COLLECTION_REC_INTERVAL_PROP, nextDuration); } else { log.debug("not increasing limit: collected {} documents ({}% >= {}% limit)", count, usedFraction, allowedFraction); @@ -236,19 +249,23 @@ public class VersionGCRecommendations { } } - private Map<String, Long> getLongSettings() { - Document versionGCDoc = vgc.getDocumentStore().find(Collection.SETTINGS, VersionGarbageCollector.SETTINGS_COLLECTION_ID, 0); - Map<String, Long> settings = new HashMap<>(); + private Map<String, Object> getVGCSettings() { + Document versionGCDoc = vgc.getDocumentStore().find(Collection.SETTINGS, SETTINGS_COLLECTION_ID, 0); + Map<String, Object> settings = new HashMap<>(); // default values - settings.put(VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, 0L); - settings.put(VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP, 0L); + settings.put(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, 0L); + settings.put(SETTINGS_COLLECTION_REC_INTERVAL_PROP, 0L); settings.put(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, 0L); + settings.put(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP, MIN_ID_VALUE); if (versionGCDoc != null) { for (String k : versionGCDoc.keySet()) { Object value = versionGCDoc.get(k); if (value instanceof Number) { settings.put(k, ((Number) value).longValue()); } + if (value instanceof String) { + settings.put(k, value); + } } } return settings; @@ -258,8 +275,14 @@ public class VersionGCRecommendations { setLongSetting(of(propName, val)); } + private void setStringSetting(String propName, String val) { + UpdateOp updateOp = new UpdateOp(SETTINGS_COLLECTION_ID, true); + updateOp.set(propName, val); + vgc.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp); + } + private void setLongSetting(final Map<String, Long> propValMap) { - UpdateOp updateOp = new UpdateOp(VersionGarbageCollector.SETTINGS_COLLECTION_ID, true); + UpdateOp updateOp = new UpdateOp(SETTINGS_COLLECTION_ID, true); propValMap.forEach(updateOp::set); vgc.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp); } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java index f23340acbc..abdfdf4a64 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java @@ -19,9 +19,12 @@ package org.apache.jackrabbit.oak.plugins.document; +import static java.util.Comparator.comparing; import static org.apache.jackrabbit.guava.common.collect.Iterables.filter; import static java.util.stream.Collectors.toList; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs; import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getAllDocuments; import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getSelectedDocuments; @@ -52,7 +55,7 @@ public class VersionGCSupport { /** * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value - * within the given range and the {@link NodeDocument#DELETED} set to + * within the given range and the {@link NodeDocument# DELETED} set to * {@code true}. The two passed modified timestamps are in milliseconds * since the epoch and the implementation will convert them to seconds at * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and @@ -79,12 +82,15 @@ public class VersionGCSupport { * @param fromModified the lower bound modified timestamp (inclusive) * @param toModified the upper bound modified timestamp (exclusive) * @param limit the limit of documents to return + * @param fromId the lower bound {@link NodeDocument#ID} (exclusive) * @return matching documents. */ - public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit) { + public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit, + final String fromId) { return StreamSupport - .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, fromModified).spliterator(), false) + .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, fromId).spliterator(), false) .filter(input -> modifiedGreaterThanEquals(input, fromModified) && modifiedLessThan(input, toModified)) + .sorted((o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)) .limit(limit) .collect(toList()); } @@ -176,27 +182,26 @@ public class VersionGCSupport { } /** - * Retrieve the time of the oldest modified document. + * Retrieve the oldest modified document. * - * @return the timestamp of the oldest modified document. + * @return the oldest modified document. */ - public long getOldestModifiedTimestamp(final Clock clock) { + public NodeDocument getOldestModifiedDoc(final Clock clock) { long ts = 0; long now = clock.getTime(); Iterable<NodeDocument> docs = null; LOG.info("find oldest modified document"); try { - docs = getModifiedDocs(ts, now, 1); + docs = getModifiedDocs(ts, now, 1, MIN_ID_VALUE); if (docs.iterator().hasNext()) { - Long modified = docs.iterator().next().getModified(); - return modified != null ? modified : 0L; + return docs.iterator().next(); } } finally { Utils.closeIfCloseable(docs); } LOG.info("find oldest modified document to be {}", Utils.timestampToString(ts)); - return ts; + return NULL; } public long getDeletedOnceCount() throws UnsupportedOperationException { diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java index b562831e24..f54299e3fd 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java @@ -32,7 +32,6 @@ import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; -import java.util.stream.Collectors; import org.apache.jackrabbit.guava.common.base.Function; import org.apache.jackrabbit.guava.common.base.Joiner; @@ -112,6 +111,11 @@ public class VersionGarbageCollector { */ static final String SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP = "detailedGCTimeStamp"; + /** + * Property name to _id till when last detailed-GC run happened + */ + static final String SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP = "detailedGCId"; + private final DocumentNodeStore nodeStore; private final DocumentStore ds; private final boolean detailedGCEnabled; @@ -272,7 +276,8 @@ public class VersionGarbageCollector { int splitDocGCCount; int intermediateSplitDocGCCount; int updateResurrectedGCCount; - long oldestModifiedGced; + long oldestModifiedDocTimeStamp; + String oldestModifiedDocId; int updatedDetailedGCDocsCount; int deletedPropsGCCount; final TimeDurationFormatter df = TimeDurationFormatter.forLogging(); @@ -343,7 +348,8 @@ public class VersionGarbageCollector { ", updateResurrectedGCCount=" + updateResurrectedGCCount + ", splitDocGCCount=" + splitDocGCCount + ", intermediateSplitDocGCCount=" + intermediateSplitDocGCCount + - ", oldestModifiedGced=" + oldestModifiedGced + + ", oldestModifiedDocId=" + oldestModifiedDocId + + ", oldestModifiedDocTimeStamp=" + oldestModifiedDocTimeStamp + ", updatedDetailedGCDocsCount=" + updatedDetailedGCDocsCount + ", deletedPropsGCCount=" + deletedPropsGCCount + ", iterationCount=" + iterationCount + @@ -363,7 +369,8 @@ public class VersionGarbageCollector { this.splitDocGCCount += run.splitDocGCCount; this.intermediateSplitDocGCCount += run.intermediateSplitDocGCCount; this.updateResurrectedGCCount += run.updateResurrectedGCCount; - this.oldestModifiedGced = run.oldestModifiedGced; + this.oldestModifiedDocTimeStamp = run.oldestModifiedDocTimeStamp; + this.oldestModifiedDocId = run.oldestModifiedDocId; this.updatedDetailedGCDocsCount += run.updatedDetailedGCDocsCount; this.deletedPropsGCCount += run.deletedPropsGCCount; if (run.iterationCount > 0) { @@ -608,15 +615,16 @@ public class VersionGarbageCollector { throws IOException { int docsTraversed = 0; boolean foundDoc = true; - long oldestModifiedGCed = rec.scopeFullGC.fromMs; + long oldestModifiedDocTimeStamp = rec.scopeDetailedGC.fromMs; + String oldestModifiedDocId = rec.detailedGCId; try (DetailedGC gc = new DetailedGC(headRevision, monitor, cancel)) { - final long fromModified = rec.scopeFullGC.fromMs; - final long toModified = rec.scopeFullGC.toMs; + final long fromModified = rec.scopeDetailedGC.fromMs; + final long toModified = rec.scopeDetailedGC.toMs; if (phases.start(GCPhase.DETAILED_GC)) { - while (foundDoc && oldestModifiedGCed < toModified && docsTraversed <= PROGRESS_BATCH_SIZE) { + while (foundDoc && oldestModifiedDocTimeStamp < toModified && docsTraversed <= PROGRESS_BATCH_SIZE) { // set foundDoc to false to allow exiting the while loop foundDoc = false; - Iterable<NodeDocument> itr = versionStore.getModifiedDocs(oldestModifiedGCed, toModified, 1000); + Iterable<NodeDocument> itr = versionStore.getModifiedDocs(oldestModifiedDocTimeStamp, toModified, 1000, oldestModifiedDocId); try { for (NodeDocument doc : itr) { foundDoc = true; @@ -640,12 +648,12 @@ public class VersionGarbageCollector { if (modified == null) { monitor.warn("collectDetailGarbage : document has no _modified property : {}", doc.getId()); - } else if (modified < oldestModifiedGCed) { + } else if (modified < oldestModifiedDocTimeStamp) { monitor.warn( "collectDetailGarbage : document has older _modified than query boundary : {} (from: {}, to: {})", modified, fromModified, toModified); } else { - oldestModifiedGCed = modified; + oldestModifiedDocTimeStamp = modified; } if (gc.hasGarbage() && phases.start(GCPhase.DETAILED_GC_CLEANUP)) { @@ -653,11 +661,13 @@ public class VersionGarbageCollector { phases.stop(GCPhase.DETAILED_GC_CLEANUP); } - oldestModifiedGCed = modified == null ? fromModified : modified; + oldestModifiedDocTimeStamp = modified == null ? fromModified : modified; + oldestModifiedDocId = doc.getId(); } } finally { Utils.closeIfCloseable(itr); - phases.stats.oldestModifiedGced = oldestModifiedGCed; + phases.stats.oldestModifiedDocTimeStamp = oldestModifiedDocTimeStamp; + phases.stats.oldestModifiedDocId = oldestModifiedDocId; } } phases.stop(GCPhase.DETAILED_GC); diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java index 324ade704c..690fd5a0d6 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java @@ -21,8 +21,8 @@ package org.apache.jackrabbit.oak.plugins.document.mongo; import static com.mongodb.client.model.Filters.eq; import static com.mongodb.client.model.Filters.exists; +import static com.mongodb.client.model.Filters.gt; import static java.util.Optional.ofNullable; -import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.jackrabbit.guava.common.collect.Iterables.concat; import static org.apache.jackrabbit.guava.common.collect.Iterables.filter; import static org.apache.jackrabbit.guava.common.collect.Iterables.transform; @@ -34,6 +34,7 @@ import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; import static org.apache.jackrabbit.oak.plugins.document.Document.ID; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.DELETED_ONCE; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.PATH; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_MAX_REV_TIME_IN_SECS; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_TYPE; @@ -130,21 +131,26 @@ public class MongoVersionGCSupport extends VersionGCSupport { /** * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value - * within the given range in sorted order. The two passed modified timestamps - * are in milliseconds since the epoch and the implementation will convert them - * to seconds at the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} - * field and then perform the comparison. + * within the given range .The two passed modified timestamps are in milliseconds + * since the epoch and the implementation will convert them to seconds at + * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and + * then perform the comparison. * * @param fromModified the lower bound modified timestamp (inclusive) - * @param toModified the upper bound modified timestamp (exclusive) - * @return matching documents in sorted order of {@link NodeDocument#MODIFIED_IN_SECS} + * @param toModified the upper bound modified timestamp (exclusive) + * @param limit the limit of documents to return + * @param fromId the lower bound {@link NodeDocument#ID} (exclusive) + * @return matching documents. */ @Override - public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit) { - // _modified >= fromModified && _modified < toModified + public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit, + final String fromId) { + // _modified >= fromModified && _modified < toModified && _id > fromId final Bson query = and(gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)), - lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified))); - final Bson sort = eq(MODIFIED_IN_SECS, 1); + lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)), gt(ID, fromId)); + // first sort by _modified and then by _id + final Bson sort = and(eq(MODIFIED_IN_SECS, 1), eq(ID, 1)); + final FindIterable<BasicDBObject> cursor = getNodeCollection() .find(query) .sort(sort) @@ -232,11 +238,11 @@ public class MongoVersionGCSupport extends VersionGCSupport { * @return the timestamp of the oldest modified document. */ @Override - public long getOldestModifiedTimestamp(final Clock clock) { - LOG.info("getOldestModifiedTimestamp() <- start"); + public NodeDocument getOldestModifiedDoc(final Clock clock) { + LOG.info("getOldestModifiedDoc() <- start"); - final Bson sort = eq(MODIFIED_IN_SECS, 1); - final List<Long> result = new ArrayList<>(1); + final Bson sort = and(eq(MODIFIED_IN_SECS, 1), eq(ID, 1)); + final List<NodeDocument> result = new ArrayList<>(1); // we need to add query condition to ignore `previous` documents which doesn't have this field final Bson query = exists(MODIFIED_IN_SECS); @@ -245,14 +251,13 @@ public class MongoVersionGCSupport extends VersionGCSupport { (Consumer<BasicDBObject>) document -> ofNullable(store.convertFromDBObject(NODES, document)) .ifPresent(doc -> { - long modifiedMs = SECONDS.toMillis(ofNullable(doc.getModified()).orElse(0L)); - LOG.info("getOldestDeletedOnceTimestamp() -> {}", Utils.timestampToString(modifiedMs)); - result.add(modifiedMs); + LOG.info("getOldestModifiedDoc() -> {}", doc); + result.add(doc); })); if (result.isEmpty()) { - LOG.info("getOldestModifiedTimestamp() -> none found, return current time"); - result.add(clock.getTime()); + LOG.info("getOldestModifiedDoc() -> none found, return NULL document"); + result.add(NULL); } return result.get(0); } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java index 82c09e213d..3a9ef2d95d 100755 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java @@ -971,8 +971,8 @@ public class RDBDocumentStore implements DocumentStore { public static String VERSIONPROP = "__version"; // set of supported indexed properties - private static final Set<String> INDEXEDPROPERTIES = new HashSet<String>(Arrays.asList(new String[] { MODIFIED, - NodeDocument.HAS_BINARY_FLAG, NodeDocument.DELETED_ONCE, NodeDocument.SD_TYPE, NodeDocument.SD_MAX_REV_TIME_IN_SECS, VERSIONPROP })); + private static final Set<String> INDEXEDPROPERTIES = new HashSet<>(Arrays.asList(MODIFIED, + NodeDocument.HAS_BINARY_FLAG, NodeDocument.DELETED_ONCE, NodeDocument.SD_TYPE, NodeDocument.SD_MAX_REV_TIME_IN_SECS, VERSIONPROP, ID)); // set of required table columns private static final Set<String> REQUIREDCOLUMNS = Collections.unmodifiableSet(new HashSet<String>(Arrays.asList( @@ -1840,7 +1840,7 @@ public class RDBDocumentStore implements DocumentStore { } protected <T extends Document> Iterable<T> queryAsIterable(final Collection<T> collection, String fromKey, String toKey, - final List<String> excludeKeyPatterns, final List<QueryCondition> conditions, final int limit, final String sortBy) { + final List<String> excludeKeyPatterns, final List<QueryCondition> conditions, final int limit, final List<String> sortBy) { final RDBTableMetaData tmd = getTable(collection); Set<String> allowedProps = Sets.intersection(INDEXEDPROPERTIES, tmd.getColumnProperties()); @@ -1853,6 +1853,16 @@ public class RDBDocumentStore implements DocumentStore { } } + if (sortBy != null && !sortBy.isEmpty()) { + for (String key: sortBy) { + if (!allowedProps.contains(key)) { + final String message = "indexed property " + key + " not supported. supported properties are " + allowedProps; + LOG.error(message); + throw new UnsupportedIndexedPropertyException(message); + } + } + } + final String from = collection == Collection.NODES && NodeDocument.MIN_ID_VALUE.equals(fromKey) ? null : fromKey; final String to = collection == Collection.NODES && NodeDocument.MAX_ID_VALUE.equals(toKey) ? null : toKey; diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java index 26fc1311fa..59dfb968b0 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java @@ -16,6 +16,8 @@ */ package org.apache.jackrabbit.oak.plugins.document.rdb; +import static java.util.List.of; +import static java.util.stream.Collectors.joining; import static org.apache.jackrabbit.guava.common.collect.Iterables.transform; import static org.apache.jackrabbit.guava.common.collect.Sets.newHashSet; import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.CHAR2OCTETRATIO; @@ -459,7 +461,7 @@ public class RDBDocumentStoreJDBC { + excludeKeyPatterns + ", conditions=" + conditions + ", limit=" + limit) : null); stmt = prepareQuery(connection, tmd, fields, minId, - maxId, excludeKeyPatterns, conditions, limit, "ID"); + maxId, excludeKeyPatterns, conditions, limit, of("ID")); rs = stmt.executeQuery(); while (rs.next() && result.size() < limit) { int field = 1; @@ -554,7 +556,7 @@ public class RDBDocumentStoreJDBC { @NotNull public Iterator<RDBRow> queryAsIterator(RDBConnectionHandler ch, RDBTableMetaData tmd, String minId, String maxId, - List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, String sortBy) throws SQLException { + List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, List<String> sortBy) throws SQLException { return new ResultSetIterator(ch, tmd, minId, maxId, excludeKeyPatterns, conditions, limit, sortBy); } @@ -573,7 +575,7 @@ public class RDBDocumentStoreJDBC { private long pstart; public ResultSetIterator(RDBConnectionHandler ch, RDBTableMetaData tmd, String minId, String maxId, - List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, String sortBy) throws SQLException { + List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, List<String> sortBy) throws SQLException { long start = System.currentTimeMillis(); try { this.ch = ch; @@ -695,7 +697,7 @@ public class RDBDocumentStoreJDBC { @NotNull private PreparedStatement prepareQuery(Connection connection, RDBTableMetaData tmd, String columns, String minId, String maxId, - List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, String sortBy) throws SQLException { + List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, List<String> sortBy) throws SQLException { StringBuilder selectClause = new StringBuilder(); @@ -714,9 +716,8 @@ public class RDBDocumentStoreJDBC { query.append(" where ").append(whereClause); } - if (sortBy != null) { - // FIXME : order should be determined via sortBy field - query.append(" order by ID"); + if (sortBy != null && !sortBy.isEmpty()) { + query.append(" order by ").append(sortBy.stream().map(INDEXED_PROP_MAPPING::get).collect(joining(", "))); } if (limit != Integer.MAX_VALUE) { @@ -967,6 +968,7 @@ public class RDBDocumentStoreJDBC { tmp.put(NodeDocument.SD_TYPE, "SDTYPE"); tmp.put(NodeDocument.SD_MAX_REV_TIME_IN_SECS, "SDMAXREVTIME"); tmp.put(RDBDocumentStore.VERSIONPROP, "VERSION"); + tmp.put(NodeDocument.ID, "ID"); INDEXED_PROP_MAPPING = Collections.unmodifiableMap(tmp); } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java index f26268bcd3..0d2f678911 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java @@ -16,11 +16,13 @@ */ package org.apache.jackrabbit.oak.plugins.document.rdb; -import static java.util.Collections.emptyList; import static java.util.List.of; import static org.apache.jackrabbit.guava.common.collect.Iterables.filter; import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; +import static org.apache.jackrabbit.oak.plugins.document.Document.ID; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs; import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.EMPTY_KEY_PATTERN; @@ -99,18 +101,21 @@ public class RDBVersionGCSupport extends VersionGCSupport { * then perform the comparison. * * @param fromModified the lower bound modified timestamp (inclusive) - * @param toModified the upper bound modified timestamp (exclusive) - * @param limit the limit of documents to return + * @param toModified the upper bound modified timestamp (exclusive) + * @param limit the limit of documents to return + * @param fromId the lower bound {@link NodeDocument#ID} (exclusive) * @return matching documents. */ @Override - public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit) { + public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit, + final String fromId) { List<QueryCondition> conditions = of(new QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)), - new QueryCondition(MODIFIED_IN_SECS, ">=", getModifiedInSecs(fromModified))); + new QueryCondition(MODIFIED_IN_SECS, ">=", getModifiedInSecs(fromModified)), + new QueryCondition(ID, ">", of(fromId))); if (MODE == 1) { return getIterator(EMPTY_KEY_PATTERN, conditions); } else { - return store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, conditions, limit, MODIFIED_IN_SECS); + return store.queryAsIterable(NODES, fromId, null, EMPTY_KEY_PATTERN, conditions, limit, of(MODIFIED_IN_SECS, ID)); } } @@ -275,24 +280,20 @@ public class RDBVersionGCSupport extends VersionGCSupport { * @return the timestamp of the oldest modified document. */ @Override - public long getOldestModifiedTimestamp(Clock clock) { - long modifiedMs = Long.MIN_VALUE; + public NodeDocument getOldestModifiedDoc(Clock clock) { + NodeDocument doc = NULL; - LOG.info("getOldestModifiedTimestamp() <- start"); + LOG.info("getOldestModifiedDoc() <- start"); + Iterable<NodeDocument> modifiedDocs = null; try { - long modifiedSec = store.getMinValue(NODES, MODIFIED_IN_SECS, null, null, EMPTY_KEY_PATTERN, emptyList()); - modifiedMs = TimeUnit.SECONDS.toMillis(modifiedSec); + modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, MIN_ID_VALUE); + doc = modifiedDocs.iterator().hasNext() ? modifiedDocs.iterator().next() : NULL; } catch (DocumentStoreException ex) { - LOG.error("getOldestModifiedTimestamp()", ex); - } - - if (modifiedMs > 0) { - LOG.info("getOldestModifiedTimestamp() -> {}", Utils.timestampToString(modifiedMs)); - return modifiedMs; - } else { - LOG.info("getOldestModifiedTimestamp() -> none found, return current time"); - return clock.getTime(); + LOG.error("getOldestModifiedDoc()", ex); + } finally { + Utils.closeIfCloseable(modifiedDocs); } + return doc; } @Override diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java index c9428429bc..85bb1225fb 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java @@ -62,6 +62,7 @@ import org.slf4j.LoggerFactory; import static org.apache.jackrabbit.guava.common.base.Preconditions.checkNotNull; import static org.apache.jackrabbit.guava.common.collect.Iterables.transform; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.isDeletedEntry; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.isCommitRootEntry; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.isRevisionsEntry; @@ -668,7 +669,7 @@ public class Utils { * @return an {@link Iterable} over all documents in the store. */ public static Iterable<NodeDocument> getAllDocuments(final DocumentStore store) { - return internalGetSelectedDocuments(store, null, 0, DEFAULT_BATCH_SIZE); + return internalGetSelectedDocuments(store, null, 0, MIN_ID_VALUE, DEFAULT_BATCH_SIZE); } /** @@ -710,7 +711,7 @@ public class Utils { */ public static Iterable<NodeDocument> getSelectedDocuments( DocumentStore store, String indexedProperty, long startValue, int batchSize) { - return internalGetSelectedDocuments(store, indexedProperty, startValue, batchSize); + return internalGetSelectedDocuments(store, indexedProperty, startValue, MIN_ID_VALUE, batchSize); } /** @@ -719,12 +720,21 @@ public class Utils { */ public static Iterable<NodeDocument> getSelectedDocuments( DocumentStore store, String indexedProperty, long startValue) { - return internalGetSelectedDocuments(store, indexedProperty, startValue, DEFAULT_BATCH_SIZE); + return internalGetSelectedDocuments(store, indexedProperty, startValue, MIN_ID_VALUE, DEFAULT_BATCH_SIZE); + } + + /** + * Like {@link #getSelectedDocuments(DocumentStore, String, long, int)} with + * a default {@code batchSize}. + */ + public static Iterable<NodeDocument> getSelectedDocuments( + DocumentStore store, String indexedProperty, long startValue, String fromId) { + return internalGetSelectedDocuments(store, indexedProperty, startValue, fromId, DEFAULT_BATCH_SIZE); } private static Iterable<NodeDocument> internalGetSelectedDocuments( final DocumentStore store, final String indexedProperty, - final long startValue, final int batchSize) { + final long startValue, String fromId, final int batchSize) { if (batchSize < 2) { throw new IllegalArgumentException("batchSize must be > 1"); } @@ -733,7 +743,7 @@ public class Utils { public Iterator<NodeDocument> iterator() { return new AbstractIterator<NodeDocument>() { - private String startId = NodeDocument.MIN_ID_VALUE; + private String startId = fromId; private Iterator<NodeDocument> batch = nextBatch(); diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java index ed39a372b2..6aceeac830 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java @@ -22,14 +22,19 @@ import org.junit.Before; import org.junit.Rule; import org.junit.Test; +import static java.util.concurrent.TimeUnit.DAYS; +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; +import static org.apache.jackrabbit.oak.plugins.document.Collection.SETTINGS; import static org.apache.jackrabbit.oak.plugins.document.DetailGCHelper.enableDetailGC; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE; +import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP; import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP; +import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getIdFromPath; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; -import java.util.concurrent.TimeUnit; - public class VersionGCInitTest { @Rule @@ -45,27 +50,50 @@ public class VersionGCInitTest { @Test public void lazyInitialize() throws Exception { DocumentStore store = ns.getDocumentStore(); - Document vgc = store.find(Collection.SETTINGS, "versionGC"); + Document vgc = store.find(SETTINGS, "versionGC"); assertNull(vgc); - ns.getVersionGarbageCollector().gc(1, TimeUnit.DAYS); + ns.getVersionGarbageCollector().gc(1, DAYS); - vgc = store.find(Collection.SETTINGS, "versionGC"); + vgc = store.find(SETTINGS, "versionGC"); assertNotNull(vgc); assertEquals(0L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP)); + assertNull(vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP)); } @Test public void lazyInitializeWithDetailedGC() throws Exception { DocumentStore store = ns.getDocumentStore(); - Document vgc = store.find(Collection.SETTINGS, "versionGC"); + Document vgc = store.find(SETTINGS, "versionGC"); assertNull(vgc); enableDetailGC(ns.getVersionGarbageCollector()); - ns.getVersionGarbageCollector().gc(1, TimeUnit.DAYS); + long offset = SECONDS.toMillis(42); + String id = getIdFromPath("/node"); + Revision r = new Revision(offset, 0, 1); + UpdateOp op = new UpdateOp(id, true); + NodeDocument.setModified(op, r); + store.createOrUpdate(NODES, op); + ns.getVersionGarbageCollector().gc(1, DAYS); - vgc = store.find(Collection.SETTINGS, "versionGC"); + vgc = store.find(SETTINGS, "versionGC"); assertNotNull(vgc); - assertEquals(-1L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP)); + assertEquals(39L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP)); + assertEquals(id, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP)); + } + + @Test + public void lazyInitializeWithDetailedGCWithNoData() throws Exception { + DocumentStore store = ns.getDocumentStore(); + Document vgc = store.find(SETTINGS, "versionGC"); + assertNull(vgc); + + enableDetailGC(ns.getVersionGarbageCollector()); + ns.getVersionGarbageCollector().gc(1, DAYS); + + vgc = store.find(SETTINGS, "versionGC"); + assertNotNull(vgc); + assertEquals(0L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP)); + assertEquals(MIN_ID_VALUE, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP)); } } diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java index 831d2c89ec..4eb20986c2 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java @@ -18,10 +18,9 @@ */ package org.apache.jackrabbit.oak.plugins.document; +import java.util.ArrayList; import java.util.List; -import org.apache.jackrabbit.guava.common.collect.Iterables; -import org.apache.jackrabbit.guava.common.collect.Lists; import com.mongodb.ReadPreference; import org.apache.jackrabbit.oak.plugins.document.mongo.MongoDocumentStore; @@ -29,31 +28,38 @@ import org.apache.jackrabbit.oak.plugins.document.mongo.MongoTestUtils; import org.apache.jackrabbit.oak.plugins.document.mongo.MongoVersionGCSupport; import org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore; import org.apache.jackrabbit.oak.plugins.document.rdb.RDBVersionGCSupport; -import org.apache.jackrabbit.oak.plugins.document.util.Utils; -import org.apache.jackrabbit.oak.stats.Clock; import org.junit.After; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import static java.util.Comparator.comparing; +import static java.util.List.of; +import static java.util.Optional.ofNullable; import static java.util.concurrent.TimeUnit.SECONDS; +import static java.util.stream.StreamSupport.stream; +import static org.apache.jackrabbit.guava.common.collect.Comparators.isInOrder; +import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; import static org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.MEMORY; import static org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.MONGO; import static org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.RDB_H2; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE; +import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getIdFromPath; +import static org.apache.jackrabbit.oak.stats.Clock.SIMPLE; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @RunWith(Parameterized.class) public class VersionGCSupportTest { - private DocumentStoreFixture fixture; - private DocumentStore store; - private VersionGCSupport gcSupport; - private List<String> ids = Lists.newArrayList(); + private final DocumentStoreFixture fixture; + private final DocumentStore store; + private final VersionGCSupport gcSupport; + private final List<String> ids = new ArrayList<>(); @Parameterized.Parameters(name="{0}") public static java.util.Collection<DocumentStoreFixture> fixtures() { - List<DocumentStoreFixture> fixtures = Lists.newArrayList(); + List<DocumentStoreFixture> fixtures = new ArrayList<>(3); if (RDB_H2.isAvailable()) { fixtures.add(RDB_H2); } @@ -88,7 +94,7 @@ public class VersionGCSupportTest { @After public void after() throws Exception { - store.remove(Collection.NODES, ids); + store.remove(NODES, ids); fixture.dispose(); } @@ -97,12 +103,12 @@ public class VersionGCSupportTest { long offset = SECONDS.toMillis(42); for (int i = 0; i < 5; i++) { Revision r = new Revision(offset + SECONDS.toMillis(i), 0, 1); - String id = Utils.getIdFromPath("/doc-" + i); + String id = getIdFromPath("/doc-" + i); ids.add(id); UpdateOp op = new UpdateOp(id, true); NodeDocument.setModified(op, r); NodeDocument.setDeleted(op, r, true); - store.create(Collection.NODES, Lists.newArrayList(op)); + store.create(NODES, of(op)); } assertPossiblyDeleted(0, 41, 0); @@ -126,25 +132,83 @@ public class VersionGCSupportTest { assertPossiblyDeleted(51, 60, 0); } + @Test + public void getPossiblyModifiedDocs() { + long offset = SECONDS.toMillis(42); + for (int i = 0; i < 5; i++) { + Revision r = new Revision(offset + SECONDS.toMillis(i), 0, 1); + String id = getIdFromPath("/doc-modified" + i); + ids.add(id); + UpdateOp op = new UpdateOp(id, true); + NodeDocument.setModified(op, r); + store.create(NODES, of(op)); + } + + assertModified(0, 41, 0); + assertModified(0, 42, 0); + assertModified(0, 44, 0); + assertModified(0, 45, 3); + assertModified(0, 46, 3); + assertModified(0, 49, 3); + assertModified(0, 50, 5); + assertModified(0, 51, 5); + assertModified(39, 60, 5); + assertModified(40, 60, 5); + assertModified(41, 60, 5); + assertModified(42, 60, 5); + assertModified(44, 60, 5); + assertModified(45, 60, 2); + assertModified(47, 60, 2); + assertModified(48, 60, 2); + assertModified(49, 60, 2); + assertModified(50, 60, 0); + assertModified(51, 60, 0); + } + @Test public void findOldest() { // see OAK-8476 long secs = 123456; long offset = SECONDS.toMillis(secs); Revision r = new Revision(offset, 0, 1); - String id = Utils.getIdFromPath("/doc-del"); + String id = getIdFromPath("/doc-del"); ids.add(id); UpdateOp op = new UpdateOp(id, true); NodeDocument.setModified(op, r); NodeDocument.setDeleted(op, r, true); - store.create(Collection.NODES, Lists.newArrayList(op)); + store.create(NODES, of(op)); - long reportedsecs = gcSupport.getOldestDeletedOnceTimestamp(Clock.SIMPLE, 1) / SECONDS.toMillis(1); - assertTrue("diff (s) should be < 5: " + Math.abs(secs - reportedsecs), Math.abs(secs - reportedsecs) < 5); + long reportedSecs = gcSupport.getOldestDeletedOnceTimestamp(SIMPLE, 1) / SECONDS.toMillis(1); + assertTrue("diff (s) should be < 5: " + Math.abs(secs - reportedSecs), Math.abs(secs - reportedSecs) < 5); + } + + @Test + public void findOldestModified() { + long secs = 1234567; + long offset = SECONDS.toMillis(secs); + Revision r = new Revision(offset, 0, 1); + String id = getIdFromPath("/doc-modified"); + ids.add(id); + UpdateOp op = new UpdateOp(id, true); + NodeDocument.setModified(op, r); + store.create(NODES, of(op)); + + NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE); + String oldestModifiedDocId = oldestModifiedDoc.getId(); + long reportedSecs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L); + assertTrue("diff (s) should be < 5: " + Math.abs(secs - reportedSecs), Math.abs(secs - reportedSecs) < 5); + assertEquals(id, oldestModifiedDocId); } private void assertPossiblyDeleted(long fromSeconds, long toSeconds, long num) { Iterable<NodeDocument> docs = gcSupport.getPossiblyDeletedDocs(SECONDS.toMillis(fromSeconds), SECONDS.toMillis(toSeconds)); - assertEquals(num, Iterables.size(docs)); + assertEquals(num, stream(docs.spliterator(), false).count()); + } + + private void assertModified(long fromSeconds, long toSeconds, long num) { + Iterable<NodeDocument> docs = gcSupport.getModifiedDocs(SECONDS.toMillis(fromSeconds), SECONDS.toMillis(toSeconds), 10, MIN_ID_VALUE); + docs.forEach(d -> System.out.println(d.getModified() + " " + d.getId())); + assertEquals(num, stream(docs.spliterator(), false).count()); + assertTrue(isInOrder(docs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2))); } } diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java index 4470a07f96..caa156a6d4 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java @@ -229,8 +229,8 @@ public class VersionGarbageCollectorIT { } @Test - public void testGCDeletedProps() throws Exception{ - //1. Create nodes + public void testGCDeletedProps() throws Exception { + //1. Create nodes with properties NodeBuilder b1 = store.getRoot().builder(); // Add property to node & save @@ -289,6 +289,54 @@ public class VersionGarbageCollectorIT { stats = gc.gc(maxAge*2, HOURS); assertEquals(0, stats.deletedPropsGCCount); } + + // Test when we have more than 1000 deleted properties + @Test + public void testGCDeletedProps_1() throws Exception { + //1. Create nodes with properties + NodeBuilder b1 = store.getRoot().builder(); + + // Add property to node & save + for (int i = 0; i < 5_000; i++) { + for (int j = 0; j < 10; j++) { + b1.child("z"+i).setProperty("prop"+j, "foo", STRING); + } + } + store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + // enable the detailed gc flag + writeField(gc, "detailedGCEnabled", true, true); + long maxAge = 1; //hours + long delta = TimeUnit.MINUTES.toMillis(10); + //1. Go past GC age and check no GC done as nothing deleted + clock.waitUntil(Revision.getCurrentTimestamp() + maxAge); + VersionGCStats stats = gc.gc(maxAge, HOURS); + assertEquals(0, stats.deletedPropsGCCount); + + //Remove property + NodeBuilder b2 = store.getRoot().builder(); + for (int i = 0; i < 5_000; i++) { + for (int j = 0; j < 10; j++) { + b2.getChildNode("z"+i).removeProperty("prop"+j); + } + } + store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + store.runBackgroundOperations(); + + //2. Check that a deleted property is not collected before maxAge + //Clock cannot move back (it moved forward in #1) so double the maxAge + clock.waitUntil(clock.getTime() + delta); + stats = gc.gc(maxAge*2, HOURS); + assertEquals(0, stats.deletedPropsGCCount); + + //3. Check that deleted property does get collected post maxAge + clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); + + stats = gc.gc(maxAge*2, HOURS); + assertEquals(50_000, stats.deletedPropsGCCount); + + } private void gcSplitDocsInternal(String subNodeName) throws Exception { long maxAge = 1; //hrs