This is an automated email from the ASF dual-hosted git repository. daim pushed a commit to branch OAK-10199 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 650757c36eb85e7bad369835abeee537427615b9 Author: Rishabh Kumar <d...@adobe.com> AuthorDate: Thu Jun 22 01:07:36 2023 +0530 OAK-10199 : fixed logic to include previously garbage collected documents if updated recently --- .../plugins/document/VersionGCRecommendations.java | 11 +- .../oak/plugins/document/VersionGCSupport.java | 15 ++- .../plugins/document/VersionGarbageCollector.java | 50 ++++---- .../document/mongo/MongoVersionGCSupport.java | 13 +-- .../plugins/document/rdb/RDBVersionGCSupport.java | 15 ++- .../oak/plugins/document/VersionGCInitTest.java | 2 +- .../oak/plugins/document/VersionGCSupportTest.java | 22 ++-- .../oak/plugins/document/VersionGCTest.java | 88 +++++++++++++- .../document/VersionGarbageCollectorIT.java | 127 ++++++++++++++++++++- 9 files changed, 278 insertions(+), 65 deletions(-) diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java index 056c2fe438..ac0bcc03e3 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java @@ -20,13 +20,13 @@ package org.apache.jackrabbit.oak.plugins.document; import java.util.HashMap; import java.util.Map; -import java.util.Objects; import java.util.concurrent.TimeUnit; import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats; import org.apache.jackrabbit.oak.plugins.document.util.TimeInterval; import org.apache.jackrabbit.oak.spi.gc.GCMonitor; import org.apache.jackrabbit.oak.stats.Clock; +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -117,16 +117,16 @@ public class VersionGCRecommendations { detailedGCTimestamp = (long) settings.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP); oldestModifiedDocId = (String) settings.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP); - if (detailedGCTimestamp == 0 || Objects.equals(oldestModifiedDocId, MIN_ID_VALUE)) { + if (detailedGCTimestamp == 0) { + // it will only happens for the very first time, we run this detailedGC log.info("No detailedGCTimestamp found, querying for the oldest modified candidate"); final NodeDocument doc = vgc.getOldestModifiedDoc(clock); if (doc == NULL) { oldestModifiedDocTimeStamp = 0L; - oldestModifiedDocId = MIN_ID_VALUE; } else { - oldestModifiedDocId = doc.getId(); oldestModifiedDocTimeStamp = doc.getModified() == null ? 0L : doc.getModified() - 1; } + oldestModifiedDocId = MIN_ID_VALUE; log.info("detailedGCTimestamp found: {}", timestampToString(oldestModifiedDocTimeStamp)); } else { oldestModifiedDocTimeStamp = detailedGCTimestamp - 1; @@ -179,7 +179,8 @@ public class VersionGCRecommendations { ignoreDueToCheckPoint = true; } else { scope = scope.notLaterThan(checkpoint.getTimestamp() - 1); - log.debug("checkpoint at [{}] found, scope now {}", timestampToString(checkpoint.getTimestamp()), scope); + detailedGCTimeInternal = detailedGCTimeInternal.notLaterThan(checkpoint.getTimestamp() - 1); + log.info("checkpoint at [{}] found, scope now {}, detailedGcScope now {}", timestampToString(checkpoint.getTimestamp()), scope, detailedGCTimeInternal); } } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java index 96fa2bbaea..6086eef772 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java @@ -82,17 +82,16 @@ public class VersionGCSupport { * then perform the comparison. * <p/> * - * @param fromModified the lower bound modified timestamp (inclusive) - * @param toModified the upper bound modified timestamp (exclusive) - * @param limit the limit of documents to return - * @param fromId the lower bound {@link NodeDocument#ID} - * @param includeFromId boolean indicating whether {@code fromId} is inclusive or not + * @param fromModified the lower bound modified timestamp (inclusive) + * @param toModified the upper bound modified timestamp (exclusive) + * @param limit the limit of documents to return + * @param fromId the lower bound {@link NodeDocument#ID} * @return matching documents. */ public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit, - @NotNull final String fromId, boolean includeFromId) { + @NotNull final String fromId) { return StreamSupport - .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, includeFromId ? "\0"+fromId : fromId).spliterator(), false) + .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, fromId).spliterator(), false) .filter(input -> modifiedGreaterThanEquals(input, fromModified) && modifiedLessThan(input, toModified)) .sorted((o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)) .limit(limit) @@ -197,7 +196,7 @@ public class VersionGCSupport { LOG.info("find oldest modified document"); try { - docs = getModifiedDocs(ts, now, 1, MIN_ID_VALUE, false); + docs = getModifiedDocs(ts, now, 1, MIN_ID_VALUE); if (docs.iterator().hasNext()) { return docs.iterator().next(); } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java index 0c7da0d4fa..9d918ee9a4 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java @@ -69,6 +69,7 @@ import static org.apache.jackrabbit.guava.common.util.concurrent.Atomics.newRefe import static java.util.concurrent.TimeUnit.MICROSECONDS; import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; import static org.apache.jackrabbit.oak.plugins.document.Collection.SETTINGS; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY; import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF; @@ -615,24 +616,26 @@ public class VersionGarbageCollector { throws IOException { int docsTraversed = 0; boolean foundDoc = true; - boolean includeFromId = true; - long oldestModifiedDocTimeStamp = rec.scopeDetailedGC.fromMs; - String oldestModifiedDocId = rec.detailedGCId; + final long oldestModifiedDocTimeStamp = rec.scopeDetailedGC.fromMs; + final String oldestModifiedDocId = rec.detailedGCId; try (DetailedGC gc = new DetailedGC(headRevision, monitor, cancel)) { - final long fromModified = rec.scopeDetailedGC.fromMs; + long fromModified = oldestModifiedDocTimeStamp; + String fromId = oldestModifiedDocId; + NodeDocument lastDoc = null; final long toModified = rec.scopeDetailedGC.toMs; if (phases.start(GCPhase.DETAILED_GC)) { - while (foundDoc && oldestModifiedDocTimeStamp < toModified && docsTraversed <= PROGRESS_BATCH_SIZE) { + while (foundDoc && fromModified < toModified && docsTraversed <= PROGRESS_BATCH_SIZE) { // set foundDoc to false to allow exiting the while loop foundDoc = false; - Iterable<NodeDocument> itr = versionStore.getModifiedDocs(oldestModifiedDocTimeStamp, toModified, 1000, oldestModifiedDocId, includeFromId); + Iterable<NodeDocument> itr = versionStore.getModifiedDocs(fromModified, toModified, 1000, fromId); // set includeFromId to false for subsequent queries - includeFromId = false; try { for (NodeDocument doc : itr) { foundDoc = true; // continue with GC? if (cancel.get()) { + foundDoc = false; // to exit while loop as well + log.info("Received GC cancel call. Terminating the GC Operation."); break; } docsTraversed++; @@ -641,13 +644,14 @@ public class VersionGarbageCollector { docsTraversed, gc.getGarbageDocsCount()); } + lastDoc = doc; // collect the data to delete in next step if (phases.start(GCPhase.COLLECTING)) { gc.collectGarbage(doc, phases); phases.stop(GCPhase.COLLECTING); } - final Long modified = doc.getModified(); + final Long modified = lastDoc.getModified(); if (modified == null) { monitor.warn("collectDetailGarbage : document has no _modified property : {}", doc.getId()); @@ -655,22 +659,28 @@ public class VersionGarbageCollector { monitor.warn( "collectDetailGarbage : document has older _modified than query boundary : {} (from: {}, to: {})", modified, fromModified, toModified); - } else { - oldestModifiedDocTimeStamp = modified; } - - if (gc.hasGarbage() && phases.start(GCPhase.DETAILED_GC_CLEANUP)) { - gc.removeGarbage(phases.stats); - phases.stop(GCPhase.DETAILED_GC_CLEANUP); - } - - oldestModifiedDocTimeStamp = modified == null ? fromModified : modified; - oldestModifiedDocId = doc.getId(); + } + // now remove the garbage in one go, if any + if (gc.hasGarbage() && phases.start(GCPhase.DETAILED_GC_CLEANUP)) { + gc.removeGarbage(phases.stats); + phases.stop(GCPhase.DETAILED_GC_CLEANUP); + } + if (lastDoc != null) { + fromModified = ofNullable(lastDoc.getModified()).orElse(oldestModifiedDocTimeStamp); + fromId = lastDoc.getId(); } } finally { Utils.closeIfCloseable(itr); - phases.stats.oldestModifiedDocTimeStamp = oldestModifiedDocTimeStamp; - phases.stats.oldestModifiedDocId = oldestModifiedDocId; + phases.stats.oldestModifiedDocTimeStamp = fromModified; + if (fromModified > oldestModifiedDocTimeStamp) { + // we have moved ahead, now we can reset oldestModifiedId to min value + phases.stats.oldestModifiedDocId = MIN_ID_VALUE; + } else { + // there are still documents pending at oldest Modified timestamp, + // save the last _id traversed to avoid re-fetching of ids + phases.stats.oldestModifiedDocId = fromId; + } } } phases.stop(GCPhase.DETAILED_GC); diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java index ca9a8a955b..cf821fcf48 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java @@ -139,19 +139,18 @@ public class MongoVersionGCSupport extends VersionGCSupport { * then perform the comparison. * <p/> * - * @param fromModified the lower bound modified timestamp (inclusive) - * @param toModified the upper bound modified timestamp (exclusive) - * @param limit the limit of documents to return - * @param fromId the lower bound {@link NodeDocument#ID} - * @param includeFromId boolean indicating whether {@code fromId} is inclusive or not + * @param fromModified the lower bound modified timestamp (inclusive) + * @param toModified the upper bound modified timestamp (exclusive) + * @param limit the limit of documents to return + * @param fromId the lower bound {@link NodeDocument#ID} * @return matching documents. */ @Override public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit, - @NotNull final String fromId, boolean includeFromId) { + @NotNull final String fromId) { // _modified >= fromModified && _modified < toModified && _id > fromId final Bson query = and(gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)), - lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)), includeFromId ? gte(ID, fromId) :gt(ID, fromId)); + lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)), gt(ID, fromId)); // first sort by _modified and then by _id final Bson sort = and(eq(MODIFIED_IN_SECS, 1), eq(ID, 1)); diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java index efce4b8006..9d96c35811 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java @@ -104,19 +104,18 @@ public class RDBVersionGCSupport extends VersionGCSupport { * then perform the comparison. * <p/> * - * @param fromModified the lower bound modified timestamp (inclusive) - * @param toModified the upper bound modified timestamp (exclusive) - * @param limit the limit of documents to return - * @param fromId the lower bound {@link NodeDocument#ID} - * @param includeFromId boolean indicating whether {@code fromId} is inclusive or not + * @param fromModified the lower bound modified timestamp (inclusive) + * @param toModified the upper bound modified timestamp (exclusive) + * @param limit the limit of documents to return + * @param fromId the lower bound {@link NodeDocument#ID} * @return matching documents. */ @Override public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit, - @NotNull final String fromId, boolean includeFromId) { + @NotNull final String fromId) { List<QueryCondition> conditions = of(new QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)), new QueryCondition(MODIFIED_IN_SECS, ">=", getModifiedInSecs(fromModified)), - new QueryCondition(ID, includeFromId ? ">=" : ">", of(fromId))); + new QueryCondition(ID, ">", of(fromId))); if (MODE == 1) { return getIterator(EMPTY_KEY_PATTERN, conditions); } else { @@ -291,7 +290,7 @@ public class RDBVersionGCSupport extends VersionGCSupport { LOG.info("getOldestModifiedDoc() <- start"); Iterable<NodeDocument> modifiedDocs = null; try { - modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, MIN_ID_VALUE, false); + modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, MIN_ID_VALUE); doc = modifiedDocs.iterator().hasNext() ? modifiedDocs.iterator().next() : NULL; } catch (DocumentStoreException ex) { LOG.error("getOldestModifiedDoc()", ex); diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java index 0bf7b8601a..4db64c942f 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java @@ -79,7 +79,7 @@ public class VersionGCInitTest { vgc = store.find(SETTINGS, "versionGC"); assertNotNull(vgc); assertEquals(40L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP)); - assertEquals(id, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP)); + assertEquals(MIN_ID_VALUE, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP)); } @Test diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java index cff9511a66..6d02fd38f7 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java @@ -224,10 +224,10 @@ public class VersionGCSupportTest { long oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L); assertEquals(40L, oldestModifiedDocTs); assertEquals("1:/x0", oldestModifiedDocId); - boolean includeFromId = true; + oldestModifiedDocId = MIN_ID_VALUE; for(int i = 0; i < 5; i++) { - Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, includeFromId); + Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId); assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2))); long count = stream(modifiedDocs.spliterator(), false).count(); assertEquals(1000, count); @@ -236,7 +236,6 @@ public class VersionGCSupportTest { } oldestModifiedDocId = oldestModifiedDoc.getId(); oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L); - includeFromId = false; } } @@ -260,10 +259,10 @@ public class VersionGCSupportTest { long oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L); assertEquals(40L, oldestModifiedDocTs); assertEquals("1:/x0", oldestModifiedDocId); - boolean includeFromId = true; + oldestModifiedDocId = MIN_ID_VALUE; for(int i = 0; i < 5; i++) { - Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, includeFromId); + Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId); assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2))); long count = stream(modifiedDocs.spliterator(), false).count(); assertEquals(1000, count); @@ -272,11 +271,10 @@ public class VersionGCSupportTest { } oldestModifiedDocId = oldestModifiedDoc.getId(); oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L); - includeFromId = false; } // fetch last remaining document now - Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, false); + Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId); assertEquals(1, stream(modifiedDocs.spliterator(), false).count()); assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2))); oldestModifiedDoc = modifiedDocs.iterator().next(); @@ -284,7 +282,7 @@ public class VersionGCSupportTest { oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L); // all documents had been fetched, now we won't get any document - modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, false); + modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId); assertEquals(0, stream(modifiedDocs.spliterator(), false).count()); } @@ -309,10 +307,9 @@ public class VersionGCSupportTest { } // create 5_000 nodes store.create(NODES, updateOps); - boolean includeFromId = true; for(int i = 0; i < 5; i++) { - Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, includeFromId); + Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId); assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2))); long count = stream(modifiedDocs.spliterator(), false).count(); assertEquals(1000, count); @@ -321,11 +318,10 @@ public class VersionGCSupportTest { } oldestModifiedDocId = oldestModifiedDoc.getId(); oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L); - includeFromId = false; } // all documents had been fetched, now we won't get any document - Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, false); + Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId); assertEquals(0, stream(modifiedDocs.spliterator(), false).count()); } @@ -335,7 +331,7 @@ public class VersionGCSupportTest { } private void assertModified(long fromSeconds, long toSeconds, long num) { - Iterable<NodeDocument> docs = gcSupport.getModifiedDocs(SECONDS.toMillis(fromSeconds), SECONDS.toMillis(toSeconds), 10, MIN_ID_VALUE, false); + Iterable<NodeDocument> docs = gcSupport.getModifiedDocs(SECONDS.toMillis(fromSeconds), SECONDS.toMillis(toSeconds), 10, MIN_ID_VALUE); assertEquals(num, stream(docs.spliterator(), false).count()); assertTrue(isInOrder(docs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2))); } diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java index f29716ca5d..cf3148a86d 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java @@ -50,6 +50,11 @@ import org.junit.Test; import static java.util.concurrent.TimeUnit.HOURS; import static java.util.concurrent.TimeUnit.MINUTES; import static java.util.concurrent.TimeUnit.SECONDS; +import static org.apache.jackrabbit.oak.plugins.document.Collection.SETTINGS; +import static org.apache.jackrabbit.oak.plugins.document.DetailGCHelper.enableDetailGC; +import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP; +import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP; +import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_ID; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; @@ -187,6 +192,85 @@ public class VersionGCTest { } } + // OAK-10199 + @Test + public void cancelMustNotUpdateLastOldestModifiedTimeStamp() throws Exception { + // get previous entry from SETTINGS + String versionGCId = SETTINGS_COLLECTION_ID; + String detailedGCTimestamp = SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP; + enableDetailGC(gc); + gc.gc(30, SECONDS); + Document statusBefore = store.find(SETTINGS, versionGCId); + // block gc call + store.semaphore.acquireUninterruptibly(); + Future<VersionGCStats> stats = gc(); + boolean gcBlocked = false; + for (int i = 0; i < 10; i ++) { + if (store.semaphore.hasQueuedThreads()) { + gcBlocked = true; + break; + } + Thread.sleep(100); + } + assertTrue(gcBlocked); + // now cancel the GC + gc.cancel(); + store.semaphore.release(); + assertTrue(stats.get().canceled); + + // ensure a canceled GC doesn't update that versionGC SETTINGS entry + Document statusAfter = store.find(SETTINGS, SETTINGS_COLLECTION_ID); + if (statusBefore == null) { + assertNull(statusAfter); + } else { + assertNotNull(statusAfter); + assertEquals( + "canceled GC shouldn't change the " + detailedGCTimestamp + " property on " + versionGCId + + " settings entry", + statusBefore.get(detailedGCTimestamp), statusAfter.get(detailedGCTimestamp)); + } + } + + @Test + public void cancelMustNotUpdateLastOldestModifiedDocId() throws Exception { + // get previous entry from SETTINGS + String versionGCId = SETTINGS_COLLECTION_ID; + String oldestModifiedDocId = SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP; + enableDetailGC(gc); + gc.gc(30, SECONDS); + Document statusBefore = store.find(SETTINGS, versionGCId); + // block gc call + store.semaphore.acquireUninterruptibly(); + Future<VersionGCStats> stats = gc(); + boolean gcBlocked = false; + for (int i = 0; i < 10; i ++) { + if (store.semaphore.hasQueuedThreads()) { + gcBlocked = true; + break; + } + Thread.sleep(100); + } + assertTrue(gcBlocked); + // now cancel the GC + gc.cancel(); + store.semaphore.release(); + assertTrue(stats.get().canceled); + + // ensure a canceled GC doesn't update that versionGC SETTINGS entry + Document statusAfter = store.find(SETTINGS, SETTINGS_COLLECTION_ID); + if (statusBefore == null) { + assertNull(statusAfter); + } else { + assertNotNull(statusAfter); + assertEquals( + "canceled GC shouldn't change the " + oldestModifiedDocId + " property on " + versionGCId + + " settings entry", + statusBefore.get(oldestModifiedDocId), statusAfter.get(oldestModifiedDocId)); + } + } + + // END - OAK-10199 + @Test public void getInfo() throws Exception { gc.gc(1, TimeUnit.HOURS); @@ -351,7 +435,7 @@ public class VersionGCTest { @Test public void testDetailGcDocumentRead_enabled() throws Exception { - DetailGCHelper.enableDetailGC(gc); + enableDetailGC(gc); VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES); assertNotNull(stats); assertNotEquals(0, stats.detailedGCDocsElapsed); @@ -417,7 +501,7 @@ public class VersionGCTest { @Override public <T extends Document> T find(Collection<T> collection, String key) { - if (collection == Collection.SETTINGS + if (collection == SETTINGS && key.equals("versionGC")) { findVersionGC.incrementAndGet(); } diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java index 80dd47dee6..e0de0c0617 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java @@ -34,7 +34,6 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; -import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.commons.lang3.reflect.FieldUtils.writeField; import static org.apache.jackrabbit.guava.common.collect.Iterables.filter; import static org.apache.jackrabbit.guava.common.collect.Iterables.size; @@ -53,6 +52,7 @@ import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import static org.junit.Assume.assumeTrue; import org.apache.jackrabbit.guava.common.base.Function; @@ -120,6 +120,7 @@ public class VersionGarbageCollectorIT { execService = Executors.newCachedThreadPool(); clock = new Clock.Virtual(); clock.waitUntil(System.currentTimeMillis()); + ClusterNodeInfo.setClock(clock); Revision.setClock(clock); if (fixture instanceof RDBFixture) { ((RDBFixture) fixture).setRDBOptions( @@ -230,6 +231,7 @@ public class VersionGarbageCollectorIT { gcSplitDocsInternal(Strings.repeat("sub", 120)); } + // OAK-10199 @Test public void testGCDeletedProps() throws Exception { //1. Create nodes with properties @@ -393,6 +395,129 @@ public class VersionGarbageCollectorIT { assertEquals(50_000, stats.deletedPropsGCCount); } + + // Test where we modify the already GCed nodes + @Test + public void testGCDeletedProps_3() throws Exception { + //1. Create nodes with properties + NodeBuilder b1 = store.getRoot().builder(); + // Add property to node & save + for (int i = 0; i < 10; i++) { + b1.child("z" + i).setProperty("prop" + i, "foo", STRING); + } + store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + // enable the detailed gc flag + writeField(gc, "detailedGCEnabled", true, true); + long maxAge = 1; //hours + long delta = TimeUnit.MINUTES.toMillis(10); + //1. Go past GC age and check no GC done as nothing deleted + clock.waitUntil(Revision.getCurrentTimestamp() + maxAge); + VersionGCStats stats = gc.gc(maxAge, HOURS); + assertEquals(0, stats.deletedPropsGCCount); + + //Remove property + NodeBuilder b2 = store.getRoot().builder(); + for (int i = 0; i < 10; i++) { + b2.getChildNode("z" + i).removeProperty("prop" + i); + } + store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY); + store.runBackgroundOperations(); + + //2. Check that deleted property does get collected post maxAge + clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); + + stats = gc.gc(maxAge*2, HOURS); + assertEquals(10, stats.deletedPropsGCCount); + + //3. now reCreate those properties again + NodeBuilder b3 = store.getRoot().builder(); + // Add property to node & save + for (int i = 0; i < 10; i++) { + b3.child("z" + i).setProperty("prop" + i, "bar", STRING); + } + store.merge(b3, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + //Remove properties again + NodeBuilder b4 = store.getRoot().builder(); + for (int i = 0; i < 10; i++) { + b4.getChildNode("z" + i).removeProperty("prop" + i); + } + store.merge(b4, EmptyHook.INSTANCE, CommitInfo.EMPTY); + store.runBackgroundOperations(); + + + //4. Check that deleted property does get collected again + // increment the clock again by more than 2 hours + delta + clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); + stats = gc.gc(maxAge*2, HOURS); + assertEquals(10, stats.deletedPropsGCCount); + } + + // Test when properties are not collected in one GC cycle + @Test + @Ignore + public void testGCDeletedProps_4() throws Exception { + documentMKBuilder = new DocumentMK.Builder().clock(clock) + .setLeaseCheckMode(LeaseCheckMode.DISABLED) + .setDocumentStore(new FailingDocumentStore(fixture.createDocumentStore(), 42)).setAsyncDelay(0); + store = documentMKBuilder.getNodeStore(); + assertTrue(store.getDocumentStore() instanceof FailingDocumentStore); + MongoTestUtils.setReadPreference(store, ReadPreference.primary()); + gc = store.getVersionGarbageCollector(); + //1. Create nodes with properties + NodeBuilder b1 = store.getRoot().builder(); + // Add property to node & save + for (int i = 0; i < 10; i++) { + b1.child("z" + i).setProperty("prop" + i, "foo", STRING); + } + store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + //2. Remove property + NodeBuilder b2 = store.getRoot().builder(); + for (int i = 0; i < 10; i++) { + b2.getChildNode("z" + i).removeProperty("prop" + i); + } + store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY); + store.runBackgroundOperations(); + + // enable the detailed gc flag + writeField(gc, "detailedGCEnabled", true, true); + long maxAge = 1; //hours + long delta = TimeUnit.MINUTES.toMillis(10); + + //3. Check that deleted property does get collected again + // increment the clock again by more than 2 hours + delta + clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); + gc.setOptions(gc.getOptions().withMaxIterations(1)); + + ((FailingDocumentStore) store.getDocumentStore()).fail().after(0).eternally(); + try { + store.dispose(); + fail("dispose() must fail with an exception"); + } catch (DocumentStoreException e) { + // expected + } + ((FailingDocumentStore) store.getDocumentStore()).fail().never(); + + // create new store + store = new DocumentMK.Builder().clock(clock).setLeaseCheckMode(LeaseCheckMode.DISABLED) + .setDocumentStore(new FailingDocumentStore(fixture.createDocumentStore(1), 42)).setAsyncDelay(0) + .getNodeStore(); + assertTrue(store.getDocumentStore() instanceof FailingDocumentStore); + MongoTestUtils.setReadPreference(store, ReadPreference.primary()); + gc = store.getVersionGarbageCollector(); + store.runBackgroundOperations(); + + //4. Check that deleted property does get collected again + // increment the clock again by more than 2 hours + delta + clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); + VersionGCStats stats = gc.gc(maxAge*2, HOURS); + assertEquals(10, stats.deletedPropsGCCount); + + } + + // OAK-10199 END private void gcSplitDocsInternal(String subNodeName) throws Exception { long maxAge = 1; //hrs