This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 650757c36eb85e7bad369835abeee537427615b9
Author: Rishabh Kumar <d...@adobe.com>
AuthorDate: Thu Jun 22 01:07:36 2023 +0530

    OAK-10199 : fixed logic to include previously garbage collected documents 
if updated recently
---
 .../plugins/document/VersionGCRecommendations.java |  11 +-
 .../oak/plugins/document/VersionGCSupport.java     |  15 ++-
 .../plugins/document/VersionGarbageCollector.java  |  50 ++++----
 .../document/mongo/MongoVersionGCSupport.java      |  13 +--
 .../plugins/document/rdb/RDBVersionGCSupport.java  |  15 ++-
 .../oak/plugins/document/VersionGCInitTest.java    |   2 +-
 .../oak/plugins/document/VersionGCSupportTest.java |  22 ++--
 .../oak/plugins/document/VersionGCTest.java        |  88 +++++++++++++-
 .../document/VersionGarbageCollectorIT.java        | 127 ++++++++++++++++++++-
 9 files changed, 278 insertions(+), 65 deletions(-)

diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
index 056c2fe438..ac0bcc03e3 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
@@ -20,13 +20,13 @@ package org.apache.jackrabbit.oak.plugins.document;
 
 import java.util.HashMap;
 import java.util.Map;
-import java.util.Objects;
 import java.util.concurrent.TimeUnit;
 
 import 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
 import org.apache.jackrabbit.oak.plugins.document.util.TimeInterval;
 import org.apache.jackrabbit.oak.spi.gc.GCMonitor;
 import org.apache.jackrabbit.oak.stats.Clock;
+import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -117,16 +117,16 @@ public class VersionGCRecommendations {
 
         detailedGCTimestamp = (long) 
settings.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP);
         oldestModifiedDocId = (String) 
settings.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP);
-        if (detailedGCTimestamp == 0 || Objects.equals(oldestModifiedDocId, 
MIN_ID_VALUE)) {
+        if (detailedGCTimestamp == 0) {
+            // it will only happens for the very first time, we run this 
detailedGC
             log.info("No detailedGCTimestamp found, querying for the oldest 
modified candidate");
             final NodeDocument doc = vgc.getOldestModifiedDoc(clock);
             if (doc == NULL) {
                 oldestModifiedDocTimeStamp = 0L;
-                oldestModifiedDocId = MIN_ID_VALUE;
             } else {
-                oldestModifiedDocId = doc.getId();
                 oldestModifiedDocTimeStamp = doc.getModified() == null ? 0L : 
doc.getModified() - 1;
             }
+            oldestModifiedDocId = MIN_ID_VALUE;
             log.info("detailedGCTimestamp found: {}", 
timestampToString(oldestModifiedDocTimeStamp));
         } else {
             oldestModifiedDocTimeStamp = detailedGCTimestamp - 1;
@@ -179,7 +179,8 @@ public class VersionGCRecommendations {
                 ignoreDueToCheckPoint = true;
             } else {
                 scope = scope.notLaterThan(checkpoint.getTimestamp() - 1);
-                log.debug("checkpoint at [{}] found, scope now {}", 
timestampToString(checkpoint.getTimestamp()), scope);
+                detailedGCTimeInternal = 
detailedGCTimeInternal.notLaterThan(checkpoint.getTimestamp() - 1);
+                log.info("checkpoint at [{}] found, scope now {}, 
detailedGcScope now {}", timestampToString(checkpoint.getTimestamp()), scope, 
detailedGCTimeInternal);
             }
         }
 
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
index 96fa2bbaea..6086eef772 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
@@ -82,17 +82,16 @@ public class VersionGCSupport {
      * then perform the comparison.
      * <p/>
      *
-     * @param fromModified  the lower bound modified timestamp (inclusive)
-     * @param toModified    the upper bound modified timestamp (exclusive)
-     * @param limit         the limit of documents to return
-     * @param fromId        the lower bound {@link NodeDocument#ID}
-     * @param includeFromId boolean indicating whether {@code fromId} is 
inclusive or not
+     * @param fromModified the lower bound modified timestamp (inclusive)
+     * @param toModified   the upper bound modified timestamp (exclusive)
+     * @param limit        the limit of documents to return
+     * @param fromId       the lower bound {@link NodeDocument#ID}
      * @return matching documents.
      */
     public Iterable<NodeDocument> getModifiedDocs(final long fromModified, 
final long toModified, final int limit,
-                                                  @NotNull final String 
fromId, boolean includeFromId) {
+                                                  @NotNull final String 
fromId) {
         return StreamSupport
-                .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, 
includeFromId ? "\0"+fromId : fromId).spliterator(), false)
+                .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, 
fromId).spliterator(), false)
                 .filter(input -> modifiedGreaterThanEquals(input, 
fromModified) && modifiedLessThan(input, toModified))
                 .sorted((o1, o2) -> 
comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, 
o2))
                 .limit(limit)
@@ -197,7 +196,7 @@ public class VersionGCSupport {
 
         LOG.info("find oldest modified document");
         try {
-            docs = getModifiedDocs(ts, now, 1, MIN_ID_VALUE, false);
+            docs = getModifiedDocs(ts, now, 1, MIN_ID_VALUE);
             if (docs.iterator().hasNext()) {
                 return docs.iterator().next();
             }
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 0c7da0d4fa..9d918ee9a4 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -69,6 +69,7 @@ import static 
org.apache.jackrabbit.guava.common.util.concurrent.Atomics.newRefe
 import static java.util.concurrent.TimeUnit.MICROSECONDS;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.SETTINGS;
+import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
@@ -615,24 +616,26 @@ public class VersionGarbageCollector {
                 throws IOException {
             int docsTraversed = 0;
             boolean foundDoc = true;
-            boolean includeFromId = true;
-            long oldestModifiedDocTimeStamp = rec.scopeDetailedGC.fromMs;
-            String oldestModifiedDocId = rec.detailedGCId;
+            final long oldestModifiedDocTimeStamp = rec.scopeDetailedGC.fromMs;
+            final String oldestModifiedDocId = rec.detailedGCId;
             try (DetailedGC gc = new DetailedGC(headRevision, monitor, 
cancel)) {
-                final long fromModified = rec.scopeDetailedGC.fromMs;
+                long fromModified = oldestModifiedDocTimeStamp;
+                String fromId = oldestModifiedDocId;
+                NodeDocument lastDoc = null;
                 final long toModified = rec.scopeDetailedGC.toMs;
                 if (phases.start(GCPhase.DETAILED_GC)) {
-                    while (foundDoc && oldestModifiedDocTimeStamp < toModified 
&& docsTraversed <= PROGRESS_BATCH_SIZE) {
+                    while (foundDoc && fromModified < toModified && 
docsTraversed <= PROGRESS_BATCH_SIZE) {
                         // set foundDoc to false to allow exiting the while 
loop
                         foundDoc = false;
-                        Iterable<NodeDocument> itr = 
versionStore.getModifiedDocs(oldestModifiedDocTimeStamp, toModified, 1000, 
oldestModifiedDocId, includeFromId);
+                        Iterable<NodeDocument> itr = 
versionStore.getModifiedDocs(fromModified, toModified, 1000, fromId);
                         // set includeFromId to false for subsequent queries
-                        includeFromId = false;
                         try {
                             for (NodeDocument doc : itr) {
                                 foundDoc = true;
                                 // continue with GC?
                                 if (cancel.get()) {
+                                    foundDoc = false; // to exit while loop as 
well
+                                    log.info("Received GC cancel call. 
Terminating the GC Operation.");
                                     break;
                                 }
                                 docsTraversed++;
@@ -641,13 +644,14 @@ public class VersionGarbageCollector {
                                             docsTraversed, 
gc.getGarbageDocsCount());
                                 }
 
+                                lastDoc = doc;
                                 // collect the data to delete in next step
                                 if (phases.start(GCPhase.COLLECTING)) {
                                     gc.collectGarbage(doc, phases);
                                     phases.stop(GCPhase.COLLECTING);
                                 }
 
-                                final Long modified = doc.getModified();
+                                final Long modified = lastDoc.getModified();
                                 if (modified == null) {
                                     monitor.warn("collectDetailGarbage : 
document has no _modified property : {}",
                                             doc.getId());
@@ -655,22 +659,28 @@ public class VersionGarbageCollector {
                                     monitor.warn(
                                             "collectDetailGarbage : document 
has older _modified than query boundary : {} (from: {}, to: {})",
                                             modified, fromModified, 
toModified);
-                                } else {
-                                    oldestModifiedDocTimeStamp = modified;
                                 }
-
-                                if (gc.hasGarbage() && 
phases.start(GCPhase.DETAILED_GC_CLEANUP)) {
-                                    gc.removeGarbage(phases.stats);
-                                    phases.stop(GCPhase.DETAILED_GC_CLEANUP);
-                                }
-
-                                oldestModifiedDocTimeStamp = modified == null 
? fromModified : modified;
-                                oldestModifiedDocId = doc.getId();
+                            }
+                            // now remove the garbage in one go, if any
+                            if (gc.hasGarbage() && 
phases.start(GCPhase.DETAILED_GC_CLEANUP)) {
+                                gc.removeGarbage(phases.stats);
+                                phases.stop(GCPhase.DETAILED_GC_CLEANUP);
+                            }
+                            if (lastDoc != null) {
+                                fromModified = 
ofNullable(lastDoc.getModified()).orElse(oldestModifiedDocTimeStamp);
+                                fromId = lastDoc.getId();
                             }
                         } finally {
                             Utils.closeIfCloseable(itr);
-                            phases.stats.oldestModifiedDocTimeStamp = 
oldestModifiedDocTimeStamp;
-                            phases.stats.oldestModifiedDocId = 
oldestModifiedDocId;
+                            phases.stats.oldestModifiedDocTimeStamp = 
fromModified;
+                            if (fromModified > oldestModifiedDocTimeStamp) {
+                                // we have moved ahead, now we can reset 
oldestModifiedId to min value
+                                phases.stats.oldestModifiedDocId = 
MIN_ID_VALUE;
+                            } else {
+                                // there are still documents pending at oldest 
Modified timestamp,
+                                // save the last _id traversed to avoid 
re-fetching of ids
+                                phases.stats.oldestModifiedDocId = fromId;
+                            }
                         }
                     }
                     phases.stop(GCPhase.DETAILED_GC);
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index ca9a8a955b..cf821fcf48 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -139,19 +139,18 @@ public class MongoVersionGCSupport extends 
VersionGCSupport {
      * then perform the comparison.
      * <p/>
      *
-     * @param fromModified  the lower bound modified timestamp (inclusive)
-     * @param toModified    the upper bound modified timestamp (exclusive)
-     * @param limit         the limit of documents to return
-     * @param fromId        the lower bound {@link NodeDocument#ID}
-     * @param includeFromId boolean indicating whether {@code fromId} is 
inclusive or not
+     * @param fromModified the lower bound modified timestamp (inclusive)
+     * @param toModified   the upper bound modified timestamp (exclusive)
+     * @param limit        the limit of documents to return
+     * @param fromId       the lower bound {@link NodeDocument#ID}
      * @return matching documents.
      */
     @Override
     public Iterable<NodeDocument> getModifiedDocs(final long fromModified, 
final long toModified, final int limit,
-                                                  @NotNull final String 
fromId, boolean includeFromId) {
+                                                  @NotNull final String 
fromId) {
         // _modified >= fromModified && _modified < toModified && _id > fromId
         final Bson query = and(gte(MODIFIED_IN_SECS, 
getModifiedInSecs(fromModified)),
-                lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)), 
includeFromId ? gte(ID, fromId) :gt(ID, fromId));
+                lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)), gt(ID, 
fromId));
         // first sort by _modified and then by _id
         final Bson sort = and(eq(MODIFIED_IN_SECS, 1), eq(ID, 1));
 
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
index efce4b8006..9d96c35811 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
@@ -104,19 +104,18 @@ public class RDBVersionGCSupport extends VersionGCSupport 
{
      * then perform the comparison.
      * <p/>
      *
-     * @param fromModified  the lower bound modified timestamp (inclusive)
-     * @param toModified    the upper bound modified timestamp (exclusive)
-     * @param limit         the limit of documents to return
-     * @param fromId        the lower bound {@link NodeDocument#ID}
-     * @param includeFromId boolean indicating whether {@code fromId} is 
inclusive or not
+     * @param fromModified the lower bound modified timestamp (inclusive)
+     * @param toModified   the upper bound modified timestamp (exclusive)
+     * @param limit        the limit of documents to return
+     * @param fromId       the lower bound {@link NodeDocument#ID}
      * @return matching documents.
      */
     @Override
     public Iterable<NodeDocument> getModifiedDocs(final long fromModified, 
final long toModified, final int limit,
-                                                  @NotNull final String 
fromId, boolean includeFromId) {
+                                                  @NotNull final String 
fromId) {
         List<QueryCondition> conditions = of(new 
QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)),
                 new QueryCondition(MODIFIED_IN_SECS, ">=", 
getModifiedInSecs(fromModified)),
-                new QueryCondition(ID, includeFromId ? ">=" : ">", 
of(fromId)));
+                new QueryCondition(ID, ">", of(fromId)));
         if (MODE == 1) {
             return getIterator(EMPTY_KEY_PATTERN, conditions);
         } else {
@@ -291,7 +290,7 @@ public class RDBVersionGCSupport extends VersionGCSupport {
         LOG.info("getOldestModifiedDoc() <- start");
         Iterable<NodeDocument> modifiedDocs = null;
         try {
-            modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, 
MIN_ID_VALUE, false);
+            modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, 
MIN_ID_VALUE);
             doc = modifiedDocs.iterator().hasNext() ? 
modifiedDocs.iterator().next() : NULL;
         } catch (DocumentStoreException ex) {
             LOG.error("getOldestModifiedDoc()", ex);
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
index 0bf7b8601a..4db64c942f 100644
--- 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
@@ -79,7 +79,7 @@ public class VersionGCInitTest {
         vgc = store.find(SETTINGS, "versionGC");
         assertNotNull(vgc);
         assertEquals(40L, 
vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
-        assertEquals(id, 
vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
+        assertEquals(MIN_ID_VALUE, 
vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
     }
 
     @Test
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
index cff9511a66..6d02fd38f7 100644
--- 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
@@ -224,10 +224,10 @@ public class VersionGCSupportTest {
         long oldestModifiedDocTs = 
ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
         assertEquals(40L, oldestModifiedDocTs);
         assertEquals("1:/x0", oldestModifiedDocId);
-        boolean includeFromId = true;
+        oldestModifiedDocId = MIN_ID_VALUE;
 
         for(int i = 0; i < 5; i++) {
-            Iterable<NodeDocument> modifiedDocs = 
gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 
1000, oldestModifiedDocId, includeFromId);
+            Iterable<NodeDocument> modifiedDocs = 
gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 
1000, oldestModifiedDocId);
             assertTrue(isInOrder(modifiedDocs, (o1, o2) -> 
comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, 
o2)));
             long count = stream(modifiedDocs.spliterator(), false).count();
             assertEquals(1000, count);
@@ -236,7 +236,6 @@ public class VersionGCSupportTest {
             }
             oldestModifiedDocId = oldestModifiedDoc.getId();
             oldestModifiedDocTs = 
ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
-            includeFromId = false;
         }
     }
 
@@ -260,10 +259,10 @@ public class VersionGCSupportTest {
         long oldestModifiedDocTs = 
ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
         assertEquals(40L, oldestModifiedDocTs);
         assertEquals("1:/x0", oldestModifiedDocId);
-        boolean includeFromId = true;
+        oldestModifiedDocId = MIN_ID_VALUE;
 
         for(int i = 0; i < 5; i++) {
-            Iterable<NodeDocument> modifiedDocs = 
gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 
1000, oldestModifiedDocId, includeFromId);
+            Iterable<NodeDocument> modifiedDocs = 
gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 
1000, oldestModifiedDocId);
             assertTrue(isInOrder(modifiedDocs, (o1, o2) -> 
comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, 
o2)));
             long count = stream(modifiedDocs.spliterator(), false).count();
             assertEquals(1000, count);
@@ -272,11 +271,10 @@ public class VersionGCSupportTest {
             }
             oldestModifiedDocId = oldestModifiedDoc.getId();
             oldestModifiedDocTs = 
ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
-            includeFromId = false;
         }
 
         // fetch last remaining document now
-        Iterable<NodeDocument> modifiedDocs = 
gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 
1000, oldestModifiedDocId, false);
+        Iterable<NodeDocument> modifiedDocs = 
gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 
1000, oldestModifiedDocId);
         assertEquals(1, stream(modifiedDocs.spliterator(), false).count());
         assertTrue(isInOrder(modifiedDocs, (o1, o2) -> 
comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, 
o2)));
         oldestModifiedDoc = modifiedDocs.iterator().next();
@@ -284,7 +282,7 @@ public class VersionGCSupportTest {
         oldestModifiedDocTs = 
ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
 
         // all documents had been fetched, now we won't get any document
-        modifiedDocs = 
gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 
1000, oldestModifiedDocId, false);
+        modifiedDocs = 
gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 
1000, oldestModifiedDocId);
         assertEquals(0, stream(modifiedDocs.spliterator(), false).count());
 
     }
@@ -309,10 +307,9 @@ public class VersionGCSupportTest {
         }
         // create 5_000 nodes
         store.create(NODES, updateOps);
-        boolean includeFromId = true;
 
         for(int i = 0; i < 5; i++) {
-            Iterable<NodeDocument> modifiedDocs = 
gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 
1000, oldestModifiedDocId, includeFromId);
+            Iterable<NodeDocument> modifiedDocs = 
gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 
1000, oldestModifiedDocId);
             assertTrue(isInOrder(modifiedDocs, (o1, o2) -> 
comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, 
o2)));
             long count = stream(modifiedDocs.spliterator(), false).count();
             assertEquals(1000, count);
@@ -321,11 +318,10 @@ public class VersionGCSupportTest {
             }
             oldestModifiedDocId = oldestModifiedDoc.getId();
             oldestModifiedDocTs = 
ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
-            includeFromId = false;
         }
 
         // all documents had been fetched, now we won't get any document
-        Iterable<NodeDocument> modifiedDocs = 
gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 
1000, oldestModifiedDocId, false);
+        Iterable<NodeDocument> modifiedDocs = 
gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 
1000, oldestModifiedDocId);
         assertEquals(0, stream(modifiedDocs.spliterator(), false).count());
     }
 
@@ -335,7 +331,7 @@ public class VersionGCSupportTest {
     }
 
     private void assertModified(long fromSeconds, long toSeconds, long num) {
-        Iterable<NodeDocument> docs = 
gcSupport.getModifiedDocs(SECONDS.toMillis(fromSeconds), 
SECONDS.toMillis(toSeconds), 10, MIN_ID_VALUE, false);
+        Iterable<NodeDocument> docs = 
gcSupport.getModifiedDocs(SECONDS.toMillis(fromSeconds), 
SECONDS.toMillis(toSeconds), 10, MIN_ID_VALUE);
         assertEquals(num, stream(docs.spliterator(), false).count());
         assertTrue(isInOrder(docs, (o1, o2) -> 
comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, 
o2)));
     }
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
index f29716ca5d..cf3148a86d 100644
--- 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
@@ -50,6 +50,11 @@ import org.junit.Test;
 import static java.util.concurrent.TimeUnit.HOURS;
 import static java.util.concurrent.TimeUnit.MINUTES;
 import static java.util.concurrent.TimeUnit.SECONDS;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.SETTINGS;
+import static 
org.apache.jackrabbit.oak.plugins.document.DetailGCHelper.enableDetailGC;
+import static 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP;
+import static 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP;
+import static 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_ID;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertNotNull;
@@ -187,6 +192,85 @@ public class VersionGCTest {
         }
     }
 
+    // OAK-10199
+    @Test
+    public void cancelMustNotUpdateLastOldestModifiedTimeStamp() throws 
Exception {
+        // get previous entry from SETTINGS
+        String versionGCId = SETTINGS_COLLECTION_ID;
+        String detailedGCTimestamp = 
SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP;
+        enableDetailGC(gc);
+        gc.gc(30, SECONDS);
+        Document statusBefore = store.find(SETTINGS, versionGCId);
+        // block gc call
+        store.semaphore.acquireUninterruptibly();
+        Future<VersionGCStats> stats = gc();
+        boolean gcBlocked = false;
+        for (int i = 0; i < 10; i ++) {
+            if (store.semaphore.hasQueuedThreads()) {
+                gcBlocked = true;
+                break;
+            }
+            Thread.sleep(100);
+        }
+        assertTrue(gcBlocked);
+        // now cancel the GC
+        gc.cancel();
+        store.semaphore.release();
+        assertTrue(stats.get().canceled);
+
+        // ensure a canceled GC doesn't update that versionGC SETTINGS entry
+        Document statusAfter = store.find(SETTINGS, SETTINGS_COLLECTION_ID);
+        if (statusBefore == null) {
+            assertNull(statusAfter);
+        } else {
+            assertNotNull(statusAfter);
+            assertEquals(
+                    "canceled GC shouldn't change the " + detailedGCTimestamp 
+ " property on " + versionGCId
+                            + " settings entry",
+                    statusBefore.get(detailedGCTimestamp), 
statusAfter.get(detailedGCTimestamp));
+        }
+    }
+
+    @Test
+    public void cancelMustNotUpdateLastOldestModifiedDocId() throws Exception {
+        // get previous entry from SETTINGS
+        String versionGCId = SETTINGS_COLLECTION_ID;
+        String oldestModifiedDocId = 
SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP;
+        enableDetailGC(gc);
+        gc.gc(30, SECONDS);
+        Document statusBefore = store.find(SETTINGS, versionGCId);
+        // block gc call
+        store.semaphore.acquireUninterruptibly();
+        Future<VersionGCStats> stats = gc();
+        boolean gcBlocked = false;
+        for (int i = 0; i < 10; i ++) {
+            if (store.semaphore.hasQueuedThreads()) {
+                gcBlocked = true;
+                break;
+            }
+            Thread.sleep(100);
+        }
+        assertTrue(gcBlocked);
+        // now cancel the GC
+        gc.cancel();
+        store.semaphore.release();
+        assertTrue(stats.get().canceled);
+
+        // ensure a canceled GC doesn't update that versionGC SETTINGS entry
+        Document statusAfter = store.find(SETTINGS, SETTINGS_COLLECTION_ID);
+        if (statusBefore == null) {
+            assertNull(statusAfter);
+        } else {
+            assertNotNull(statusAfter);
+            assertEquals(
+                    "canceled GC shouldn't change the " + oldestModifiedDocId 
+ " property on " + versionGCId
+                            + " settings entry",
+                    statusBefore.get(oldestModifiedDocId), 
statusAfter.get(oldestModifiedDocId));
+        }
+    }
+
+    // END - OAK-10199
+
     @Test
     public void getInfo() throws Exception {
         gc.gc(1, TimeUnit.HOURS);
@@ -351,7 +435,7 @@ public class VersionGCTest {
 
     @Test
     public void testDetailGcDocumentRead_enabled() throws Exception {
-        DetailGCHelper.enableDetailGC(gc);
+        enableDetailGC(gc);
         VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES);
         assertNotNull(stats);
         assertNotEquals(0, stats.detailedGCDocsElapsed);
@@ -417,7 +501,7 @@ public class VersionGCTest {
         @Override
         public <T extends Document> T find(Collection<T> collection,
                                            String key) {
-            if (collection == Collection.SETTINGS
+            if (collection == SETTINGS
                     && key.equals("versionGC")) {
                 findVersionGC.incrementAndGet();
             }
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index 80dd47dee6..e0de0c0617 100644
--- 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -34,7 +34,6 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 
-import static java.util.concurrent.TimeUnit.SECONDS;
 import static org.apache.commons.lang3.reflect.FieldUtils.writeField;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.size;
@@ -53,6 +52,7 @@ import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 import static org.junit.Assume.assumeTrue;
 
 import org.apache.jackrabbit.guava.common.base.Function;
@@ -120,6 +120,7 @@ public class VersionGarbageCollectorIT {
         execService = Executors.newCachedThreadPool();
         clock = new Clock.Virtual();
         clock.waitUntil(System.currentTimeMillis());
+        ClusterNodeInfo.setClock(clock);
         Revision.setClock(clock);
         if (fixture instanceof RDBFixture) {
             ((RDBFixture) fixture).setRDBOptions(
@@ -230,6 +231,7 @@ public class VersionGarbageCollectorIT {
         gcSplitDocsInternal(Strings.repeat("sub", 120));
     }
 
+    // OAK-10199
     @Test
     public void testGCDeletedProps() throws Exception {
         //1. Create nodes with properties
@@ -393,6 +395,129 @@ public class VersionGarbageCollectorIT {
         assertEquals(50_000, stats.deletedPropsGCCount);
 
     }
+
+    // Test where we modify the already GCed nodes
+    @Test
+    public void testGCDeletedProps_3() throws Exception {
+        //1. Create nodes with properties
+        NodeBuilder b1 = store.getRoot().builder();
+        // Add property to node & save
+        for (int i = 0; i < 10; i++) {
+            b1.child("z" + i).setProperty("prop" + i, "foo", STRING);
+        }
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        // enable the detailed gc flag
+        writeField(gc, "detailedGCEnabled", true, true);
+        long maxAge = 1; //hours
+        long delta = TimeUnit.MINUTES.toMillis(10);
+        //1. Go past GC age and check no GC done as nothing deleted
+        clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
+        VersionGCStats stats = gc.gc(maxAge, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+
+        //Remove property
+        NodeBuilder b2 = store.getRoot().builder();
+        for (int i = 0; i < 10; i++) {
+            b2.getChildNode("z" + i).removeProperty("prop" + i);
+        }
+        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        store.runBackgroundOperations();
+
+        //2. Check that deleted property does get collected post maxAge
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(10, stats.deletedPropsGCCount);
+
+        //3. now reCreate those properties again
+        NodeBuilder b3 = store.getRoot().builder();
+        // Add property to node & save
+        for (int i = 0; i < 10; i++) {
+            b3.child("z" + i).setProperty("prop" + i, "bar", STRING);
+        }
+        store.merge(b3, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        //Remove properties again
+        NodeBuilder b4 = store.getRoot().builder();
+        for (int i = 0; i < 10; i++) {
+            b4.getChildNode("z" + i).removeProperty("prop" + i);
+        }
+        store.merge(b4, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        store.runBackgroundOperations();
+
+
+        //4. Check that deleted property does get collected again
+        // increment the clock again by more than 2 hours + delta
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(10, stats.deletedPropsGCCount);
+    }
+
+    // Test when properties are not collected in one GC cycle
+    @Test
+    @Ignore
+    public void testGCDeletedProps_4() throws Exception {
+        documentMKBuilder = new DocumentMK.Builder().clock(clock)
+                .setLeaseCheckMode(LeaseCheckMode.DISABLED)
+                .setDocumentStore(new 
FailingDocumentStore(fixture.createDocumentStore(), 42)).setAsyncDelay(0);
+        store = documentMKBuilder.getNodeStore();
+        assertTrue(store.getDocumentStore() instanceof FailingDocumentStore);
+        MongoTestUtils.setReadPreference(store, ReadPreference.primary());
+        gc = store.getVersionGarbageCollector();
+        //1. Create nodes with properties
+        NodeBuilder b1 = store.getRoot().builder();
+        // Add property to node & save
+        for (int i = 0; i < 10; i++) {
+            b1.child("z" + i).setProperty("prop" + i, "foo", STRING);
+        }
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        //2. Remove property
+        NodeBuilder b2 = store.getRoot().builder();
+        for (int i = 0; i < 10; i++) {
+            b2.getChildNode("z" + i).removeProperty("prop" + i);
+        }
+        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        store.runBackgroundOperations();
+
+        // enable the detailed gc flag
+        writeField(gc, "detailedGCEnabled", true, true);
+        long maxAge = 1; //hours
+        long delta = TimeUnit.MINUTES.toMillis(10);
+
+        //3. Check that deleted property does get collected again
+        // increment the clock again by more than 2 hours + delta
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+        gc.setOptions(gc.getOptions().withMaxIterations(1));
+
+        ((FailingDocumentStore) 
store.getDocumentStore()).fail().after(0).eternally();
+        try {
+            store.dispose();
+            fail("dispose() must fail with an exception");
+        } catch (DocumentStoreException e) {
+            // expected
+        }
+        ((FailingDocumentStore) store.getDocumentStore()).fail().never();
+
+        // create new store
+        store = new 
DocumentMK.Builder().clock(clock).setLeaseCheckMode(LeaseCheckMode.DISABLED)
+                .setDocumentStore(new 
FailingDocumentStore(fixture.createDocumentStore(1), 42)).setAsyncDelay(0)
+                .getNodeStore();
+        assertTrue(store.getDocumentStore() instanceof FailingDocumentStore);
+        MongoTestUtils.setReadPreference(store, ReadPreference.primary());
+        gc = store.getVersionGarbageCollector();
+        store.runBackgroundOperations();
+
+        //4. Check that deleted property does get collected again
+        // increment the clock again by more than 2 hours + delta
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+        VersionGCStats stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(10, stats.deletedPropsGCCount);
+
+    }
+
+    // OAK-10199 END
     
     private void gcSplitDocsInternal(String subNodeName) throws Exception {
         long maxAge = 1; //hrs


Reply via email to