This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit fabe3c166d6aaece26e4d21072cb1b92dc5a4430
Author: Rishabh Kumar <d...@adobe.com>
AuthorDate: Mon Jun 19 13:45:20 2023 +0530

    OAK-10199 : updated logic to fetch nodes by sorting them on the basis of 
_modified & _id
---
 .../plugins/document/VersionGCRecommendations.java | 83 +++++++++++-------
 .../oak/plugins/document/VersionGCSupport.java     | 25 +++---
 .../plugins/document/VersionGarbageCollector.java  | 36 +++++---
 .../document/mongo/MongoVersionGCSupport.java      | 45 +++++-----
 .../oak/plugins/document/rdb/RDBDocumentStore.java | 16 +++-
 .../plugins/document/rdb/RDBDocumentStoreJDBC.java | 16 ++--
 .../plugins/document/rdb/RDBVersionGCSupport.java  | 41 ++++-----
 .../oak/plugins/document/util/Utils.java           | 20 +++--
 .../oak/plugins/document/VersionGCInitTest.java    | 46 ++++++++--
 .../oak/plugins/document/VersionGCSupportTest.java | 98 ++++++++++++++++++----
 .../document/VersionGarbageCollectorIT.java        | 52 +++++++++++-
 11 files changed, 342 insertions(+), 136 deletions(-)

diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
index 4584d925c0..056c2fe438 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
@@ -20,11 +20,11 @@ package org.apache.jackrabbit.oak.plugins.document;
 
 import java.util.HashMap;
 import java.util.Map;
+import java.util.Objects;
 import java.util.concurrent.TimeUnit;
 
 import 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
 import org.apache.jackrabbit.oak.plugins.document.util.TimeInterval;
-import org.apache.jackrabbit.oak.plugins.document.util.Utils;
 import org.apache.jackrabbit.oak.spi.gc.GCMonitor;
 import org.apache.jackrabbit.oak.stats.Clock;
 import org.slf4j.Logger;
@@ -32,8 +32,14 @@ import org.slf4j.LoggerFactory;
 
 import static java.lang.Long.MAX_VALUE;
 import static java.util.Map.of;
+import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL;
+import static 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP;
 import static 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP;
+import static 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_ID;
 import static 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP;
+import static 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP;
+import static 
org.apache.jackrabbit.oak.plugins.document.util.Utils.timestampToString;
 
 /**
  * Gives a recommendation about parameters for the next revision garbage 
collection run.
@@ -47,13 +53,13 @@ public class VersionGCRecommendations {
 
     final boolean ignoreDueToCheckPoint;
     final TimeInterval scope;
-    final TimeInterval scopeFullGC;
+    final TimeInterval scopeDetailedGC;
     final long maxCollect;
     final long deleteCandidateCount;
     final long lastOldestTimestamp;
     final long detailedGCTimestamp;
+    final String detailedGCId;
     final long originalCollectLimit;
-
     private final long precisionMs;
     final long suggestedIntervalMs;
     private final boolean scopeIsComplete;
@@ -86,7 +92,8 @@ public class VersionGCRecommendations {
         long deletedOnceCount = 0;
         long suggestedIntervalMs;
         long oldestPossible;
-        long oldestPossibleFullGC;
+        long oldestModifiedDocTimeStamp;
+        String oldestModifiedDocId;
         long collectLimit = options.collectLimit;
 
         this.vgc = vgc;
@@ -95,12 +102,12 @@ public class VersionGCRecommendations {
 
         TimeInterval keep = new TimeInterval(clock.getTime() - 
maxRevisionAgeMs, Long.MAX_VALUE);
 
-        Map<String, Long> settings = getLongSettings();
-        lastOldestTimestamp = 
settings.get(VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP);
+        Map<String, Object> settings = getVGCSettings();
+        lastOldestTimestamp = (long) 
settings.get(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP);
         if (lastOldestTimestamp == 0) {
-            log.debug("No lastOldestTimestamp found, querying for the oldest 
deletedOnce candidate");
+            log.info("No lastOldestTimestamp found, querying for the oldest 
deletedOnce candidate");
             oldestPossible = vgc.getOldestDeletedOnceTimestamp(clock, 
options.precisionMs) - 1;
-            log.debug("lastOldestTimestamp found: {}", 
Utils.timestampToString(oldestPossible));
+            log.info("lastOldestTimestamp found: {}", 
timestampToString(oldestPossible));
         } else {
             oldestPossible = lastOldestTimestamp - 1;
         }
@@ -108,23 +115,27 @@ public class VersionGCRecommendations {
         TimeInterval scope = new TimeInterval(oldestPossible, Long.MAX_VALUE);
         scope = scope.notLaterThan(keep.fromMs);
 
-        detailedGCTimestamp = 
settings.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP);
-        if (detailedGCTimestamp == 0) {
-            if (log.isDebugEnabled()) {
-                log.debug("No detailedGCTimestamp found, querying for the 
oldest deletedOnce candidate");
-            }
-            oldestPossibleFullGC = vgc.getOldestModifiedTimestamp(clock) - 1;
-            if (log.isDebugEnabled()) {
-                log.debug("detailedGCTimestamp found: {}", 
Utils.timestampToString(oldestPossibleFullGC));
+        detailedGCTimestamp = (long) 
settings.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP);
+        oldestModifiedDocId = (String) 
settings.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP);
+        if (detailedGCTimestamp == 0 || Objects.equals(oldestModifiedDocId, 
MIN_ID_VALUE)) {
+            log.info("No detailedGCTimestamp found, querying for the oldest 
modified candidate");
+            final NodeDocument doc = vgc.getOldestModifiedDoc(clock);
+            if (doc == NULL) {
+                oldestModifiedDocTimeStamp = 0L;
+                oldestModifiedDocId = MIN_ID_VALUE;
+            } else {
+                oldestModifiedDocId = doc.getId();
+                oldestModifiedDocTimeStamp = doc.getModified() == null ? 0L : 
doc.getModified() - 1;
             }
+            log.info("detailedGCTimestamp found: {}", 
timestampToString(oldestModifiedDocTimeStamp));
         } else {
-            oldestPossibleFullGC = detailedGCTimestamp - 1;
+            oldestModifiedDocTimeStamp = detailedGCTimestamp - 1;
         }
 
-        TimeInterval fullGCTimeInternal = new 
TimeInterval(oldestPossibleFullGC, MAX_VALUE);
-        fullGCTimeInternal = fullGCTimeInternal.notLaterThan(keep.fromMs);
+        TimeInterval detailedGCTimeInternal = new 
TimeInterval(oldestModifiedDocTimeStamp, MAX_VALUE);
+        detailedGCTimeInternal = 
detailedGCTimeInternal.notLaterThan(keep.fromMs);
 
-        suggestedIntervalMs = 
settings.get(VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP);
+        suggestedIntervalMs = (long) 
settings.get(SETTINGS_COLLECTION_REC_INTERVAL_PROP);
         if (suggestedIntervalMs > 0) {
             suggestedIntervalMs = Math.max(suggestedIntervalMs, 
options.precisionMs);
             if (suggestedIntervalMs < scope.getDurationMs()) {
@@ -168,7 +179,7 @@ public class VersionGCRecommendations {
                 ignoreDueToCheckPoint = true;
             } else {
                 scope = scope.notLaterThan(checkpoint.getTimestamp() - 1);
-                log.debug("checkpoint at [{}] found, scope now {}", 
Utils.timestampToString(checkpoint.getTimestamp()), scope);
+                log.debug("checkpoint at [{}] found, scope now {}", 
timestampToString(checkpoint.getTimestamp()), scope);
             }
         }
 
@@ -182,7 +193,8 @@ public class VersionGCRecommendations {
         this.precisionMs = options.precisionMs;
         this.ignoreDueToCheckPoint = ignoreDueToCheckPoint;
         this.scope = scope;
-        this.scopeFullGC = fullGCTimeInternal;
+        this.scopeDetailedGC = detailedGCTimeInternal;
+        this.detailedGCId = oldestModifiedDocId;
         this.scopeIsComplete = scope.toMs >= keep.fromMs;
         this.maxCollect = collectLimit;
         this.suggestedIntervalMs = suggestedIntervalMs;
@@ -207,7 +219,8 @@ public class VersionGCRecommendations {
         } else if (!stats.canceled && !stats.ignoredGCDueToCheckPoint) {
             // success, we would not expect to encounter revisions older than 
this in the future
             setLongSetting(of(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, 
scope.toMs,
-                    SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, 
stats.oldestModifiedGced));
+                    SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, 
stats.oldestModifiedDocTimeStamp));
+            setStringSetting(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP, 
stats.oldestModifiedDocId);
 
             int count = stats.deletedDocGCCount - stats.deletedLeafDocGCCount;
             double usedFraction;
@@ -224,7 +237,7 @@ public class VersionGCRecommendations {
                     long nextDuration = (long) Math.ceil(suggestedIntervalMs * 
1.5);
                     log.debug("successful run using {}% of limit, raising 
recommended interval to {} seconds",
                             Math.round(usedFraction * 1000) / 10.0, 
TimeUnit.MILLISECONDS.toSeconds(nextDuration));
-                    
setLongSetting(VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP, 
nextDuration);
+                    setLongSetting(SETTINGS_COLLECTION_REC_INTERVAL_PROP, 
nextDuration);
                 } else {
                     log.debug("not increasing limit: collected {} documents 
({}% >= {}% limit)", count, usedFraction,
                             allowedFraction);
@@ -236,19 +249,23 @@ public class VersionGCRecommendations {
         }
     }
 
-    private Map<String, Long> getLongSettings() {
-        Document versionGCDoc = 
vgc.getDocumentStore().find(Collection.SETTINGS, 
VersionGarbageCollector.SETTINGS_COLLECTION_ID, 0);
-        Map<String, Long> settings = new HashMap<>();
+    private Map<String, Object> getVGCSettings() {
+        Document versionGCDoc = 
vgc.getDocumentStore().find(Collection.SETTINGS, SETTINGS_COLLECTION_ID, 0);
+        Map<String, Object> settings = new HashMap<>();
         // default values
-        
settings.put(VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, 
0L);
-        
settings.put(VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP, 0L);
+        settings.put(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, 0L);
+        settings.put(SETTINGS_COLLECTION_REC_INTERVAL_PROP, 0L);
         settings.put(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, 0L);
+        settings.put(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP, 
MIN_ID_VALUE);
         if (versionGCDoc != null) {
             for (String k : versionGCDoc.keySet()) {
                 Object value = versionGCDoc.get(k);
                 if (value instanceof Number) {
                     settings.put(k, ((Number) value).longValue());
                 }
+                if (value instanceof String) {
+                    settings.put(k, value);
+                }
             }
         }
         return settings;
@@ -258,8 +275,14 @@ public class VersionGCRecommendations {
         setLongSetting(of(propName, val));
     }
 
+    private void setStringSetting(String propName, String val) {
+        UpdateOp updateOp = new UpdateOp(SETTINGS_COLLECTION_ID, true);
+        updateOp.set(propName, val);
+        vgc.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp);
+    }
+
     private void setLongSetting(final Map<String, Long> propValMap) {
-        UpdateOp updateOp = new 
UpdateOp(VersionGarbageCollector.SETTINGS_COLLECTION_ID, true);
+        UpdateOp updateOp = new UpdateOp(SETTINGS_COLLECTION_ID, true);
         propValMap.forEach(updateOp::set);
         vgc.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp);
     }
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
index f23340acbc..abdfdf4a64 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
@@ -19,9 +19,12 @@
 
 package org.apache.jackrabbit.oak.plugins.document;
 
+import static java.util.Comparator.comparing;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static java.util.stream.Collectors.toList;
+import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
 import static 
org.apache.jackrabbit.oak.plugins.document.util.Utils.getAllDocuments;
 import static 
org.apache.jackrabbit.oak.plugins.document.util.Utils.getSelectedDocuments;
@@ -52,7 +55,7 @@ public class VersionGCSupport {
 
     /**
      * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} 
value
-     * within the given range and the {@link NodeDocument#DELETED} set to
+     * within the given range and the {@link NodeDocument#  DELETED} set to
      * {@code true}. The two passed modified timestamps are in milliseconds
      * since the epoch and the implementation will convert them to seconds at
      * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
@@ -79,12 +82,15 @@ public class VersionGCSupport {
      * @param fromModified the lower bound modified timestamp (inclusive)
      * @param toModified the upper bound modified timestamp (exclusive)
      * @param limit the limit of documents to return
+     * @param fromId the lower bound {@link NodeDocument#ID} (exclusive)
      * @return matching documents.
      */
-    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, 
final long toModified, final int limit) {
+    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, 
final long toModified, final int limit,
+                                                  final String fromId) {
         return StreamSupport
-                .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 
fromModified).spliterator(), false)
+                .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, 
fromId).spliterator(), false)
                 .filter(input -> modifiedGreaterThanEquals(input, 
fromModified) && modifiedLessThan(input, toModified))
+                .sorted((o1, o2) -> 
comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, 
o2))
                 .limit(limit)
                 .collect(toList());
     }
@@ -176,27 +182,26 @@ public class VersionGCSupport {
     }
 
     /**
-     * Retrieve the time of the oldest modified document.
+     * Retrieve the oldest modified document.
      *
-     * @return the timestamp of the oldest modified document.
+     * @return the oldest modified document.
      */
-    public long getOldestModifiedTimestamp(final Clock clock) {
+    public NodeDocument getOldestModifiedDoc(final Clock clock) {
         long ts = 0;
         long now = clock.getTime();
         Iterable<NodeDocument> docs = null;
 
         LOG.info("find oldest modified document");
         try {
-            docs = getModifiedDocs(ts, now, 1);
+            docs = getModifiedDocs(ts, now, 1, MIN_ID_VALUE);
             if (docs.iterator().hasNext()) {
-                Long modified = docs.iterator().next().getModified();
-                return modified != null ? modified : 0L;
+                return docs.iterator().next();
             }
         } finally {
             Utils.closeIfCloseable(docs);
         }
         LOG.info("find oldest modified document to be {}", 
Utils.timestampToString(ts));
-        return ts;
+        return NULL;
     }
 
     public long getDeletedOnceCount() throws UnsupportedOperationException {
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index b562831e24..f54299e3fd 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -32,7 +32,6 @@ import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
-import java.util.stream.Collectors;
 
 import org.apache.jackrabbit.guava.common.base.Function;
 import org.apache.jackrabbit.guava.common.base.Joiner;
@@ -112,6 +111,11 @@ public class VersionGarbageCollector {
      */
     static final String SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP = 
"detailedGCTimeStamp";
 
+    /**
+     * Property name to _id till when last detailed-GC run happened
+     */
+    static final String SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP = 
"detailedGCId";
+
     private final DocumentNodeStore nodeStore;
     private final DocumentStore ds;
     private final boolean detailedGCEnabled;
@@ -272,7 +276,8 @@ public class VersionGarbageCollector {
         int splitDocGCCount;
         int intermediateSplitDocGCCount;
         int updateResurrectedGCCount;
-        long oldestModifiedGced;
+        long oldestModifiedDocTimeStamp;
+        String oldestModifiedDocId;
         int updatedDetailedGCDocsCount;
         int deletedPropsGCCount;
         final TimeDurationFormatter df = TimeDurationFormatter.forLogging();
@@ -343,7 +348,8 @@ public class VersionGarbageCollector {
                     ", updateResurrectedGCCount=" + updateResurrectedGCCount +
                     ", splitDocGCCount=" + splitDocGCCount +
                     ", intermediateSplitDocGCCount=" + 
intermediateSplitDocGCCount +
-                    ", oldestModifiedGced=" + oldestModifiedGced +
+                    ", oldestModifiedDocId=" + oldestModifiedDocId +
+                    ", oldestModifiedDocTimeStamp=" + 
oldestModifiedDocTimeStamp +
                     ", updatedDetailedGCDocsCount=" + 
updatedDetailedGCDocsCount +
                     ", deletedPropsGCCount=" + deletedPropsGCCount +
                     ", iterationCount=" + iterationCount +
@@ -363,7 +369,8 @@ public class VersionGarbageCollector {
             this.splitDocGCCount += run.splitDocGCCount;
             this.intermediateSplitDocGCCount += 
run.intermediateSplitDocGCCount;
             this.updateResurrectedGCCount += run.updateResurrectedGCCount;
-            this.oldestModifiedGced = run.oldestModifiedGced;
+            this.oldestModifiedDocTimeStamp = run.oldestModifiedDocTimeStamp;
+            this.oldestModifiedDocId = run.oldestModifiedDocId;
             this.updatedDetailedGCDocsCount += run.updatedDetailedGCDocsCount;
             this.deletedPropsGCCount += run.deletedPropsGCCount;
             if (run.iterationCount > 0) {
@@ -608,15 +615,16 @@ public class VersionGarbageCollector {
                 throws IOException {
             int docsTraversed = 0;
             boolean foundDoc = true;
-            long oldestModifiedGCed = rec.scopeFullGC.fromMs;
+            long oldestModifiedDocTimeStamp = rec.scopeDetailedGC.fromMs;
+            String oldestModifiedDocId = rec.detailedGCId;
             try (DetailedGC gc = new DetailedGC(headRevision, monitor, 
cancel)) {
-                final long fromModified = rec.scopeFullGC.fromMs;
-                final long toModified = rec.scopeFullGC.toMs;
+                final long fromModified = rec.scopeDetailedGC.fromMs;
+                final long toModified = rec.scopeDetailedGC.toMs;
                 if (phases.start(GCPhase.DETAILED_GC)) {
-                    while (foundDoc && oldestModifiedGCed < toModified && 
docsTraversed <= PROGRESS_BATCH_SIZE) {
+                    while (foundDoc && oldestModifiedDocTimeStamp < toModified 
&& docsTraversed <= PROGRESS_BATCH_SIZE) {
                         // set foundDoc to false to allow exiting the while 
loop
                         foundDoc = false;
-                        Iterable<NodeDocument> itr = 
versionStore.getModifiedDocs(oldestModifiedGCed, toModified, 1000);
+                        Iterable<NodeDocument> itr = 
versionStore.getModifiedDocs(oldestModifiedDocTimeStamp, toModified, 1000, 
oldestModifiedDocId);
                         try {
                             for (NodeDocument doc : itr) {
                                 foundDoc = true;
@@ -640,12 +648,12 @@ public class VersionGarbageCollector {
                                 if (modified == null) {
                                     monitor.warn("collectDetailGarbage : 
document has no _modified property : {}",
                                             doc.getId());
-                                } else if (modified < oldestModifiedGCed) {
+                                } else if (modified < 
oldestModifiedDocTimeStamp) {
                                     monitor.warn(
                                             "collectDetailGarbage : document 
has older _modified than query boundary : {} (from: {}, to: {})",
                                             modified, fromModified, 
toModified);
                                 } else {
-                                    oldestModifiedGCed = modified;
+                                    oldestModifiedDocTimeStamp = modified;
                                 }
 
                                 if (gc.hasGarbage() && 
phases.start(GCPhase.DETAILED_GC_CLEANUP)) {
@@ -653,11 +661,13 @@ public class VersionGarbageCollector {
                                     phases.stop(GCPhase.DETAILED_GC_CLEANUP);
                                 }
 
-                                oldestModifiedGCed = modified == null ? 
fromModified : modified;
+                                oldestModifiedDocTimeStamp = modified == null 
? fromModified : modified;
+                                oldestModifiedDocId = doc.getId();
                             }
                         } finally {
                             Utils.closeIfCloseable(itr);
-                            phases.stats.oldestModifiedGced = 
oldestModifiedGCed;
+                            phases.stats.oldestModifiedDocTimeStamp = 
oldestModifiedDocTimeStamp;
+                            phases.stats.oldestModifiedDocId = 
oldestModifiedDocId;
                         }
                     }
                     phases.stop(GCPhase.DETAILED_GC);
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index 324ade704c..690fd5a0d6 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -21,8 +21,8 @@ package org.apache.jackrabbit.oak.plugins.document.mongo;
 
 import static com.mongodb.client.model.Filters.eq;
 import static com.mongodb.client.model.Filters.exists;
+import static com.mongodb.client.model.Filters.gt;
 import static java.util.Optional.ofNullable;
-import static java.util.concurrent.TimeUnit.SECONDS;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.concat;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.transform;
@@ -34,6 +34,7 @@ import static 
org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
 import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.DELETED_ONCE;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.PATH;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_MAX_REV_TIME_IN_SECS;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_TYPE;
@@ -130,21 +131,26 @@ public class MongoVersionGCSupport extends 
VersionGCSupport {
 
     /**
      * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} 
value
-     * within the given range in sorted order. The two passed modified 
timestamps
-     * are in milliseconds since the epoch and the implementation will convert 
them
-     * to seconds at the granularity of the {@link 
NodeDocument#MODIFIED_IN_SECS}
-     * field and then perform the comparison.
+     * within the given range .The two passed modified timestamps are in 
milliseconds
+     * since the epoch and the implementation will convert them to seconds at
+     * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
+     * then perform the comparison.
      *
      * @param fromModified the lower bound modified timestamp (inclusive)
-     * @param toModified   the upper bound modified timestamp (exclusive)
-     * @return matching documents in sorted order of {@link 
NodeDocument#MODIFIED_IN_SECS}
+     * @param toModified the upper bound modified timestamp (exclusive)
+     * @param limit the limit of documents to return
+     * @param fromId the lower bound {@link NodeDocument#ID} (exclusive)
+     * @return matching documents.
      */
     @Override
-    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, 
final long toModified, final int limit) {
-        // _modified >= fromModified && _modified < toModified
+    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, 
final long toModified, final int limit,
+                                                  final String fromId) {
+        // _modified >= fromModified && _modified < toModified && _id > fromId
         final Bson query = and(gte(MODIFIED_IN_SECS, 
getModifiedInSecs(fromModified)),
-                lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)));
-        final Bson sort = eq(MODIFIED_IN_SECS, 1);
+                lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)), gt(ID, 
fromId));
+        // first sort by _modified and then by _id
+        final Bson sort = and(eq(MODIFIED_IN_SECS, 1), eq(ID, 1));
+
         final FindIterable<BasicDBObject> cursor = getNodeCollection()
                 .find(query)
                 .sort(sort)
@@ -232,11 +238,11 @@ public class MongoVersionGCSupport extends 
VersionGCSupport {
      * @return the timestamp of the oldest modified document.
      */
     @Override
-    public long getOldestModifiedTimestamp(final Clock clock) {
-        LOG.info("getOldestModifiedTimestamp() <- start");
+    public NodeDocument getOldestModifiedDoc(final Clock clock) {
+        LOG.info("getOldestModifiedDoc() <- start");
 
-        final Bson sort = eq(MODIFIED_IN_SECS, 1);
-        final List<Long> result = new ArrayList<>(1);
+        final Bson sort = and(eq(MODIFIED_IN_SECS, 1), eq(ID, 1));
+        final List<NodeDocument> result = new ArrayList<>(1);
 
         // we need to add query condition to ignore `previous` documents which 
doesn't have this field
         final Bson query = exists(MODIFIED_IN_SECS);
@@ -245,14 +251,13 @@ public class MongoVersionGCSupport extends 
VersionGCSupport {
                 (Consumer<BasicDBObject>) document ->
                         ofNullable(store.convertFromDBObject(NODES, document))
                                 .ifPresent(doc -> {
-                    long modifiedMs = 
SECONDS.toMillis(ofNullable(doc.getModified()).orElse(0L));
-                    LOG.info("getOldestDeletedOnceTimestamp() -> {}", 
Utils.timestampToString(modifiedMs));
-                    result.add(modifiedMs);
+                    LOG.info("getOldestModifiedDoc() -> {}", doc);
+                    result.add(doc);
                 }));
 
         if (result.isEmpty()) {
-            LOG.info("getOldestModifiedTimestamp() -> none found, return 
current time");
-            result.add(clock.getTime());
+            LOG.info("getOldestModifiedDoc() -> none found, return NULL 
document");
+            result.add(NULL);
         }
         return result.get(0);
     }
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java
index 82c09e213d..3a9ef2d95d 100755
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java
@@ -971,8 +971,8 @@ public class RDBDocumentStore implements DocumentStore {
     public static String VERSIONPROP = "__version";
 
     // set of supported indexed properties
-    private static final Set<String> INDEXEDPROPERTIES = new 
HashSet<String>(Arrays.asList(new String[] { MODIFIED,
-            NodeDocument.HAS_BINARY_FLAG, NodeDocument.DELETED_ONCE, 
NodeDocument.SD_TYPE, NodeDocument.SD_MAX_REV_TIME_IN_SECS, VERSIONPROP }));
+    private static final Set<String> INDEXEDPROPERTIES = new 
HashSet<>(Arrays.asList(MODIFIED,
+            NodeDocument.HAS_BINARY_FLAG, NodeDocument.DELETED_ONCE, 
NodeDocument.SD_TYPE, NodeDocument.SD_MAX_REV_TIME_IN_SECS, VERSIONPROP, ID));
 
     // set of required table columns
     private static final Set<String> REQUIREDCOLUMNS = 
Collections.unmodifiableSet(new HashSet<String>(Arrays.asList(
@@ -1840,7 +1840,7 @@ public class RDBDocumentStore implements DocumentStore {
     }
 
     protected <T extends Document> Iterable<T> queryAsIterable(final 
Collection<T> collection, String fromKey, String toKey,
-            final List<String> excludeKeyPatterns, final List<QueryCondition> 
conditions, final int limit, final String sortBy) {
+            final List<String> excludeKeyPatterns, final List<QueryCondition> 
conditions, final int limit, final List<String> sortBy) {
 
         final RDBTableMetaData tmd = getTable(collection);
         Set<String> allowedProps = Sets.intersection(INDEXEDPROPERTIES, 
tmd.getColumnProperties());
@@ -1853,6 +1853,16 @@ public class RDBDocumentStore implements DocumentStore {
             }
         }
 
+        if (sortBy != null && !sortBy.isEmpty()) {
+            for (String key: sortBy) {
+                if (!allowedProps.contains(key)) {
+                    final String message = "indexed property " + key + " not 
supported. supported properties are " + allowedProps;
+                    LOG.error(message);
+                    throw new UnsupportedIndexedPropertyException(message);
+                }
+            }
+        }
+
         final String from = collection == Collection.NODES && 
NodeDocument.MIN_ID_VALUE.equals(fromKey) ? null : fromKey;
         final String to = collection == Collection.NODES && 
NodeDocument.MAX_ID_VALUE.equals(toKey) ? null : toKey;
 
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
index 26fc1311fa..59dfb968b0 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
@@ -16,6 +16,8 @@
  */
 package org.apache.jackrabbit.oak.plugins.document.rdb;
 
+import static java.util.List.of;
+import static java.util.stream.Collectors.joining;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.transform;
 import static org.apache.jackrabbit.guava.common.collect.Sets.newHashSet;
 import static 
org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.CHAR2OCTETRATIO;
@@ -459,7 +461,7 @@ public class RDBDocumentStoreJDBC {
                             + excludeKeyPatterns + ", conditions=" + 
conditions + ", limit=" + limit)
                     : null);
             stmt = prepareQuery(connection, tmd, fields, minId,
-                    maxId, excludeKeyPatterns, conditions, limit, "ID");
+                    maxId, excludeKeyPatterns, conditions, limit, of("ID"));
             rs = stmt.executeQuery();
             while (rs.next() && result.size() < limit) {
                 int field = 1;
@@ -554,7 +556,7 @@ public class RDBDocumentStoreJDBC {
 
     @NotNull
     public Iterator<RDBRow> queryAsIterator(RDBConnectionHandler ch, 
RDBTableMetaData tmd, String minId, String maxId,
-            List<String> excludeKeyPatterns, List<QueryCondition> conditions, 
int limit, String sortBy) throws SQLException {
+            List<String> excludeKeyPatterns, List<QueryCondition> conditions, 
int limit, List<String> sortBy) throws SQLException {
         return new ResultSetIterator(ch, tmd, minId, maxId, 
excludeKeyPatterns, conditions, limit, sortBy);
     }
 
@@ -573,7 +575,7 @@ public class RDBDocumentStoreJDBC {
         private long pstart;
 
         public ResultSetIterator(RDBConnectionHandler ch, RDBTableMetaData 
tmd, String minId, String maxId,
-                List<String> excludeKeyPatterns, List<QueryCondition> 
conditions, int limit, String sortBy) throws SQLException {
+                List<String> excludeKeyPatterns, List<QueryCondition> 
conditions, int limit, List<String> sortBy) throws SQLException {
             long start = System.currentTimeMillis();
             try {
                 this.ch = ch;
@@ -695,7 +697,7 @@ public class RDBDocumentStoreJDBC {
 
     @NotNull
     private PreparedStatement prepareQuery(Connection connection, 
RDBTableMetaData tmd, String columns, String minId, String maxId,
-            List<String> excludeKeyPatterns, List<QueryCondition> conditions, 
int limit, String sortBy) throws SQLException {
+            List<String> excludeKeyPatterns, List<QueryCondition> conditions, 
int limit, List<String> sortBy) throws SQLException {
 
         StringBuilder selectClause = new StringBuilder();
 
@@ -714,9 +716,8 @@ public class RDBDocumentStoreJDBC {
             query.append(" where ").append(whereClause);
         }
 
-        if (sortBy != null) {
-            // FIXME : order should be determined via sortBy field
-            query.append(" order by ID");
+        if (sortBy != null && !sortBy.isEmpty()) {
+            query.append(" order by 
").append(sortBy.stream().map(INDEXED_PROP_MAPPING::get).collect(joining(", 
")));
         }
 
         if (limit != Integer.MAX_VALUE) {
@@ -967,6 +968,7 @@ public class RDBDocumentStoreJDBC {
         tmp.put(NodeDocument.SD_TYPE, "SDTYPE");
         tmp.put(NodeDocument.SD_MAX_REV_TIME_IN_SECS, "SDMAXREVTIME");
         tmp.put(RDBDocumentStore.VERSIONPROP, "VERSION");
+        tmp.put(NodeDocument.ID, "ID");
         INDEXED_PROP_MAPPING = Collections.unmodifiableMap(tmp);
     }
 
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
index f26268bcd3..0d2f678911 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
@@ -16,11 +16,13 @@
  */
 package org.apache.jackrabbit.oak.plugins.document.rdb;
 
-import static java.util.Collections.emptyList;
 import static java.util.List.of;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
+import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
+import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
 import static 
org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.EMPTY_KEY_PATTERN;
 
@@ -99,18 +101,21 @@ public class RDBVersionGCSupport extends VersionGCSupport {
      * then perform the comparison.
      *
      * @param fromModified the lower bound modified timestamp (inclusive)
-     * @param toModified   the upper bound modified timestamp (exclusive)
-     * @param limit        the limit of documents to return
+     * @param toModified the upper bound modified timestamp (exclusive)
+     * @param limit the limit of documents to return
+     * @param fromId the lower bound {@link NodeDocument#ID} (exclusive)
      * @return matching documents.
      */
     @Override
-    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, 
final long toModified, final int limit) {
+    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, 
final long toModified, final int limit,
+                                                  final String fromId) {
         List<QueryCondition> conditions = of(new 
QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)),
-                new QueryCondition(MODIFIED_IN_SECS, ">=", 
getModifiedInSecs(fromModified)));
+                new QueryCondition(MODIFIED_IN_SECS, ">=", 
getModifiedInSecs(fromModified)),
+                new QueryCondition(ID, ">", of(fromId)));
         if (MODE == 1) {
             return getIterator(EMPTY_KEY_PATTERN, conditions);
         } else {
-            return store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, 
conditions, limit, MODIFIED_IN_SECS);
+            return store.queryAsIterable(NODES, fromId, null, 
EMPTY_KEY_PATTERN, conditions, limit, of(MODIFIED_IN_SECS, ID));
         }
     }
 
@@ -275,24 +280,20 @@ public class RDBVersionGCSupport extends VersionGCSupport 
{
      * @return the timestamp of the oldest modified document.
      */
     @Override
-    public long getOldestModifiedTimestamp(Clock clock) {
-        long modifiedMs = Long.MIN_VALUE;
+    public NodeDocument getOldestModifiedDoc(Clock clock) {
+        NodeDocument doc = NULL;
 
-        LOG.info("getOldestModifiedTimestamp() <- start");
+        LOG.info("getOldestModifiedDoc() <- start");
+        Iterable<NodeDocument> modifiedDocs = null;
         try {
-            long modifiedSec = store.getMinValue(NODES, MODIFIED_IN_SECS, 
null, null, EMPTY_KEY_PATTERN, emptyList());
-            modifiedMs = TimeUnit.SECONDS.toMillis(modifiedSec);
+            modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, 
MIN_ID_VALUE);
+            doc = modifiedDocs.iterator().hasNext() ? 
modifiedDocs.iterator().next() : NULL;
         } catch (DocumentStoreException ex) {
-            LOG.error("getOldestModifiedTimestamp()", ex);
-        }
-
-        if (modifiedMs > 0) {
-            LOG.info("getOldestModifiedTimestamp() -> {}", 
Utils.timestampToString(modifiedMs));
-            return modifiedMs;
-        } else {
-            LOG.info("getOldestModifiedTimestamp() -> none found, return 
current time");
-            return clock.getTime();
+            LOG.error("getOldestModifiedDoc()", ex);
+        } finally {
+            Utils.closeIfCloseable(modifiedDocs);
         }
+        return doc;
     }
 
     @Override
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
index c9428429bc..85bb1225fb 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
@@ -62,6 +62,7 @@ import org.slf4j.LoggerFactory;
 
 import static 
org.apache.jackrabbit.guava.common.base.Preconditions.checkNotNull;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.transform;
+import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.isDeletedEntry;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.isCommitRootEntry;
 import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.isRevisionsEntry;
@@ -668,7 +669,7 @@ public class Utils {
      * @return an {@link Iterable} over all documents in the store.
      */
     public static Iterable<NodeDocument> getAllDocuments(final DocumentStore 
store) {
-        return internalGetSelectedDocuments(store, null, 0, 
DEFAULT_BATCH_SIZE);
+        return internalGetSelectedDocuments(store, null, 0, MIN_ID_VALUE, 
DEFAULT_BATCH_SIZE);
     }
 
     /**
@@ -710,7 +711,7 @@ public class Utils {
      */
     public static Iterable<NodeDocument> getSelectedDocuments(
             DocumentStore store, String indexedProperty, long startValue, int 
batchSize) {
-        return internalGetSelectedDocuments(store, indexedProperty, 
startValue, batchSize);
+        return internalGetSelectedDocuments(store, indexedProperty, 
startValue, MIN_ID_VALUE, batchSize);
     }
 
     /**
@@ -719,12 +720,21 @@ public class Utils {
      */
     public static Iterable<NodeDocument> getSelectedDocuments(
             DocumentStore store, String indexedProperty, long startValue) {
-        return internalGetSelectedDocuments(store, indexedProperty, 
startValue, DEFAULT_BATCH_SIZE);
+        return internalGetSelectedDocuments(store, indexedProperty, 
startValue, MIN_ID_VALUE, DEFAULT_BATCH_SIZE);
+    }
+
+    /**
+     * Like {@link #getSelectedDocuments(DocumentStore, String, long, int)} 
with
+     * a default {@code batchSize}.
+     */
+    public static Iterable<NodeDocument> getSelectedDocuments(
+            DocumentStore store, String indexedProperty, long startValue, 
String fromId) {
+        return internalGetSelectedDocuments(store, indexedProperty, 
startValue, fromId, DEFAULT_BATCH_SIZE);
     }
 
     private static Iterable<NodeDocument> internalGetSelectedDocuments(
             final DocumentStore store, final String indexedProperty,
-            final long startValue, final int batchSize) {
+            final long startValue, String fromId, final int batchSize) {
         if (batchSize < 2) {
             throw new IllegalArgumentException("batchSize must be > 1");
         }
@@ -733,7 +743,7 @@ public class Utils {
             public Iterator<NodeDocument> iterator() {
                 return new AbstractIterator<NodeDocument>() {
 
-                    private String startId = NodeDocument.MIN_ID_VALUE;
+                    private String startId = fromId;
 
                     private Iterator<NodeDocument> batch = nextBatch();
 
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
index ed39a372b2..6aceeac830 100644
--- 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
@@ -22,14 +22,19 @@ import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
 
+import static java.util.concurrent.TimeUnit.DAYS;
+import static java.util.concurrent.TimeUnit.SECONDS;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.SETTINGS;
 import static 
org.apache.jackrabbit.oak.plugins.document.DetailGCHelper.enableDetailGC;
+import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
+import static 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP;
 import static 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP;
+import static 
org.apache.jackrabbit.oak.plugins.document.util.Utils.getIdFromPath;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 
-import java.util.concurrent.TimeUnit;
-
 public class VersionGCInitTest {
 
     @Rule
@@ -45,27 +50,50 @@ public class VersionGCInitTest {
     @Test
     public void lazyInitialize() throws Exception {
         DocumentStore store = ns.getDocumentStore();
-        Document vgc = store.find(Collection.SETTINGS, "versionGC");
+        Document vgc = store.find(SETTINGS, "versionGC");
         assertNull(vgc);
 
-        ns.getVersionGarbageCollector().gc(1, TimeUnit.DAYS);
+        ns.getVersionGarbageCollector().gc(1, DAYS);
 
-        vgc = store.find(Collection.SETTINGS, "versionGC");
+        vgc = store.find(SETTINGS, "versionGC");
         assertNotNull(vgc);
         assertEquals(0L, 
vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
+        assertNull(vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
     }
 
     @Test
     public void lazyInitializeWithDetailedGC() throws Exception {
         DocumentStore store = ns.getDocumentStore();
-        Document vgc = store.find(Collection.SETTINGS, "versionGC");
+        Document vgc = store.find(SETTINGS, "versionGC");
         assertNull(vgc);
 
         enableDetailGC(ns.getVersionGarbageCollector());
-        ns.getVersionGarbageCollector().gc(1, TimeUnit.DAYS);
+        long offset = SECONDS.toMillis(42);
+        String id = getIdFromPath("/node");
+        Revision r = new Revision(offset, 0, 1);
+        UpdateOp op = new UpdateOp(id, true);
+        NodeDocument.setModified(op, r);
+        store.createOrUpdate(NODES, op);
+        ns.getVersionGarbageCollector().gc(1, DAYS);
 
-        vgc = store.find(Collection.SETTINGS, "versionGC");
+        vgc = store.find(SETTINGS, "versionGC");
         assertNotNull(vgc);
-        assertEquals(-1L, 
vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
+        assertEquals(39L, 
vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
+        assertEquals(id, 
vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
+    }
+
+    @Test
+    public void lazyInitializeWithDetailedGCWithNoData() throws Exception {
+        DocumentStore store = ns.getDocumentStore();
+        Document vgc = store.find(SETTINGS, "versionGC");
+        assertNull(vgc);
+
+        enableDetailGC(ns.getVersionGarbageCollector());
+        ns.getVersionGarbageCollector().gc(1, DAYS);
+
+        vgc = store.find(SETTINGS, "versionGC");
+        assertNotNull(vgc);
+        assertEquals(0L, 
vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
+        assertEquals(MIN_ID_VALUE, 
vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
     }
 }
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
index 831d2c89ec..4eb20986c2 100644
--- 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
@@ -18,10 +18,9 @@
  */
 package org.apache.jackrabbit.oak.plugins.document;
 
+import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.jackrabbit.guava.common.collect.Iterables;
-import org.apache.jackrabbit.guava.common.collect.Lists;
 import com.mongodb.ReadPreference;
 
 import org.apache.jackrabbit.oak.plugins.document.mongo.MongoDocumentStore;
@@ -29,31 +28,38 @@ import 
org.apache.jackrabbit.oak.plugins.document.mongo.MongoTestUtils;
 import org.apache.jackrabbit.oak.plugins.document.mongo.MongoVersionGCSupport;
 import org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore;
 import org.apache.jackrabbit.oak.plugins.document.rdb.RDBVersionGCSupport;
-import org.apache.jackrabbit.oak.plugins.document.util.Utils;
-import org.apache.jackrabbit.oak.stats.Clock;
 import org.junit.After;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
+import static java.util.Comparator.comparing;
+import static java.util.List.of;
+import static java.util.Optional.ofNullable;
 import static java.util.concurrent.TimeUnit.SECONDS;
+import static java.util.stream.StreamSupport.stream;
+import static org.apache.jackrabbit.guava.common.collect.Comparators.isInOrder;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
 import static 
org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.MEMORY;
 import static 
org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.MONGO;
 import static 
org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.RDB_H2;
+import static 
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
+import static 
org.apache.jackrabbit.oak.plugins.document.util.Utils.getIdFromPath;
+import static org.apache.jackrabbit.oak.stats.Clock.SIMPLE;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 @RunWith(Parameterized.class)
 public class VersionGCSupportTest {
 
-    private DocumentStoreFixture fixture;
-    private DocumentStore store;
-    private VersionGCSupport gcSupport;
-    private List<String> ids = Lists.newArrayList();
+    private final DocumentStoreFixture fixture;
+    private final DocumentStore store;
+    private final VersionGCSupport gcSupport;
+    private final List<String> ids = new ArrayList<>();
 
     @Parameterized.Parameters(name="{0}")
     public static java.util.Collection<DocumentStoreFixture> fixtures() {
-        List<DocumentStoreFixture> fixtures = Lists.newArrayList();
+        List<DocumentStoreFixture> fixtures = new ArrayList<>(3);
         if (RDB_H2.isAvailable()) {
             fixtures.add(RDB_H2);
         }
@@ -88,7 +94,7 @@ public class VersionGCSupportTest {
 
     @After
     public void after() throws Exception {
-        store.remove(Collection.NODES, ids);
+        store.remove(NODES, ids);
         fixture.dispose();
     }
 
@@ -97,12 +103,12 @@ public class VersionGCSupportTest {
         long offset = SECONDS.toMillis(42);
         for (int i = 0; i < 5; i++) {
             Revision r = new Revision(offset + SECONDS.toMillis(i), 0, 1);
-            String id = Utils.getIdFromPath("/doc-" + i);
+            String id = getIdFromPath("/doc-" + i);
             ids.add(id);
             UpdateOp op = new UpdateOp(id, true);
             NodeDocument.setModified(op, r);
             NodeDocument.setDeleted(op, r, true);
-            store.create(Collection.NODES, Lists.newArrayList(op));
+            store.create(NODES, of(op));
         }
 
         assertPossiblyDeleted(0, 41, 0);
@@ -126,25 +132,83 @@ public class VersionGCSupportTest {
         assertPossiblyDeleted(51, 60, 0);
     }
 
+    @Test
+    public void getPossiblyModifiedDocs() {
+        long offset = SECONDS.toMillis(42);
+        for (int i = 0; i < 5; i++) {
+            Revision r = new Revision(offset + SECONDS.toMillis(i), 0, 1);
+            String id = getIdFromPath("/doc-modified" + i);
+            ids.add(id);
+            UpdateOp op = new UpdateOp(id, true);
+            NodeDocument.setModified(op, r);
+            store.create(NODES, of(op));
+        }
+
+        assertModified(0, 41, 0);
+        assertModified(0, 42, 0);
+        assertModified(0, 44, 0);
+        assertModified(0, 45, 3);
+        assertModified(0, 46, 3);
+        assertModified(0, 49, 3);
+        assertModified(0, 50, 5);
+        assertModified(0, 51, 5);
+        assertModified(39, 60, 5);
+        assertModified(40, 60, 5);
+        assertModified(41, 60, 5);
+        assertModified(42, 60, 5);
+        assertModified(44, 60, 5);
+        assertModified(45, 60, 2);
+        assertModified(47, 60, 2);
+        assertModified(48, 60, 2);
+        assertModified(49, 60, 2);
+        assertModified(50, 60, 0);
+        assertModified(51, 60, 0);
+    }
+
     @Test
     public void findOldest() {
         // see OAK-8476
         long secs = 123456;
         long offset = SECONDS.toMillis(secs);
         Revision r = new Revision(offset, 0, 1);
-        String id = Utils.getIdFromPath("/doc-del");
+        String id = getIdFromPath("/doc-del");
         ids.add(id);
         UpdateOp op = new UpdateOp(id, true);
         NodeDocument.setModified(op, r);
         NodeDocument.setDeleted(op, r, true);
-        store.create(Collection.NODES, Lists.newArrayList(op));
+        store.create(NODES, of(op));
 
-        long reportedsecs = 
gcSupport.getOldestDeletedOnceTimestamp(Clock.SIMPLE, 1) / SECONDS.toMillis(1);
-        assertTrue("diff (s) should be < 5: " + Math.abs(secs - reportedsecs), 
Math.abs(secs - reportedsecs) < 5);
+        long reportedSecs = gcSupport.getOldestDeletedOnceTimestamp(SIMPLE, 1) 
/ SECONDS.toMillis(1);
+        assertTrue("diff (s) should be < 5: " + Math.abs(secs - reportedSecs), 
Math.abs(secs - reportedSecs) < 5);
+    }
+
+    @Test
+    public void findOldestModified() {
+        long secs = 1234567;
+        long offset = SECONDS.toMillis(secs);
+        Revision r = new Revision(offset, 0, 1);
+        String id = getIdFromPath("/doc-modified");
+        ids.add(id);
+        UpdateOp op = new UpdateOp(id, true);
+        NodeDocument.setModified(op, r);
+        store.create(NODES, of(op));
+
+        NodeDocument oldestModifiedDoc = 
gcSupport.getOldestModifiedDoc(SIMPLE);
+        String oldestModifiedDocId = oldestModifiedDoc.getId();
+        long reportedSecs = 
ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
+        assertTrue("diff (s) should be < 5: " + Math.abs(secs - reportedSecs), 
Math.abs(secs - reportedSecs) < 5);
+        assertEquals(id, oldestModifiedDocId);
     }
 
     private void assertPossiblyDeleted(long fromSeconds, long toSeconds, long 
num) {
         Iterable<NodeDocument> docs = 
gcSupport.getPossiblyDeletedDocs(SECONDS.toMillis(fromSeconds), 
SECONDS.toMillis(toSeconds));
-        assertEquals(num, Iterables.size(docs));
+        assertEquals(num, stream(docs.spliterator(), false).count());
+    }
+
+    private void assertModified(long fromSeconds, long toSeconds, long num) {
+        Iterable<NodeDocument> docs = 
gcSupport.getModifiedDocs(SECONDS.toMillis(fromSeconds), 
SECONDS.toMillis(toSeconds), 10, MIN_ID_VALUE);
+        docs.forEach(d -> System.out.println(d.getModified() + " " + 
d.getId()));
+        assertEquals(num, stream(docs.spliterator(), false).count());
+        assertTrue(isInOrder(docs, (o1, o2) -> 
comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, 
o2)));
     }
 }
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index 4470a07f96..caa156a6d4 100644
--- 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -229,8 +229,8 @@ public class VersionGarbageCollectorIT {
     }
 
     @Test
-    public void testGCDeletedProps() throws Exception{
-        //1. Create nodes
+    public void testGCDeletedProps() throws Exception {
+        //1. Create nodes with properties
         NodeBuilder b1 = store.getRoot().builder();
 
         // Add property to node & save
@@ -289,6 +289,54 @@ public class VersionGarbageCollectorIT {
         stats = gc.gc(maxAge*2, HOURS);
         assertEquals(0, stats.deletedPropsGCCount);
     }
+
+    // Test when we have more than 1000 deleted properties
+    @Test
+    public void testGCDeletedProps_1() throws Exception {
+        //1. Create nodes with properties
+        NodeBuilder b1 = store.getRoot().builder();
+
+        // Add property to node & save
+        for (int i = 0; i < 5_000; i++) {
+            for (int j = 0; j < 10; j++) {
+                b1.child("z"+i).setProperty("prop"+j, "foo", STRING);
+            }
+        }
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        // enable the detailed gc flag
+        writeField(gc, "detailedGCEnabled", true, true);
+        long maxAge = 1; //hours
+        long delta = TimeUnit.MINUTES.toMillis(10);
+        //1. Go past GC age and check no GC done as nothing deleted
+        clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
+        VersionGCStats stats = gc.gc(maxAge, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+
+        //Remove property
+        NodeBuilder b2 = store.getRoot().builder();
+        for (int i = 0; i < 5_000; i++) {
+            for (int j = 0; j < 10; j++) {
+                b2.getChildNode("z"+i).removeProperty("prop"+j);
+            }
+        }
+        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        store.runBackgroundOperations();
+
+        //2. Check that a deleted property is not collected before maxAge
+        //Clock cannot move back (it moved forward in #1) so double the maxAge
+        clock.waitUntil(clock.getTime() + delta);
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+
+        //3. Check that deleted property does get collected post maxAge
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(50_000, stats.deletedPropsGCCount);
+
+    }
     
     private void gcSplitDocsInternal(String subNodeName) throws Exception {
         long maxAge = 1; //hrs

Reply via email to