This is an automated email from the ASF dual-hosted git repository. stefanegli pushed a commit to branch OAK-10526 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 5be575223d2f60470e795ffb10a78520f76195cb Author: Stefan Egli <stefane...@apache.org> AuthorDate: Mon Nov 6 15:55:45 2023 +0100 OAK-10526 : set split doc maxRev to 'now' at split time, to avoid it being GCed too early --- .../jackrabbit/oak/plugins/document/SplitOperations.java | 16 ++++++++++++++-- .../oak/plugins/document/VersionGarbageCollectorIT.java | 15 ++++++++++----- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/SplitOperations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/SplitOperations.java index 5e0b8f6fec..2644ee71bb 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/SplitOperations.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/SplitOperations.java @@ -324,7 +324,13 @@ class SplitOperations { for (Range r : entry.getValue()) { setPrevious(intermediate, r); } - setIntermediateDocProps(intermediate, h); + // OAK-10526 : setting 'maxRev=now()' here guarantees earliest GC of this + // split doc will be 'maxAgeMillis' (24h) from now (hence covers all open + // JCR sessions) or until any checkpoint created before 'now()' is + // released. While this leaves garbage split doc slightly longer than + // absolutely necessary, it is a rather simple and robust mechanism. + setIntermediateDocProps(intermediate, + Revision.newRevision(context.getClusterId())); splitOps.add(intermediate); } } @@ -380,7 +386,13 @@ class SplitOperations { // check size of old document NodeDocument oldDoc = new NodeDocument(STORE); UpdateUtils.applyChanges(oldDoc, old); - setSplitDocProps(doc, oldDoc, old, high); + // OAK-10526 : setting 'maxRev=now()' here guarantees earliest GC of this + // split doc will be 'maxAgeMillis' (24h) from now (hence covers all open + // JCR sessions) or until any checkpoint created before 'now()' is + // released. While this leaves garbage split doc slightly longer than + // absolutely necessary, it is a rather simple and robust mechanism. + setSplitDocProps(doc, oldDoc, old, + Revision.newRevision(context.getClusterId())); splitOps.add(old); if (numValues < numRevsThreshold) { diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java index 1ecbd73cb7..c40ae68456 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java @@ -82,7 +82,6 @@ import org.apache.jackrabbit.oak.stats.Clock; import org.jetbrains.annotations.NotNull; import org.junit.After; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -295,7 +294,6 @@ public class VersionGarbageCollectorIT { * split doc. */ @Test - @Ignore(value = "requires fix for OAK-10526") public void gcSplitDocsWithReferencedRevisions() throws Exception { final String exp; @@ -327,7 +325,14 @@ public class VersionGarbageCollectorIT { exp = lastValue; store.runBackgroundOperations(); - // step 5 : create a checkpoint at t(+1w) + // step 4b : another change to further lastRev for clusterId 1 + // required to ensure 5sec rounding of mongo variant is also covered + clock.waitUntil(clock.getTime() + TimeUnit.SECONDS.toMillis(6)); + b1 = store.getRoot().builder(); + b1.child("unrelated").setProperty("unrelated", "unrelated"); + store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + // step 5 : create a checkpoint at t(+1w+6sec) String checkpoint = store.checkpoint(TimeUnit.DAYS.toMillis(42)); assertEquals(exp, store.getRoot().getChildNode("t").getString("foo")); assertEquals(exp, store.retrieve(checkpoint).getChildNode("t").getString("foo")); @@ -348,8 +353,8 @@ public class VersionGarbageCollectorIT { // as we'd be in the same rounded second) -> t(+2w:30s) clock.waitUntil(clock.getTime() + TimeUnit.SECONDS.toMillis(30)); - // step 9 : trigger another GC - this now splits away the referenced revision - assertEquals(1, gc.gc(24, HOURS).splitDocGCCount); + // step 9 : trigger another GC - previously split away the referenced revision + assertEquals(0, gc.gc(24, HOURS).splitDocGCCount); // flush the caches as otherwise it might deliver stale data store.getNodeCache().invalidateAll(); assertEquals("barZ", store.getRoot().getChildNode("t").getString("foo"));