This is an automated email from the ASF dual-hosted git repository. stefanegli pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push: new fd8b860ea6 OAK-10742 : add support for defining include/exclude paths for fullGC (#1547) fd8b860ea6 is described below commit fd8b860ea69adb0d5cdaaef15141563cf8bf3f01 Author: stefan-egli <stefane...@apache.org> AuthorDate: Thu Jun 20 17:57:34 2024 +0200 OAK-10742 : add support for defining include/exclude paths for fullGC (#1547) --- .../oak/plugins/document/VersionGCSupport.java | 13 ++- .../plugins/document/VersionGarbageCollector.java | 27 ++++- .../document/mongo/MongoVersionGCSupport.java | 60 ++++++++++- .../oak/plugins/document/util/Utils.java | 111 ++++++++++++++++++++- .../oak/plugins/document/VersionGCSupportTest.java | 18 ++-- .../document/VersionGarbageCollectorIT.java | 111 ++++++++++++++++++++- 6 files changed, 319 insertions(+), 21 deletions(-) diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java index b938da536a..0c320e0e54 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java @@ -34,6 +34,7 @@ import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifie import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getAllDocuments; import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getSelectedDocuments; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Objects; @@ -98,12 +99,16 @@ public class VersionGCSupport { * @return matching documents. */ public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit, - @NotNull final String fromId) { + @NotNull final String fromId, + @NotNull final Set<String> includePaths, + @NotNull final Set<String> excludePaths) { // (_modified = fromModified && _id > fromId || _modified > fromModified && _modified < toModified) - final Stream<NodeDocument> s1 = stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, fromId).spliterator(), false) + final Stream<NodeDocument> s1 = stream(getSelectedDocuments(store, + MODIFIED_IN_SECS, 1, fromId, includePaths, excludePaths).spliterator(), false) .filter(input -> modifiedEqualsTo(input, fromModified)); - final Stream<NodeDocument> s2 = stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1).spliterator(), false) + final Stream<NodeDocument> s2 = stream(getSelectedDocuments(store, + MODIFIED_IN_SECS, 1, includePaths, excludePaths).spliterator(), false) .filter(input -> modifiedGreaterThan(input, fromModified) && modifiedLessThan(input, toModified)); return concat(s1, s2) @@ -222,7 +227,7 @@ public class VersionGCSupport { long now = clock.getTime(); Iterable<NodeDocument> docs = null; try { - docs = getModifiedDocs(0, now, 1, MIN_ID_VALUE); + docs = getModifiedDocs(0, now, 1, MIN_ID_VALUE, Collections.emptySet(), Collections.emptySet()); if (docs.iterator().hasNext()) { final NodeDocument oldestModifiedDoc = docs.iterator().next(); LOG.info("Oldest modified document is {}", oldestModifiedDoc); diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java index c26e433c0e..33ebdb2ed2 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java @@ -216,6 +216,8 @@ public class VersionGarbageCollector { private final boolean fullGCEnabled; private final boolean isFullGCDryRun; private final boolean embeddedVerification; + private Set<String> fullGCIncludePaths = Collections.emptySet(); + private Set<String> fullGCExcludePaths = Collections.emptySet(); private final VersionGCSupport versionStore; private final AtomicReference<GCJob> collector = newReference(); private VersionGCOptions options; @@ -238,6 +240,20 @@ public class VersionGarbageCollector { AUDIT_LOG.info("<init> VersionGarbageCollector created with fullGcMode = {}", fullGcMode); } + /** + * Please note that at the moment the includes do not + * take long paths into account. That is, if a long path was + * supposed to be included via an include, it is not. + * Reason for this is that long paths would require + * the mongo query to include a '_path' condition - which disallows + * mongo from using the '_modified_id' index. IOW long paths + * would result in full scans - which results in bad performance. + */ + void setFullGCPaths(@NotNull Set<String> includes, @NotNull Set<String> excludes) { + this.fullGCIncludePaths = requireNonNull(includes); + this.fullGCExcludePaths = requireNonNull(excludes); + } + void setStatisticsProvider(StatisticsProvider provider) { this.gcStats = new RevisionGCStats(provider); this.fullGCStats = new FullGCStatsCollectorImpl(provider); @@ -833,7 +849,7 @@ public class VersionGarbageCollector { if (log.isDebugEnabled()) { log.debug("Fetching docs from [{}] to [{}] with Id starting from [{}]", timestampToString(fromModifiedMs), timestampToString(toModifiedMs), fromId); } - Iterable<NodeDocument> itr = versionStore.getModifiedDocs(fromModifiedMs, toModifiedMs, FULL_GC_BATCH_SIZE, fromId); + Iterable<NodeDocument> itr = versionStore.getModifiedDocs(fromModifiedMs, toModifiedMs, FULL_GC_BATCH_SIZE, fromId, fullGCIncludePaths, fullGCExcludePaths); try { for (NodeDocument doc : itr) { foundDoc = true; @@ -851,7 +867,14 @@ public class VersionGarbageCollector { lastDoc = doc; // collect the data to delete in next step if (phases.start(GCPhase.FULL_GC_COLLECT_GARBAGE)) { - gc.collectGarbage(doc, phases); + if (Utils.isIncluded(doc.getPath(), Collections.emptySet(), fullGCExcludePaths)) { + gc.collectGarbage(doc, phases); + } else { + // MongoVersionGCSupport doesn't take long paths into consideration + // for neither includes nor excludes. If isIncluded returns false here, + // that can only be due to an excluded long path. + // in which case, we can actually honor that and skip this + } phases.stop(GCPhase.FULL_GC_COLLECT_GARBAGE); } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java index 67751d912a..060c105689 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java @@ -22,6 +22,7 @@ package org.apache.jackrabbit.oak.plugins.document.mongo; import static com.mongodb.client.model.Filters.eq; import static com.mongodb.client.model.Filters.exists; import static com.mongodb.client.model.Filters.gt; +import static com.mongodb.client.model.Filters.not; import static com.mongodb.client.model.Filters.or; import static com.mongodb.client.model.Projections.include; import static java.util.Optional.empty; @@ -132,6 +133,56 @@ public class MongoVersionGCSupport extends VersionGCSupport { input -> store.convertFromDBObject(NODES, input))); } + /** + * Calculate the bson representing including only the provided + * include path prefixes and/or excluding the provided + * exclude path prefixes - if any are provided - AND the provided + * query. + * Please note that at the moment the includes do not + * take long paths into account. That is, if a long path was + * supposed to be included via an include, it is not. + * Reason for this is that long paths would require + * the mongo query to include a '_path' condition - which disallows + * mongo from using the '_modified_id' index. IOW long paths + * would result in full scans - which results in bad performance. + * @param includes set of path prefixes which should only be considered + * @param excludes set of path prefixes which should be excluded. + * if these overlap with includes, then exclude has precedence. + * @param query the query with which to do an AND + * @return the combined bson with include/exclude path prefixes + * AND the provided query + */ + private Bson withIncludeExcludes(Set<String> includes, Set<String> excludes, Bson query) { + Bson inclExcl = null; + if (includes != null && !includes.isEmpty()) { + final List<Bson> ors = new ArrayList<>(includes.size()); + for (String incl : includes) { + ors.add(Filters.regex(ID, ":" + incl)); + } + inclExcl = or(ors); + } + if (excludes != null && !excludes.isEmpty()) { + final List<Bson> ands = new ArrayList<>(excludes.size()); + for (String excl : excludes) { + ands.add(not(Filters.regex(ID, ":" + excl))); + } + if (inclExcl != null) { + ands.add(inclExcl); + } + inclExcl = and(ands); + } + if (inclExcl == null) { + // if no include or exclude path prefixes are defined, + // then everything is included - i.e. we fall back to + // just the provided query + return query; + } else { + // if there are include or exclude path prefixes, + // then add them via AND + return and(inclExcl, query); + } + } + /** * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value * within the given range and are greater than given @{@link NodeDocument#ID}. @@ -150,11 +201,14 @@ public class MongoVersionGCSupport extends VersionGCSupport { */ @Override public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit, - @NotNull final String fromId) { + @NotNull final String fromId, + Set<String> includedPathPrefixes, Set<String> excludedPathPrefixes) { // (_modified = fromModified && _id > fromId || _modified > fromModified && _modified < toModified) final Bson query = or( - and(eq(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)), gt(ID, fromId)), - and(gt(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)), lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)))); + withIncludeExcludes(includedPathPrefixes, excludedPathPrefixes, + and(eq(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)), gt(ID, fromId))), + withIncludeExcludes(includedPathPrefixes, excludedPathPrefixes, + and(gt(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)), lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified))))); // first sort by _modified and then by _id final Bson sort = and(eq(MODIFIED_IN_SECS, 1), eq(ID, 1)); diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java index f4ba7c245c..53ab10cc25 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java @@ -26,6 +26,7 @@ import java.sql.Timestamp; import java.time.Instant; import java.time.format.DateTimeFormatter; import java.util.ArrayList; +import java.util.Collections; import java.util.Comparator; import java.util.Date; import java.util.Iterator; @@ -54,6 +55,7 @@ import org.apache.jackrabbit.oak.plugins.document.Path; import org.apache.jackrabbit.oak.plugins.document.Revision; import org.apache.jackrabbit.oak.plugins.document.RevisionVector; import org.apache.jackrabbit.oak.plugins.document.StableRevisionComparator; +import org.apache.jackrabbit.oak.plugins.document.mongo.MongoDocumentStore; import org.apache.jackrabbit.oak.spi.toggle.Feature; import org.apache.jackrabbit.oak.stats.Clock; import org.jetbrains.annotations.NotNull; @@ -806,6 +808,17 @@ public class Utils { return internalGetSelectedDocuments(store, indexedProperty, startValue, MIN_ID_VALUE, DEFAULT_BATCH_SIZE); } + /** + * Like {@link #getSelectedDocuments(DocumentStore, String, long, int)} with + * a default {@code batchSize}. + */ + public static Iterable<NodeDocument> getSelectedDocuments( + DocumentStore store, String indexedProperty, long startValue, + @NotNull final Set<String> includePaths, @NotNull final Set<String> excludePaths) { + return internalGetSelectedDocuments(store, indexedProperty, startValue, + MIN_ID_VALUE, includePaths, excludePaths, DEFAULT_BATCH_SIZE); + } + /** * Like {@link #getSelectedDocuments(DocumentStore, String, long, int)} with * a default {@code batchSize}. @@ -815,12 +828,89 @@ public class Utils { return internalGetSelectedDocuments(store, indexedProperty, startValue, fromId, DEFAULT_BATCH_SIZE); } + /** + * Like {@link #getSelectedDocuments(DocumentStore, String, long, int)} with + * a default {@code batchSize}. + */ + public static Iterable<NodeDocument> getSelectedDocuments( + DocumentStore store, String indexedProperty, long startValue, String fromId, + @NotNull final Set<String> includePaths, @NotNull final Set<String> excludePaths) { + return internalGetSelectedDocuments(store, indexedProperty, startValue, fromId, + includePaths, excludePaths, DEFAULT_BATCH_SIZE); + } + + /** + * Default implementation for applying include/exclude path prefixes + * client-side, meaning the query to the DocumentStore searches for all + * documents and include/excludes are then filtered after receiving that + * query. + * This variant is obviously not intended for production use, as client + * side filtering is slow. Hence this is only used for testing for + * any non-MongoDocumentStore. It should not be enabled in production, + * unless this performance hit here is understood and accepted. + * @param path the path for which to evaluate the include/excludes + * @param includes set of path prefixes which should only be considered + * @param excludes set of path prefixes which should be excluded. + * if these overlap with includes, then exclude has precedence. + * @return whether the provided path is included or not + */ + public static boolean isIncluded(Path path, @NotNull Set<String> includes, + @NotNull Set<String> excludes) { + // check first if includes/excludes are empty + if (includes.isEmpty() && excludes.isEmpty()) { + return true; + } + String s = path.toString(); + // then check excludes first + for (String anExclude : excludes) { + if (s.startsWith(anExclude)) { + // if there is an exclude matching the path + // we need to definitely exclude it, + // no matter whether there is even an include + // for it or not + return false; + } + } + if (includes.isEmpty()) { + // if we have no includes defined at all, and it + // was not excluded, then it is an include + return true; + } + // then the includes + for (String anInclude : includes) { + if (s.startsWith(anInclude)) { + // if we have a matching include, and given + // it was not excluded above, then this is + // an include + return true; + } + } + // if we have any includes defined, but none of + // them matched so far, then this is an exclude + return false; + } + + private static Iterable<NodeDocument> internalGetSelectedDocuments( + final DocumentStore store, final String indexedProperty, + final long startValue, String fromId, + final int batchSize) { + return internalGetSelectedDocuments(store, indexedProperty, startValue, fromId, + Collections.emptySet(), Collections.emptySet(), batchSize); + } + private static Iterable<NodeDocument> internalGetSelectedDocuments( final DocumentStore store, final String indexedProperty, - final long startValue, String fromId, final int batchSize) { + final long startValue, String fromId, + @NotNull final Set<String> includePaths, + @NotNull final Set<String> excludePaths, + final int batchSize) { if (batchSize < 2) { throw new IllegalArgumentException("batchSize must be > 1"); } + if ((store instanceof MongoDocumentStore) + && (!includePaths.isEmpty() || !excludePaths.isEmpty())) { + throw new IllegalArgumentException("cannot use with MongoDocumentStore"); + } return new Iterable<NodeDocument>() { @Override public Iterator<NodeDocument> iterator() { @@ -832,6 +922,25 @@ public class Utils { @Override protected NodeDocument computeNext() { + do { + final NodeDocument n = doComputeNext(); + if (n == null) { + return null; + } + if (isIncluded(n.getPath(), includePaths, excludePaths)) { + return n; + } + // else repeat + // note that this loop is potentially dangerous, + // depending on the include/exclude definition. + // that's why currently this variant is not supported + // against MongoDocumentStore. I.e. it is only used + // for unit testing. FullGC for RDBDocumentStore + // is not supported at all. + } while(true); + } + + private NodeDocument doComputeNext() { // read next batch if necessary if (!batch.hasNext()) { batch = nextBatch(); diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java index eb848fe4cc..26f5759854 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java @@ -19,7 +19,9 @@ package org.apache.jackrabbit.oak.plugins.document; import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.Set; import com.mongodb.ReadPreference; @@ -59,6 +61,8 @@ import static org.junit.Assert.assertTrue; @RunWith(Parameterized.class) public class VersionGCSupportTest { + private static final Set<String> EMPTY_STRING_SET = Collections.emptySet(); + private final DocumentStoreFixture fixture; private final DocumentStore store; private final VersionGCSupport gcSupport; @@ -278,7 +282,7 @@ public class VersionGCSupportTest { oldestModifiedDocId = MIN_ID_VALUE; for(int i = 0; i < 5; i++) { - Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId); + Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, EMPTY_STRING_SET, EMPTY_STRING_SET); assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2))); long count = stream(modifiedDocs.spliterator(), false).count(); assertEquals(1000, count); @@ -313,7 +317,7 @@ public class VersionGCSupportTest { oldestModifiedDocId = MIN_ID_VALUE; for(int i = 0; i < 5; i++) { - Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId); + Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, EMPTY_STRING_SET, EMPTY_STRING_SET); assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2))); long count = stream(modifiedDocs.spliterator(), false).count(); assertEquals(1000, count); @@ -325,7 +329,7 @@ public class VersionGCSupportTest { } // fetch last remaining document now - Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId); + Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, EMPTY_STRING_SET, EMPTY_STRING_SET); assertEquals(1, stream(modifiedDocs.spliterator(), false).count()); assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2))); oldestModifiedDoc = modifiedDocs.iterator().next(); @@ -333,7 +337,7 @@ public class VersionGCSupportTest { oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L); // all documents had been fetched, now we won't get any document - modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId); + modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, EMPTY_STRING_SET, EMPTY_STRING_SET); assertEquals(0, stream(modifiedDocs.spliterator(), false).count()); } @@ -360,7 +364,7 @@ public class VersionGCSupportTest { store.create(NODES, updateOps); for(int i = 0; i < 5; i++) { - Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId); + Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, EMPTY_STRING_SET, EMPTY_STRING_SET); assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2))); long count = stream(modifiedDocs.spliterator(), false).count(); assertEquals(1000, count); @@ -372,7 +376,7 @@ public class VersionGCSupportTest { } // all documents had been fetched, now we won't get any document - Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId); + Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, EMPTY_STRING_SET, EMPTY_STRING_SET); assertEquals(0, stream(modifiedDocs.spliterator(), false).count()); } @@ -382,7 +386,7 @@ public class VersionGCSupportTest { } private void assertModified(long fromSeconds, long toSeconds, long num) { - Iterable<NodeDocument> docs = gcSupport.getModifiedDocs(SECONDS.toMillis(fromSeconds), SECONDS.toMillis(toSeconds), 10, MIN_ID_VALUE); + Iterable<NodeDocument> docs = gcSupport.getModifiedDocs(SECONDS.toMillis(fromSeconds), SECONDS.toMillis(toSeconds), 10, MIN_ID_VALUE, EMPTY_STRING_SET, EMPTY_STRING_SET); assertEquals(num, stream(docs.spliterator(), false).count()); assertTrue(isInOrder(docs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2))); } diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java index 2dfbbab2f4..3566925c0d 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java @@ -21,6 +21,7 @@ package org.apache.jackrabbit.oak.plugins.document; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; @@ -536,6 +537,103 @@ public class VersionGarbageCollectorIT { assertTrue(stats.canceled); } + @Test + public void testGCDeletedLongPathPropsInclExcl_excludes() throws Exception { + String longName = repeat("p", PATH_LONG + 1); + createEmptyProps("/a/b/" + longName + "/x", "/b/c/" + longName + "/x", + "/c/d/" + longName + "/x"); + setGCIncludeExcludes(Sets.newHashSet(), Sets.newHashSet("/b/c", "/c")); + doTestDeletedPropsGC(1, 1); + } + + @Test + public void testGCDeletedPropsInclExcl_oneInclude() throws Exception { + createEmptyProps("/a/b/c", "/b/c/d", "/c/d/e"); + setGCIncludeExcludes(Sets.newHashSet("/a"), Sets.newHashSet()); + doTestDeletedPropsGC(1, 1); + } + + @Test + public void testGCDeletedPropsInclExcl_twoIncludes() throws Exception { + createEmptyProps("/a/b/c", "/b/c/d", "/c/d/e"); + setGCIncludeExcludes(Sets.newHashSet("/a", "/c"), Sets.newHashSet()); + doTestDeletedPropsGC(2, 2); + } + + @Test + public void testGCDeletedPropsInclExcl_inclAndExcl() throws Exception { + createEmptyProps("/a/b/c", "/b/c/d", "/c/d/e"); + setGCIncludeExcludes(Sets.newHashSet("/a", "/c"), Sets.newHashSet("/c/d")); + doTestDeletedPropsGC(1, 1); + } + + @Test + public void testGCDeletedPropsInclExcl_excludes() throws Exception { + createEmptyProps("/a/b/c", "/b/c/d", "/c/d/e"); + setGCIncludeExcludes(Sets.newHashSet(), Sets.newHashSet("/b", "/c")); + doTestDeletedPropsGC(1, 1); + } + + @Test + public void testGCDeletedPropsInclExcl_emptyEmpty() throws Exception { + createEmptyProps("/a/b/c", "/b/c/d", "/c/d/e"); + setGCIncludeExcludes(Collections.emptySet(), Collections.emptySet()); + doTestDeletedPropsGC(3, 3); + } + + private void setGCIncludeExcludes(Set<String> includes, Set<String> excludes) { + gc.setFullGCPaths(requireNonNull(includes), requireNonNull(excludes)); + } + + private void doTestDeletedPropsGC(int deletedPropsCount, int updatedDocsCount) + throws Exception { + // enable the full gc flag + writeField(gc, "fullGCEnabled", true, true); + long maxAge = 1; //hours + clock.waitUntil(getCurrentTimestamp() + TimeUnit.HOURS.toMillis(maxAge)); + VersionGCStats stats = gc(gc, maxAge, HOURS); + assertStatsCountsEqual(stats, + gapOrphOnly(), + gapOrphProp(0, deletedPropsCount, 0, 0, 0, 0, updatedDocsCount), + allOrphProp(0, deletedPropsCount, 0, 0, 0, 0, updatedDocsCount), + keepOneFull(0, deletedPropsCount, 0, 0, 0, 0, updatedDocsCount), + keepOneUser(0, deletedPropsCount, 0, 0, 0, 0, updatedDocsCount), + unmergedBcs(0, deletedPropsCount, 0, 0, 0, 0, updatedDocsCount), + betweenChkp(0, deletedPropsCount, 0, 0, 3, 0, updatedDocsCount), + btwnChkpUBC(0, deletedPropsCount, 0, 0, 3, 0, updatedDocsCount)); + } + + /** + * Utility method to create empty properties, meaning they + * are created, then removed again. That leaves them + * as not existing properties, except they still exist + * in the document. FullGC can then remove them after + * a certain amount of time + */ + private void createEmptyProps(String... paths) throws CommitFailedException { + // 1. create nodes with properties + NodeBuilder rb1 = store1.getRoot().builder(); + for (String path : paths) { + NodeBuilder b1 = rb1; + for (String name : Path.fromString(path).elements()) { + b1 = b1.child(name); + } + b1.setProperty("foo", "bar", STRING); + } + store1.merge(rb1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + // 2. refresh the builder + rb1 = store1.getRoot().builder(); + // 3. empty the properties + for (String path : paths) { + NodeBuilder b1 = rb1; + for (String name : Path.fromString(path).elements()) { + b1 = b1.child(name); + } + b1.removeProperty("foo"); + } + store1.merge(rb1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + } + @Test public void testGCDeletedProps() throws Exception { //1. Create nodes with properties @@ -1276,8 +1374,11 @@ public class VersionGarbageCollectorIT { VersionGCSupport gcSupport = new VersionGCSupport(store1.getDocumentStore()) { @Override - public Iterable<NodeDocument> getModifiedDocs(long fromModified, long toModified, int limit, @NotNull String fromId) { - Iterable<NodeDocument> modifiedDocs = super.getModifiedDocs(fromModified, toModified, limit, fromId); + public Iterable<NodeDocument> getModifiedDocs(long fromModified, + long toModified, int limit, @NotNull String fromId, + @NotNull final Set<String> includePaths, @NotNull final Set<String> excludePaths) { + Iterable<NodeDocument> modifiedDocs = super.getModifiedDocs(fromModified, + toModified, limit, fromId, includePaths, excludePaths); List<NodeDocument> result = stream(modifiedDocs.spliterator(), false).collect(toList()); final Revision updateRev = newRevision(1); store1.getDocumentStore().findAndUpdate(NODES, stream(modifiedDocs.spliterator(), false) @@ -1336,7 +1437,8 @@ public class VersionGarbageCollectorIT { final VersionGCSupport gcSupport = new VersionGCSupport(store1.getDocumentStore()) { @Override - public Iterable<NodeDocument> getModifiedDocs(long fromModified, long toModified, int limit, @NotNull String fromId) { + public Iterable<NodeDocument> getModifiedDocs(long fromModified, long toModified, int limit, @NotNull String fromId, final Set<String> includePaths, + final Set<String> excludePaths) { return () -> new AbstractIterator<>() { private final Iterator<NodeDocument> it = candidates(fromModified, toModified, limit, fromId); @@ -1353,7 +1455,8 @@ public class VersionGarbageCollectorIT { } private Iterator<NodeDocument> candidates(long fromModified, long toModified, int limit, @NotNull String fromId) { - return super.getModifiedDocs(fromModified, toModified, limit, fromId).iterator(); + return super.getModifiedDocs(fromModified, toModified, limit, fromId, + Collections.emptySet(), Collections.emptySet()).iterator(); } };