This is an automated email from the ASF dual-hosted git repository.
joscorbe pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new 6ca71c76f3 OAK-11444 [full-gc] Save document id and empty properties
names before deletion (#2038)
6ca71c76f3 is described below
commit 6ca71c76f33326ac7936da02baf6a04cb0af30c9
Author: Daniel Iancu <[email protected]>
AuthorDate: Mon Mar 31 15:33:46 2025 +0300
OAK-11444 [full-gc] Save document id and empty properties names before
deletion (#2038)
---
.../jackrabbit/oak/run/RevisionsCommand.java | 9 +
.../oak/plugins/document/Configuration.java | 5 +
.../plugins/document/DocumentNodeStoreBuilder.java | 10 +
.../plugins/document/DocumentNodeStoreService.java | 1 +
.../oak/plugins/document/FullGcNodeBin.java | 82 ++++++++
.../oak/plugins/document/VersionGCSupport.java | 4 +
.../plugins/document/VersionGarbageCollector.java | 12 +-
.../mongo/MongoDocumentNodeStoreBuilderBase.java | 2 +-
.../plugins/document/mongo/MongoDocumentStore.java | 17 ++
.../plugins/document/mongo/MongoFullGcNodeBin.java | 171 ++++++++++++++++
.../document/mongo/MongoVersionGCSupport.java | 22 ++-
.../document/rdb/RDBDocumentNodeStoreBuilder.java | 12 ++
.../DocumentNodeStoreServiceConfigurationTest.java | 8 +
.../mongo/MongoDocumentNodeStoreBuilderTest.java | 6 +
.../document/mongo/MongoFullGcNodeBinTest.java | 216 +++++++++++++++++++++
.../rdb/RDBDocumentNodeStoreBuilderTest.java | 7 +
16 files changed, 569 insertions(+), 15 deletions(-)
diff --git
a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java
b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java
index 07ed36ad35..0d9c611d1e 100644
--- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java
+++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java
@@ -147,6 +147,7 @@ public class RevisionsCommand implements Command {
final OptionSpec<Boolean> dryRun;
final OptionSpec<Boolean> embeddedVerification;
final OptionSpec<Integer> fullGcMode;
+ final OptionSpec<Boolean> fullGCAuditLoggingEnabled;
RevisionsOptions(String usage) {
super(usage);
@@ -208,6 +209,8 @@ public class RevisionsCommand implements Command {
"to be considered for Full GC i.e. Version Garbage
Collector (Full GC) logic will only consider those " +
"nodes for Full GC which are not accessed recently
(currentTime - lastModifiedTime > fullGcMaxAge). Default: 86400 (one day)")
.withOptionalArg().ofType(Long.class).defaultsTo(TimeUnit.DAYS.toSeconds(1));
+ fullGCAuditLoggingEnabled =
parser.accepts("fullGCAuditLoggingEnabled", "Enable audit logging for Full GC")
+ .withOptionalArg().ofType(Boolean.class).defaultsTo(FALSE);
}
public RevisionsOptions parse(String[] args) {
@@ -306,6 +309,10 @@ public class RevisionsCommand implements Command {
boolean doCompaction() {
return options.has(compact);
}
+
+ Boolean isFullGCAuditLoggingEnabled() {
+ return options.has(fullGCAuditLoggingEnabled);
+ }
}
@Override
@@ -375,6 +382,7 @@ public class RevisionsCommand implements Command {
builder.setFullGCBatchSize(options.getFullGcBatchSize());
builder.setFullGCProgressSize(options.getFullGcProgressSize());
builder.setFullGcMaxAgeMillis(SECONDS.toMillis(options.getFullGcMaxAge()));
+
builder.setFullGCAuditLoggingEnabled(options.isFullGCAuditLoggingEnabled());
// create a VersionGCSupport while builder is read-write
VersionGCSupport gcSupport = builder.createVersionGCSupport();
@@ -408,6 +416,7 @@ public class RevisionsCommand implements Command {
System.out.println("FullGcProgressSize is : " +
options.getFullGcProgressSize());
System.out.println("FullGcMaxAgeInSecs is : " +
options.getFullGcMaxAge());
System.out.println("FullGcMaxAgeMillis is : " +
builder.getFullGcMaxAgeMillis());
+ System.out.println("FullGCAuditLoggingEnabled is : " +
options.isFullGCAuditLoggingEnabled());
VersionGarbageCollector gc = createVersionGC(builder.build(),
gcSupport, options.isDryRun(), builder);
VersionGCOptions gcOptions = gc.getOptions();
diff --git
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java
index 0c82ea8f2a..eb5e9e1f51 100644
---
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java
+++
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java
@@ -400,4 +400,9 @@ import static
org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreServic
name = "Invisible for discovery",
description = "Boolean value indicating whether the instance
should be discoverable by the cluster. The default value is " +
DocumentNodeStoreService.DEFAULT_INVISIBLE_FOR_DISCOVERY)
boolean invisibleForDiscovery() default
DocumentNodeStoreService.DEFAULT_INVISIBLE_FOR_DISCOVERY;
+
+ @AttributeDefinition(
+ name = "Enable Full GC Persistent Audit Logging",
+ description = "This parameter will enable/disable the saving of
deleted document IDs and properties during FullGC into a persistent storage,
e.g Mongo collection")
+ boolean fullGCAuditLoggingEnabled() default false;
}
diff --git
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java
index 7b94796fe9..702cdca0bc 100644
---
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java
+++
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java
@@ -185,6 +185,7 @@ public class DocumentNodeStoreBuilder<T extends
DocumentNodeStoreBuilder<T>> {
private int fullGCBatchSize =
DocumentNodeStoreService.DEFAULT_FGC_BATCH_SIZE;
private int fullGCProgressSize =
DocumentNodeStoreService.DEFAULT_FGC_PROGRESS_SIZE;
private double fullGCDelayFactor =
DocumentNodeStoreService.DEFAULT_FGC_DELAY_FACTOR;
+ private boolean fullGCAuditLoggingEnabled;
private long suspendTimeoutMillis = DEFAULT_SUSPEND_TIMEOUT;
/**
@@ -317,6 +318,15 @@ public class DocumentNodeStoreBuilder<T extends
DocumentNodeStoreBuilder<T>> {
return this.fullGCEnabled;
}
+ public T setFullGCAuditLoggingEnabled(boolean b) {
+ this.fullGCAuditLoggingEnabled = b;
+ return thisBuilder();
+ }
+
+ public boolean isFullGCAuditLoggingEnabled() {
+ return this.fullGCAuditLoggingEnabled;
+ }
+
public T setFullGCIncludePaths(@Nullable String[] includePaths) {
if (isNull(includePaths) || includePaths.length == 0 ||
Arrays.equals(includePaths, new String[]{"/"})) {
this.fullGCIncludePaths = Set.of();
diff --git
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
index 780cc3b011..9790267abd 100644
---
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
+++
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
@@ -534,6 +534,7 @@ public class DocumentNodeStoreService {
setFullGCBatchSize(config.fullGCBatchSize()).
setFullGCProgressSize(config.fullGCProgressSize()).
setFullGCDelayFactor(config.fullGCDelayFactor()).
+
setFullGCAuditLoggingEnabled(config.fullGCAuditLoggingEnabled()).
setSuspendTimeoutMillis(config.suspendTimeoutMillis()).
setClusterIdReuseDelayAfterRecovery(config.clusterIdReuseDelayAfterRecoveryMillis()).
setRecoveryDelayMillis(config.recoveryDelayMillis()).
diff --git
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/FullGcNodeBin.java
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/FullGcNodeBin.java
new file mode 100644
index 0000000000..662b8ebe46
--- /dev/null
+++
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/FullGcNodeBin.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.document;
+
+import java.util.List;
+import java.util.Map;
+/**
+ * This class is as a wrapper around DocumentStore that expose two methods
used to clean garbage from NODES collection
+ * public int remove(Map<String, Long> orphanOrDeletedRemovalMap)
+ * public List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList)
+ * When enabled
+ * Each method saves the document ID or empty properties names (that will be
deleted) to a separate _bin collection as a BinDocument then delegates deletion
to DocumentStore
+ *
+ * When disabled (default)
+ * Each method delegates directly to DocumentStore
+ */
+public interface FullGcNodeBin {
+
+ static FullGcNodeBin noBin(DocumentStore store) {
+ return new FullGcNodeBin() {
+ @Override
+ public int remove(Map<String, Long> orphanOrDeletedRemovalMap) {
+ return store.remove(Collection.NODES,
orphanOrDeletedRemovalMap);
+ }
+
+ @Override
+ public List<NodeDocument> findAndUpdate(List<UpdateOp>
updateOpList) {
+ return store.findAndUpdate(Collection.NODES, updateOpList);
+ }
+
+ @Override
+ public void setEnabled(boolean value) {
+ // no-op
+ }
+ };
+ }
+
+ /**
+ * Remove orphaned or deleted documents from the NODES collection
+ * If bin is enabled, the document IDs are saved to the SETTINGS
collection with ID prefixed with '/bin/'
+ * If document ID cannot be saved then the removal of the document fails
+ * If the bin is disabled, the document IDs are directly removed from the
NODES collection
+ *
+ * @param orphanOrDeletedRemovalMap the keys of the documents to remove
with the corresponding timestamps
+ * @return the number of documents removed
+ * @see DocumentStore#remove(Collection, Map)
+ */
+ int remove(Map<String, Long> orphanOrDeletedRemovalMap);
+
+ /**
+ * Performs a conditional update
+ * If the bin is enabled, the removed properties are saved to the SETTINGS
collection with ID prefixed with '/bin/' and empty value
+ * If the document ID and properties cannot be saved then the removal of
the property fails
+ * If bin is disabled, the removed properties are directly removed from
the NODES collection
+ *
+ * @param updateOpList the update operation List
+ * @return the list containing old documents
+ * @see DocumentStore#findAndUpdate(Collection, List)
+ */
+ List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList);
+
+ /**
+ * Enable or disable the bin
+ * @param value true to enable, false to disable
+ */
+ void setEnabled(boolean value);
+}
diff --git
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
index fae8af8674..c1735d7369 100644
---
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
+++
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
@@ -307,4 +307,8 @@ public class VersionGCSupport {
Revision r = IterableUtils.getFirst(doc.getAllChanges(), null);
return r != null && sweepRevs.isRevisionNewer(r);
}
+
+ public FullGcNodeBin getFullGCBin() {
+ return FullGcNodeBin.noBin(store);
+ }
}
diff --git
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 7bf95c0b76..271604d61f 100644
---
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -1947,15 +1947,9 @@ public class VersionGarbageCollector {
}
if (!isFullGCDryRun) {
// only delete these in case it is not a dryRun
-
if (!orphanOrDeletedRemovalMap.isEmpty()) {
- // use remove() with the modified check to rule
- // out any further race-condition where this removal
- // races with a un-orphan/re-creation as a result of
which
- // the node should now not be removed. The modified
check
- // ensures a node would then not be removed
- // (and as a result the removedSize != map.size())
- final int removedSize = ds.remove(NODES,
orphanOrDeletedRemovalMap);
+
+ final int removedSize =
versionStore.getFullGCBin().remove(orphanOrDeletedRemovalMap);
stats.updatedFullGCDocsCount += removedSize;
stats.deletedDocGCCount += removedSize;
stats.deletedOrphanNodesCount += removedSize;
@@ -1973,7 +1967,7 @@ public class VersionGarbageCollector {
}
if (!updateOpList.isEmpty()) {
- List<NodeDocument> oldDocs = ds.findAndUpdate(NODES,
updateOpList);
+ List<NodeDocument> oldDocs =
versionStore.getFullGCBin().findAndUpdate(updateOpList);
int deletedProps =
oldDocs.stream().filter(Objects::nonNull).mapToInt(d ->
deletedPropsCountMap.getOrDefault(d.getId(), 0)).sum();
diff --git
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderBase.java
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderBase.java
index 46f6f7decd..1f69130b4e 100644
---
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderBase.java
+++
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderBase.java
@@ -172,7 +172,7 @@ public abstract class MongoDocumentNodeStoreBuilderBase<T
extends MongoDocumentN
public VersionGCSupport createVersionGCSupport() {
DocumentStore store = getDocumentStore();
if (store instanceof MongoDocumentStore) {
- return new MongoVersionGCSupport((MongoDocumentStore) store);
+ return new MongoVersionGCSupport((MongoDocumentStore) store,
isFullGCAuditLoggingEnabled());
} else {
return super.createVersionGCSupport();
}
diff --git
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
index b24014fe1e..060a90d2a6 100644
---
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
+++
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java
@@ -42,6 +42,7 @@ import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.apache.commons.io.IOUtils;
+import com.mongodb.client.model.IndexOptions;
import org.apache.jackrabbit.guava.common.base.Stopwatch;
import org.apache.jackrabbit.guava.common.collect.Iterators;
import org.apache.jackrabbit.guava.common.util.concurrent.AtomicDouble;
@@ -166,6 +167,8 @@ public class MongoDocumentStore implements DocumentStore {
* which we block any data modification operation when system has been
throttled.
*/
public static final long DEFAULT_THROTTLING_TIME_MS =
Long.getLong("oak.mongo.throttlingTime", 20);
+
+ private static final @NotNull String BIN_COLLECTION = "bin";
/**
* nodeNameLimit for node name based on Mongo Version
*/
@@ -348,6 +351,9 @@ public class MongoDocumentStore implements DocumentStore {
if (!readOnly) {
ensureIndexes(db, status);
+ if (builder.isFullGCAuditLoggingEnabled()) {
+ ensureFullGcTTLIndex();
+ }
}
this.nodeLocks = new StripedNodeDocumentLocks();
@@ -465,6 +471,13 @@ public class MongoDocumentStore implements DocumentStore {
createIndex(journal, JournalEntry.MODIFIED, true, false, false);
}
+ private void ensureFullGcTTLIndex() {
+ //TTL index for full GC bin documents to expire after 90 days
+ //see https://issues.apache.org/jira/browse/OAK-11444
+ IndexOptions indexOptions = new
IndexOptions().expireAfter(TimeUnit.DAYS.toSeconds(90), TimeUnit.SECONDS);
+ connection.getCollection(BIN_COLLECTION).createIndex(new
org.bson.Document(MongoFullGcNodeBin.GC_COLLECTED_AT, 1), indexOptions);
+ }
+
private void createCollection(MongoDatabase db, String collectionName,
MongoStatus mongoStatus) {
CreateCollectionOptions options = new CreateCollectionOptions();
@@ -2011,6 +2024,10 @@ public class MongoDocumentStore implements DocumentStore
{
return getDBCollection(collection).withReadPreference(readPreference);
}
+ <T extends Document> MongoCollection<BasicDBObject> getBinCollection() {
+ return this.connection.getCollection(BIN_COLLECTION);
+ }
+
MongoDatabase getDatabase() {
return connection.getDatabase();
}
diff --git
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBin.java
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBin.java
new file mode 100644
index 0000000000..8a8d837222
--- /dev/null
+++
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBin.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.document.mongo;
+
+import com.mongodb.BasicDBObject;
+import org.apache.jackrabbit.oak.plugins.document.Collection;
+import org.apache.jackrabbit.oak.plugins.document.Document;
+import org.apache.jackrabbit.oak.plugins.document.DocumentStore;
+import org.apache.jackrabbit.oak.plugins.document.FullGcNodeBin;
+import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
+import org.apache.jackrabbit.oak.plugins.document.UpdateOp;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import static org.slf4j.LoggerFactory.getLogger;
+
+import java.time.Instant;
+import java.util.Collections;
+import java.util.Date;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * This class is as a wrapper around DocumentStore that expose two methods
used to clean garbage from NODES collection
+ * public int remove(Map<String, Long> orphanOrDeletedRemovalMap)
+ * public List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList)
+ * When enabled
+ * Each method saves the document ID or empty properties names (that will be
deleted) to a separate _bin collection as a BinDocument then delegates deletion
to DocumentStore
+ *
+ * When disabled (default)
+ * Each method delegates directly to DocumentStore
+ */
+public class MongoFullGcNodeBin implements FullGcNodeBin {
+ public static final String GC_COLLECTED_AT = "_gcCollectedAt";
+ private static final Logger LOG =
LoggerFactory.getLogger(MongoFullGcNodeBin.class);
+
+ private final MongoDocumentStore mongoDocumentStore;
+ private boolean enabled;
+
+ public MongoFullGcNodeBin(MongoDocumentStore ds) {
+ this(ds, false);
+ }
+
+ public MongoFullGcNodeBin(MongoDocumentStore store, boolean
fullGcBinEnabled) {
+ mongoDocumentStore = store;
+ enabled = fullGcBinEnabled;
+ }
+
+ /**
+ * Remove orphaned or deleted documents from the NODES collection
+ * If bin is enabled, the document IDs are saved to the BIN collection
with ID prefixed with '/bin/'
+ * If document ID cannot be saved then the removal of the document fails
+ * If the bin is disabled, the document IDs are directly removed from the
NODES collection
+ *
+ * @param orphanOrDeletedRemovalMap the keys of the documents to remove
with the corresponding timestamps
+ * @return the number of documents removed
+ * @see DocumentStore#remove(Collection, Map)
+ */
+ @Override
+ public int remove(Map<String, Long> orphanOrDeletedRemovalMap) {
+ if (orphanOrDeletedRemovalMap.isEmpty() ||
!addToBin(orphanOrDeletedRemovalMap)) {
+ return 0;
+ }
+
+ // use remove() with the modified check to rule
+ // out any further race-condition where this removal
+ // races with a un-orphan/re-creation as a result of which
+ // the node should now not be removed. The modified check
+ // ensures a node would then not be removed
+ // (and as a result the removedSize != map.size())
+ return mongoDocumentStore.remove(Collection.NODES,
orphanOrDeletedRemovalMap);
+ }
+
+
+ /**
+ * Performs a conditional update
+ * If the bin is enabled, the removed properties are saved to the BIN
collection with ID prefixed with '/bin/' and empty value
+ * If the document ID and properties cannot be saved then the removal of
the property fails
+ * If bin is disabled, the removed properties are directly removed from
the NODES collection
+ *
+ * @param updateOpList the update operation List
+ * @return the list containing old documents
+ * @see DocumentStore#findAndUpdate(Collection, List)
+ */
+ @Override
+ public List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList) {
+ LOG.info("Updating {} documents", updateOpList.size());
+ if (updateOpList.isEmpty() || !addToBin(updateOpList)) {
+ return Collections.emptyList();
+ }
+ return mongoDocumentStore.findAndUpdate(Collection.NODES,
updateOpList);
+ }
+
+ private boolean addToBin(Map<String, Long> orphanOrDeletedRemovalMap) {
+ if (!enabled) {
+ LOG.info("Bin is disabled, skipping adding delete candidate
documents to bin");
+ return true;
+ }
+ LOG.info("Adding {} delete candidate documents to bin",
orphanOrDeletedRemovalMap.size());
+ List<BasicDBObject> docs = orphanOrDeletedRemovalMap.keySet().stream()
+ .map(e -> new UpdateOp(e, true))
+ .map(this::toBasicDBObject)
+ .collect(Collectors.toList());
+ try {
+ return persist(docs);
+ } catch (Exception e) {
+ LOG.error("Error while adding delete candidate documents to bin:
{}", docs, e);
+ }
+ return false;
+ }
+
+ private boolean addToBin(List<UpdateOp> updateOpList) {
+ if (!enabled) {
+ LOG.info("Bin is disabled, skipping adding removed properties to
bin");
+ return true;
+ }
+ LOG.info("Adding {} removed properties to bin", updateOpList.size());
+ List<BasicDBObject> binOpList =
updateOpList.stream().map(this::toBasicDBObject).collect(Collectors.toList());
+ try {
+ return persist(binOpList);
+ } catch (Exception e) {
+ LOG.error("Error while adding removed properties to bin: {}",
binOpList, e);
+ }
+ return false;
+ }
+
+ private boolean persist(List<BasicDBObject> inserts) {
+ mongoDocumentStore.getBinCollection().insertMany(inserts);
+ return true;
+ }
+
+ private BasicDBObject toBasicDBObject(UpdateOp op) {
+ BasicDBObject doc = new BasicDBObject();
+ doc.put(Document.ID, "/bin/" + op.getId() + "-" +
Instant.now().toEpochMilli());
+ //copy removed properties to the new document
+ op.getChanges().forEach((k, v) -> {
+ if (v.type == UpdateOp.Operation.Type.REMOVE) {
+ doc.put(k.getName(), "");
+ }
+ });
+ //this property is used to track the time when the document was added
to the bin
+ //it can be used as a TTL index property to automatically remove the
document after a certain time
+ //see
https://www.mongodb.com/docs/manual/core/index-ttl/#std-label-index-feature-ttl
+ doc.put(MongoFullGcNodeBin.GC_COLLECTED_AT, new Date());
+ return doc;
+ }
+
+ @Override
+ public void setEnabled(boolean value) {
+ this.enabled = value;
+ LOG.info("Full GC Bin changed to {}", enabled ? "enabled" :
"disabled");
+ }
+
+ public boolean isEnabled() {
+ return enabled;
+ }
+}
diff --git
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index ffc02602fd..931a7b2d19 100644
---
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -29,8 +29,10 @@ import static java.util.Optional.ofNullable;
import static com.mongodb.client.model.Filters.and;
import static com.mongodb.client.model.Filters.lt;
import static java.util.Collections.emptyList;
+import org.apache.jackrabbit.oak.commons.properties.SystemPropertySupplier;
import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
+import org.apache.jackrabbit.oak.plugins.document.FullGcNodeBin;
import static
org.apache.jackrabbit.oak.plugins.document.NodeDocument.DELETED_ONCE;
import static
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
import static
org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
@@ -109,10 +111,15 @@ public class MongoVersionGCSupport extends
VersionGCSupport {
/**
* The batch size for the query of possibly deleted docs.
*/
- private final int batchSize = Integer.getInteger(
- "oak.mongo.queryDeletedDocsBatchSize", 1000);
+ private final int batchSize = SystemPropertySupplier.create(
+ "oak.mongo.queryDeletedDocsBatchSize", 1000).get();
+ private final MongoFullGcNodeBin fullGcBin;
public MongoVersionGCSupport(MongoDocumentStore store) {
+ this(store, false);
+ }
+
+ public MongoVersionGCSupport(MongoDocumentStore store, boolean
fullGcBinEnabled) {
super(store);
this.store = store;
if(hasIndex(getNodeCollection(), SD_TYPE, SD_MAX_REV_TIME_IN_SECS)) {
@@ -129,6 +136,7 @@ public class MongoVersionGCSupport extends VersionGCSupport
{
} else {
modifiedIdHint = null;
}
+ this.fullGcBin = new MongoFullGcNodeBin(store, fullGcBinEnabled);
}
@Override
@@ -241,9 +249,8 @@ public class MongoVersionGCSupport extends VersionGCSupport
{
public Iterable<NodeDocument> getModifiedDocs(final long fromModified,
final long toModified, final int limit,
@NotNull final String
fromId, @NotNull Set<String> includedPathPrefixes,
@NotNull Set<String>
excludedPathPrefixes) {
- LOG.info("getModifiedDocs fromModified: {}, toModified: {}, limit: {},
fromId: {}, includedPathPrefixes: {}, excludedPathPrefixes: {}",
- fromModified, toModified, limit, fromId, includedPathPrefixes,
excludedPathPrefixes);
-
+ LOG.info("getModifiedDocs fromModified: {} ({}), toModified: {} ({}),
limit: {}, fromId: {}, includedPathPrefixes: {}, excludedPathPrefixes: {}",
+ fromModified, Utils.timestampToString(fromModified),
toModified, Utils.timestampToString(toModified), limit, fromId,
includedPathPrefixes, excludedPathPrefixes);
final long fromModifiedQuery;
if (MIN_ID_VALUE.equals(fromId)) {
// If fromId is MIN_ID_VALUE, round fromModified to 5 second
resolution
@@ -476,6 +483,11 @@ public class MongoVersionGCSupport extends
VersionGCSupport {
LOG.debug(sb.toString());
}
+ @Override
+ public FullGcNodeBin getFullGCBin() {
+ return fullGcBin;
+ }
+
private static String getID(BasicDBObject document) {
return String.valueOf(document.get(Document.ID));
}
diff --git
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilder.java
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilder.java
index 18cfb61f60..bff7f0ff17 100644
---
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilder.java
+++
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilder.java
@@ -136,6 +136,18 @@ public class RDBDocumentNodeStoreBuilder
return thisBuilder();
}
+ @Override
+ public boolean isFullGCAuditLoggingEnabled() {
+ // fullGC is non supported for RDB
+ return false;
+ }
+
+ @Override
+ public RDBDocumentNodeStoreBuilder setFullGCAuditLoggingEnabled(boolean b)
{
+ // fullGC is non supported for RDB
+ return thisBuilder();
+ }
+
@Override
public Set<String> getFullGCIncludePaths() {
return of();
diff --git
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java
index 58a29042f5..2e9429a9ad 100644
---
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java
+++
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java
@@ -104,6 +104,7 @@ public class DocumentNodeStoreServiceConfigurationTest {
assertEquals(DEFAULT_EMBEDDED_VERIFICATION_ENABLED,
config.embeddedVerificationEnabled());
assertEquals(DocumentNodeStoreService.DEFAULT_FULL_GC_MAX_AGE,
config.fullGcMaxAgeInSecs());
assertEquals(CommitQueue.DEFAULT_SUSPEND_TIMEOUT,
config.suspendTimeoutMillis());
+ assertFalse(config.fullGCAuditLoggingEnabled());
}
@Test
@@ -170,6 +171,13 @@ public class DocumentNodeStoreServiceConfigurationTest {
assertEquals(batchSize, config.fullGCBatchSize());
}
+ @Test
+ public void fullGCAuditLoggingEnabled() throws Exception {
+ addConfigurationEntry(preset, "fullGCAuditLoggingEnabled", true);
+ Configuration config = createConfiguration();
+ assertTrue(config.fullGCAuditLoggingEnabled());
+ }
+
@Test
public void invisibleForDiscoveryFalse() throws Exception {
boolean batchSize = false;
diff --git
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderTest.java
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderTest.java
index 0c3f82f36b..d900d10f71 100644
---
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderTest.java
+++
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderTest.java
@@ -132,4 +132,10 @@ public class MongoDocumentNodeStoreBuilderTest {
final int fullGcModeNone = 0;
assertEquals(builder.getFullGCMode(), fullGcModeNone);
}
+
+ @Test
+ public void isFullGCAuditLoggingEnabled() {
+ MongoDocumentNodeStoreBuilder builder = new
MongoDocumentNodeStoreBuilder();
+ assertFalse(builder.isFullGCAuditLoggingEnabled());
+ }
}
diff --git
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinTest.java
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinTest.java
new file mode 100644
index 0000000000..f2dfdf9d34
--- /dev/null
+++
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinTest.java
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.document.mongo;
+
+import com.mongodb.BasicDBObject;
+import com.mongodb.client.MongoCollection;
+import org.apache.jackrabbit.oak.plugins.document.Collection;
+import org.apache.jackrabbit.oak.plugins.document.Document;
+import org.apache.jackrabbit.oak.plugins.document.FullGcNodeBin;
+import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
+import org.apache.jackrabbit.oak.plugins.document.UpdateOp;
+import org.junit.After;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+import static org.mockito.ArgumentMatchers.anyList;
+import static org.mockito.ArgumentMatchers.anyMap;
+import static org.mockito.ArgumentMatchers.eq;
+import org.mockito.InjectMocks;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoInteractions;
+import static org.mockito.Mockito.when;
+import org.mockito.MockitoAnnotations;
+
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class MongoFullGcNodeBinTest {
+
+ private static final List<NodeDocument> FIND_AND_UPDATE_RETURN_VALUE =
List.of();
+ @Mock
+ MongoDocumentStore documentStore;
+
+
+ MongoFullGcNodeBin fullGcBin;
+
+ @Mock MongoCollection<BasicDBObject> mockBinCollection;
+
+
+ @Before
+ public void setUp() throws Exception {
+ MockitoAnnotations.openMocks(this);
+ fullGcBin = new MongoFullGcNodeBin(documentStore, true);
+ when(documentStore.remove(eq(Collection.NODES),
anyMap())).thenAnswer(invocation -> {
+ Map<String, Long> map = invocation.getArgument(1);
+ return map.size();
+ });
+
+ when(documentStore.findAndUpdate(eq(Collection.NODES),
anyList())).thenAnswer(invocation -> {
+ return FIND_AND_UPDATE_RETURN_VALUE;
+ });
+
+ when(documentStore.getBinCollection()).thenReturn(mockBinCollection);
+ }
+
+ @After
+ public void tearDown() {
+ Mockito.reset(documentStore, mockBinCollection);
+ }
+
+ @Test
+ public void defaultDisabled() {
+ assertFalse(new MongoFullGcNodeBin(this.documentStore).isEnabled());
+ }
+
+ @Test
+ public void enableWithConstructor() {
+ assertTrue(new MongoFullGcNodeBin(this.documentStore,
true).isEnabled());
+ }
+
+ @Test
+ public void remove() {
+ Map<String, Long> orphanOrDeletedRemovalMap = new HashMap<>();
+ orphanOrDeletedRemovalMap.put("key1", 1L);
+ orphanOrDeletedRemovalMap.put("key2", 2L);
+
+
+ int removed = fullGcBin.remove(orphanOrDeletedRemovalMap);
+
+ //verify returned value
+ assertEquals(orphanOrDeletedRemovalMap.size(), removed);
+
+ //verify removed documents are added to bin
+ ArgumentCaptor<List<BasicDBObject>> argumentCaptor =
ArgumentCaptor.forClass(List.class);
+ verify(mockBinCollection).insertMany(argumentCaptor.capture());
+ assertEquals(orphanOrDeletedRemovalMap.size(),
argumentCaptor.getValue().size());
+
assertTrue(argumentCaptor.getValue().get(0).get(Document.ID).toString().matches("^\\/bin\\/key1\\-\\d+$"));
+
assertTrue(argumentCaptor.getValue().get(1).get(Document.ID).toString().matches("^\\/bin\\/key2\\-\\d+$"));
+
+ //verify documents are removed
+ verify(documentStore).remove(Collection.NODES,
orphanOrDeletedRemovalMap);
+ }
+
+ @Test
+ public void removeWhenCopyToBinFails() {
+ Map<String, Long> orphanOrDeletedRemovalMap = new HashMap<>();
+ orphanOrDeletedRemovalMap.put("key", 1L);
+ doThrow(new RuntimeException("Error while adding documents to
bin")).when(mockBinCollection).insertMany(anyList());
+
+ int removed = fullGcBin.remove(orphanOrDeletedRemovalMap);
+
+ assertEquals(0, removed);
+ verify(documentStore, never()).remove(Collection.NODES,
orphanOrDeletedRemovalMap);
+ }
+
+ @Test
+ public void removeEmptyMap() {
+ int removed = fullGcBin.remove(Map.of());
+ assertEquals(0, removed);
+ Mockito.verifyNoInteractions(documentStore);
+ }
+
+ @Test
+ public void removeWhenBinDisabled() {
+ fullGcBin.setEnabled(false);
+ Map<String, Long> orphanOrDeletedRemovalMap = new HashMap<>();
+ orphanOrDeletedRemovalMap.put("key", 1L);
+
+ fullGcBin.remove(orphanOrDeletedRemovalMap);
+
+ verify(mockBinCollection, never()).insertMany(anyList());
+ }
+
+ @Test
+ public void findAndUpdate() {
+ UpdateOp doc1 = new UpdateOp("doc1", false);
+ doc1.remove("prop1.1");
+ doc1.set("prop1.2", "value1.2");
+ UpdateOp doc2 = new UpdateOp("doc2", false);
+ doc2.remove("prop2.1");
+ doc2.remove("prop2.2");
+
+ List<UpdateOp> properties = List.of(doc1, doc2);
+ List<NodeDocument> modifiedDocs = fullGcBin.findAndUpdate(properties);
+
+ //verify removed properties are added to bin
+ ArgumentCaptor<List<BasicDBObject>> argumentCaptor =
ArgumentCaptor.forClass(List.class);
+ verify(mockBinCollection).insertMany(argumentCaptor.capture());
+
+ List<BasicDBObject> binOpList = argumentCaptor.getValue();
+ BasicDBObject binDoc1 = binOpList.get(0);
+
+
+
assertTrue(binDoc1.get(Document.ID).toString().matches("^\\/bin\\/doc1\\-\\d+$"));
+ assertTrue(binDoc1.containsField("prop1.1"));
+ assertFalse(binDoc1.containsField("prop1.2"));//only removed props are
saved
+ assertGcTimestampAdded(binDoc1);
+
+ BasicDBObject binDoc2 = binOpList.get(1);
+
assertTrue(binDoc2.get(Document.ID).toString().matches("^\\/bin\\/doc2\\-\\d+$"));
+ assertTrue(binDoc2.containsField("prop2.1"));
+ assertTrue(binDoc2.containsField("prop2.2"));
+
+ assertGcTimestampAdded(binDoc2);
+
+
+ //verify removed properties are removed from the original document
+ verify(documentStore).findAndUpdate(Collection.NODES, properties);
+
+ //verify returned value
+ assertEquals(FIND_AND_UPDATE_RETURN_VALUE, modifiedDocs);
+ }
+
+ private static void assertGcTimestampAdded(BasicDBObject binDoc2) {
+ assertTrue(binDoc2.containsField("_gcCollectedAt"));
+ assertTrue(binDoc2.get("_gcCollectedAt") instanceof Date);
+ }
+
+ @Test
+ public void findAndUpdateWhenCopyToBinFails() {
+ doThrow(new RuntimeException("Error while adding documents to
bin")).when(mockBinCollection).insertMany(anyList());
+ UpdateOp doc1 = new UpdateOp("doc1", false);
+ doc1.remove("prop1");
+ fullGcBin.findAndUpdate(List.of(doc1));
+ verify(documentStore, never()).findAndUpdate(eq(Collection.NODES),
anyList());
+ }
+
+ @Test
+ public void findAndUpdateWhenBinDisabled() {
+ fullGcBin.setEnabled(false);
+ UpdateOp doc1 = new UpdateOp("doc1", false);
+ doc1.remove("prop1");
+ fullGcBin.findAndUpdate(List.of(doc1));
+ verify(mockBinCollection, never()).insertMany(anyList());
+ }
+
+ @Test
+ public void findAndUpdateWhenEmptyList() {
+ fullGcBin.findAndUpdate(List.of());
+ verifyNoInteractions(documentStore);
+ }
+}
\ No newline at end of file
diff --git
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilderTest.java
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilderTest.java
index 1aa9d47c6d..642b05e20e 100755
---
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilderTest.java
+++
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilderTest.java
@@ -121,4 +121,11 @@ public class RDBDocumentNodeStoreBuilderTest {
builder.setFullGcMaxAgeMillis(30 * 24 * 60 * 60 * 1000L);
assertEquals(0, builder.getFullGcMaxAgeMillis());
}
+
+ @Test
+ public void fullGcAuditLoggingEnabled() {
+ RDBDocumentNodeStoreBuilder builder = new
RDBDocumentNodeStoreBuilder();
+ builder.setFullGCAuditLoggingEnabled(true);
+ assertFalse(builder.isFullGCAuditLoggingEnabled());
+ }
}