This is an automated email from the ASF dual-hosted git repository.
daim pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new eb852d6de7 OAK-11671 [full-gc] Persist the avg object size and avg
size of props per document in Mongo settings (#2252)
eb852d6de7 is described below
commit eb852d6de7a7c696526acc21bf7e706aba3b150a
Author: Daniel Iancu <[email protected]>
AuthorDate: Tue Apr 29 12:47:32 2025 +0300
OAK-11671 [full-gc] Persist the avg object size and avg size of props per
document in Mongo settings (#2252)
Co-authored-by: Daniel Iancu <[email protected]>
---
.../plugins/document/VersionGarbageCollector.java | 7 +-
.../plugins/document/mongo/MongoFullGcNodeBin.java | 4 +
.../mongo/MongoFullGcNodeBinSumBsonSize.java | 139 +++++++++++
.../document/mongo/MongoVersionGCSupport.java | 5 +-
.../mongo/MongoFullGcNodeBinSumBsonSizeTest.java | 267 +++++++++++++++++++++
5 files changed, 418 insertions(+), 4 deletions(-)
diff --git
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 400c5ed430..418da521b7 100644
---
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -129,7 +129,12 @@ public class VersionGarbageCollector {
/**
* Document id stored in settings collection that keeps info about version
gc
*/
- static final String SETTINGS_COLLECTION_ID = "versionGC";
+ public static final String SETTINGS_COLLECTION_ID = "versionGC";
+
+ /**
+ * Property name to sum the total size of removed garbage in bytes
+ */
+ public static final String
SETTINGS_COLLECTION_FULL_GC_REMOVED_TOTAL_BSON_SIZE =
"fullGcRemovedTotalBsonSize";
/**
* Property name to timestamp when last gc run happened
diff --git
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBin.java
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBin.java
index 5714a6bba9..7889926679 100644
---
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBin.java
+++
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBin.java
@@ -173,4 +173,8 @@ public class MongoFullGcNodeBin implements FullGcNodeBin {
public boolean isEnabled() {
return enabled;
}
+
+ MongoDocumentStore getMongoDocumentStore() {
+ return mongoDocumentStore;
+ }
}
diff --git
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinSumBsonSize.java
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinSumBsonSize.java
new file mode 100644
index 0000000000..3e2ce684e4
--- /dev/null
+++
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinSumBsonSize.java
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.document.mongo;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import com.mongodb.BasicDBObject;
+import com.mongodb.client.model.Updates;
+import org.bson.conversions.Bson;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.jackrabbit.oak.plugins.document.FullGcNodeBin;
+import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
+import org.apache.jackrabbit.oak.plugins.document.UpdateOp;
+
+import static com.mongodb.client.model.Filters.eq;
+import static com.mongodb.client.model.Filters.in;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.SETTINGS;
+import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
+import static
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_FULL_GC_REMOVED_TOTAL_BSON_SIZE;
+import static
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_ID;
+
+/**
+ * This class is a wrapper around a MongoFullGcNodeBin
+ * that sums the bson size of the documents that are removed or updated and
then update the value in the SETTINGS collection
+ */
+class MongoFullGcNodeBinSumBsonSize implements FullGcNodeBin {
+ private static final Logger LOG =
LoggerFactory.getLogger(MongoFullGcNodeBinSumBsonSize.class);
+
+ private final MongoFullGcNodeBin delegate;
+ private final MongoDocumentStore store;
+
+ public MongoFullGcNodeBinSumBsonSize(MongoFullGcNodeBin delegate) {
+ this.delegate = delegate;
+ this.store = delegate.getMongoDocumentStore();
+ }
+
+ @Override
+ public void setEnabled(boolean value) {
+ delegate.setEnabled(value);
+ }
+
+ /**
+ * Remove the documents from the collection and sum the the bson size of
the removed properties
+ * @param updateOpList the list of the documents to be removed
+ * @return the list of the documents removed
+ */
+ @Override
+ public List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList) {
+ List<String> ids =
updateOpList.stream().map(UpdateOp::getId).collect(Collectors.toList());
+ //get the total bson size before the update
+ long initialBsonSize = getBsonSize(ids);
+ LOG.debug("bson size before update: {}", initialBsonSize);
+ //remove garbage properties
+ List<NodeDocument> updated = delegate.findAndUpdate(updateOpList);
+ if (!updated.isEmpty()) {
+ //calculate the diff of the bson size after update
+ long afterUpdateBsonSize = getBsonSize(ids);
+ LOG.debug("bson size after update: {}", afterUpdateBsonSize);
+ if (initialBsonSize > 0 && afterUpdateBsonSize > 0) {
+ //sum up the removed bson size
+ addBsonSize(initialBsonSize - afterUpdateBsonSize);
+ }
+ }
+ return updated;
+ }
+ /**
+ * Remove the documents from the collection and sum the their bson size
+ * @param orphanOrDeletedRemovalMap the map of the documents to be removed
+ * @return the number of documents removed
+ */
+ @Override
+ public int remove(Map<String, Long> orphanOrDeletedRemovalMap) {
+ //get the total bson size before the update
+ long bsonSize = getBsonSize(new
ArrayList<>(orphanOrDeletedRemovalMap.keySet()));
+ LOG.debug("bson size before remove: {}", bsonSize);
+ //remove garbage documents
+ int removed = delegate.remove(orphanOrDeletedRemovalMap);
+ //sum up the removed bson size
+ if (removed > 0 && bsonSize > 0) {
+ addBsonSize(bsonSize);
+ }
+ return removed;
+ }
+
+ private void addBsonSize(long bsonSize) {
+ if (bsonSize <= 0) {
+ LOG.warn("bson size {} is not positive", bsonSize);
+ return;
+ }
+ //sum the bson size with the value from fullGcBsonSize document in the
SETTINGS collection
+ Bson query = eq(ID, SETTINGS_COLLECTION_ID);
+ Bson update =
Updates.inc(SETTINGS_COLLECTION_FULL_GC_REMOVED_TOTAL_BSON_SIZE, bsonSize);
+ //increment the value in SETTINGS collection with the new bson size
+ store.getDBCollection(SETTINGS).updateOne(query, update);
+ LOG.info("Incremented bson size with {}", bsonSize);
+ }
+
+ /**
+ * Calculate the total bson size of documents in the list
+ * @param ids the list of ids to be iterated
+ * @return the total size of the bson
+ */
+ private long getBsonSize(List<String> ids) {
+ long start = System.currentTimeMillis();
+ try {
+ //get the bson size of the documents in the list
+ Bson match = in("_id", ids);
+ BasicDBObject first =
store.getDBCollection(NODES).aggregate(List.of(
+ new BasicDBObject("$match", match),
+ new BasicDBObject("$group", new BasicDBObject("_id", null)
+ .append("totalSize", new BasicDBObject("$sum", new
BasicDBObject("$bsonSize", "$$ROOT"))))
+ )).first();
+ return first != null ? first.getLong("totalSize") : -1;
+ } finally {
+ LOG.info("getBsonSize for {} documents took {} ms", ids.size(),
System.currentTimeMillis() - start);
+ }
+ }
+}
diff --git
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index 6b58b0438f..14d3476abb 100644
---
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -56,7 +56,6 @@ import com.mongodb.MongoClient;
import com.mongodb.client.MongoCursor;
import org.apache.jackrabbit.oak.commons.collections.IterableUtils;
-import org.apache.jackrabbit.oak.commons.json.JsopBuilder;
import org.apache.jackrabbit.oak.plugins.document.Document;
import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
import org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
@@ -113,7 +112,7 @@ public class MongoVersionGCSupport extends VersionGCSupport
{
*/
private final int batchSize = SystemPropertySupplier.create(
"oak.mongo.queryDeletedDocsBatchSize", 1000).get();
- private final MongoFullGcNodeBin fullGcBin;
+ private final FullGcNodeBin fullGcBin;
public MongoVersionGCSupport(MongoDocumentStore store) {
this(store, false);
@@ -136,7 +135,7 @@ public class MongoVersionGCSupport extends VersionGCSupport
{
} else {
modifiedIdHint = null;
}
- this.fullGcBin = new MongoFullGcNodeBin(store, fullGcBinEnabled);
+ this.fullGcBin = new MongoFullGcNodeBinSumBsonSize( new
MongoFullGcNodeBin(store, fullGcBinEnabled));
}
@Override
diff --git
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinSumBsonSizeTest.java
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinSumBsonSizeTest.java
new file mode 100644
index 0000000000..77c48aa270
--- /dev/null
+++
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinSumBsonSizeTest.java
@@ -0,0 +1,267 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.document.mongo;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import com.mongodb.BasicDBObject;
+import com.mongodb.MongoClient;
+import com.mongodb.client.AggregateIterable;
+import com.mongodb.client.MongoCollection;
+import org.bson.Document;
+import org.bson.conversions.Bson;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import org.apache.jackrabbit.oak.plugins.document.Collection;
+import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
+import org.apache.jackrabbit.oak.plugins.document.UpdateOp;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+public class MongoFullGcNodeBinSumBsonSizeTest {
+
+ @Mock
+ private MongoFullGcNodeBin delegate;
+
+ @Mock
+ private MongoDocumentStore store;
+
+ @Mock
+ private MongoCollection<BasicDBObject> nodesCollection;
+
+ @Mock
+ private MongoCollection<BasicDBObject> settingsCollection;
+
+ @Mock
+ private AggregateIterable<BasicDBObject> aggregateIterable;
+
+ private MongoFullGcNodeBinSumBsonSize wrapper;
+
+ @Before
+ public void setUp() {
+ MockitoAnnotations.openMocks(this);
+ when(delegate.getMongoDocumentStore()).thenReturn(store);
+
when(store.getDBCollection(Collection.NODES)).thenReturn(nodesCollection);
+
when(store.getDBCollection(Collection.SETTINGS)).thenReturn(settingsCollection);
+ when(nodesCollection.aggregate(any())).thenReturn(aggregateIterable);
+ wrapper = new MongoFullGcNodeBinSumBsonSize(delegate);
+ }
+
+ private void mockBsonSizeCalculation(long... totalSizes) {
+ List<BasicDBObject> results = new ArrayList<>();
+ for (long totalSize : totalSizes) {
+ BasicDBObject aggregateResult = new BasicDBObject("_id", null)
+ .append("totalSize", totalSize);
+ results.add(aggregateResult);
+ }
+ if (totalSizes.length > 2) {
+ throw new IllegalArgumentException("Max 2 arguments are
supported");
+ }
+ //return based on argument number
+ if (totalSizes.length == 2) {
+ when(aggregateIterable.first())
+ .thenReturn(results.get(0))
+ .thenReturn(results.get(1));
+ } else {
+ when(aggregateIterable.first()).thenReturn(results.get(0));
+ }
+ }
+
+ @Test
+ public void testSetEnabled() {
+ wrapper.setEnabled(true);
+ verify(delegate).setEnabled(true);
+ }
+
+ @Test
+ public void testFindAndUpdateWithSuccessfulUpdate() {
+ // Setup
+ List<UpdateOp> updateOps = new ArrayList<>();
+ UpdateOp op1 = new UpdateOp("doc1", false);
+ updateOps.add(op1);
+
+ List<NodeDocument> expectedDocs = new ArrayList<>();
+ expectedDocs.add(NodeDocument.NULL);
+
+ List<NodeDocument> docs = new ArrayList<>();
+ docs.add(NodeDocument.NULL);
+ when(delegate.findAndUpdate(updateOps)).thenReturn(docs);
+ //before size 100, after update -> size 50
+ mockBsonSizeCalculation(100L, 50);
+
+ // Execute
+ List<NodeDocument> result = wrapper.findAndUpdate(updateOps);
+
+ // Verify
+ assertEquals(expectedDocs, result);
+ verify(delegate).findAndUpdate(updateOps);
+
+ // Verify bson size update
+ ArgumentCaptor<Bson> queryCaptor = ArgumentCaptor.forClass(Bson.class);
+ ArgumentCaptor<Bson> updateCaptor =
ArgumentCaptor.forClass(Bson.class);
+ verify(settingsCollection).updateOne(queryCaptor.capture(),
updateCaptor.capture());
+
+ // Verify query
+ Bson query = queryCaptor.getValue();
+ Document queryDoc =
Document.parse(query.toBsonDocument(Document.class,
MongoClient.getDefaultCodecRegistry()).toJson());
+ assertEquals("versionGC", queryDoc.get("_id"));
+
+ // Verify update
+ Bson update = updateCaptor.getValue();
+ Document updateDoc =
Document.parse(update.toBsonDocument(Document.class,
MongoClient.getDefaultCodecRegistry()).toJson());
+ Document inc = updateDoc.get("$inc", Document.class);
+ assertEquals(Long.valueOf(50L),
inc.getLong("fullGcRemovedTotalBsonSize"));
+ }
+
+ @Test
+ public void testFindAndUpdateWithInitialBsonSizeLessThenUpdatedBsonSize() {
+ // Setup
+ List<UpdateOp> updateOps = new ArrayList<>();
+ UpdateOp op1 = new UpdateOp("doc1", false);
+ updateOps.add(op1);
+
+ List<NodeDocument> docs = new ArrayList<>();
+ docs.add(NodeDocument.NULL);
+ when(delegate.findAndUpdate(updateOps)).thenReturn(docs);
+ //before size 100, after update -> size 150
+ mockBsonSizeCalculation(100L, 150L);
+
+ // Execute
+ wrapper.findAndUpdate(updateOps);
+
+ // Verify
+ verify(delegate).findAndUpdate(updateOps);
+ verify(settingsCollection, never()).updateOne(any(Bson.class),
any(Bson.class));
+ }
+
+ @Test
+ public void testFindAndUpdateWithNoUpdates() {
+ // Setup
+ List<UpdateOp> updateOps = new ArrayList<>();
+ UpdateOp op1 = new UpdateOp("doc1", false);
+ updateOps.add(op1);
+
+ when(delegate.findAndUpdate(updateOps)).thenReturn(new ArrayList<>());
+ mockBsonSizeCalculation(100L);
+
+ // Execute
+ List<NodeDocument> result = wrapper.findAndUpdate(updateOps);
+
+ // Verify
+ assertTrue(result.isEmpty());
+ verify(delegate).findAndUpdate(updateOps);
+ verify(settingsCollection, never()).updateOne(any(Bson.class),
any(Bson.class));
+ }
+
+ @Test
+ public void testFindAndUpdateWithEmptyList() {
+ // Setup
+ List<UpdateOp> updateOps = new ArrayList<>();
+
+ when(delegate.findAndUpdate(updateOps)).thenReturn(new ArrayList<>());
+
+ // Execute
+ List<NodeDocument> result = wrapper.findAndUpdate(updateOps);
+
+ // Verify
+ assertTrue(result.isEmpty());
+ verify(delegate).findAndUpdate(updateOps);
+ verify(settingsCollection, never()).updateOne(any(Bson.class),
any(Bson.class));
+ }
+
+ @Test
+ public void testRemoveWithSuccessfulRemoval() {
+ // Setup
+ Map<String, Long> removalMap = new HashMap<>();
+ removalMap.put("doc1", 1L);
+ removalMap.put("doc2", 2L);
+
+ when(delegate.remove(removalMap)).thenReturn(2);
+ mockBsonSizeCalculation(200L);
+
+ // Execute
+ int result = wrapper.remove(removalMap);
+
+ // Verify
+ assertEquals(2, result);
+ verify(delegate).remove(removalMap);
+
+ // Verify bson size update
+ ArgumentCaptor<Bson> queryCaptor = ArgumentCaptor.forClass(Bson.class);
+ ArgumentCaptor<Bson> updateCaptor =
ArgumentCaptor.forClass(Bson.class);
+ verify(settingsCollection).updateOne(queryCaptor.capture(),
updateCaptor.capture());
+
+ // Verify query
+ Bson query = queryCaptor.getValue();
+ Document queryDoc =
Document.parse(query.toBsonDocument(Document.class,
MongoClient.getDefaultCodecRegistry()).toJson());
+ assertEquals("versionGC", queryDoc.get("_id"));
+
+ // Verify update
+ Bson update = updateCaptor.getValue();
+ Document updateDoc =
Document.parse(update.toBsonDocument(Document.class,
MongoClient.getDefaultCodecRegistry()).toJson());
+ Document inc = updateDoc.get("$inc", Document.class);
+ assertEquals(Long.valueOf(200L),
inc.getLong("fullGcRemovedTotalBsonSize"));
+ }
+
+ @Test
+ public void testRemoveWithNoBsonSize() {
+ // Setup
+ Map<String, Long> removalMap = new HashMap<>();
+ removalMap.put("doc1", 1L);
+
+ when(delegate.remove(removalMap)).thenReturn(1);
+ mockBsonSizeCalculation(0L);
+
+ // Execute
+ int result = wrapper.remove(removalMap);
+
+ // Verify
+ assertEquals(1, result);
+ verify(delegate).remove(removalMap);
+ verify(settingsCollection, never()).updateOne(any(Bson.class),
any(Bson.class));
+ }
+
+ @Test
+ public void testRemoveWithEmptyMap() {
+ // Setup
+ Map<String, Long> removalMap = new HashMap<>();
+
+ when(delegate.remove(removalMap)).thenReturn(0);
+
+ // Execute
+ int result = wrapper.remove(removalMap);
+
+ // Verify
+ assertEquals(0, result);
+ verify(delegate).remove(removalMap);
+ verify(settingsCollection, never()).updateOne(any(Bson.class),
any(Bson.class));
+ }
+
+}
\ No newline at end of file