This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


The following commit(s) were added to refs/heads/trunk by this push:
     new eb852d6de7 OAK-11671 [full-gc] Persist the avg object size and avg 
size of props per document in Mongo settings (#2252)
eb852d6de7 is described below

commit eb852d6de7a7c696526acc21bf7e706aba3b150a
Author: Daniel Iancu <[email protected]>
AuthorDate: Tue Apr 29 12:47:32 2025 +0300

    OAK-11671 [full-gc] Persist the avg object size and avg size of props per 
document in Mongo settings (#2252)
    
    Co-authored-by: Daniel Iancu <[email protected]>
---
 .../plugins/document/VersionGarbageCollector.java  |   7 +-
 .../plugins/document/mongo/MongoFullGcNodeBin.java |   4 +
 .../mongo/MongoFullGcNodeBinSumBsonSize.java       | 139 +++++++++++
 .../document/mongo/MongoVersionGCSupport.java      |   5 +-
 .../mongo/MongoFullGcNodeBinSumBsonSizeTest.java   | 267 +++++++++++++++++++++
 5 files changed, 418 insertions(+), 4 deletions(-)

diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 400c5ed430..418da521b7 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -129,7 +129,12 @@ public class VersionGarbageCollector {
     /**
      * Document id stored in settings collection that keeps info about version 
gc
      */
-    static final String SETTINGS_COLLECTION_ID = "versionGC";
+    public static final String SETTINGS_COLLECTION_ID = "versionGC";
+
+    /**
+     * Property name to sum the total size of removed garbage in bytes
+     */
+    public static final String 
SETTINGS_COLLECTION_FULL_GC_REMOVED_TOTAL_BSON_SIZE = 
"fullGcRemovedTotalBsonSize";
 
     /**
      * Property name to timestamp when last gc run happened
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBin.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBin.java
index 5714a6bba9..7889926679 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBin.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBin.java
@@ -173,4 +173,8 @@ public class MongoFullGcNodeBin implements FullGcNodeBin {
     public boolean isEnabled() {
         return enabled;
     }
+
+    MongoDocumentStore getMongoDocumentStore() {
+        return mongoDocumentStore;
+    }
 }
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinSumBsonSize.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinSumBsonSize.java
new file mode 100644
index 0000000000..3e2ce684e4
--- /dev/null
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinSumBsonSize.java
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.document.mongo;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import com.mongodb.BasicDBObject;
+import com.mongodb.client.model.Updates;
+import org.bson.conversions.Bson;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.jackrabbit.oak.plugins.document.FullGcNodeBin;
+import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
+import org.apache.jackrabbit.oak.plugins.document.UpdateOp;
+
+import static com.mongodb.client.model.Filters.eq;
+import static com.mongodb.client.model.Filters.in;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.SETTINGS;
+import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
+import static 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_FULL_GC_REMOVED_TOTAL_BSON_SIZE;
+import static 
org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_ID;
+
+/**
+ * This class is a wrapper around a MongoFullGcNodeBin
+ * that sums the bson size of the documents that are removed or updated and 
then update the value in the SETTINGS collection
+ */
+class MongoFullGcNodeBinSumBsonSize implements FullGcNodeBin {
+    private static final Logger LOG = 
LoggerFactory.getLogger(MongoFullGcNodeBinSumBsonSize.class);
+
+    private final MongoFullGcNodeBin delegate;
+    private final MongoDocumentStore store;
+
+    public MongoFullGcNodeBinSumBsonSize(MongoFullGcNodeBin delegate) {
+        this.delegate = delegate;
+        this.store = delegate.getMongoDocumentStore();
+    }
+
+    @Override
+    public void setEnabled(boolean value) {
+        delegate.setEnabled(value);
+    }
+
+    /**
+     * Remove the documents from the collection and sum the the bson size of 
the removed properties     
+     * @param updateOpList the list of the documents to be removed
+     * @return the list of the documents removed
+     */ 
+    @Override
+    public List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList) {
+        List<String> ids = 
updateOpList.stream().map(UpdateOp::getId).collect(Collectors.toList());
+        //get the total bson size before the update
+        long initialBsonSize = getBsonSize(ids);
+        LOG.debug("bson size before update: {}", initialBsonSize);
+        //remove garbage properties
+        List<NodeDocument> updated = delegate.findAndUpdate(updateOpList);
+        if (!updated.isEmpty()) {
+            //calculate the diff of the bson size after update
+            long afterUpdateBsonSize = getBsonSize(ids);
+            LOG.debug("bson size after update: {}", afterUpdateBsonSize);
+            if (initialBsonSize > 0 && afterUpdateBsonSize > 0) {
+                //sum up the removed bson size
+                addBsonSize(initialBsonSize - afterUpdateBsonSize);
+            }
+        }
+        return updated;
+    }
+    /**
+     * Remove the documents from the collection and sum the their bson size    
  
+     * @param orphanOrDeletedRemovalMap the map of the documents to be removed
+     * @return the number of documents removed
+     */
+    @Override
+    public int remove(Map<String, Long> orphanOrDeletedRemovalMap) {
+        //get the total bson size before the update
+        long bsonSize = getBsonSize(new 
ArrayList<>(orphanOrDeletedRemovalMap.keySet()));
+        LOG.debug("bson size before remove: {}", bsonSize);
+        //remove garbage documents
+        int removed = delegate.remove(orphanOrDeletedRemovalMap);
+        //sum up the removed bson size
+        if (removed > 0 && bsonSize > 0) {
+            addBsonSize(bsonSize);
+        }
+        return removed;
+    }
+
+    private void addBsonSize(long bsonSize) {
+        if (bsonSize <= 0) {
+            LOG.warn("bson size {} is not positive", bsonSize);
+            return;
+        }
+        //sum the bson size with the value from fullGcBsonSize document in the 
SETTINGS collection
+        Bson query = eq(ID, SETTINGS_COLLECTION_ID);
+        Bson update = 
Updates.inc(SETTINGS_COLLECTION_FULL_GC_REMOVED_TOTAL_BSON_SIZE, bsonSize);
+        //increment the value in SETTINGS collection with the new bson size
+        store.getDBCollection(SETTINGS).updateOne(query, update);
+        LOG.info("Incremented bson size with {}", bsonSize);
+    }
+
+    /**
+     * Calculate the total bson size of documents in the list
+     * @param ids the list of ids to be iterated
+     * @return the total size of the bson
+     */
+    private long getBsonSize(List<String> ids) {
+        long start = System.currentTimeMillis();
+        try {
+            //get the bson size of the documents in the list
+            Bson match = in("_id", ids);
+            BasicDBObject first = 
store.getDBCollection(NODES).aggregate(List.of(
+                new BasicDBObject("$match", match),
+                new BasicDBObject("$group", new BasicDBObject("_id", null)
+                    .append("totalSize", new BasicDBObject("$sum", new 
BasicDBObject("$bsonSize", "$$ROOT"))))
+            )).first();
+            return first != null ? first.getLong("totalSize") : -1;
+        } finally {
+            LOG.info("getBsonSize for {} documents took {} ms", ids.size(), 
System.currentTimeMillis() - start);
+        }
+    }
+}
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index 6b58b0438f..14d3476abb 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -56,7 +56,6 @@ import com.mongodb.MongoClient;
 import com.mongodb.client.MongoCursor;
 
 import org.apache.jackrabbit.oak.commons.collections.IterableUtils;
-import org.apache.jackrabbit.oak.commons.json.JsopBuilder;
 import org.apache.jackrabbit.oak.plugins.document.Document;
 import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
 import org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
@@ -113,7 +112,7 @@ public class MongoVersionGCSupport extends VersionGCSupport 
{
      */
     private final int batchSize = SystemPropertySupplier.create(
         "oak.mongo.queryDeletedDocsBatchSize", 1000).get();
-    private final MongoFullGcNodeBin fullGcBin;
+    private final FullGcNodeBin fullGcBin;
 
     public MongoVersionGCSupport(MongoDocumentStore store) {
         this(store, false);
@@ -136,7 +135,7 @@ public class MongoVersionGCSupport extends VersionGCSupport 
{
         } else {
             modifiedIdHint = null;
         }
-        this.fullGcBin = new MongoFullGcNodeBin(store, fullGcBinEnabled);
+        this.fullGcBin = new MongoFullGcNodeBinSumBsonSize( new 
MongoFullGcNodeBin(store, fullGcBinEnabled));
     }
 
     @Override
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinSumBsonSizeTest.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinSumBsonSizeTest.java
new file mode 100644
index 0000000000..77c48aa270
--- /dev/null
+++ 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoFullGcNodeBinSumBsonSizeTest.java
@@ -0,0 +1,267 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.document.mongo;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import com.mongodb.BasicDBObject;
+import com.mongodb.MongoClient;
+import com.mongodb.client.AggregateIterable;
+import com.mongodb.client.MongoCollection;
+import org.bson.Document;
+import org.bson.conversions.Bson;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import org.apache.jackrabbit.oak.plugins.document.Collection;
+import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
+import org.apache.jackrabbit.oak.plugins.document.UpdateOp;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+public class MongoFullGcNodeBinSumBsonSizeTest {
+
+    @Mock
+    private MongoFullGcNodeBin delegate;
+
+    @Mock
+    private MongoDocumentStore store;
+
+    @Mock
+    private MongoCollection<BasicDBObject> nodesCollection;
+
+    @Mock
+    private MongoCollection<BasicDBObject> settingsCollection;
+
+    @Mock
+    private AggregateIterable<BasicDBObject> aggregateIterable;
+
+    private MongoFullGcNodeBinSumBsonSize wrapper;
+
+    @Before
+    public void setUp() {
+        MockitoAnnotations.openMocks(this);
+        when(delegate.getMongoDocumentStore()).thenReturn(store);
+        
when(store.getDBCollection(Collection.NODES)).thenReturn(nodesCollection);
+        
when(store.getDBCollection(Collection.SETTINGS)).thenReturn(settingsCollection);
+        when(nodesCollection.aggregate(any())).thenReturn(aggregateIterable);
+        wrapper = new MongoFullGcNodeBinSumBsonSize(delegate);
+    }
+
+    private void mockBsonSizeCalculation(long... totalSizes) {
+        List<BasicDBObject> results = new ArrayList<>();
+        for (long totalSize : totalSizes) {
+            BasicDBObject aggregateResult = new BasicDBObject("_id", null)
+                .append("totalSize", totalSize);
+            results.add(aggregateResult);
+        }
+        if (totalSizes.length > 2) {
+            throw new IllegalArgumentException("Max 2 arguments are 
supported");
+        }
+        //return based on argument number
+        if (totalSizes.length == 2) {
+            when(aggregateIterable.first())
+                .thenReturn(results.get(0))
+                .thenReturn(results.get(1));
+        } else {
+            when(aggregateIterable.first()).thenReturn(results.get(0));
+        }
+    }
+
+    @Test
+    public void testSetEnabled() {
+        wrapper.setEnabled(true);
+        verify(delegate).setEnabled(true);
+    }
+
+    @Test
+    public void testFindAndUpdateWithSuccessfulUpdate() {
+        // Setup
+        List<UpdateOp> updateOps = new ArrayList<>();
+        UpdateOp op1 = new UpdateOp("doc1", false);
+        updateOps.add(op1);
+
+        List<NodeDocument> expectedDocs = new ArrayList<>();
+        expectedDocs.add(NodeDocument.NULL);
+
+        List<NodeDocument> docs = new ArrayList<>();
+        docs.add(NodeDocument.NULL);
+        when(delegate.findAndUpdate(updateOps)).thenReturn(docs);
+        //before size 100, after update -> size 50
+        mockBsonSizeCalculation(100L, 50);
+        
+        // Execute
+        List<NodeDocument> result = wrapper.findAndUpdate(updateOps);
+        
+        // Verify
+        assertEquals(expectedDocs, result);
+        verify(delegate).findAndUpdate(updateOps);
+        
+        // Verify bson size update
+        ArgumentCaptor<Bson> queryCaptor = ArgumentCaptor.forClass(Bson.class);
+        ArgumentCaptor<Bson> updateCaptor = 
ArgumentCaptor.forClass(Bson.class);
+        verify(settingsCollection).updateOne(queryCaptor.capture(), 
updateCaptor.capture());
+        
+        // Verify query
+        Bson query = queryCaptor.getValue();
+        Document queryDoc = 
Document.parse(query.toBsonDocument(Document.class, 
MongoClient.getDefaultCodecRegistry()).toJson());
+        assertEquals("versionGC", queryDoc.get("_id"));
+        
+        // Verify update
+        Bson update = updateCaptor.getValue();
+        Document updateDoc = 
Document.parse(update.toBsonDocument(Document.class, 
MongoClient.getDefaultCodecRegistry()).toJson());
+        Document inc = updateDoc.get("$inc", Document.class);
+        assertEquals(Long.valueOf(50L), 
inc.getLong("fullGcRemovedTotalBsonSize"));
+    }
+
+    @Test
+    public void testFindAndUpdateWithInitialBsonSizeLessThenUpdatedBsonSize() {
+        // Setup
+        List<UpdateOp> updateOps = new ArrayList<>();
+        UpdateOp op1 = new UpdateOp("doc1", false);
+        updateOps.add(op1);
+
+        List<NodeDocument> docs = new ArrayList<>();
+        docs.add(NodeDocument.NULL);
+        when(delegate.findAndUpdate(updateOps)).thenReturn(docs);
+        //before size 100, after update -> size 150
+        mockBsonSizeCalculation(100L, 150L);
+
+        // Execute
+        wrapper.findAndUpdate(updateOps);
+
+        // Verify
+        verify(delegate).findAndUpdate(updateOps);
+        verify(settingsCollection, never()).updateOne(any(Bson.class), 
any(Bson.class));
+    }
+
+    @Test
+    public void testFindAndUpdateWithNoUpdates() {
+        // Setup
+        List<UpdateOp> updateOps = new ArrayList<>();
+        UpdateOp op1 = new UpdateOp("doc1", false);
+        updateOps.add(op1);
+        
+        when(delegate.findAndUpdate(updateOps)).thenReturn(new ArrayList<>());
+        mockBsonSizeCalculation(100L);
+        
+        // Execute
+        List<NodeDocument> result = wrapper.findAndUpdate(updateOps);
+        
+        // Verify
+        assertTrue(result.isEmpty());
+        verify(delegate).findAndUpdate(updateOps);
+        verify(settingsCollection, never()).updateOne(any(Bson.class), 
any(Bson.class));
+    }
+
+    @Test
+    public void testFindAndUpdateWithEmptyList() {
+        // Setup
+        List<UpdateOp> updateOps = new ArrayList<>();
+        
+        when(delegate.findAndUpdate(updateOps)).thenReturn(new ArrayList<>());
+        
+        // Execute
+        List<NodeDocument> result = wrapper.findAndUpdate(updateOps);
+        
+        // Verify
+        assertTrue(result.isEmpty());
+        verify(delegate).findAndUpdate(updateOps);
+        verify(settingsCollection, never()).updateOne(any(Bson.class), 
any(Bson.class));
+    }
+
+    @Test
+    public void testRemoveWithSuccessfulRemoval() {
+        // Setup
+        Map<String, Long> removalMap = new HashMap<>();
+        removalMap.put("doc1", 1L);
+        removalMap.put("doc2", 2L);
+        
+        when(delegate.remove(removalMap)).thenReturn(2);
+        mockBsonSizeCalculation(200L);
+        
+        // Execute
+        int result = wrapper.remove(removalMap);
+        
+        // Verify
+        assertEquals(2, result);
+        verify(delegate).remove(removalMap);
+        
+        // Verify bson size update
+        ArgumentCaptor<Bson> queryCaptor = ArgumentCaptor.forClass(Bson.class);
+        ArgumentCaptor<Bson> updateCaptor = 
ArgumentCaptor.forClass(Bson.class);
+        verify(settingsCollection).updateOne(queryCaptor.capture(), 
updateCaptor.capture());
+        
+        // Verify query
+        Bson query = queryCaptor.getValue();
+        Document queryDoc = 
Document.parse(query.toBsonDocument(Document.class, 
MongoClient.getDefaultCodecRegistry()).toJson());
+        assertEquals("versionGC", queryDoc.get("_id"));
+        
+        // Verify update
+        Bson update = updateCaptor.getValue();
+        Document updateDoc = 
Document.parse(update.toBsonDocument(Document.class, 
MongoClient.getDefaultCodecRegistry()).toJson());
+        Document inc = updateDoc.get("$inc", Document.class);
+        assertEquals(Long.valueOf(200L), 
inc.getLong("fullGcRemovedTotalBsonSize"));
+    }
+
+    @Test
+    public void testRemoveWithNoBsonSize() {
+        // Setup
+        Map<String, Long> removalMap = new HashMap<>();
+        removalMap.put("doc1", 1L);
+        
+        when(delegate.remove(removalMap)).thenReturn(1);
+        mockBsonSizeCalculation(0L);
+        
+        // Execute
+        int result = wrapper.remove(removalMap);
+        
+        // Verify
+        assertEquals(1, result);
+        verify(delegate).remove(removalMap);
+        verify(settingsCollection, never()).updateOne(any(Bson.class), 
any(Bson.class));
+    }
+
+    @Test
+    public void testRemoveWithEmptyMap() {
+        // Setup
+        Map<String, Long> removalMap = new HashMap<>();
+        
+        when(delegate.remove(removalMap)).thenReturn(0);
+        
+        // Execute
+        int result = wrapper.remove(removalMap);
+        
+        // Verify
+        assertEquals(0, result);
+        verify(delegate).remove(removalMap);
+        verify(settingsCollection, never()).updateOne(any(Bson.class), 
any(Bson.class));
+    }
+
+}
\ No newline at end of file

Reply via email to