This is an automated email from the ASF dual-hosted git repository.

adulceanu pushed a commit to branch issues/OAK-10978
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit f43f4d3c4a3253929b3f1b4d49e0c69b1bf0e83d
Author: Andrei Dulceanu <dulce...@adobe.com>
AuthorDate: Mon Jul 29 12:00:33 2024 +0300

    OAK-10978 - Skip Azure compaction when there's not enough garbage in the 
repository
---
 .../apache/jackrabbit/oak/run/CompactCommand.java  | 18 ++++++
 .../oak/segment/azure/tool/AzureCompact.java       | 67 +++++++++++++++++++++-
 .../jackrabbit/oak/segment/file/FileStore.java     |  2 +-
 3 files changed, 85 insertions(+), 2 deletions(-)

diff --git 
a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java 
b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java
index ced75fc6be..0f8ee389fe 100644
--- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java
+++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java
@@ -78,6 +78,16 @@ class CompactCommand implements Command {
                 .withRequiredArg()
                 .ofType(Integer.class)
                 .defaultsTo(50);
+        OptionSpec<Integer> garbageThresholdGb = 
parser.accepts("garbage-threshold-gb", "Minimum amount of garbage in GB 
(defaults to 0 GB) for "
+                        + "compaction to run")
+                .withRequiredArg()
+                .ofType(Integer.class)
+                .defaultsTo(0);
+        OptionSpec<Integer> garbageThresholdPercentage = 
parser.accepts("garbage-threshold-percentage", "Minimum amount of garbage in 
percentage (defaults to 0%) for "
+                        + "compaction to run")
+                .withRequiredArg()
+                .ofType(Integer.class)
+                .defaultsTo(0);
 
 
         OptionSet options = parser.parse(args);
@@ -111,6 +121,14 @@ class CompactCommand implements Command {
                 
azureBuilder.withPersistentCacheSizeGb(persistentCacheSizeGb.value(options));
             }
 
+            if (options.has(garbageThresholdGb)) {
+                
azureBuilder.withGarbageThresholdGb(garbageThresholdGb.value(options));
+            }
+
+            if (options.has(garbageThresholdPercentage)) {
+                
azureBuilder.withGarbageThresholdPercentage(garbageThresholdPercentage.value(options));
+            }
+
             if (options.has(tailArg)) {
                 azureBuilder.withGCType(SegmentGCOptions.GCType.TAIL);
             }
diff --git 
a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java
 
b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java
index a5db11950b..e11f30ca7b 100644
--- 
a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java
+++ 
b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java
@@ -37,6 +37,8 @@ import 
org.apache.jackrabbit.oak.segment.azure.tool.ToolUtils.SegmentStoreType;
 import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.GCType;
 import 
org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.CompactorType;
 import org.apache.jackrabbit.oak.segment.file.FileStore;
+import org.apache.jackrabbit.oak.segment.file.GCJournal;
+import org.apache.jackrabbit.oak.segment.spi.persistence.GCJournalFile;
 import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveManager;
 import 
org.apache.jackrabbit.oak.segment.spi.persistence.SegmentNodeStorePersistence;
 import 
org.apache.jackrabbit.oak.segment.spi.persistence.split.SplitPersistence;
@@ -88,6 +90,10 @@ public class AzureCompact {
 
         private Integer persistentCacheSizeGb;
 
+        private int garbageThresholdGb;
+
+        private int garbageThresholdPercentage;
+
         private CloudBlobDirectory sourceCloudBlobDirectory;
 
         private CloudBlobDirectory destinationCloudBlobDirectory;
@@ -219,6 +225,29 @@ public class AzureCompact {
             return this;
         }
 
+        /**
+         * The minimum garbage size in GB for the compaction to run.
+         * @param garbageThresholdGb
+         *           the minimum garbage size in GB for the compaction to run.
+         *
+         * @return this builder
+         */
+        public Builder withGarbageThresholdGb(int garbageThresholdGb) {
+            this.garbageThresholdGb = garbageThresholdGb;
+            return this;
+        }
+
+        /**
+         * The minimum garbage size in percentage for the compaction to run.
+         * @param garbageThresholdPercentage
+         *          the minimum garbage size in percentage for the compaction 
to run.
+         * @return this builder
+         */
+        public Builder withGarbageThresholdPercentage(int 
garbageThresholdPercentage) {
+            this.garbageThresholdPercentage = garbageThresholdPercentage;
+            return this;
+        }
+
         public Builder withSourceCloudBlobDirectory(CloudBlobDirectory 
sourceCloudBlobDirectory) {
             this.sourceCloudBlobDirectory = 
checkNotNull(sourceCloudBlobDirectory);
             return this;
@@ -243,6 +272,8 @@ public class AzureCompact {
         }
     }
 
+    private static final long GB = 1024 * 1024 * 1024;
+
     private final String path;
 
     private final String targetPath;
@@ -263,6 +294,10 @@ public class AzureCompact {
 
     private final Integer persistentCacheSizeGb;
 
+    private final int garbageThresholdGb;
+
+    private final int garbageThresholdPercentage;
+
     private final CloudBlobDirectory sourceCloudBlobDirectory;
 
     private final CloudBlobDirectory destinationCloudBlobDirectory;
@@ -279,6 +314,8 @@ public class AzureCompact {
         this.concurrency = builder.concurrency;
         this.persistentCachePath = builder.persistentCachePath;
         this.persistentCacheSizeGb = builder.persistentCacheSizeGb;
+        this.garbageThresholdGb = builder.garbageThresholdGb;
+        this.garbageThresholdPercentage = builder.garbageThresholdPercentage;
         this.sourceCloudBlobDirectory = builder.sourceCloudBlobDirectory;
         this.destinationCloudBlobDirectory = 
builder.destinationCloudBlobDirectory;
         this.azureStorageCredentialManager = new 
AzureStorageCredentialManager();
@@ -317,10 +354,19 @@ public class AzureCompact {
         }
 
         printArchives(System.out, beforeArchives);
-        System.out.printf("    -> compacting\n");
 
         try (FileStore store = newFileStore(splitPersistence, 
Files.createTempDir(), strictVersionCheck, segmentCacheSize,
                 gcLogInterval, compactorType, concurrency)) {
+            if (garbageThresholdGb > 0 && garbageThresholdPercentage > 0) {
+                System.out.printf("    -> minimum garbage threshold set to %d 
GB or %d%%\n", garbageThresholdGb, garbageThresholdPercentage);
+                long currentSize = store.size();
+                if (!isGarbageOverMinimumThreshold(currentSize, 
roPersistence)) {
+                    return 0;
+                }
+            }
+
+            System.out.printf("    -> compacting\n");
+
             boolean success = false;
             switch (gcType) {
                 case FULL:
@@ -369,6 +415,25 @@ public class AzureCompact {
         return 0;
     }
 
+    private boolean isGarbageOverMinimumThreshold(long currentSize, 
SegmentNodeStorePersistence roPersistence) throws IOException {
+        long previousSize = 0;
+
+        GCJournalFile gcJournalFile = roPersistence.getGCJournalFile();
+        if (gcJournalFile != null) {
+            GCJournal gcJournal = new GCJournal(gcJournalFile);
+            GCJournal.GCJournalEntry gcJournalEntry = gcJournal.read();
+            previousSize = gcJournalEntry.getRepoSize();
+        }
+
+        long potentialGarbage = currentSize - previousSize;
+        if (currentSize < previousSize || (potentialGarbage < 
garbageThresholdGb * GB && potentialGarbage < currentSize * 
garbageThresholdPercentage / 100)) {
+            System.out.printf("    -> [skipping] not enough garbage -> 
previous size: %d, current size: %d\n", previousSize, currentSize);
+            return false;
+        }
+
+        return true;
+    }
+
     private long printTargetRepoSizeInfo(CloudBlobContainer container) {
         System.out.printf("Calculating the size of container %s\n", 
container.getName());
         long size = 0;
diff --git 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java
 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java
index 6381e1129c..7e72cf7824 100644
--- 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java
+++ 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java
@@ -298,7 +298,7 @@ public class FileStore extends AbstractFileStore {
     /**
      * @return the size of this store.
      */
-    private long size() {
+    public long size() {
         try (ShutDownCloser ignored = shutDown.keepAlive()) {
             return tarFiles.size();
         }

Reply via email to