This is an automated email from the ASF dual-hosted git repository. adulceanu pushed a commit to branch issues/OAK-10978 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit f43f4d3c4a3253929b3f1b4d49e0c69b1bf0e83d Author: Andrei Dulceanu <dulce...@adobe.com> AuthorDate: Mon Jul 29 12:00:33 2024 +0300 OAK-10978 - Skip Azure compaction when there's not enough garbage in the repository --- .../apache/jackrabbit/oak/run/CompactCommand.java | 18 ++++++ .../oak/segment/azure/tool/AzureCompact.java | 67 +++++++++++++++++++++- .../jackrabbit/oak/segment/file/FileStore.java | 2 +- 3 files changed, 85 insertions(+), 2 deletions(-) diff --git a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java index ced75fc6be..0f8ee389fe 100644 --- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java +++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java @@ -78,6 +78,16 @@ class CompactCommand implements Command { .withRequiredArg() .ofType(Integer.class) .defaultsTo(50); + OptionSpec<Integer> garbageThresholdGb = parser.accepts("garbage-threshold-gb", "Minimum amount of garbage in GB (defaults to 0 GB) for " + + "compaction to run") + .withRequiredArg() + .ofType(Integer.class) + .defaultsTo(0); + OptionSpec<Integer> garbageThresholdPercentage = parser.accepts("garbage-threshold-percentage", "Minimum amount of garbage in percentage (defaults to 0%) for " + + "compaction to run") + .withRequiredArg() + .ofType(Integer.class) + .defaultsTo(0); OptionSet options = parser.parse(args); @@ -111,6 +121,14 @@ class CompactCommand implements Command { azureBuilder.withPersistentCacheSizeGb(persistentCacheSizeGb.value(options)); } + if (options.has(garbageThresholdGb)) { + azureBuilder.withGarbageThresholdGb(garbageThresholdGb.value(options)); + } + + if (options.has(garbageThresholdPercentage)) { + azureBuilder.withGarbageThresholdPercentage(garbageThresholdPercentage.value(options)); + } + if (options.has(tailArg)) { azureBuilder.withGCType(SegmentGCOptions.GCType.TAIL); } diff --git a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java index a5db11950b..e11f30ca7b 100644 --- a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java +++ b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java @@ -37,6 +37,8 @@ import org.apache.jackrabbit.oak.segment.azure.tool.ToolUtils.SegmentStoreType; import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.GCType; import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.CompactorType; import org.apache.jackrabbit.oak.segment.file.FileStore; +import org.apache.jackrabbit.oak.segment.file.GCJournal; +import org.apache.jackrabbit.oak.segment.spi.persistence.GCJournalFile; import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveManager; import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentNodeStorePersistence; import org.apache.jackrabbit.oak.segment.spi.persistence.split.SplitPersistence; @@ -88,6 +90,10 @@ public class AzureCompact { private Integer persistentCacheSizeGb; + private int garbageThresholdGb; + + private int garbageThresholdPercentage; + private CloudBlobDirectory sourceCloudBlobDirectory; private CloudBlobDirectory destinationCloudBlobDirectory; @@ -219,6 +225,29 @@ public class AzureCompact { return this; } + /** + * The minimum garbage size in GB for the compaction to run. + * @param garbageThresholdGb + * the minimum garbage size in GB for the compaction to run. + * + * @return this builder + */ + public Builder withGarbageThresholdGb(int garbageThresholdGb) { + this.garbageThresholdGb = garbageThresholdGb; + return this; + } + + /** + * The minimum garbage size in percentage for the compaction to run. + * @param garbageThresholdPercentage + * the minimum garbage size in percentage for the compaction to run. + * @return this builder + */ + public Builder withGarbageThresholdPercentage(int garbageThresholdPercentage) { + this.garbageThresholdPercentage = garbageThresholdPercentage; + return this; + } + public Builder withSourceCloudBlobDirectory(CloudBlobDirectory sourceCloudBlobDirectory) { this.sourceCloudBlobDirectory = checkNotNull(sourceCloudBlobDirectory); return this; @@ -243,6 +272,8 @@ public class AzureCompact { } } + private static final long GB = 1024 * 1024 * 1024; + private final String path; private final String targetPath; @@ -263,6 +294,10 @@ public class AzureCompact { private final Integer persistentCacheSizeGb; + private final int garbageThresholdGb; + + private final int garbageThresholdPercentage; + private final CloudBlobDirectory sourceCloudBlobDirectory; private final CloudBlobDirectory destinationCloudBlobDirectory; @@ -279,6 +314,8 @@ public class AzureCompact { this.concurrency = builder.concurrency; this.persistentCachePath = builder.persistentCachePath; this.persistentCacheSizeGb = builder.persistentCacheSizeGb; + this.garbageThresholdGb = builder.garbageThresholdGb; + this.garbageThresholdPercentage = builder.garbageThresholdPercentage; this.sourceCloudBlobDirectory = builder.sourceCloudBlobDirectory; this.destinationCloudBlobDirectory = builder.destinationCloudBlobDirectory; this.azureStorageCredentialManager = new AzureStorageCredentialManager(); @@ -317,10 +354,19 @@ public class AzureCompact { } printArchives(System.out, beforeArchives); - System.out.printf(" -> compacting\n"); try (FileStore store = newFileStore(splitPersistence, Files.createTempDir(), strictVersionCheck, segmentCacheSize, gcLogInterval, compactorType, concurrency)) { + if (garbageThresholdGb > 0 && garbageThresholdPercentage > 0) { + System.out.printf(" -> minimum garbage threshold set to %d GB or %d%%\n", garbageThresholdGb, garbageThresholdPercentage); + long currentSize = store.size(); + if (!isGarbageOverMinimumThreshold(currentSize, roPersistence)) { + return 0; + } + } + + System.out.printf(" -> compacting\n"); + boolean success = false; switch (gcType) { case FULL: @@ -369,6 +415,25 @@ public class AzureCompact { return 0; } + private boolean isGarbageOverMinimumThreshold(long currentSize, SegmentNodeStorePersistence roPersistence) throws IOException { + long previousSize = 0; + + GCJournalFile gcJournalFile = roPersistence.getGCJournalFile(); + if (gcJournalFile != null) { + GCJournal gcJournal = new GCJournal(gcJournalFile); + GCJournal.GCJournalEntry gcJournalEntry = gcJournal.read(); + previousSize = gcJournalEntry.getRepoSize(); + } + + long potentialGarbage = currentSize - previousSize; + if (currentSize < previousSize || (potentialGarbage < garbageThresholdGb * GB && potentialGarbage < currentSize * garbageThresholdPercentage / 100)) { + System.out.printf(" -> [skipping] not enough garbage -> previous size: %d, current size: %d\n", previousSize, currentSize); + return false; + } + + return true; + } + private long printTargetRepoSizeInfo(CloudBlobContainer container) { System.out.printf("Calculating the size of container %s\n", container.getName()); long size = 0; diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java index 6381e1129c..7e72cf7824 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java @@ -298,7 +298,7 @@ public class FileStore extends AbstractFileStore { /** * @return the size of this store. */ - private long size() { + public long size() { try (ShutDownCloser ignored = shutDown.keepAlive()) { return tarFiles.size(); }