This is an automated email from the ASF dual-hosted git repository. adulceanu pushed a commit to branch issues/OAK-9949-final in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit a2309992a222b8740b2fc2fd10eea0892db7ca3f Author: Lucas Weitzendorf <lweitzend...@adobe.com> AuthorDate: Wed Sep 28 14:59:13 2022 +0200 OAK-9949 Offline Tail Compaction --- .../site/markdown/nodestore/segment/overview.md | 6 ++- .../apache/jackrabbit/oak/run/CompactCommand.java | 23 +++++++---- .../oak/segment/aws/tool/AwsCompact.java | 28 ++++++++++++- .../oak/segment/azure/tool/AzureCompact.java | 28 ++++++++++++- .../oak/segment/CheckpointCompactor.java | 46 +++++++++++++--------- .../jackrabbit/oak/segment/tool/Compact.java | 28 ++++++++++++- 6 files changed, 129 insertions(+), 30 deletions(-) diff --git a/oak-doc/src/site/markdown/nodestore/segment/overview.md b/oak-doc/src/site/markdown/nodestore/segment/overview.md index 53f10542ab..499bcb3cf2 100644 --- a/oak-doc/src/site/markdown/nodestore/segment/overview.md +++ b/oak-doc/src/site/markdown/nodestore/segment/overview.md @@ -823,14 +823,16 @@ This option is optional and is disabled by default. ### <a name="compact"/> Compact ``` -java -jar oak-run.jar compact [--force] [--mmap] [--compactor] [--threads] SOURCE [--target-path DESTINATION] [--persistent-cache-path PERSISTENT_CACHE_PATH] [--persistent-cache-size-gb <PERSISTENT_CACHE_SIZE_GB>] +java -jar oak-run.jar compact [--force] [--mmap] [--tail] [--compactor] [--threads] SOURCE [--target-path DESTINATION] [--persistent-cache-path PERSISTENT_CACHE_PATH] [--persistent-cache-size-gb <PERSISTENT_CACHE_SIZE_GB>] ``` The `compact` command performs offline compaction of the local/remote Segment Store at `SOURCE`. `SOURCE` must be a valid path/uri to an existing Segment Store. Currently, Azure Segment Store and AWS Segment Store the supported remote Segment Stores. Please refer to the [Remote Segment Stores](#remote-segment-stores) section for details on how to correctly specify connection URIs. -If the optional `--force [Boolean]` argument is set to `true` the tool ignores a non-matching Segment Store version. *CAUTION*: this will upgrade the Segment Store to the +With the optional `--tail` flag, only tail compaction is performed instead of the full repository. + +If the optional `--force` flag is set, the tool ignores a non-matching Segment Store version. *CAUTION*: this will upgrade the Segment Store to the latest version, which is incompatible with older versions. *There is no way to downgrade an accidentally upgraded Segment Store*. diff --git a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java index 47257982d3..f955278931 100644 --- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java +++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java @@ -25,6 +25,7 @@ import joptsimple.OptionSet; import joptsimple.OptionSpec; import org.apache.jackrabbit.oak.run.commons.Command; import org.apache.jackrabbit.oak.segment.azure.tool.AzureCompact; +import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions; import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.CompactorType; import org.apache.jackrabbit.oak.segment.aws.tool.AwsCompact; import org.apache.jackrabbit.oak.segment.tool.Compact; @@ -48,12 +49,11 @@ class CompactCommand implements Command { "is always enforced and this option is ignored.") .withOptionalArg() .ofType(Boolean.class); - OptionSpec<Boolean> forceArg = parser.accepts("force", + OptionSpec<Void> forceArg = parser.accepts("force", "Force compaction and ignore a non matching segment store version. " + "CAUTION: this will upgrade the segment store to the latest version, " + - "which is incompatible with older versions of Oak.") - .withOptionalArg() - .ofType(Boolean.class); + "which is incompatible with older versions of Oak."); + OptionSpec<Void> tailArg = parser.accepts("tail", "Use tail compaction instead of a full repository rewrite."); OptionSpec<String> compactor = parser.accepts("compactor", "Allow the user to control compactor type to be used. Valid choices are \"classic\", \"diff\", \"parallel\". " + "While \"classic\" is slower, it might be more stable, due to lack of optimisations employed " + @@ -110,10 +110,13 @@ class CompactCommand implements Command { .withTargetPath(targetPath.value(options)) .withPersistentCachePath(persistentCachePath.value(options)) .withPersistentCacheSizeGb(persistentCacheSizeGb.value(options)) - .withForce(isTrue(forceArg.value(options))) + .withForce(options.has(forceArg)) .withGCLogInterval(Long.getLong("compaction-progress-log", 150000)) .withConcurrency(nThreads.value(options)); + if (options.has(tailArg)) { + azureBuilder.withGCType(SegmentGCOptions.GCType.TAIL); + } if (options.has(compactor)) { azureBuilder.withCompactorType(CompactorType.fromDescription(compactor.value(options))); } @@ -122,11 +125,14 @@ class CompactCommand implements Command { } else if (path.startsWith("aws:")) { AwsCompact.Builder awsBuilder = AwsCompact.builder() .withPath(path) - .withForce(isTrue(forceArg.value(options))) + .withForce(options.has(forceArg)) .withSegmentCacheSize(Integer.getInteger("cache", 256)) .withGCLogInterval(Long.getLong("compaction-progress-log", 150000)) .withConcurrency(nThreads.value(options)); + if (options.has(tailArg)) { + awsBuilder.withGCType(SegmentGCOptions.GCType.TAIL); + } if (options.has(compactor)) { awsBuilder.withCompactorType(CompactorType.fromDescription(compactor.value(options))); } @@ -135,13 +141,16 @@ class CompactCommand implements Command { } else { Compact.Builder tarBuilder = Compact.builder() .withPath(new File(path)) - .withForce(isTrue(forceArg.value(options))) + .withForce(options.has(forceArg)) .withMmap(mmapArg.value(options)) .withOs(StandardSystemProperty.OS_NAME.value()) .withSegmentCacheSize(Integer.getInteger("cache", 256)) .withGCLogInterval(Long.getLong("compaction-progress-log", 150000)) .withConcurrency(nThreads.value(options)); + if (options.has(tailArg)) { + tarBuilder.withGCType(SegmentGCOptions.GCType.TAIL); + } if (options.has(compactor)) { tarBuilder.withCompactorType(CompactorType.fromDescription(compactor.value(options))); } diff --git a/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/tool/AwsCompact.java b/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/tool/AwsCompact.java index 1c77b9bab6..6405dcebd1 100644 --- a/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/tool/AwsCompact.java +++ b/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/tool/AwsCompact.java @@ -34,6 +34,7 @@ import org.apache.jackrabbit.guava.common.io.Files; import org.apache.jackrabbit.oak.segment.SegmentCache; import org.apache.jackrabbit.oak.segment.aws.tool.AwsToolUtils.SegmentStoreType; +import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.GCType; import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.CompactorType; import org.apache.jackrabbit.oak.segment.file.FileStore; import org.apache.jackrabbit.oak.segment.file.JournalReader; @@ -73,6 +74,8 @@ public class AwsCompact { private int segmentCacheSize = DEFAULT_SEGMENT_CACHE_MB; + private GCType gcType = GCType.FULL; + private CompactorType compactorType = CompactorType.PARALLEL_COMPACTOR; private int concurrency = 1; @@ -133,6 +136,16 @@ public class AwsCompact { return this; } + /** + * The garbage collection type used. If not specified it defaults to full compaction + * @param gcType the GC type + * @return this builder + */ + public Builder withGCType(GCType gcType) { + this.gcType = gcType; + return this; + } + /** * The compactor type to be used by compaction. If not specified it defaults to * "parallel" compactor @@ -173,6 +186,8 @@ public class AwsCompact { private final long gcLogInterval; + private final GCType gcType; + private final CompactorType compactorType; private final int concurrency; @@ -182,6 +197,7 @@ public class AwsCompact { this.segmentCacheSize = builder.segmentCacheSize; this.strictVersionCheck = !builder.force; this.gcLogInterval = builder.gcLogInterval; + this.gcType = builder.gcType; this.compactorType = builder.compactorType; this.concurrency = builder.concurrency; } @@ -206,7 +222,17 @@ public class AwsCompact { try (FileStore store = newFileStore(persistence, Files.createTempDir(), strictVersionCheck, segmentCacheSize, gcLogInterval, compactorType, concurrency)) { - if (!store.compactFull()) { + boolean success = false; + switch (gcType) { + case FULL: + success = store.compactFull(); + break; + case TAIL: + success = store.compactTail(); + break; + } + + if (!success) { System.out.printf("Compaction cancelled after %s.\n", printableStopwatch(watch)); return 1; } diff --git a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java index a8e103c3c8..d4942f63f1 100644 --- a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java +++ b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java @@ -36,6 +36,7 @@ import com.microsoft.azure.storage.blob.ListBlobItem; import org.apache.jackrabbit.oak.segment.SegmentCache; import org.apache.jackrabbit.oak.segment.azure.tool.ToolUtils.SegmentStoreType; +import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.GCType; import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.CompactorType; import org.apache.jackrabbit.oak.segment.file.FileStore; import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveManager; @@ -79,6 +80,8 @@ public class AzureCompact { private int segmentCacheSize = 2048; + private GCType gcType = GCType.FULL; + private CompactorType compactorType = CompactorType.PARALLEL_COMPACTOR; private int concurrency = 1; @@ -159,6 +162,16 @@ public class AzureCompact { return this; } + /** + * The garbage collection type used. If not specified it defaults to full compaction + * @param gcType the GC type + * @return this builder + */ + public Builder withGCType(GCType gcType) { + this.gcType = gcType; + return this; + } + /** * The compactor type to be used by compaction. If not specified it defaults to * "parallel" compactor @@ -225,6 +238,8 @@ public class AzureCompact { private final long gcLogInterval; + private final GCType gcType; + private final CompactorType compactorType; private final int concurrency; @@ -239,6 +254,7 @@ public class AzureCompact { this.segmentCacheSize = builder.segmentCacheSize; this.strictVersionCheck = !builder.force; this.gcLogInterval = builder.gcLogInterval; + this.gcType = builder.gcType; this.compactorType = builder.compactorType; this.concurrency = builder.concurrency; this.persistentCachePath = builder.persistentCachePath; @@ -270,7 +286,17 @@ public class AzureCompact { try (FileStore store = newFileStore(splitPersistence, Files.createTempDir(), strictVersionCheck, segmentCacheSize, gcLogInterval, compactorType, concurrency)) { - if (!store.compactFull()) { + boolean success = false; + switch (gcType) { + case FULL: + success = store.compactFull(); + break; + case TAIL: + success = store.compactTail(); + break; + } + + if (!success) { System.out.printf("Compaction cancelled after %s.\n", printableStopwatch(watch)); return 1; } diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/CheckpointCompactor.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/CheckpointCompactor.java index d5609a5a9d..e9d63cec05 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/CheckpointCompactor.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/CheckpointCompactor.java @@ -36,11 +36,13 @@ import java.util.Map; import java.util.Map.Entry; import org.apache.jackrabbit.oak.commons.Buffer; +import org.apache.jackrabbit.oak.plugins.memory.MemoryChildNodeEntry; import org.apache.jackrabbit.oak.segment.file.GCNodeWriteMonitor; import org.apache.jackrabbit.oak.segment.file.cancel.Canceller; import org.apache.jackrabbit.oak.spi.blob.BlobStore; import org.apache.jackrabbit.oak.spi.gc.GCMonitor; import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry; +import org.apache.jackrabbit.oak.spi.state.DefaultNodeStateDiff; import org.apache.jackrabbit.oak.spi.state.NodeBuilder; import org.apache.jackrabbit.oak.spi.state.NodeState; import org.jetbrains.annotations.NotNull; @@ -117,7 +119,7 @@ public class CheckpointCompactor implements Compactor { ) throws IOException { // Collect a chronologically ordered list of roots for the uncompacted // state. This list consists of all checkpoints followed by the root. - LinkedHashMap<String, NodeState> uncompactedRoots = collectRoots(uncompacted); + LinkedHashMap<String, NodeState> uncompactedRoots = collectRoots(base, uncompacted); // Compact the list of uncompacted roots to a list of compacted roots. LinkedHashMap<String, NodeState> compactedRoots = compact( @@ -183,27 +185,35 @@ public class CheckpointCompactor implements Compactor { * the root. */ @NotNull - private LinkedHashMap<String, NodeState> collectRoots(@Nullable NodeState superRoot) { + private LinkedHashMap<String, NodeState> collectRoots(@NotNull NodeState superRootBefore, @NotNull NodeState superRootAfter) { LinkedHashMap<String, NodeState> roots = newLinkedHashMap(); - if (superRoot != null) { - List<ChildNodeEntry> checkpoints = newArrayList( - superRoot.getChildNode("checkpoints").getChildNodeEntries()); - checkpoints.sort((cne1, cne2) -> { - long c1 = cne1.getNodeState().getLong("created"); - long c2 = cne2.getNodeState().getLong("created"); - return Long.compare(c1, c2); - }); + List<ChildNodeEntry> checkpoints = newArrayList(); + superRootAfter.getChildNode("checkpoints").compareAgainstBaseState( + superRootBefore.getChildNode("checkpoints"), new DefaultNodeStateDiff() { + @Override + public boolean childNodeAdded(String name, NodeState after) { + checkpoints.add(new MemoryChildNodeEntry(name, after)); + return true; + } + } + ); - for (ChildNodeEntry checkpoint : checkpoints) { - String name = checkpoint.getName(); - NodeState node = checkpoint.getNodeState(); - gcListener.info("found checkpoint {} created at {}.", - name, new Date(node.getLong("created"))); - roots.put("checkpoints/" + name + "/root", node.getChildNode("root")); - } - roots.put("root", superRoot.getChildNode("root")); + checkpoints.sort((cne1, cne2) -> { + long c1 = cne1.getNodeState().getLong("created"); + long c2 = cne2.getNodeState().getLong("created"); + return Long.compare(c1, c2); + }); + + for (ChildNodeEntry checkpoint : checkpoints) { + String name = checkpoint.getName(); + NodeState node = checkpoint.getNodeState(); + gcListener.info("found checkpoint {} created at {}.", + name, new Date(node.getLong("created"))); + roots.put("checkpoints/" + name + "/root", node.getChildNode("root")); } + roots.put("root", superRootAfter.getChildNode("root")); + return roots; } diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Compact.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Compact.java index a72360ddb0..62fb9af92f 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Compact.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Compact.java @@ -37,6 +37,7 @@ import java.util.concurrent.TimeUnit; import org.apache.jackrabbit.guava.common.base.Stopwatch; import org.apache.jackrabbit.oak.segment.SegmentCache; +import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.GCType; import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.CompactorType; import org.apache.jackrabbit.oak.segment.spi.persistence.JournalFile; import org.apache.jackrabbit.oak.segment.spi.persistence.JournalFileWriter; @@ -78,6 +79,8 @@ public class Compact { private int segmentCacheSize = DEFAULT_SEGMENT_CACHE_MB; + private GCType gcType = GCType.FULL; + private CompactorType compactorType = CompactorType.PARALLEL_COMPACTOR; private int concurrency = 1; @@ -164,6 +167,16 @@ public class Compact { return this; } + /** + * The garbage collection type used. If not specified it defaults to full compaction + * @param gcType the GC type + * @return this builder + */ + public Builder withGCType(GCType gcType) { + this.gcType = gcType; + return this; + } + /** * The compactor type to be used by compaction. If not specified it defaults to * "parallel" compactor @@ -277,6 +290,8 @@ public class Compact { private final long gcLogInterval; + private final GCType gcType; + private final CompactorType compactorType; private final int concurrency; @@ -288,6 +303,7 @@ public class Compact { this.segmentCacheSize = builder.segmentCacheSize; this.strictVersionCheck = !builder.force; this.gcLogInterval = builder.gcLogInterval; + this.gcType = builder.gcType; this.compactorType = builder.compactorType; this.concurrency = builder.concurrency; } @@ -303,7 +319,17 @@ public class Compact { Stopwatch watch = Stopwatch.createStarted(); try (FileStore store = newFileStore()) { - if (!store.compactFull()) { + boolean success = false; + switch (gcType) { + case FULL: + success = store.compactFull(); + break; + case TAIL: + success = store.compactTail(); + break; + } + + if (!success) { System.out.printf("Compaction cancelled after %s.\n", printableStopwatch(watch)); return 1; }