This is an automated email from the ASF dual-hosted git repository.

adulceanu pushed a commit to branch issues/OAK-9949-final
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit a2309992a222b8740b2fc2fd10eea0892db7ca3f
Author: Lucas Weitzendorf <lweitzend...@adobe.com>
AuthorDate: Wed Sep 28 14:59:13 2022 +0200

    OAK-9949 Offline Tail Compaction
---
 .../site/markdown/nodestore/segment/overview.md    |  6 ++-
 .../apache/jackrabbit/oak/run/CompactCommand.java  | 23 +++++++----
 .../oak/segment/aws/tool/AwsCompact.java           | 28 ++++++++++++-
 .../oak/segment/azure/tool/AzureCompact.java       | 28 ++++++++++++-
 .../oak/segment/CheckpointCompactor.java           | 46 +++++++++++++---------
 .../jackrabbit/oak/segment/tool/Compact.java       | 28 ++++++++++++-
 6 files changed, 129 insertions(+), 30 deletions(-)

diff --git a/oak-doc/src/site/markdown/nodestore/segment/overview.md 
b/oak-doc/src/site/markdown/nodestore/segment/overview.md
index 53f10542ab..499bcb3cf2 100644
--- a/oak-doc/src/site/markdown/nodestore/segment/overview.md
+++ b/oak-doc/src/site/markdown/nodestore/segment/overview.md
@@ -823,14 +823,16 @@ This option is optional and is disabled by default.
 ### <a name="compact"/> Compact
 
 ```
-java -jar oak-run.jar compact [--force] [--mmap] [--compactor] [--threads] 
SOURCE [--target-path DESTINATION] [--persistent-cache-path 
PERSISTENT_CACHE_PATH] [--persistent-cache-size-gb <PERSISTENT_CACHE_SIZE_GB>]
+java -jar oak-run.jar compact [--force] [--mmap] [--tail] [--compactor] 
[--threads] SOURCE [--target-path DESTINATION] [--persistent-cache-path 
PERSISTENT_CACHE_PATH] [--persistent-cache-size-gb <PERSISTENT_CACHE_SIZE_GB>]
 ```
 
 The `compact` command performs offline compaction of the local/remote Segment 
Store at `SOURCE`. 
 `SOURCE` must be a valid path/uri to an existing Segment Store. Currently, 
Azure Segment Store and AWS Segment Store the supported remote Segment Stores. 
 Please refer to the [Remote Segment Stores](#remote-segment-stores) section 
for details on how to correctly specify connection URIs.
 
-If the optional `--force [Boolean]` argument is set to `true` the tool ignores 
a non-matching Segment Store version. *CAUTION*: this will upgrade the Segment 
Store to the 
+With the optional `--tail` flag, only tail compaction is performed instead of 
the full repository.
+
+If the optional `--force` flag is set, the tool ignores a non-matching Segment 
Store version. *CAUTION*: this will upgrade the Segment Store to the 
 latest version, which is incompatible with older versions. *There is no way to 
downgrade 
 an accidentally upgraded Segment Store*.  
 
diff --git 
a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java 
b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java
index 47257982d3..f955278931 100644
--- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java
+++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java
@@ -25,6 +25,7 @@ import joptsimple.OptionSet;
 import joptsimple.OptionSpec;
 import org.apache.jackrabbit.oak.run.commons.Command;
 import org.apache.jackrabbit.oak.segment.azure.tool.AzureCompact;
+import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions;
 import 
org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.CompactorType;
 import org.apache.jackrabbit.oak.segment.aws.tool.AwsCompact;
 import org.apache.jackrabbit.oak.segment.tool.Compact;
@@ -48,12 +49,11 @@ class CompactCommand implements Command {
                     "is always enforced and this option is ignored.")
                 .withOptionalArg()
                 .ofType(Boolean.class);
-        OptionSpec<Boolean> forceArg = parser.accepts("force",
+        OptionSpec<Void> forceArg = parser.accepts("force",
                 "Force compaction and ignore a non matching segment store 
version. " +
                         "CAUTION: this will upgrade the segment store to the 
latest version, " +
-                        "which is incompatible with older versions of Oak.")
-                .withOptionalArg()
-                .ofType(Boolean.class);
+                        "which is incompatible with older versions of Oak.");
+        OptionSpec<Void> tailArg = parser.accepts("tail", "Use tail compaction 
instead of a full repository rewrite.");
         OptionSpec<String> compactor = parser.accepts("compactor",
                 "Allow the user to control compactor type to be used. Valid 
choices are \"classic\", \"diff\", \"parallel\". " +
                         "While \"classic\" is slower, it might be more stable, 
due to lack of optimisations employed " +
@@ -110,10 +110,13 @@ class CompactCommand implements Command {
                     .withTargetPath(targetPath.value(options))
                     
.withPersistentCachePath(persistentCachePath.value(options))
                     
.withPersistentCacheSizeGb(persistentCacheSizeGb.value(options))
-                    .withForce(isTrue(forceArg.value(options)))
+                    .withForce(options.has(forceArg))
                     .withGCLogInterval(Long.getLong("compaction-progress-log", 
150000))
                     .withConcurrency(nThreads.value(options));
 
+            if (options.has(tailArg)) {
+                azureBuilder.withGCType(SegmentGCOptions.GCType.TAIL);
+            }
             if (options.has(compactor)) {
                 
azureBuilder.withCompactorType(CompactorType.fromDescription(compactor.value(options)));
             }
@@ -122,11 +125,14 @@ class CompactCommand implements Command {
         } else if (path.startsWith("aws:")) {
             AwsCompact.Builder awsBuilder = AwsCompact.builder()
                     .withPath(path)
-                    .withForce(isTrue(forceArg.value(options)))
+                    .withForce(options.has(forceArg))
                     .withSegmentCacheSize(Integer.getInteger("cache", 256))
                     .withGCLogInterval(Long.getLong("compaction-progress-log", 
150000))
                     .withConcurrency(nThreads.value(options));
 
+            if (options.has(tailArg)) {
+                awsBuilder.withGCType(SegmentGCOptions.GCType.TAIL);
+            }
             if (options.has(compactor)) {
                 
awsBuilder.withCompactorType(CompactorType.fromDescription(compactor.value(options)));
             }
@@ -135,13 +141,16 @@ class CompactCommand implements Command {
         } else {
             Compact.Builder tarBuilder = Compact.builder()
                     .withPath(new File(path))
-                    .withForce(isTrue(forceArg.value(options)))
+                    .withForce(options.has(forceArg))
                     .withMmap(mmapArg.value(options))
                     .withOs(StandardSystemProperty.OS_NAME.value())
                     .withSegmentCacheSize(Integer.getInteger("cache", 256))
                     .withGCLogInterval(Long.getLong("compaction-progress-log", 
150000))
                     .withConcurrency(nThreads.value(options));
 
+            if (options.has(tailArg)) {
+                tarBuilder.withGCType(SegmentGCOptions.GCType.TAIL);
+            }
             if (options.has(compactor)) {
                 
tarBuilder.withCompactorType(CompactorType.fromDescription(compactor.value(options)));
             }
diff --git 
a/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/tool/AwsCompact.java
 
b/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/tool/AwsCompact.java
index 1c77b9bab6..6405dcebd1 100644
--- 
a/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/tool/AwsCompact.java
+++ 
b/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/tool/AwsCompact.java
@@ -34,6 +34,7 @@ import org.apache.jackrabbit.guava.common.io.Files;
 
 import org.apache.jackrabbit.oak.segment.SegmentCache;
 import 
org.apache.jackrabbit.oak.segment.aws.tool.AwsToolUtils.SegmentStoreType;
+import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.GCType;
 import 
org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.CompactorType;
 import org.apache.jackrabbit.oak.segment.file.FileStore;
 import org.apache.jackrabbit.oak.segment.file.JournalReader;
@@ -73,6 +74,8 @@ public class AwsCompact {
 
         private int segmentCacheSize = DEFAULT_SEGMENT_CACHE_MB;
 
+        private GCType gcType = GCType.FULL;
+
         private CompactorType compactorType = CompactorType.PARALLEL_COMPACTOR;
 
         private int concurrency = 1;
@@ -133,6 +136,16 @@ public class AwsCompact {
             return this;
         }
 
+        /**
+         * The garbage collection type used. If not specified it defaults to 
full compaction
+         * @param gcType the GC type
+         * @return this builder
+         */
+        public Builder withGCType(GCType gcType) {
+            this.gcType = gcType;
+            return this;
+        }
+
         /**
          * The compactor type to be used by compaction. If not specified it 
defaults to
          * "parallel" compactor
@@ -173,6 +186,8 @@ public class AwsCompact {
 
     private final long gcLogInterval;
 
+    private final GCType gcType;
+
     private final CompactorType compactorType;
 
     private final int concurrency;
@@ -182,6 +197,7 @@ public class AwsCompact {
         this.segmentCacheSize = builder.segmentCacheSize;
         this.strictVersionCheck = !builder.force;
         this.gcLogInterval = builder.gcLogInterval;
+        this.gcType = builder.gcType;
         this.compactorType = builder.compactorType;
         this.concurrency = builder.concurrency;
     }
@@ -206,7 +222,17 @@ public class AwsCompact {
 
         try (FileStore store = newFileStore(persistence, 
Files.createTempDir(), strictVersionCheck, segmentCacheSize,
                 gcLogInterval, compactorType, concurrency)) {
-            if (!store.compactFull()) {
+            boolean success = false;
+            switch (gcType) {
+                case FULL:
+                    success = store.compactFull();
+                    break;
+                case TAIL:
+                    success = store.compactTail();
+                    break;
+            }
+
+            if (!success) {
                 System.out.printf("Compaction cancelled after %s.\n", 
printableStopwatch(watch));
                 return 1;
             }
diff --git 
a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java
 
b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java
index a8e103c3c8..d4942f63f1 100644
--- 
a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java
+++ 
b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java
@@ -36,6 +36,7 @@ import com.microsoft.azure.storage.blob.ListBlobItem;
 
 import org.apache.jackrabbit.oak.segment.SegmentCache;
 import org.apache.jackrabbit.oak.segment.azure.tool.ToolUtils.SegmentStoreType;
+import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.GCType;
 import 
org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.CompactorType;
 import org.apache.jackrabbit.oak.segment.file.FileStore;
 import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveManager;
@@ -79,6 +80,8 @@ public class AzureCompact {
 
         private int segmentCacheSize = 2048;
 
+        private GCType gcType = GCType.FULL;
+
         private CompactorType compactorType = CompactorType.PARALLEL_COMPACTOR;
 
         private int concurrency = 1;
@@ -159,6 +162,16 @@ public class AzureCompact {
             return this;
         }
 
+        /**
+         * The garbage collection type used. If not specified it defaults to 
full compaction
+         * @param gcType the GC type
+         * @return this builder
+         */
+        public Builder withGCType(GCType gcType) {
+            this.gcType = gcType;
+            return this;
+        }
+
         /**
          * The compactor type to be used by compaction. If not specified it 
defaults to
          * "parallel" compactor
@@ -225,6 +238,8 @@ public class AzureCompact {
 
     private final long gcLogInterval;
 
+    private final GCType gcType;
+
     private final CompactorType compactorType;
 
     private final int concurrency;
@@ -239,6 +254,7 @@ public class AzureCompact {
         this.segmentCacheSize = builder.segmentCacheSize;
         this.strictVersionCheck = !builder.force;
         this.gcLogInterval = builder.gcLogInterval;
+        this.gcType = builder.gcType;
         this.compactorType = builder.compactorType;
         this.concurrency = builder.concurrency;
         this.persistentCachePath = builder.persistentCachePath;
@@ -270,7 +286,17 @@ public class AzureCompact {
 
         try (FileStore store = newFileStore(splitPersistence, 
Files.createTempDir(), strictVersionCheck, segmentCacheSize,
                 gcLogInterval, compactorType, concurrency)) {
-            if (!store.compactFull()) {
+            boolean success = false;
+            switch (gcType) {
+                case FULL:
+                    success = store.compactFull();
+                    break;
+                case TAIL:
+                    success = store.compactTail();
+                    break;
+            }
+
+            if (!success) {
                 System.out.printf("Compaction cancelled after %s.\n", 
printableStopwatch(watch));
                 return 1;
             }
diff --git 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/CheckpointCompactor.java
 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/CheckpointCompactor.java
index d5609a5a9d..e9d63cec05 100644
--- 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/CheckpointCompactor.java
+++ 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/CheckpointCompactor.java
@@ -36,11 +36,13 @@ import java.util.Map;
 import java.util.Map.Entry;
 
 import org.apache.jackrabbit.oak.commons.Buffer;
+import org.apache.jackrabbit.oak.plugins.memory.MemoryChildNodeEntry;
 import org.apache.jackrabbit.oak.segment.file.GCNodeWriteMonitor;
 import org.apache.jackrabbit.oak.segment.file.cancel.Canceller;
 import org.apache.jackrabbit.oak.spi.blob.BlobStore;
 import org.apache.jackrabbit.oak.spi.gc.GCMonitor;
 import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
+import org.apache.jackrabbit.oak.spi.state.DefaultNodeStateDiff;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 import org.jetbrains.annotations.NotNull;
@@ -117,7 +119,7 @@ public class CheckpointCompactor implements Compactor {
     ) throws IOException {
         // Collect a chronologically ordered list of roots for the uncompacted
         // state. This list consists of all checkpoints followed by the root.
-        LinkedHashMap<String, NodeState> uncompactedRoots = 
collectRoots(uncompacted);
+        LinkedHashMap<String, NodeState> uncompactedRoots = collectRoots(base, 
uncompacted);
 
         // Compact the list of uncompacted roots to a list of compacted roots.
         LinkedHashMap<String, NodeState> compactedRoots = compact(
@@ -183,27 +185,35 @@ public class CheckpointCompactor implements Compactor {
      * the root.
      */
     @NotNull
-    private LinkedHashMap<String, NodeState> collectRoots(@Nullable NodeState 
superRoot) {
+    private LinkedHashMap<String, NodeState> collectRoots(@NotNull NodeState 
superRootBefore, @NotNull NodeState superRootAfter) {
         LinkedHashMap<String, NodeState> roots = newLinkedHashMap();
-        if (superRoot != null) {
-            List<ChildNodeEntry> checkpoints = newArrayList(
-                    
superRoot.getChildNode("checkpoints").getChildNodeEntries());
 
-            checkpoints.sort((cne1, cne2) -> {
-                long c1 = cne1.getNodeState().getLong("created");
-                long c2 = cne2.getNodeState().getLong("created");
-                return Long.compare(c1, c2);
-            });
+        List<ChildNodeEntry> checkpoints = newArrayList();
+        superRootAfter.getChildNode("checkpoints").compareAgainstBaseState(
+                superRootBefore.getChildNode("checkpoints"), new 
DefaultNodeStateDiff() {
+                    @Override
+                    public boolean childNodeAdded(String name, NodeState 
after) {
+                        checkpoints.add(new MemoryChildNodeEntry(name, after));
+                        return true;
+                    }
+                }
+        );
 
-            for (ChildNodeEntry checkpoint : checkpoints) {
-                String name = checkpoint.getName();
-                NodeState node = checkpoint.getNodeState();
-                gcListener.info("found checkpoint {} created at {}.",
-                    name, new Date(node.getLong("created")));
-                roots.put("checkpoints/" + name + "/root", 
node.getChildNode("root"));
-            }
-            roots.put("root", superRoot.getChildNode("root"));
+        checkpoints.sort((cne1, cne2) -> {
+            long c1 = cne1.getNodeState().getLong("created");
+            long c2 = cne2.getNodeState().getLong("created");
+            return Long.compare(c1, c2);
+        });
+
+        for (ChildNodeEntry checkpoint : checkpoints) {
+            String name = checkpoint.getName();
+            NodeState node = checkpoint.getNodeState();
+            gcListener.info("found checkpoint {} created at {}.",
+                name, new Date(node.getLong("created")));
+            roots.put("checkpoints/" + name + "/root", 
node.getChildNode("root"));
         }
+        roots.put("root", superRootAfter.getChildNode("root"));
+
         return roots;
     }
 
diff --git 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Compact.java
 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Compact.java
index a72360ddb0..62fb9af92f 100644
--- 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Compact.java
+++ 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Compact.java
@@ -37,6 +37,7 @@ import java.util.concurrent.TimeUnit;
 
 import org.apache.jackrabbit.guava.common.base.Stopwatch;
 import org.apache.jackrabbit.oak.segment.SegmentCache;
+import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.GCType;
 import 
org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.CompactorType;
 import org.apache.jackrabbit.oak.segment.spi.persistence.JournalFile;
 import org.apache.jackrabbit.oak.segment.spi.persistence.JournalFileWriter;
@@ -78,6 +79,8 @@ public class Compact {
 
         private int segmentCacheSize = DEFAULT_SEGMENT_CACHE_MB;
 
+        private GCType gcType = GCType.FULL;
+
         private CompactorType compactorType = CompactorType.PARALLEL_COMPACTOR;
 
         private int concurrency = 1;
@@ -164,6 +167,16 @@ public class Compact {
             return this;
         }
 
+        /**
+         * The garbage collection type used. If not specified it defaults to 
full compaction
+         * @param gcType the GC type
+         * @return this builder
+         */
+        public Builder withGCType(GCType gcType) {
+            this.gcType = gcType;
+            return this;
+        }
+
         /**
          * The compactor type to be used by compaction. If not specified it 
defaults to
          * "parallel" compactor
@@ -277,6 +290,8 @@ public class Compact {
 
     private final long gcLogInterval;
 
+    private final GCType gcType;
+
     private final CompactorType compactorType;
 
     private final int concurrency;
@@ -288,6 +303,7 @@ public class Compact {
         this.segmentCacheSize = builder.segmentCacheSize;
         this.strictVersionCheck = !builder.force;
         this.gcLogInterval = builder.gcLogInterval;
+        this.gcType = builder.gcType;
         this.compactorType = builder.compactorType;
         this.concurrency = builder.concurrency;
     }
@@ -303,7 +319,17 @@ public class Compact {
         Stopwatch watch = Stopwatch.createStarted();
 
         try (FileStore store = newFileStore()) {
-            if (!store.compactFull()) {
+            boolean success = false;
+            switch (gcType) {
+                case FULL:
+                    success = store.compactFull();
+                    break;
+                case TAIL:
+                    success = store.compactTail();
+                    break;
+            }
+
+            if (!success) {
                 System.out.printf("Compaction cancelled after %s.\n", 
printableStopwatch(watch));
                 return 1;
             }

Reply via email to