stefan-egli commented on code in PR #1805:
URL: https://github.com/apache/jackrabbit-oak/pull/1805#discussion_r1806355826


##########
oak-run/src/main/java/org/apache/jackrabbit/oak/run/GenerateFullGCCommand.java:
##########
@@ -0,0 +1,416 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.run;
+
+import joptsimple.OptionSpec;
+import org.apache.jackrabbit.guava.common.io.Closer;
+import org.apache.jackrabbit.oak.plugins.document.Collection;
+import org.apache.jackrabbit.oak.plugins.document.Document;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder;
+import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException;
+import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector;
+import org.apache.jackrabbit.oak.run.commons.Command;
+import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
+import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.jetbrains.annotations.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.jackrabbit.oak.api.Type.NAME;
+import static org.apache.jackrabbit.oak.api.Type.STRING;
+import static org.apache.jackrabbit.oak.run.Utils.createDocumentMKBuilder;
+
+/**
+ * GenerateFullGCCommand generates garbage nodes in the repository in order to 
allow for testing fullGC functionality.
+ */
+public class GenerateFullGCCommand implements Command {
+    private static final Logger LOG = 
LoggerFactory.getLogger(GenerateFullGCCommand.class);
+
+    private static final String USAGE = "generateFullGC {<jdbc-uri> | 
<mongodb-uri>} [options]";
+
+    /**
+     * Root node for fullGC garbage generation.
+     * Necessary in order to allow cleanup of all generated garbage nodes by 
simply removing the root node.
+     */
+    public static String FULLGC_GEN_ROOT_PATH = "fullGCGenRoot";
+
+    /**
+     * Base path for fullGC garbage generation. The timestamp of the run will 
be appended to this path,
+     * which is necessary in order for each garbage generation run to be 
unique and not overwrite previous ones.
+     * If continuous generation is enabled, the index of the run will also be 
appended to this path.
+     */
+    public static String FULLGC_GEN_BASE_PATH = "fullGCGenTest_";

Review Comment:
   once you are under `FULLGC_GEN_ROOT_PATH` and its name indicates that it is 
for testing and is garbage, do we still need all path elements to contain 
`fullGc`?



##########
oak-run/src/main/java/org/apache/jackrabbit/oak/run/GenerateFullGCCommand.java:
##########
@@ -0,0 +1,416 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.run;
+
+import joptsimple.OptionSpec;
+import org.apache.jackrabbit.guava.common.io.Closer;
+import org.apache.jackrabbit.oak.plugins.document.Collection;
+import org.apache.jackrabbit.oak.plugins.document.Document;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder;
+import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException;
+import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector;
+import org.apache.jackrabbit.oak.run.commons.Command;
+import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
+import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.jetbrains.annotations.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.jackrabbit.oak.api.Type.NAME;
+import static org.apache.jackrabbit.oak.api.Type.STRING;
+import static org.apache.jackrabbit.oak.run.Utils.createDocumentMKBuilder;
+
+/**
+ * GenerateFullGCCommand generates garbage nodes in the repository in order to 
allow for testing fullGC functionality.
+ */
+public class GenerateFullGCCommand implements Command {
+    private static final Logger LOG = 
LoggerFactory.getLogger(GenerateFullGCCommand.class);
+
+    private static final String USAGE = "generateFullGC {<jdbc-uri> | 
<mongodb-uri>} [options]";
+
+    /**
+     * Root node for fullGC garbage generation.
+     * Necessary in order to allow cleanup of all generated garbage nodes by 
simply removing the root node.
+     */
+    public static String FULLGC_GEN_ROOT_PATH = "fullGCGenRoot";

Review Comment:
   Would recommend not to add content to root and use something like "/tmp" 
instead, so eg "/tmp/fullGcTestGarbage" or something like that.



##########
oak-run/src/main/java/org/apache/jackrabbit/oak/run/GenerateFullGCCommand.java:
##########
@@ -0,0 +1,416 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.run;
+
+import joptsimple.OptionSpec;
+import org.apache.jackrabbit.guava.common.io.Closer;
+import org.apache.jackrabbit.oak.plugins.document.Collection;
+import org.apache.jackrabbit.oak.plugins.document.Document;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder;
+import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException;
+import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector;
+import org.apache.jackrabbit.oak.run.commons.Command;
+import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
+import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.jetbrains.annotations.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.jackrabbit.oak.api.Type.NAME;
+import static org.apache.jackrabbit.oak.api.Type.STRING;
+import static org.apache.jackrabbit.oak.run.Utils.createDocumentMKBuilder;
+
+/**
+ * GenerateFullGCCommand generates garbage nodes in the repository in order to 
allow for testing fullGC functionality.
+ */
+public class GenerateFullGCCommand implements Command {
+    private static final Logger LOG = 
LoggerFactory.getLogger(GenerateFullGCCommand.class);
+
+    private static final String USAGE = "generateFullGC {<jdbc-uri> | 
<mongodb-uri>} [options]";
+
+    /**
+     * Root node for fullGC garbage generation.
+     * Necessary in order to allow cleanup of all generated garbage nodes by 
simply removing the root node.
+     */
+    public static String FULLGC_GEN_ROOT_PATH = "fullGCGenRoot";
+
+    /**
+     * Base path for fullGC garbage generation. The timestamp of the run will 
be appended to this path,
+     * which is necessary in order for each garbage generation run to be 
unique and not overwrite previous ones.
+     * If continuous generation is enabled, the index of the run will also be 
appended to this path.
+     */
+    public static String FULLGC_GEN_BASE_PATH = "fullGCGenTest_";
+
+    /**
+     * Prefix for parent nodes under which garbage nodes will be created.
+     * The index of the parent node will be appended to this prefix.
+     */
+    public static String FULLGC_GEN_PARENT_NODE_PREFIX = "fullGCParent_";
+    public static String FULLGC_GEN_NODE_PREFIX = "fullGCNode_";
+
+    public static String EMPTY_PROPERTY_NAME = "prop";
+
+    private int continuousRunIndex = 0;
+
+    private DocumentNodeStore documentNodeStore;
+
+    public DocumentNodeStore getDocumentNodeStore() {
+        return documentNodeStore;
+    }
+
+    private static class GenerateFullGCOptions extends Utils.NodeStoreOptions {
+
+        /**
+         * Sub-command for generating garbage.
+         * This is the default sub-command to run if none is specified.
+         */
+        static final String CMD_GENERATE = "generate";
+
+        /**
+         * Sub-command for cleaning up all generated garbage.
+         * Using this will remove the root node FULLGC_GEN_ROOT_PATH and all 
of its children (recursively).
+         */
+        static final String CMD_CLEAN = "clean";
+
+        final OptionSpec<Integer> createGarbageNodesCount;
+        final OptionSpec<Integer> garbageNodesParentCount;
+        final OptionSpec<Integer> garbageType;
+        final OptionSpec<Integer> numberOfRuns;
+        final OptionSpec<Integer> generateIntervalSeconds;
+
+        public GenerateFullGCOptions(String usage) {
+            super(usage);
+            createGarbageNodesCount = parser
+                    .accepts("garbageNodesCount", "the total number of garbage 
nodes to create").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(0);
+            garbageNodesParentCount = parser
+                    .accepts("garbageNodesParentCount", "total number of 
parent nodes under which to create garbage nodes").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(1);
+            garbageType = parser
+                    .accepts("garbageType", "garbage type to be generated - 
must be a value from VersionGarbageCollector.fullGCMode").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(1);
+            numberOfRuns = parser
+                    .accepts("numberOfRuns", "the number of garbage generation 
runs to do. Only applies if greater than 1, " +
+                            "otherwise a single run will be 
done.").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(1);
+            generateIntervalSeconds = parser
+                    .accepts("generateIntervalSeconds", "the interval at which 
to generate a complete garbage count from createGarbageNotesCount. " +
+                            "Applies only if numberOfRuns is greater than 
1.").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(60);
+        }
+
+        public GenerateFullGCOptions parse(String[] args) {
+            super.parse(args);
+            return this;
+        }
+
+        String getSubCmd() {
+            List<String> args = getOtherArgs();
+            if (args.size() > 0) {
+                return args.get(0);
+            }
+            return CMD_GENERATE;
+        }
+
+        public int getCreateGarbageNodesCount() {
+            return createGarbageNodesCount.value(options);
+        }
+
+        public int getGarbageNodesParentCount() {
+            return garbageNodesParentCount.value(options);
+        }
+
+        public int getGarbageType() {
+            return garbageType.value(options);
+        }
+
+        public int getNumberOfRuns() {
+            return numberOfRuns.value(options);
+        }
+
+        public int getGenerateIntervalSeconds() {
+            return generateIntervalSeconds.value(options);
+        }
+    }
+
+    public void execute(String... args) throws Exception {
+        Closer closer = Closer.create();
+        try {
+            execute(closer, args);
+        } catch (Throwable e) {
+            LOG.error("Command failed", e);
+            throw closer.rethrow(e);
+        } finally {
+            closer.close();
+        }
+    }
+
+    /**
+     * Method with passed closer is necessary in order to allow for unit tests 
to check the output of the command.
+     * It is the responsibility of the caller to close the closer.
+     *
+     * Returns the list of generated garbage base paths (under the garbage 
root node).
+     * @param closer
+     * @param args
+     * @throws Exception
+     */
+    public List<String> execute(Closer closer, String... args) throws 
Exception {
+        continuousRunIndex = 0;
+
+        List<String> generateBasePaths = new ArrayList<>();
+
+        GenerateFullGCOptions options = new 
GenerateFullGCOptions(USAGE).parse(args);
+        String subCmd = options.getSubCmd();
+
+        if (GenerateFullGCOptions.CMD_GENERATE.equals(subCmd)) {
+            if (options.getNumberOfRuns() > 1 && 
options.getGenerateIntervalSeconds() > 0) {
+                generateBasePaths.addAll(generateGarbageContinuously(options, 
closer));
+            } else {
+                generateBasePaths.add(generateGarbage(options, closer, 0));
+            }
+        } else if (GenerateFullGCOptions.CMD_CLEAN.equals(subCmd)) {
+            cleanGarbage(options, closer);
+        } else {
+            System.err.println("unknown revisions command: " + subCmd);
+        }
+
+        return generateBasePaths;
+    }
+
+    private List<String> generateGarbageContinuously(GenerateFullGCOptions 
options, Closer closer) throws IOException, Exception {
+        ScheduledExecutorService executor = 
Executors.newScheduledThreadPool(1);
+
+        DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, 
closer);
+        long startGenTimestamp = System.currentTimeMillis();
+
+        List<String> generatedGarbageBasePaths = new ArrayList<>();
+
+        int numberOfRuns = options.getNumberOfRuns();
+        int intervalSeconds = options.getGenerateIntervalSeconds();
+        Runnable task = () -> {
+            if (continuousRunIndex < numberOfRuns) {
+                try {
+                    String genBasePath = generateGarbage(options, closer, 
continuousRunIndex, builder, startGenTimestamp);
+                    generatedGarbageBasePaths.add(genBasePath);
+                } catch (Exception e) {
+                    LOG.error("Error generating garbage in run " + 
continuousRunIndex, e);
+                }
+                LOG.info("Task executed. Count: " + (continuousRunIndex + 1));
+                continuousRunIndex++;
+            } else {
+                // Shutdown the executor once the task has run numberOfRuns 
times
+                executor.shutdown();
+                LOG.info("Task completed " + numberOfRuns + " times. Stopping 
execution.");
+            }
+        };
+
+        // Schedule the task to run every intervalSeconds
+        executor.scheduleAtFixedRate(task, 0, intervalSeconds, 
TimeUnit.SECONDS);
+
+        return generatedGarbageBasePaths;
+    }
+
+    /**
+     * Generate garbage nodes in the repository in order to allow for testing 
fullGC functionality.
+     *
+     * Returns the path of the generated FULLGC_GEN_BASE_PATH node (under the 
root).
+     * @param options
+     * @param closer
+     * @param runIndex
+     * @return
+     * @throws IOException
+     * @throws Exception
+     */
+    private String generateGarbage(GenerateFullGCOptions options, Closer 
closer, int runIndex) throws IOException, Exception {
+
+        DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, 
closer);
+        long generationTimestamp = System.currentTimeMillis();
+
+        return generateGarbage(options, closer, runIndex, builder, 
generationTimestamp);
+    }
+
+    private String generateGarbage(GenerateFullGCOptions options, Closer 
closer, int runIndex,
+                                  DocumentNodeStoreBuilder<?> builder, long 
timestamp) throws IOException, Exception {
+
+        if (builder == null) {
+            System.err.println("generateFullGC mode only available for 
DocumentNodeStore");
+            System.exit(1);
+        }
+
+        String generationBasePath = FULLGC_GEN_BASE_PATH + timestamp + "_" + 
runIndex;
+        System.out.println("Generating fullGC on the document: " + 
generationBasePath);
+        documentNodeStore = builder.build();
+
+        VersionGarbageCollector.FullGCMode fullGCMode = getFullGCMode(options);
+        if (fullGCMode == VersionGarbageCollector.FullGCMode.NONE) {
+            LOG.error("Invalid garbageType specified. Must be one of the 
following: 1 - EMPTYPROPS, 2 - GAP_ORPHANS, 3 - GAP_ORPHANS_EMPTYPROPS");
+            System.exit(1);
+        }
+
+        //1. Create nodes with properties
+        NodeBuilder rootNode = documentNodeStore.getRoot().builder();
+        NodeBuilder garbageRootNode = rootNode.child(FULLGC_GEN_ROOT_PATH);
+        
garbageRootNode.child(generationBasePath).setProperty("jcr:primaryType", 
"nt:file", NAME);
+
+        int nodesCountUnderParent = options.getCreateGarbageNodesCount() / 
options.getGarbageNodesParentCount();
+        for(int i = 0; i < options.getGarbageNodesParentCount(); i ++) {
+            
garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + 
i).setProperty("jcr:primaryType", "nt:folder", NAME);
+
+            for(int j = 0; j < nodesCountUnderParent; j ++) {
+                
garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + 
i).child(FULLGC_GEN_NODE_PREFIX + j).
+                        setProperty("jcr:primaryType", "nt:file", NAME);
+
+                if (fullGCMode == 
VersionGarbageCollector.FullGCMode.EMPTYPROPS || fullGCMode == 
VersionGarbageCollector.FullGCMode.GAP_ORPHANS_EMPTYPROPS) {
+                    
garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + 
i).child(FULLGC_GEN_NODE_PREFIX + j).
+                            setProperty(EMPTY_PROPERTY_NAME, "bar", STRING);
+                }
+            }
+        }
+        documentNodeStore.merge(rootNode, EmptyHook.INSTANCE, 
CommitInfo.EMPTY);
+        documentNodeStore.runBackgroundOperations();
+
+
+        //2. Generate garbage nodes - EMPTY_PROPERTIES
+        if (fullGCMode == VersionGarbageCollector.FullGCMode.EMPTYPROPS) {
+            for (int i = 0; i < options.getGarbageNodesParentCount(); i++) {
+                for (int j = 0; j < nodesCountUnderParent; j++) {
+                    
garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + 
i).child(FULLGC_GEN_NODE_PREFIX + j).
+                            removeProperty(EMPTY_PROPERTY_NAME);
+                }
+            }
+        }
+        documentNodeStore.merge(rootNode, EmptyHook.INSTANCE, 
CommitInfo.EMPTY);
+        documentNodeStore.runBackgroundOperations();
+
+        //3.1. Generate garbage nodes - GAP_ORPHANS - remove parent nodes
+        if (fullGCMode == VersionGarbageCollector.FullGCMode.GAP_ORPHANS) {
+            StringBuilder sbNodePath = new StringBuilder();
+            List<String> deleteNodePaths = new ArrayList<>();
+            for (int i = 0; i < options.getGarbageNodesParentCount(); i++) {
+
+                sbNodePath.setLength(0);
+                
sbNodePath.append("3:/").append(FULLGC_GEN_ROOT_PATH).append("/").append(generationBasePath).append("/").
+                        append(FULLGC_GEN_PARENT_NODE_PREFIX).append(i);
+                deleteNodePaths.add(sbNodePath.toString());
+            }
+            // Remove all parent nodes
+            
documentNodeStore.getDocumentStore().remove(org.apache.jackrabbit.oak.plugins.document.Collection.NODES,
 deleteNodePaths);
+            documentNodeStore.merge(rootNode, EmptyHook.INSTANCE, 
CommitInfo.EMPTY);
+            documentNodeStore.runBackgroundOperations();
+        }
+
+        return generationBasePath;
+    }
+
+    /**
+     * Cleans up all generated garbage by removing the node 
FULLGC_GEN_ROOT_PATH and all of
+     * its children (recursively)
+     * @param options
+     * @param closer
+     * @throws IOException
+     * @throws Exception
+     */
+    private void cleanGarbage(GenerateFullGCOptions options, Closer closer) 
throws IOException, Exception {
+
+        DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, 
closer);
+
+        if (builder == null) {
+            System.err.println("generateFullGC mode only available for 
DocumentNodeStore");
+            System.exit(1);
+        }
+
+        System.out.println("Cleaning up all generated garbage:");
+        documentNodeStore = builder.build();
+
+        NodeBuilder rootBuilder = documentNodeStore.getRoot().builder();
+
+        NodeBuilder generatedGarbageRootBuilder = 
rootBuilder.child(FULLGC_GEN_ROOT_PATH);
+
+        String garbageRootNodePath = "1:/"+FULLGC_GEN_ROOT_PATH;
+        List<String> childNodePaths = new ArrayList<>();
+        childNodePaths.add(garbageRootNodePath);
+
+        // get all paths of the tree nodes under the garbage root node
+        getTreeNodePaths(generatedGarbageRootBuilder, garbageRootNodePath, 
childNodePaths, 1);
+
+        
documentNodeStore.getDocumentStore().remove(org.apache.jackrabbit.oak.plugins.document.Collection.NODES,
 childNodePaths);
+        documentNodeStore.merge(rootBuilder, EmptyHook.INSTANCE, 
CommitInfo.EMPTY);
+        documentNodeStore.runBackgroundOperations();
+    }
+
+    /**
+     * Recursively get all paths of the tree nodes under the given root node.
+     * @param rootNode
+     * @param basePath
+     * @param treeNodePaths
+     * @param nodeLevel
+     */
+    private void getTreeNodePaths(NodeBuilder rootNode, String basePath,
+                                  List<String> treeNodePaths, int nodeLevel) {
+        String childBasePath = basePath.replaceFirst(nodeLevel + ":/", 
(nodeLevel + 1) + ":/");
+        for (String childNodeName : rootNode.getChildNodeNames()) {
+            String childPath = childBasePath + "/" + childNodeName;
+            treeNodePaths.add(childPath);
+            getTreeNodePaths(rootNode.child(childNodeName), childPath, 
treeNodePaths, nodeLevel + 1);
+        }
+    }
+
+    /**
+     * Used in unit tests for retrieving a document by path from the document 
store using the documentNodeStore
+     * of this class.
+     * @param collection
+     * @param key
+     * @param maxCacheAge
+     * @return
+     * @param <T>
+     * @throws DocumentStoreException
+     */
+    @Nullable
+    public <T extends Document> T getDocument(Collection<T> collection, String 
key, int maxCacheAge)

Review Comment:
   as this is only used by test code I'd move it over there (and if needed 
create an accessor to `documentNodeStore` for example)



##########
oak-run/src/main/java/org/apache/jackrabbit/oak/run/AvailableModes.java:
##########
@@ -75,5 +75,6 @@ public final class AvailableModes {
             .put("segment-copy", new SegmentCopyCommand())
             .put("server", new ServerCommand())
             .put("purge-index-versions", new 
LucenePurgeOldIndexVersionCommand())
+            .put("generateFullGC", new GenerateFullGCCommand())

Review Comment:
   +1 for renaming the command



##########
oak-run/src/test/java/org/apache/jackrabbit/oak/plugins/document/GenerateFullGCCommandTest.java:
##########
@@ -0,0 +1,293 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.document;
+
+import org.apache.jackrabbit.guava.common.collect.ImmutableList;
+import org.apache.jackrabbit.guava.common.io.Closer;
+import org.apache.jackrabbit.oak.plugins.document.util.MongoConnection;
+import org.apache.jackrabbit.oak.run.GenerateFullGCCommand;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Rule;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import static 
org.apache.jackrabbit.oak.plugins.document.CommandTestUtils.captureSystemOut;
+import static 
org.apache.jackrabbit.oak.run.GenerateFullGCCommand.EMPTY_PROPERTY_NAME;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assume.assumeTrue;
+
+public class GenerateFullGCCommandTest {
+
+    final String OPTION_GARBAGE_NODES_COUNT = "--garbageNodesCount";
+    final String OPTION_GARBAGE_NODES_PARENT_COUNT = 
"--garbageNodesParentCount";
+    final String OPTION_GARBAGE_TYPE = "--garbageType";
+    final String OPTION_NUMBER_OF_RUNS = "--numberOfRuns";
+    final String OPTION_GENERATE_INTERVAL_SECONDS = 
"--generateIntervalSeconds";
+
+    @Rule
+    public MongoConnectionFactory connectionFactory = new 
MongoConnectionFactory();
+
+    @Rule
+    public DocumentMKBuilderProvider builderProvider = new 
DocumentMKBuilderProvider();
+
+    private DocumentNodeStore ns;
+
+    private Closer closer;
+
+    @BeforeClass
+    public static void assumeMongoDB() {
+        assumeTrue(MongoUtils.isAvailable());
+    }
+
+    @Before
+    public void before() {
+        ns = createDocumentNodeStore();
+    }
+
+    @After
+    public void after() {
+        try {
+            closer.close();
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+        ns.dispose();
+    }
+
+    private DocumentNodeStore createDocumentNodeStore() {
+        MongoConnection c = connectionFactory.getConnection();
+        assertNotNull(c);
+        MongoUtils.dropCollections(c.getDatabase());
+        return 
builderProvider.newBuilder().setFullGCEnabled(false).setLeaseCheckMode(LeaseCheckMode.DISABLED).setAsyncDelay(0)
+                .setMongoDB(c.getMongoClient(), c.getDBName()).getNodeStore();
+    }
+
+    private List<String> getChildNodeNames(NodeState node) {
+        List<String> childNodeNames = new ArrayList<>();
+        for (String childNodeName : node.getChildNodeNames()) {
+            childNodeNames.add(childNodeName);
+        }
+        return childNodeNames;
+    }
+
+    @Ignore
+    @Test
+    public void generateGarbageEmptyPropsUnderOneParentOnCustomMongoDB() {
+        ns.dispose();
+
+        // this should be a valid mongoDB connection string for the MongoDB on 
which to generate the garbage
+        String mongoDBConnString = "";
+        GenerateFullGCCmd cmd = new GenerateFullGCCmd( mongoDBConnString, 
OPTION_GARBAGE_NODES_COUNT, "10", OPTION_GARBAGE_NODES_PARENT_COUNT, "1",
+                OPTION_GARBAGE_TYPE, "1");
+        String output = captureSystemOut(cmd);
+
+        //sleep thread for 10 seconds to allow the garbage generation to 
complete
+        try {
+            Thread.sleep(10000);
+        } catch (InterruptedException e) {
+            e.printStackTrace();
+        }
+
+        DocumentNodeStore nodeStore = cmd.getCommand().getDocumentNodeStore();
+        NodeState garbageRootNodeState = 
nodeStore.getRoot().getChildNode(GenerateFullGCCommand.FULLGC_GEN_ROOT_PATH);
+        List<String> garbageRootNodeChildNames = 
getChildNodeNames(garbageRootNodeState);
+
+        assertEquals(garbageRootNodeChildNames.size(), 1);
+        List<String> propParentsGarbageNodeNames = 
getChildNodeNames(garbageRootNodeState.getChildNode(garbageRootNodeChildNames.get(0)));
+        assertEquals(propParentsGarbageNodeNames.size(), 1);
+
+        List<String> propGarbageNodeNames = 
getChildNodeNames(garbageRootNodeState.getChildNode(garbageRootNodeChildNames.get(0)).getChildNode(propParentsGarbageNodeNames.get(0)));
+        assertEquals(propGarbageNodeNames.size(), 10);
+    }
+
+    @Test
+    public void cleanupAllGarbage() {
+
+        // generate garbage first
+        GenerateFullGCCmd cmdGenerateGarbage = new 
GenerateFullGCCmd(OPTION_GARBAGE_NODES_COUNT, "10", 
OPTION_GARBAGE_NODES_PARENT_COUNT, "1",
+                OPTION_GARBAGE_TYPE, "2");
+        cmdGenerateGarbage.run();
+        Closer cmd1Closer = cmdGenerateGarbage.getCloser();
+
+        // cleanup garbage
+        String mongoDBConnString = 
"mongodb://cm-p107053-e251100-username:3fuep9uhpd9y...@cmp107053e251100r355e01-shard-00-00.wxmqq.mongodb.net:27017,cmp107053e251100r355e01-shard-00-01.wxmqq.mongodb.net:27017,cmp107053e251100r355e01-shard-00-02.wxmqq.mongodb.net:27017/cm-p107053-e251100-database?ssl=true&authSource=admin&replicaSet=atlas-4x602o-shard-0&retryWrites=true&readPreference=primaryPreferred&readConcernLevel=majority&w=majority";
+        GenerateFullGCCmd cmdClean = new GenerateFullGCCmd(mongoDBConnString, 
"clean");
+        cmdClean.run();
+        closer = cmdClean.getCloser();
+
+        NodeDocument garbageRoot = 
cmdClean.getCommand().getDocument(Collection.NODES, "1:/" + 
GenerateFullGCCommand.FULLGC_GEN_ROOT_PATH, 0);
+        // garbage root node should be either deleted or empty (no children)
+        assertTrue(garbageRoot == null || !garbageRoot.hasChildren());
+
+        try {
+            cmd1Closer.close();
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+    @Test
+    public void generateGarbageEmptyPropsUnderOneParent() {
+        ns.dispose();
+
+        GenerateFullGCCmd cmd = new 
GenerateFullGCCmd(OPTION_GARBAGE_NODES_COUNT, "10", 
OPTION_GARBAGE_NODES_PARENT_COUNT, "1",
+                OPTION_GARBAGE_TYPE, "1");
+        String output = captureSystemOut(cmd);
+        closer = cmd.getCloser();
+
+        List<String> generatedBasePaths = cmd.getGeneratedBasePaths();
+
+        DocumentNodeStore nodeStore = cmd.getCommand().getDocumentNodeStore();
+        NodeState garbageRootNodeState = 
nodeStore.getRoot().getChildNode(GenerateFullGCCommand.FULLGC_GEN_ROOT_PATH);
+        List<String> garbageRootNodeChildNames = 
getChildNodeNames(garbageRootNodeState);
+
+        assertEquals(garbageRootNodeChildNames.size(), 1);
+        for (String generateBasePath : generatedBasePaths) {
+            assertTrue(garbageRootNodeChildNames.contains(generateBasePath));
+        }
+
+        List<String> propParentsGarbageNodeNames = 
getChildNodeNames(garbageRootNodeState.getChildNode(garbageRootNodeChildNames.get(0)));
+        assertEquals(propParentsGarbageNodeNames.size(), 1);
+
+        NodeState emptyPropsParentNode = 
garbageRootNodeState.getChildNode(garbageRootNodeChildNames.get(0)).getChildNode(propParentsGarbageNodeNames.get(0));
+        List<String> propGarbageNodeNames = 
getChildNodeNames(emptyPropsParentNode);
+        assertEquals(propGarbageNodeNames.size(), 10);
+
+        for (String propGarbageNodeName : propGarbageNodeNames) {
+            NodeState propGarbageNode = 
emptyPropsParentNode.getChildNode(propGarbageNodeName);
+            assertNull(propGarbageNode.getProperty(EMPTY_PROPERTY_NAME));
+        }
+    }
+
+    @Test
+    public void generateGarbageGapOrphansUnderOneParent() {
+        ns.dispose();
+
+        int garbageNodesCount = 10;
+        int garbageNodesParentCount = 1;
+
+        testGenerateGapOrphans(garbageNodesCount, garbageNodesParentCount);
+    }
+
+    @Test
+    public void generateGarbageGapOrphansUnderMultipleParents() {
+        ns.dispose();
+
+        int garbageNodesCount = 20;
+        int garbageNodesParentCount = 4;
+
+        testGenerateGapOrphans(garbageNodesCount, garbageNodesParentCount);
+    }
+
+    /**
+     * Calls the GenerateFullGCCommand to generate gap orphans and verifies 
the generated gap orphans:
+     * - checks that parents of gap orphans are missing by retrieving the 
nodes from the document store
+     * - checks that the expected number gap orphans are generated by 
retrieving the nodes from the document store
+     * @param garbageNodesCount
+     * @param garbageNodesParentCount
+     */
+    private void testGenerateGapOrphans(int garbageNodesCount, int 
garbageNodesParentCount) {
+        GenerateFullGCCmd cmd = new 
GenerateFullGCCmd(OPTION_GARBAGE_NODES_COUNT, String.valueOf(garbageNodesCount),
+                OPTION_GARBAGE_NODES_PARENT_COUNT, 
String.valueOf(garbageNodesParentCount),
+                OPTION_GARBAGE_TYPE, "2");
+        String output = captureSystemOut(cmd);
+        closer = cmd.getCloser();
+
+        int nodesPerParent = garbageNodesCount / garbageNodesParentCount;
+
+        List<String> generatedBasePaths = cmd.getGeneratedBasePaths();
+
+        DocumentNodeStore nodeStore = cmd.getCommand().getDocumentNodeStore();
+        NodeState garbageRootNodeState = 
nodeStore.getRoot().getChildNode(GenerateFullGCCommand.FULLGC_GEN_ROOT_PATH);
+        List<String> garbageRootNodeChildNames = 
getChildNodeNames(garbageRootNodeState);
+
+        assertEquals(garbageRootNodeChildNames.size(), 1);
+        for (String generateBasePath : generatedBasePaths) {
+            assertTrue(garbageRootNodeChildNames.contains(generateBasePath));
+        }
+
+        String generateBasePath = generatedBasePaths.get(0);
+        for (int i = 0; i < garbageNodesParentCount; i ++) {
+            NodeDocument missingDocument = 
cmd.getCommand().getDocument(Collection.NODES, "3:/"

Review Comment:
   would suggest to use `Utils.getIdFromPath` etc rather than hard coding the 
depth - eg changing the base path to be `/tmp/xy` would now require this depth 
to be fixed
   (applies to a few different occasions)



##########
oak-run/src/main/java/org/apache/jackrabbit/oak/run/GenerateFullGCCommand.java:
##########
@@ -0,0 +1,416 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.run;
+
+import joptsimple.OptionSpec;
+import org.apache.jackrabbit.guava.common.io.Closer;
+import org.apache.jackrabbit.oak.plugins.document.Collection;
+import org.apache.jackrabbit.oak.plugins.document.Document;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder;
+import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException;
+import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector;
+import org.apache.jackrabbit.oak.run.commons.Command;
+import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
+import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.jetbrains.annotations.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.jackrabbit.oak.api.Type.NAME;
+import static org.apache.jackrabbit.oak.api.Type.STRING;
+import static org.apache.jackrabbit.oak.run.Utils.createDocumentMKBuilder;
+
+/**
+ * GenerateFullGCCommand generates garbage nodes in the repository in order to 
allow for testing fullGC functionality.
+ */
+public class GenerateFullGCCommand implements Command {
+    private static final Logger LOG = 
LoggerFactory.getLogger(GenerateFullGCCommand.class);
+
+    private static final String USAGE = "generateFullGC {<jdbc-uri> | 
<mongodb-uri>} [options]";
+
+    /**
+     * Root node for fullGC garbage generation.
+     * Necessary in order to allow cleanup of all generated garbage nodes by 
simply removing the root node.
+     */
+    public static String FULLGC_GEN_ROOT_PATH = "fullGCGenRoot";
+
+    /**
+     * Base path for fullGC garbage generation. The timestamp of the run will 
be appended to this path,
+     * which is necessary in order for each garbage generation run to be 
unique and not overwrite previous ones.
+     * If continuous generation is enabled, the index of the run will also be 
appended to this path.
+     */
+    public static String FULLGC_GEN_BASE_PATH = "fullGCGenTest_";
+
+    /**
+     * Prefix for parent nodes under which garbage nodes will be created.
+     * The index of the parent node will be appended to this prefix.
+     */
+    public static String FULLGC_GEN_PARENT_NODE_PREFIX = "fullGCParent_";
+    public static String FULLGC_GEN_NODE_PREFIX = "fullGCNode_";
+
+    public static String EMPTY_PROPERTY_NAME = "prop";
+
+    private int continuousRunIndex = 0;
+
+    private DocumentNodeStore documentNodeStore;
+
+    public DocumentNodeStore getDocumentNodeStore() {
+        return documentNodeStore;
+    }
+
+    private static class GenerateFullGCOptions extends Utils.NodeStoreOptions {
+
+        /**
+         * Sub-command for generating garbage.
+         * This is the default sub-command to run if none is specified.
+         */
+        static final String CMD_GENERATE = "generate";
+
+        /**
+         * Sub-command for cleaning up all generated garbage.
+         * Using this will remove the root node FULLGC_GEN_ROOT_PATH and all 
of its children (recursively).
+         */
+        static final String CMD_CLEAN = "clean";
+
+        final OptionSpec<Integer> createGarbageNodesCount;
+        final OptionSpec<Integer> garbageNodesParentCount;
+        final OptionSpec<Integer> garbageType;
+        final OptionSpec<Integer> numberOfRuns;
+        final OptionSpec<Integer> generateIntervalSeconds;
+
+        public GenerateFullGCOptions(String usage) {
+            super(usage);
+            createGarbageNodesCount = parser
+                    .accepts("garbageNodesCount", "the total number of garbage 
nodes to create").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(0);
+            garbageNodesParentCount = parser
+                    .accepts("garbageNodesParentCount", "total number of 
parent nodes under which to create garbage nodes").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(1);
+            garbageType = parser
+                    .accepts("garbageType", "garbage type to be generated - 
must be a value from VersionGarbageCollector.fullGCMode").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(1);
+            numberOfRuns = parser
+                    .accepts("numberOfRuns", "the number of garbage generation 
runs to do. Only applies if greater than 1, " +
+                            "otherwise a single run will be 
done.").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(1);
+            generateIntervalSeconds = parser
+                    .accepts("generateIntervalSeconds", "the interval at which 
to generate a complete garbage count from createGarbageNotesCount. " +
+                            "Applies only if numberOfRuns is greater than 
1.").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(60);
+        }
+
+        public GenerateFullGCOptions parse(String[] args) {
+            super.parse(args);
+            return this;
+        }
+
+        String getSubCmd() {
+            List<String> args = getOtherArgs();
+            if (args.size() > 0) {
+                return args.get(0);
+            }
+            return CMD_GENERATE;
+        }
+
+        public int getCreateGarbageNodesCount() {
+            return createGarbageNodesCount.value(options);
+        }
+
+        public int getGarbageNodesParentCount() {
+            return garbageNodesParentCount.value(options);
+        }
+
+        public int getGarbageType() {
+            return garbageType.value(options);
+        }
+
+        public int getNumberOfRuns() {
+            return numberOfRuns.value(options);
+        }
+
+        public int getGenerateIntervalSeconds() {
+            return generateIntervalSeconds.value(options);
+        }
+    }
+
+    public void execute(String... args) throws Exception {
+        Closer closer = Closer.create();
+        try {
+            execute(closer, args);
+        } catch (Throwable e) {
+            LOG.error("Command failed", e);
+            throw closer.rethrow(e);
+        } finally {
+            closer.close();
+        }
+    }
+
+    /**
+     * Method with passed closer is necessary in order to allow for unit tests 
to check the output of the command.
+     * It is the responsibility of the caller to close the closer.
+     *
+     * Returns the list of generated garbage base paths (under the garbage 
root node).
+     * @param closer
+     * @param args
+     * @throws Exception
+     */
+    public List<String> execute(Closer closer, String... args) throws 
Exception {
+        continuousRunIndex = 0;
+
+        List<String> generateBasePaths = new ArrayList<>();
+
+        GenerateFullGCOptions options = new 
GenerateFullGCOptions(USAGE).parse(args);
+        String subCmd = options.getSubCmd();
+
+        if (GenerateFullGCOptions.CMD_GENERATE.equals(subCmd)) {
+            if (options.getNumberOfRuns() > 1 && 
options.getGenerateIntervalSeconds() > 0) {
+                generateBasePaths.addAll(generateGarbageContinuously(options, 
closer));
+            } else {
+                generateBasePaths.add(generateGarbage(options, closer, 0));
+            }
+        } else if (GenerateFullGCOptions.CMD_CLEAN.equals(subCmd)) {
+            cleanGarbage(options, closer);
+        } else {
+            System.err.println("unknown revisions command: " + subCmd);
+        }
+
+        return generateBasePaths;
+    }
+
+    private List<String> generateGarbageContinuously(GenerateFullGCOptions 
options, Closer closer) throws IOException, Exception {
+        ScheduledExecutorService executor = 
Executors.newScheduledThreadPool(1);
+
+        DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, 
closer);
+        long startGenTimestamp = System.currentTimeMillis();
+
+        List<String> generatedGarbageBasePaths = new ArrayList<>();
+
+        int numberOfRuns = options.getNumberOfRuns();
+        int intervalSeconds = options.getGenerateIntervalSeconds();
+        Runnable task = () -> {
+            if (continuousRunIndex < numberOfRuns) {
+                try {
+                    String genBasePath = generateGarbage(options, closer, 
continuousRunIndex, builder, startGenTimestamp);
+                    generatedGarbageBasePaths.add(genBasePath);
+                } catch (Exception e) {
+                    LOG.error("Error generating garbage in run " + 
continuousRunIndex, e);
+                }
+                LOG.info("Task executed. Count: " + (continuousRunIndex + 1));
+                continuousRunIndex++;
+            } else {
+                // Shutdown the executor once the task has run numberOfRuns 
times
+                executor.shutdown();
+                LOG.info("Task completed " + numberOfRuns + " times. Stopping 
execution.");
+            }
+        };
+
+        // Schedule the task to run every intervalSeconds
+        executor.scheduleAtFixedRate(task, 0, intervalSeconds, 
TimeUnit.SECONDS);
+
+        return generatedGarbageBasePaths;
+    }
+
+    /**
+     * Generate garbage nodes in the repository in order to allow for testing 
fullGC functionality.
+     *
+     * Returns the path of the generated FULLGC_GEN_BASE_PATH node (under the 
root).
+     * @param options
+     * @param closer
+     * @param runIndex
+     * @return
+     * @throws IOException
+     * @throws Exception
+     */
+    private String generateGarbage(GenerateFullGCOptions options, Closer 
closer, int runIndex) throws IOException, Exception {
+
+        DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, 
closer);
+        long generationTimestamp = System.currentTimeMillis();
+
+        return generateGarbage(options, closer, runIndex, builder, 
generationTimestamp);
+    }
+
+    private String generateGarbage(GenerateFullGCOptions options, Closer 
closer, int runIndex,
+                                  DocumentNodeStoreBuilder<?> builder, long 
timestamp) throws IOException, Exception {
+
+        if (builder == null) {
+            System.err.println("generateFullGC mode only available for 
DocumentNodeStore");
+            System.exit(1);
+        }
+
+        String generationBasePath = FULLGC_GEN_BASE_PATH + timestamp + "_" + 
runIndex;
+        System.out.println("Generating fullGC on the document: " + 
generationBasePath);
+        documentNodeStore = builder.build();
+
+        VersionGarbageCollector.FullGCMode fullGCMode = getFullGCMode(options);
+        if (fullGCMode == VersionGarbageCollector.FullGCMode.NONE) {
+            LOG.error("Invalid garbageType specified. Must be one of the 
following: 1 - EMPTYPROPS, 2 - GAP_ORPHANS, 3 - GAP_ORPHANS_EMPTYPROPS");
+            System.exit(1);
+        }
+
+        //1. Create nodes with properties
+        NodeBuilder rootNode = documentNodeStore.getRoot().builder();
+        NodeBuilder garbageRootNode = rootNode.child(FULLGC_GEN_ROOT_PATH);
+        
garbageRootNode.child(generationBasePath).setProperty("jcr:primaryType", 
"nt:file", NAME);

Review Comment:
   would use `org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE` instead
   
   also, would not use `nt:file` as that requires eg `jcr:content` child etc 
(which wasn't noticed in the test as the merge happens without commithook - 
something that's a bit risky, but this is probably an exception as it is 
primarily a test command - but this aspect isn't ideal and might lead to 
surprises). we could use 
`org.apache.jackrabbit.oak.spi.nodetype.NT_OAK_UNSTRUCTURED` instead (as that 
is unorderable, and unstructured, it would be a good fix as it doesn't have any 
special requirements otherwise).



##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java:
##########
@@ -168,7 +168,7 @@ public class VersionGarbageCollector {
      * Ultimately the goal is to clean up all possible garbage. After 
hardening these modes
      * might no longer be supported.
      */
-    enum FullGCMode {
+    public enum FullGCMode {

Review Comment:
   not sure if we want this public? what about adding a helper under 
`org.apache.jackrabbit.oak.plugins.document` in oak-run instead?
   
   that helper could perhaps also have some `includesEmptyProps` / 
`includesGapOrphans` etc methods - as fullgc modes is going to be a combination 
of different modes (and the current check is hard coded to only modes 1 and 2)



##########
oak-run/src/main/java/org/apache/jackrabbit/oak/run/GenerateFullGCCommand.java:
##########
@@ -0,0 +1,416 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.run;
+
+import joptsimple.OptionSpec;
+import org.apache.jackrabbit.guava.common.io.Closer;
+import org.apache.jackrabbit.oak.plugins.document.Collection;
+import org.apache.jackrabbit.oak.plugins.document.Document;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder;
+import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException;
+import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector;
+import org.apache.jackrabbit.oak.run.commons.Command;
+import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
+import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.jetbrains.annotations.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.jackrabbit.oak.api.Type.NAME;
+import static org.apache.jackrabbit.oak.api.Type.STRING;
+import static org.apache.jackrabbit.oak.run.Utils.createDocumentMKBuilder;
+
+/**
+ * GenerateFullGCCommand generates garbage nodes in the repository in order to 
allow for testing fullGC functionality.
+ */
+public class GenerateFullGCCommand implements Command {
+    private static final Logger LOG = 
LoggerFactory.getLogger(GenerateFullGCCommand.class);
+
+    private static final String USAGE = "generateFullGC {<jdbc-uri> | 
<mongodb-uri>} [options]";
+
+    /**
+     * Root node for fullGC garbage generation.
+     * Necessary in order to allow cleanup of all generated garbage nodes by 
simply removing the root node.
+     */
+    public static String FULLGC_GEN_ROOT_PATH = "fullGCGenRoot";
+
+    /**
+     * Base path for fullGC garbage generation. The timestamp of the run will 
be appended to this path,
+     * which is necessary in order for each garbage generation run to be 
unique and not overwrite previous ones.
+     * If continuous generation is enabled, the index of the run will also be 
appended to this path.
+     */
+    public static String FULLGC_GEN_BASE_PATH = "fullGCGenTest_";
+
+    /**
+     * Prefix for parent nodes under which garbage nodes will be created.
+     * The index of the parent node will be appended to this prefix.
+     */
+    public static String FULLGC_GEN_PARENT_NODE_PREFIX = "fullGCParent_";
+    public static String FULLGC_GEN_NODE_PREFIX = "fullGCNode_";
+
+    public static String EMPTY_PROPERTY_NAME = "prop";
+
+    private int continuousRunIndex = 0;
+
+    private DocumentNodeStore documentNodeStore;
+
+    public DocumentNodeStore getDocumentNodeStore() {
+        return documentNodeStore;
+    }
+
+    private static class GenerateFullGCOptions extends Utils.NodeStoreOptions {
+
+        /**
+         * Sub-command for generating garbage.
+         * This is the default sub-command to run if none is specified.
+         */
+        static final String CMD_GENERATE = "generate";
+
+        /**
+         * Sub-command for cleaning up all generated garbage.
+         * Using this will remove the root node FULLGC_GEN_ROOT_PATH and all 
of its children (recursively).
+         */
+        static final String CMD_CLEAN = "clean";
+
+        final OptionSpec<Integer> createGarbageNodesCount;
+        final OptionSpec<Integer> garbageNodesParentCount;
+        final OptionSpec<Integer> garbageType;
+        final OptionSpec<Integer> numberOfRuns;
+        final OptionSpec<Integer> generateIntervalSeconds;
+
+        public GenerateFullGCOptions(String usage) {
+            super(usage);
+            createGarbageNodesCount = parser
+                    .accepts("garbageNodesCount", "the total number of garbage 
nodes to create").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(0);
+            garbageNodesParentCount = parser
+                    .accepts("garbageNodesParentCount", "total number of 
parent nodes under which to create garbage nodes").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(1);
+            garbageType = parser
+                    .accepts("garbageType", "garbage type to be generated - 
must be a value from VersionGarbageCollector.fullGCMode").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(1);
+            numberOfRuns = parser
+                    .accepts("numberOfRuns", "the number of garbage generation 
runs to do. Only applies if greater than 1, " +
+                            "otherwise a single run will be 
done.").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(1);
+            generateIntervalSeconds = parser
+                    .accepts("generateIntervalSeconds", "the interval at which 
to generate a complete garbage count from createGarbageNotesCount. " +
+                            "Applies only if numberOfRuns is greater than 
1.").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(60);
+        }
+
+        public GenerateFullGCOptions parse(String[] args) {
+            super.parse(args);
+            return this;
+        }
+
+        String getSubCmd() {
+            List<String> args = getOtherArgs();
+            if (args.size() > 0) {
+                return args.get(0);
+            }
+            return CMD_GENERATE;
+        }
+
+        public int getCreateGarbageNodesCount() {
+            return createGarbageNodesCount.value(options);
+        }
+
+        public int getGarbageNodesParentCount() {
+            return garbageNodesParentCount.value(options);
+        }
+
+        public int getGarbageType() {
+            return garbageType.value(options);
+        }
+
+        public int getNumberOfRuns() {
+            return numberOfRuns.value(options);
+        }
+
+        public int getGenerateIntervalSeconds() {
+            return generateIntervalSeconds.value(options);
+        }
+    }
+
+    public void execute(String... args) throws Exception {
+        Closer closer = Closer.create();
+        try {
+            execute(closer, args);
+        } catch (Throwable e) {
+            LOG.error("Command failed", e);
+            throw closer.rethrow(e);
+        } finally {
+            closer.close();
+        }
+    }
+
+    /**
+     * Method with passed closer is necessary in order to allow for unit tests 
to check the output of the command.
+     * It is the responsibility of the caller to close the closer.
+     *
+     * Returns the list of generated garbage base paths (under the garbage 
root node).
+     * @param closer
+     * @param args
+     * @throws Exception
+     */
+    public List<String> execute(Closer closer, String... args) throws 
Exception {
+        continuousRunIndex = 0;
+
+        List<String> generateBasePaths = new ArrayList<>();
+
+        GenerateFullGCOptions options = new 
GenerateFullGCOptions(USAGE).parse(args);
+        String subCmd = options.getSubCmd();
+
+        if (GenerateFullGCOptions.CMD_GENERATE.equals(subCmd)) {
+            if (options.getNumberOfRuns() > 1 && 
options.getGenerateIntervalSeconds() > 0) {
+                generateBasePaths.addAll(generateGarbageContinuously(options, 
closer));
+            } else {
+                generateBasePaths.add(generateGarbage(options, closer, 0));
+            }
+        } else if (GenerateFullGCOptions.CMD_CLEAN.equals(subCmd)) {
+            cleanGarbage(options, closer);
+        } else {
+            System.err.println("unknown revisions command: " + subCmd);
+        }
+
+        return generateBasePaths;
+    }
+
+    private List<String> generateGarbageContinuously(GenerateFullGCOptions 
options, Closer closer) throws IOException, Exception {
+        ScheduledExecutorService executor = 
Executors.newScheduledThreadPool(1);
+
+        DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, 
closer);
+        long startGenTimestamp = System.currentTimeMillis();
+
+        List<String> generatedGarbageBasePaths = new ArrayList<>();
+
+        int numberOfRuns = options.getNumberOfRuns();
+        int intervalSeconds = options.getGenerateIntervalSeconds();
+        Runnable task = () -> {
+            if (continuousRunIndex < numberOfRuns) {
+                try {
+                    String genBasePath = generateGarbage(options, closer, 
continuousRunIndex, builder, startGenTimestamp);
+                    generatedGarbageBasePaths.add(genBasePath);
+                } catch (Exception e) {
+                    LOG.error("Error generating garbage in run " + 
continuousRunIndex, e);
+                }
+                LOG.info("Task executed. Count: " + (continuousRunIndex + 1));
+                continuousRunIndex++;
+            } else {
+                // Shutdown the executor once the task has run numberOfRuns 
times
+                executor.shutdown();
+                LOG.info("Task completed " + numberOfRuns + " times. Stopping 
execution.");
+            }
+        };
+
+        // Schedule the task to run every intervalSeconds
+        executor.scheduleAtFixedRate(task, 0, intervalSeconds, 
TimeUnit.SECONDS);
+
+        return generatedGarbageBasePaths;
+    }
+
+    /**
+     * Generate garbage nodes in the repository in order to allow for testing 
fullGC functionality.
+     *
+     * Returns the path of the generated FULLGC_GEN_BASE_PATH node (under the 
root).
+     * @param options
+     * @param closer
+     * @param runIndex
+     * @return
+     * @throws IOException
+     * @throws Exception
+     */
+    private String generateGarbage(GenerateFullGCOptions options, Closer 
closer, int runIndex) throws IOException, Exception {
+
+        DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, 
closer);
+        long generationTimestamp = System.currentTimeMillis();
+
+        return generateGarbage(options, closer, runIndex, builder, 
generationTimestamp);
+    }
+
+    private String generateGarbage(GenerateFullGCOptions options, Closer 
closer, int runIndex,
+                                  DocumentNodeStoreBuilder<?> builder, long 
timestamp) throws IOException, Exception {
+
+        if (builder == null) {
+            System.err.println("generateFullGC mode only available for 
DocumentNodeStore");
+            System.exit(1);
+        }
+
+        String generationBasePath = FULLGC_GEN_BASE_PATH + timestamp + "_" + 
runIndex;
+        System.out.println("Generating fullGC on the document: " + 
generationBasePath);
+        documentNodeStore = builder.build();
+
+        VersionGarbageCollector.FullGCMode fullGCMode = getFullGCMode(options);
+        if (fullGCMode == VersionGarbageCollector.FullGCMode.NONE) {
+            LOG.error("Invalid garbageType specified. Must be one of the 
following: 1 - EMPTYPROPS, 2 - GAP_ORPHANS, 3 - GAP_ORPHANS_EMPTYPROPS");
+            System.exit(1);
+        }
+
+        //1. Create nodes with properties
+        NodeBuilder rootNode = documentNodeStore.getRoot().builder();
+        NodeBuilder garbageRootNode = rootNode.child(FULLGC_GEN_ROOT_PATH);
+        
garbageRootNode.child(generationBasePath).setProperty("jcr:primaryType", 
"nt:file", NAME);
+
+        int nodesCountUnderParent = options.getCreateGarbageNodesCount() / 
options.getGarbageNodesParentCount();
+        for(int i = 0; i < options.getGarbageNodesParentCount(); i ++) {
+            
garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + 
i).setProperty("jcr:primaryType", "nt:folder", NAME);
+
+            for(int j = 0; j < nodesCountUnderParent; j ++) {
+                
garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + 
i).child(FULLGC_GEN_NODE_PREFIX + j).
+                        setProperty("jcr:primaryType", "nt:file", NAME);
+
+                if (fullGCMode == 
VersionGarbageCollector.FullGCMode.EMPTYPROPS || fullGCMode == 
VersionGarbageCollector.FullGCMode.GAP_ORPHANS_EMPTYPROPS) {
+                    
garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + 
i).child(FULLGC_GEN_NODE_PREFIX + j).

Review Comment:
   what might be interesting is adding different types of "child subtrees". 
IIUC then the it will delete the nodes at level `FULLGC_GEN_PARENT_NODE_PREFIX` 
(which then leaves its children gap orphans). currently it only creates a 
1-level deep child under that node. what would be interesting is to have 
different cases:
   * (later perhaps) 0-level deep but just split docs (we actually probably 
will miss that type of garbage, that's going to be a new TODO probably, but 
this test could trigger it)
   * 1-level deep (which you already have)
   * 2-level deep
   * 3-level deep
   
   reason being that it might cause different behavior in fullGC code later on 
- and thus having slight variations of that garbage could be useful to have



##########
oak-run/src/main/java/org/apache/jackrabbit/oak/run/GenerateFullGCCommand.java:
##########
@@ -0,0 +1,416 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.run;
+
+import joptsimple.OptionSpec;
+import org.apache.jackrabbit.guava.common.io.Closer;
+import org.apache.jackrabbit.oak.plugins.document.Collection;
+import org.apache.jackrabbit.oak.plugins.document.Document;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder;
+import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException;
+import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector;
+import org.apache.jackrabbit.oak.run.commons.Command;
+import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
+import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.jetbrains.annotations.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.jackrabbit.oak.api.Type.NAME;
+import static org.apache.jackrabbit.oak.api.Type.STRING;
+import static org.apache.jackrabbit.oak.run.Utils.createDocumentMKBuilder;
+
+/**
+ * GenerateFullGCCommand generates garbage nodes in the repository in order to 
allow for testing fullGC functionality.
+ */
+public class GenerateFullGCCommand implements Command {
+    private static final Logger LOG = 
LoggerFactory.getLogger(GenerateFullGCCommand.class);
+
+    private static final String USAGE = "generateFullGC {<jdbc-uri> | 
<mongodb-uri>} [options]";
+
+    /**
+     * Root node for fullGC garbage generation.
+     * Necessary in order to allow cleanup of all generated garbage nodes by 
simply removing the root node.
+     */
+    public static String FULLGC_GEN_ROOT_PATH = "fullGCGenRoot";
+
+    /**
+     * Base path for fullGC garbage generation. The timestamp of the run will 
be appended to this path,
+     * which is necessary in order for each garbage generation run to be 
unique and not overwrite previous ones.
+     * If continuous generation is enabled, the index of the run will also be 
appended to this path.
+     */
+    public static String FULLGC_GEN_BASE_PATH = "fullGCGenTest_";
+
+    /**
+     * Prefix for parent nodes under which garbage nodes will be created.
+     * The index of the parent node will be appended to this prefix.
+     */
+    public static String FULLGC_GEN_PARENT_NODE_PREFIX = "fullGCParent_";
+    public static String FULLGC_GEN_NODE_PREFIX = "fullGCNode_";
+
+    public static String EMPTY_PROPERTY_NAME = "prop";
+
+    private int continuousRunIndex = 0;
+
+    private DocumentNodeStore documentNodeStore;
+
+    public DocumentNodeStore getDocumentNodeStore() {
+        return documentNodeStore;
+    }
+
+    private static class GenerateFullGCOptions extends Utils.NodeStoreOptions {
+
+        /**
+         * Sub-command for generating garbage.
+         * This is the default sub-command to run if none is specified.
+         */
+        static final String CMD_GENERATE = "generate";
+
+        /**
+         * Sub-command for cleaning up all generated garbage.
+         * Using this will remove the root node FULLGC_GEN_ROOT_PATH and all 
of its children (recursively).
+         */
+        static final String CMD_CLEAN = "clean";
+
+        final OptionSpec<Integer> createGarbageNodesCount;
+        final OptionSpec<Integer> garbageNodesParentCount;
+        final OptionSpec<Integer> garbageType;
+        final OptionSpec<Integer> numberOfRuns;
+        final OptionSpec<Integer> generateIntervalSeconds;
+
+        public GenerateFullGCOptions(String usage) {
+            super(usage);
+            createGarbageNodesCount = parser
+                    .accepts("garbageNodesCount", "the total number of garbage 
nodes to create").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(0);
+            garbageNodesParentCount = parser
+                    .accepts("garbageNodesParentCount", "total number of 
parent nodes under which to create garbage nodes").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(1);
+            garbageType = parser
+                    .accepts("garbageType", "garbage type to be generated - 
must be a value from VersionGarbageCollector.fullGCMode").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(1);
+            numberOfRuns = parser
+                    .accepts("numberOfRuns", "the number of garbage generation 
runs to do. Only applies if greater than 1, " +
+                            "otherwise a single run will be 
done.").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(1);
+            generateIntervalSeconds = parser
+                    .accepts("generateIntervalSeconds", "the interval at which 
to generate a complete garbage count from createGarbageNotesCount. " +
+                            "Applies only if numberOfRuns is greater than 
1.").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(60);
+        }
+
+        public GenerateFullGCOptions parse(String[] args) {
+            super.parse(args);
+            return this;
+        }
+
+        String getSubCmd() {
+            List<String> args = getOtherArgs();
+            if (args.size() > 0) {
+                return args.get(0);
+            }
+            return CMD_GENERATE;
+        }
+
+        public int getCreateGarbageNodesCount() {
+            return createGarbageNodesCount.value(options);
+        }
+
+        public int getGarbageNodesParentCount() {
+            return garbageNodesParentCount.value(options);
+        }
+
+        public int getGarbageType() {
+            return garbageType.value(options);
+        }
+
+        public int getNumberOfRuns() {
+            return numberOfRuns.value(options);
+        }
+
+        public int getGenerateIntervalSeconds() {
+            return generateIntervalSeconds.value(options);
+        }
+    }
+
+    public void execute(String... args) throws Exception {
+        Closer closer = Closer.create();
+        try {
+            execute(closer, args);
+        } catch (Throwable e) {
+            LOG.error("Command failed", e);
+            throw closer.rethrow(e);
+        } finally {
+            closer.close();
+        }
+    }
+
+    /**
+     * Method with passed closer is necessary in order to allow for unit tests 
to check the output of the command.
+     * It is the responsibility of the caller to close the closer.
+     *
+     * Returns the list of generated garbage base paths (under the garbage 
root node).
+     * @param closer
+     * @param args
+     * @throws Exception
+     */
+    public List<String> execute(Closer closer, String... args) throws 
Exception {
+        continuousRunIndex = 0;
+
+        List<String> generateBasePaths = new ArrayList<>();
+
+        GenerateFullGCOptions options = new 
GenerateFullGCOptions(USAGE).parse(args);
+        String subCmd = options.getSubCmd();
+
+        if (GenerateFullGCOptions.CMD_GENERATE.equals(subCmd)) {
+            if (options.getNumberOfRuns() > 1 && 
options.getGenerateIntervalSeconds() > 0) {
+                generateBasePaths.addAll(generateGarbageContinuously(options, 
closer));
+            } else {
+                generateBasePaths.add(generateGarbage(options, closer, 0));
+            }
+        } else if (GenerateFullGCOptions.CMD_CLEAN.equals(subCmd)) {
+            cleanGarbage(options, closer);
+        } else {
+            System.err.println("unknown revisions command: " + subCmd);
+        }
+
+        return generateBasePaths;
+    }
+
+    private List<String> generateGarbageContinuously(GenerateFullGCOptions 
options, Closer closer) throws IOException, Exception {
+        ScheduledExecutorService executor = 
Executors.newScheduledThreadPool(1);
+
+        DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, 
closer);
+        long startGenTimestamp = System.currentTimeMillis();
+
+        List<String> generatedGarbageBasePaths = new ArrayList<>();
+
+        int numberOfRuns = options.getNumberOfRuns();
+        int intervalSeconds = options.getGenerateIntervalSeconds();
+        Runnable task = () -> {
+            if (continuousRunIndex < numberOfRuns) {
+                try {
+                    String genBasePath = generateGarbage(options, closer, 
continuousRunIndex, builder, startGenTimestamp);
+                    generatedGarbageBasePaths.add(genBasePath);
+                } catch (Exception e) {
+                    LOG.error("Error generating garbage in run " + 
continuousRunIndex, e);
+                }
+                LOG.info("Task executed. Count: " + (continuousRunIndex + 1));
+                continuousRunIndex++;
+            } else {
+                // Shutdown the executor once the task has run numberOfRuns 
times
+                executor.shutdown();
+                LOG.info("Task completed " + numberOfRuns + " times. Stopping 
execution.");
+            }
+        };
+
+        // Schedule the task to run every intervalSeconds
+        executor.scheduleAtFixedRate(task, 0, intervalSeconds, 
TimeUnit.SECONDS);
+
+        return generatedGarbageBasePaths;
+    }
+
+    /**
+     * Generate garbage nodes in the repository in order to allow for testing 
fullGC functionality.
+     *
+     * Returns the path of the generated FULLGC_GEN_BASE_PATH node (under the 
root).
+     * @param options
+     * @param closer
+     * @param runIndex
+     * @return
+     * @throws IOException
+     * @throws Exception
+     */
+    private String generateGarbage(GenerateFullGCOptions options, Closer 
closer, int runIndex) throws IOException, Exception {
+
+        DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, 
closer);
+        long generationTimestamp = System.currentTimeMillis();
+
+        return generateGarbage(options, closer, runIndex, builder, 
generationTimestamp);
+    }
+
+    private String generateGarbage(GenerateFullGCOptions options, Closer 
closer, int runIndex,
+                                  DocumentNodeStoreBuilder<?> builder, long 
timestamp) throws IOException, Exception {
+
+        if (builder == null) {
+            System.err.println("generateFullGC mode only available for 
DocumentNodeStore");
+            System.exit(1);
+        }
+
+        String generationBasePath = FULLGC_GEN_BASE_PATH + timestamp + "_" + 
runIndex;
+        System.out.println("Generating fullGC on the document: " + 
generationBasePath);
+        documentNodeStore = builder.build();
+
+        VersionGarbageCollector.FullGCMode fullGCMode = getFullGCMode(options);
+        if (fullGCMode == VersionGarbageCollector.FullGCMode.NONE) {
+            LOG.error("Invalid garbageType specified. Must be one of the 
following: 1 - EMPTYPROPS, 2 - GAP_ORPHANS, 3 - GAP_ORPHANS_EMPTYPROPS");

Review Comment:
   assume this log message will be outdated soon with more fullGC modes ready 
for rollout. eg we will support ALL_ORPHANS at some point etc... Perhaps just 
mention to use a valid fullGc mode (and perhaps add the classname)?



##########
oak-run/src/main/java/org/apache/jackrabbit/oak/run/GenerateFullGCCommand.java:
##########
@@ -0,0 +1,416 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.run;
+
+import joptsimple.OptionSpec;
+import org.apache.jackrabbit.guava.common.io.Closer;
+import org.apache.jackrabbit.oak.plugins.document.Collection;
+import org.apache.jackrabbit.oak.plugins.document.Document;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder;
+import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException;
+import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector;
+import org.apache.jackrabbit.oak.run.commons.Command;
+import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
+import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.jetbrains.annotations.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.jackrabbit.oak.api.Type.NAME;
+import static org.apache.jackrabbit.oak.api.Type.STRING;
+import static org.apache.jackrabbit.oak.run.Utils.createDocumentMKBuilder;
+
+/**
+ * GenerateFullGCCommand generates garbage nodes in the repository in order to 
allow for testing fullGC functionality.
+ */
+public class GenerateFullGCCommand implements Command {
+    private static final Logger LOG = 
LoggerFactory.getLogger(GenerateFullGCCommand.class);
+
+    private static final String USAGE = "generateFullGC {<jdbc-uri> | 
<mongodb-uri>} [options]";
+
+    /**
+     * Root node for fullGC garbage generation.
+     * Necessary in order to allow cleanup of all generated garbage nodes by 
simply removing the root node.
+     */
+    public static String FULLGC_GEN_ROOT_PATH = "fullGCGenRoot";
+
+    /**
+     * Base path for fullGC garbage generation. The timestamp of the run will 
be appended to this path,
+     * which is necessary in order for each garbage generation run to be 
unique and not overwrite previous ones.
+     * If continuous generation is enabled, the index of the run will also be 
appended to this path.
+     */
+    public static String FULLGC_GEN_BASE_PATH = "fullGCGenTest_";
+
+    /**
+     * Prefix for parent nodes under which garbage nodes will be created.
+     * The index of the parent node will be appended to this prefix.
+     */
+    public static String FULLGC_GEN_PARENT_NODE_PREFIX = "fullGCParent_";
+    public static String FULLGC_GEN_NODE_PREFIX = "fullGCNode_";
+
+    public static String EMPTY_PROPERTY_NAME = "prop";
+
+    private int continuousRunIndex = 0;
+
+    private DocumentNodeStore documentNodeStore;
+
+    public DocumentNodeStore getDocumentNodeStore() {
+        return documentNodeStore;
+    }
+
+    private static class GenerateFullGCOptions extends Utils.NodeStoreOptions {
+
+        /**
+         * Sub-command for generating garbage.
+         * This is the default sub-command to run if none is specified.
+         */
+        static final String CMD_GENERATE = "generate";
+
+        /**
+         * Sub-command for cleaning up all generated garbage.
+         * Using this will remove the root node FULLGC_GEN_ROOT_PATH and all 
of its children (recursively).
+         */
+        static final String CMD_CLEAN = "clean";
+
+        final OptionSpec<Integer> createGarbageNodesCount;
+        final OptionSpec<Integer> garbageNodesParentCount;
+        final OptionSpec<Integer> garbageType;
+        final OptionSpec<Integer> numberOfRuns;
+        final OptionSpec<Integer> generateIntervalSeconds;
+
+        public GenerateFullGCOptions(String usage) {
+            super(usage);
+            createGarbageNodesCount = parser
+                    .accepts("garbageNodesCount", "the total number of garbage 
nodes to create").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(0);
+            garbageNodesParentCount = parser
+                    .accepts("garbageNodesParentCount", "total number of 
parent nodes under which to create garbage nodes").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(1);
+            garbageType = parser
+                    .accepts("garbageType", "garbage type to be generated - 
must be a value from VersionGarbageCollector.fullGCMode").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(1);
+            numberOfRuns = parser
+                    .accepts("numberOfRuns", "the number of garbage generation 
runs to do. Only applies if greater than 1, " +
+                            "otherwise a single run will be 
done.").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(1);
+            generateIntervalSeconds = parser
+                    .accepts("generateIntervalSeconds", "the interval at which 
to generate a complete garbage count from createGarbageNotesCount. " +
+                            "Applies only if numberOfRuns is greater than 
1.").withRequiredArg()
+                    .ofType(Integer.class).defaultsTo(60);
+        }
+
+        public GenerateFullGCOptions parse(String[] args) {
+            super.parse(args);
+            return this;
+        }
+
+        String getSubCmd() {
+            List<String> args = getOtherArgs();
+            if (args.size() > 0) {
+                return args.get(0);
+            }
+            return CMD_GENERATE;
+        }
+
+        public int getCreateGarbageNodesCount() {
+            return createGarbageNodesCount.value(options);
+        }
+
+        public int getGarbageNodesParentCount() {
+            return garbageNodesParentCount.value(options);
+        }
+
+        public int getGarbageType() {
+            return garbageType.value(options);
+        }
+
+        public int getNumberOfRuns() {
+            return numberOfRuns.value(options);
+        }
+
+        public int getGenerateIntervalSeconds() {
+            return generateIntervalSeconds.value(options);
+        }
+    }
+
+    public void execute(String... args) throws Exception {
+        Closer closer = Closer.create();
+        try {
+            execute(closer, args);
+        } catch (Throwable e) {
+            LOG.error("Command failed", e);
+            throw closer.rethrow(e);
+        } finally {
+            closer.close();
+        }
+    }
+
+    /**
+     * Method with passed closer is necessary in order to allow for unit tests 
to check the output of the command.
+     * It is the responsibility of the caller to close the closer.
+     *
+     * Returns the list of generated garbage base paths (under the garbage 
root node).
+     * @param closer
+     * @param args
+     * @throws Exception
+     */
+    public List<String> execute(Closer closer, String... args) throws 
Exception {
+        continuousRunIndex = 0;
+
+        List<String> generateBasePaths = new ArrayList<>();
+
+        GenerateFullGCOptions options = new 
GenerateFullGCOptions(USAGE).parse(args);
+        String subCmd = options.getSubCmd();
+
+        if (GenerateFullGCOptions.CMD_GENERATE.equals(subCmd)) {
+            if (options.getNumberOfRuns() > 1 && 
options.getGenerateIntervalSeconds() > 0) {
+                generateBasePaths.addAll(generateGarbageContinuously(options, 
closer));
+            } else {
+                generateBasePaths.add(generateGarbage(options, closer, 0));
+            }
+        } else if (GenerateFullGCOptions.CMD_CLEAN.equals(subCmd)) {
+            cleanGarbage(options, closer);
+        } else {
+            System.err.println("unknown revisions command: " + subCmd);
+        }
+
+        return generateBasePaths;
+    }
+
+    private List<String> generateGarbageContinuously(GenerateFullGCOptions 
options, Closer closer) throws IOException, Exception {
+        ScheduledExecutorService executor = 
Executors.newScheduledThreadPool(1);
+
+        DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, 
closer);
+        long startGenTimestamp = System.currentTimeMillis();
+
+        List<String> generatedGarbageBasePaths = new ArrayList<>();
+
+        int numberOfRuns = options.getNumberOfRuns();
+        int intervalSeconds = options.getGenerateIntervalSeconds();
+        Runnable task = () -> {
+            if (continuousRunIndex < numberOfRuns) {
+                try {
+                    String genBasePath = generateGarbage(options, closer, 
continuousRunIndex, builder, startGenTimestamp);
+                    generatedGarbageBasePaths.add(genBasePath);
+                } catch (Exception e) {
+                    LOG.error("Error generating garbage in run " + 
continuousRunIndex, e);
+                }
+                LOG.info("Task executed. Count: " + (continuousRunIndex + 1));
+                continuousRunIndex++;
+            } else {
+                // Shutdown the executor once the task has run numberOfRuns 
times
+                executor.shutdown();
+                LOG.info("Task completed " + numberOfRuns + " times. Stopping 
execution.");
+            }
+        };
+
+        // Schedule the task to run every intervalSeconds
+        executor.scheduleAtFixedRate(task, 0, intervalSeconds, 
TimeUnit.SECONDS);
+
+        return generatedGarbageBasePaths;
+    }
+
+    /**
+     * Generate garbage nodes in the repository in order to allow for testing 
fullGC functionality.
+     *
+     * Returns the path of the generated FULLGC_GEN_BASE_PATH node (under the 
root).
+     * @param options
+     * @param closer
+     * @param runIndex
+     * @return
+     * @throws IOException
+     * @throws Exception
+     */
+    private String generateGarbage(GenerateFullGCOptions options, Closer 
closer, int runIndex) throws IOException, Exception {
+
+        DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, 
closer);
+        long generationTimestamp = System.currentTimeMillis();
+
+        return generateGarbage(options, closer, runIndex, builder, 
generationTimestamp);
+    }
+
+    private String generateGarbage(GenerateFullGCOptions options, Closer 
closer, int runIndex,
+                                  DocumentNodeStoreBuilder<?> builder, long 
timestamp) throws IOException, Exception {
+
+        if (builder == null) {
+            System.err.println("generateFullGC mode only available for 
DocumentNodeStore");
+            System.exit(1);
+        }
+
+        String generationBasePath = FULLGC_GEN_BASE_PATH + timestamp + "_" + 
runIndex;
+        System.out.println("Generating fullGC on the document: " + 
generationBasePath);
+        documentNodeStore = builder.build();
+
+        VersionGarbageCollector.FullGCMode fullGCMode = getFullGCMode(options);
+        if (fullGCMode == VersionGarbageCollector.FullGCMode.NONE) {
+            LOG.error("Invalid garbageType specified. Must be one of the 
following: 1 - EMPTYPROPS, 2 - GAP_ORPHANS, 3 - GAP_ORPHANS_EMPTYPROPS");
+            System.exit(1);
+        }
+
+        //1. Create nodes with properties
+        NodeBuilder rootNode = documentNodeStore.getRoot().builder();
+        NodeBuilder garbageRootNode = rootNode.child(FULLGC_GEN_ROOT_PATH);
+        
garbageRootNode.child(generationBasePath).setProperty("jcr:primaryType", 
"nt:file", NAME);
+
+        int nodesCountUnderParent = options.getCreateGarbageNodesCount() / 
options.getGarbageNodesParentCount();
+        for(int i = 0; i < options.getGarbageNodesParentCount(); i ++) {
+            
garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + 
i).setProperty("jcr:primaryType", "nt:folder", NAME);
+
+            for(int j = 0; j < nodesCountUnderParent; j ++) {
+                
garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + 
i).child(FULLGC_GEN_NODE_PREFIX + j).
+                        setProperty("jcr:primaryType", "nt:file", NAME);
+
+                if (fullGCMode == 
VersionGarbageCollector.FullGCMode.EMPTYPROPS || fullGCMode == 
VersionGarbageCollector.FullGCMode.GAP_ORPHANS_EMPTYPROPS) {
+                    
garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + 
i).child(FULLGC_GEN_NODE_PREFIX + j).
+                            setProperty(EMPTY_PROPERTY_NAME, "bar", STRING);
+                }
+            }
+        }
+        documentNodeStore.merge(rootNode, EmptyHook.INSTANCE, 
CommitInfo.EMPTY);
+        documentNodeStore.runBackgroundOperations();
+
+
+        //2. Generate garbage nodes - EMPTY_PROPERTIES
+        if (fullGCMode == VersionGarbageCollector.FullGCMode.EMPTYPROPS) {
+            for (int i = 0; i < options.getGarbageNodesParentCount(); i++) {
+                for (int j = 0; j < nodesCountUnderParent; j++) {
+                    
garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + 
i).child(FULLGC_GEN_NODE_PREFIX + j).
+                            removeProperty(EMPTY_PROPERTY_NAME);
+                }
+            }
+        }
+        documentNodeStore.merge(rootNode, EmptyHook.INSTANCE, 
CommitInfo.EMPTY);
+        documentNodeStore.runBackgroundOperations();
+
+        //3.1. Generate garbage nodes - GAP_ORPHANS - remove parent nodes
+        if (fullGCMode == VersionGarbageCollector.FullGCMode.GAP_ORPHANS) {
+            StringBuilder sbNodePath = new StringBuilder();
+            List<String> deleteNodePaths = new ArrayList<>();
+            for (int i = 0; i < options.getGarbageNodesParentCount(); i++) {
+
+                sbNodePath.setLength(0);
+                
sbNodePath.append("3:/").append(FULLGC_GEN_ROOT_PATH).append("/").append(generationBasePath).append("/").

Review Comment:
   (same suggestion here to use Utils for path composition and id conversion)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to