stefan-egli commented on code in PR #1805: URL: https://github.com/apache/jackrabbit-oak/pull/1805#discussion_r1806355826
########## oak-run/src/main/java/org/apache/jackrabbit/oak/run/GenerateFullGCCommand.java: ########## @@ -0,0 +1,416 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.run; + +import joptsimple.OptionSpec; +import org.apache.jackrabbit.guava.common.io.Closer; +import org.apache.jackrabbit.oak.plugins.document.Collection; +import org.apache.jackrabbit.oak.plugins.document.Document; +import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore; +import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder; +import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException; +import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector; +import org.apache.jackrabbit.oak.run.commons.Command; +import org.apache.jackrabbit.oak.spi.commit.CommitInfo; +import org.apache.jackrabbit.oak.spi.commit.EmptyHook; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import static org.apache.jackrabbit.oak.api.Type.NAME; +import static org.apache.jackrabbit.oak.api.Type.STRING; +import static org.apache.jackrabbit.oak.run.Utils.createDocumentMKBuilder; + +/** + * GenerateFullGCCommand generates garbage nodes in the repository in order to allow for testing fullGC functionality. + */ +public class GenerateFullGCCommand implements Command { + private static final Logger LOG = LoggerFactory.getLogger(GenerateFullGCCommand.class); + + private static final String USAGE = "generateFullGC {<jdbc-uri> | <mongodb-uri>} [options]"; + + /** + * Root node for fullGC garbage generation. + * Necessary in order to allow cleanup of all generated garbage nodes by simply removing the root node. + */ + public static String FULLGC_GEN_ROOT_PATH = "fullGCGenRoot"; + + /** + * Base path for fullGC garbage generation. The timestamp of the run will be appended to this path, + * which is necessary in order for each garbage generation run to be unique and not overwrite previous ones. + * If continuous generation is enabled, the index of the run will also be appended to this path. + */ + public static String FULLGC_GEN_BASE_PATH = "fullGCGenTest_"; Review Comment: once you are under `FULLGC_GEN_ROOT_PATH` and its name indicates that it is for testing and is garbage, do we still need all path elements to contain `fullGc`? ########## oak-run/src/main/java/org/apache/jackrabbit/oak/run/GenerateFullGCCommand.java: ########## @@ -0,0 +1,416 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.run; + +import joptsimple.OptionSpec; +import org.apache.jackrabbit.guava.common.io.Closer; +import org.apache.jackrabbit.oak.plugins.document.Collection; +import org.apache.jackrabbit.oak.plugins.document.Document; +import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore; +import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder; +import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException; +import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector; +import org.apache.jackrabbit.oak.run.commons.Command; +import org.apache.jackrabbit.oak.spi.commit.CommitInfo; +import org.apache.jackrabbit.oak.spi.commit.EmptyHook; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import static org.apache.jackrabbit.oak.api.Type.NAME; +import static org.apache.jackrabbit.oak.api.Type.STRING; +import static org.apache.jackrabbit.oak.run.Utils.createDocumentMKBuilder; + +/** + * GenerateFullGCCommand generates garbage nodes in the repository in order to allow for testing fullGC functionality. + */ +public class GenerateFullGCCommand implements Command { + private static final Logger LOG = LoggerFactory.getLogger(GenerateFullGCCommand.class); + + private static final String USAGE = "generateFullGC {<jdbc-uri> | <mongodb-uri>} [options]"; + + /** + * Root node for fullGC garbage generation. + * Necessary in order to allow cleanup of all generated garbage nodes by simply removing the root node. + */ + public static String FULLGC_GEN_ROOT_PATH = "fullGCGenRoot"; Review Comment: Would recommend not to add content to root and use something like "/tmp" instead, so eg "/tmp/fullGcTestGarbage" or something like that. ########## oak-run/src/main/java/org/apache/jackrabbit/oak/run/GenerateFullGCCommand.java: ########## @@ -0,0 +1,416 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.run; + +import joptsimple.OptionSpec; +import org.apache.jackrabbit.guava.common.io.Closer; +import org.apache.jackrabbit.oak.plugins.document.Collection; +import org.apache.jackrabbit.oak.plugins.document.Document; +import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore; +import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder; +import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException; +import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector; +import org.apache.jackrabbit.oak.run.commons.Command; +import org.apache.jackrabbit.oak.spi.commit.CommitInfo; +import org.apache.jackrabbit.oak.spi.commit.EmptyHook; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import static org.apache.jackrabbit.oak.api.Type.NAME; +import static org.apache.jackrabbit.oak.api.Type.STRING; +import static org.apache.jackrabbit.oak.run.Utils.createDocumentMKBuilder; + +/** + * GenerateFullGCCommand generates garbage nodes in the repository in order to allow for testing fullGC functionality. + */ +public class GenerateFullGCCommand implements Command { + private static final Logger LOG = LoggerFactory.getLogger(GenerateFullGCCommand.class); + + private static final String USAGE = "generateFullGC {<jdbc-uri> | <mongodb-uri>} [options]"; + + /** + * Root node for fullGC garbage generation. + * Necessary in order to allow cleanup of all generated garbage nodes by simply removing the root node. + */ + public static String FULLGC_GEN_ROOT_PATH = "fullGCGenRoot"; + + /** + * Base path for fullGC garbage generation. The timestamp of the run will be appended to this path, + * which is necessary in order for each garbage generation run to be unique and not overwrite previous ones. + * If continuous generation is enabled, the index of the run will also be appended to this path. + */ + public static String FULLGC_GEN_BASE_PATH = "fullGCGenTest_"; + + /** + * Prefix for parent nodes under which garbage nodes will be created. + * The index of the parent node will be appended to this prefix. + */ + public static String FULLGC_GEN_PARENT_NODE_PREFIX = "fullGCParent_"; + public static String FULLGC_GEN_NODE_PREFIX = "fullGCNode_"; + + public static String EMPTY_PROPERTY_NAME = "prop"; + + private int continuousRunIndex = 0; + + private DocumentNodeStore documentNodeStore; + + public DocumentNodeStore getDocumentNodeStore() { + return documentNodeStore; + } + + private static class GenerateFullGCOptions extends Utils.NodeStoreOptions { + + /** + * Sub-command for generating garbage. + * This is the default sub-command to run if none is specified. + */ + static final String CMD_GENERATE = "generate"; + + /** + * Sub-command for cleaning up all generated garbage. + * Using this will remove the root node FULLGC_GEN_ROOT_PATH and all of its children (recursively). + */ + static final String CMD_CLEAN = "clean"; + + final OptionSpec<Integer> createGarbageNodesCount; + final OptionSpec<Integer> garbageNodesParentCount; + final OptionSpec<Integer> garbageType; + final OptionSpec<Integer> numberOfRuns; + final OptionSpec<Integer> generateIntervalSeconds; + + public GenerateFullGCOptions(String usage) { + super(usage); + createGarbageNodesCount = parser + .accepts("garbageNodesCount", "the total number of garbage nodes to create").withRequiredArg() + .ofType(Integer.class).defaultsTo(0); + garbageNodesParentCount = parser + .accepts("garbageNodesParentCount", "total number of parent nodes under which to create garbage nodes").withRequiredArg() + .ofType(Integer.class).defaultsTo(1); + garbageType = parser + .accepts("garbageType", "garbage type to be generated - must be a value from VersionGarbageCollector.fullGCMode").withRequiredArg() + .ofType(Integer.class).defaultsTo(1); + numberOfRuns = parser + .accepts("numberOfRuns", "the number of garbage generation runs to do. Only applies if greater than 1, " + + "otherwise a single run will be done.").withRequiredArg() + .ofType(Integer.class).defaultsTo(1); + generateIntervalSeconds = parser + .accepts("generateIntervalSeconds", "the interval at which to generate a complete garbage count from createGarbageNotesCount. " + + "Applies only if numberOfRuns is greater than 1.").withRequiredArg() + .ofType(Integer.class).defaultsTo(60); + } + + public GenerateFullGCOptions parse(String[] args) { + super.parse(args); + return this; + } + + String getSubCmd() { + List<String> args = getOtherArgs(); + if (args.size() > 0) { + return args.get(0); + } + return CMD_GENERATE; + } + + public int getCreateGarbageNodesCount() { + return createGarbageNodesCount.value(options); + } + + public int getGarbageNodesParentCount() { + return garbageNodesParentCount.value(options); + } + + public int getGarbageType() { + return garbageType.value(options); + } + + public int getNumberOfRuns() { + return numberOfRuns.value(options); + } + + public int getGenerateIntervalSeconds() { + return generateIntervalSeconds.value(options); + } + } + + public void execute(String... args) throws Exception { + Closer closer = Closer.create(); + try { + execute(closer, args); + } catch (Throwable e) { + LOG.error("Command failed", e); + throw closer.rethrow(e); + } finally { + closer.close(); + } + } + + /** + * Method with passed closer is necessary in order to allow for unit tests to check the output of the command. + * It is the responsibility of the caller to close the closer. + * + * Returns the list of generated garbage base paths (under the garbage root node). + * @param closer + * @param args + * @throws Exception + */ + public List<String> execute(Closer closer, String... args) throws Exception { + continuousRunIndex = 0; + + List<String> generateBasePaths = new ArrayList<>(); + + GenerateFullGCOptions options = new GenerateFullGCOptions(USAGE).parse(args); + String subCmd = options.getSubCmd(); + + if (GenerateFullGCOptions.CMD_GENERATE.equals(subCmd)) { + if (options.getNumberOfRuns() > 1 && options.getGenerateIntervalSeconds() > 0) { + generateBasePaths.addAll(generateGarbageContinuously(options, closer)); + } else { + generateBasePaths.add(generateGarbage(options, closer, 0)); + } + } else if (GenerateFullGCOptions.CMD_CLEAN.equals(subCmd)) { + cleanGarbage(options, closer); + } else { + System.err.println("unknown revisions command: " + subCmd); + } + + return generateBasePaths; + } + + private List<String> generateGarbageContinuously(GenerateFullGCOptions options, Closer closer) throws IOException, Exception { + ScheduledExecutorService executor = Executors.newScheduledThreadPool(1); + + DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, closer); + long startGenTimestamp = System.currentTimeMillis(); + + List<String> generatedGarbageBasePaths = new ArrayList<>(); + + int numberOfRuns = options.getNumberOfRuns(); + int intervalSeconds = options.getGenerateIntervalSeconds(); + Runnable task = () -> { + if (continuousRunIndex < numberOfRuns) { + try { + String genBasePath = generateGarbage(options, closer, continuousRunIndex, builder, startGenTimestamp); + generatedGarbageBasePaths.add(genBasePath); + } catch (Exception e) { + LOG.error("Error generating garbage in run " + continuousRunIndex, e); + } + LOG.info("Task executed. Count: " + (continuousRunIndex + 1)); + continuousRunIndex++; + } else { + // Shutdown the executor once the task has run numberOfRuns times + executor.shutdown(); + LOG.info("Task completed " + numberOfRuns + " times. Stopping execution."); + } + }; + + // Schedule the task to run every intervalSeconds + executor.scheduleAtFixedRate(task, 0, intervalSeconds, TimeUnit.SECONDS); + + return generatedGarbageBasePaths; + } + + /** + * Generate garbage nodes in the repository in order to allow for testing fullGC functionality. + * + * Returns the path of the generated FULLGC_GEN_BASE_PATH node (under the root). + * @param options + * @param closer + * @param runIndex + * @return + * @throws IOException + * @throws Exception + */ + private String generateGarbage(GenerateFullGCOptions options, Closer closer, int runIndex) throws IOException, Exception { + + DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, closer); + long generationTimestamp = System.currentTimeMillis(); + + return generateGarbage(options, closer, runIndex, builder, generationTimestamp); + } + + private String generateGarbage(GenerateFullGCOptions options, Closer closer, int runIndex, + DocumentNodeStoreBuilder<?> builder, long timestamp) throws IOException, Exception { + + if (builder == null) { + System.err.println("generateFullGC mode only available for DocumentNodeStore"); + System.exit(1); + } + + String generationBasePath = FULLGC_GEN_BASE_PATH + timestamp + "_" + runIndex; + System.out.println("Generating fullGC on the document: " + generationBasePath); + documentNodeStore = builder.build(); + + VersionGarbageCollector.FullGCMode fullGCMode = getFullGCMode(options); + if (fullGCMode == VersionGarbageCollector.FullGCMode.NONE) { + LOG.error("Invalid garbageType specified. Must be one of the following: 1 - EMPTYPROPS, 2 - GAP_ORPHANS, 3 - GAP_ORPHANS_EMPTYPROPS"); + System.exit(1); + } + + //1. Create nodes with properties + NodeBuilder rootNode = documentNodeStore.getRoot().builder(); + NodeBuilder garbageRootNode = rootNode.child(FULLGC_GEN_ROOT_PATH); + garbageRootNode.child(generationBasePath).setProperty("jcr:primaryType", "nt:file", NAME); + + int nodesCountUnderParent = options.getCreateGarbageNodesCount() / options.getGarbageNodesParentCount(); + for(int i = 0; i < options.getGarbageNodesParentCount(); i ++) { + garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + i).setProperty("jcr:primaryType", "nt:folder", NAME); + + for(int j = 0; j < nodesCountUnderParent; j ++) { + garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + i).child(FULLGC_GEN_NODE_PREFIX + j). + setProperty("jcr:primaryType", "nt:file", NAME); + + if (fullGCMode == VersionGarbageCollector.FullGCMode.EMPTYPROPS || fullGCMode == VersionGarbageCollector.FullGCMode.GAP_ORPHANS_EMPTYPROPS) { + garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + i).child(FULLGC_GEN_NODE_PREFIX + j). + setProperty(EMPTY_PROPERTY_NAME, "bar", STRING); + } + } + } + documentNodeStore.merge(rootNode, EmptyHook.INSTANCE, CommitInfo.EMPTY); + documentNodeStore.runBackgroundOperations(); + + + //2. Generate garbage nodes - EMPTY_PROPERTIES + if (fullGCMode == VersionGarbageCollector.FullGCMode.EMPTYPROPS) { + for (int i = 0; i < options.getGarbageNodesParentCount(); i++) { + for (int j = 0; j < nodesCountUnderParent; j++) { + garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + i).child(FULLGC_GEN_NODE_PREFIX + j). + removeProperty(EMPTY_PROPERTY_NAME); + } + } + } + documentNodeStore.merge(rootNode, EmptyHook.INSTANCE, CommitInfo.EMPTY); + documentNodeStore.runBackgroundOperations(); + + //3.1. Generate garbage nodes - GAP_ORPHANS - remove parent nodes + if (fullGCMode == VersionGarbageCollector.FullGCMode.GAP_ORPHANS) { + StringBuilder sbNodePath = new StringBuilder(); + List<String> deleteNodePaths = new ArrayList<>(); + for (int i = 0; i < options.getGarbageNodesParentCount(); i++) { + + sbNodePath.setLength(0); + sbNodePath.append("3:/").append(FULLGC_GEN_ROOT_PATH).append("/").append(generationBasePath).append("/"). + append(FULLGC_GEN_PARENT_NODE_PREFIX).append(i); + deleteNodePaths.add(sbNodePath.toString()); + } + // Remove all parent nodes + documentNodeStore.getDocumentStore().remove(org.apache.jackrabbit.oak.plugins.document.Collection.NODES, deleteNodePaths); + documentNodeStore.merge(rootNode, EmptyHook.INSTANCE, CommitInfo.EMPTY); + documentNodeStore.runBackgroundOperations(); + } + + return generationBasePath; + } + + /** + * Cleans up all generated garbage by removing the node FULLGC_GEN_ROOT_PATH and all of + * its children (recursively) + * @param options + * @param closer + * @throws IOException + * @throws Exception + */ + private void cleanGarbage(GenerateFullGCOptions options, Closer closer) throws IOException, Exception { + + DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, closer); + + if (builder == null) { + System.err.println("generateFullGC mode only available for DocumentNodeStore"); + System.exit(1); + } + + System.out.println("Cleaning up all generated garbage:"); + documentNodeStore = builder.build(); + + NodeBuilder rootBuilder = documentNodeStore.getRoot().builder(); + + NodeBuilder generatedGarbageRootBuilder = rootBuilder.child(FULLGC_GEN_ROOT_PATH); + + String garbageRootNodePath = "1:/"+FULLGC_GEN_ROOT_PATH; + List<String> childNodePaths = new ArrayList<>(); + childNodePaths.add(garbageRootNodePath); + + // get all paths of the tree nodes under the garbage root node + getTreeNodePaths(generatedGarbageRootBuilder, garbageRootNodePath, childNodePaths, 1); + + documentNodeStore.getDocumentStore().remove(org.apache.jackrabbit.oak.plugins.document.Collection.NODES, childNodePaths); + documentNodeStore.merge(rootBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + documentNodeStore.runBackgroundOperations(); + } + + /** + * Recursively get all paths of the tree nodes under the given root node. + * @param rootNode + * @param basePath + * @param treeNodePaths + * @param nodeLevel + */ + private void getTreeNodePaths(NodeBuilder rootNode, String basePath, + List<String> treeNodePaths, int nodeLevel) { + String childBasePath = basePath.replaceFirst(nodeLevel + ":/", (nodeLevel + 1) + ":/"); + for (String childNodeName : rootNode.getChildNodeNames()) { + String childPath = childBasePath + "/" + childNodeName; + treeNodePaths.add(childPath); + getTreeNodePaths(rootNode.child(childNodeName), childPath, treeNodePaths, nodeLevel + 1); + } + } + + /** + * Used in unit tests for retrieving a document by path from the document store using the documentNodeStore + * of this class. + * @param collection + * @param key + * @param maxCacheAge + * @return + * @param <T> + * @throws DocumentStoreException + */ + @Nullable + public <T extends Document> T getDocument(Collection<T> collection, String key, int maxCacheAge) Review Comment: as this is only used by test code I'd move it over there (and if needed create an accessor to `documentNodeStore` for example) ########## oak-run/src/main/java/org/apache/jackrabbit/oak/run/AvailableModes.java: ########## @@ -75,5 +75,6 @@ public final class AvailableModes { .put("segment-copy", new SegmentCopyCommand()) .put("server", new ServerCommand()) .put("purge-index-versions", new LucenePurgeOldIndexVersionCommand()) + .put("generateFullGC", new GenerateFullGCCommand()) Review Comment: +1 for renaming the command ########## oak-run/src/test/java/org/apache/jackrabbit/oak/plugins/document/GenerateFullGCCommandTest.java: ########## @@ -0,0 +1,293 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.plugins.document; + +import org.apache.jackrabbit.guava.common.collect.ImmutableList; +import org.apache.jackrabbit.guava.common.io.Closer; +import org.apache.jackrabbit.oak.plugins.document.util.MongoConnection; +import org.apache.jackrabbit.oak.run.GenerateFullGCCommand; +import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Rule; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.apache.jackrabbit.oak.plugins.document.CommandTestUtils.captureSystemOut; +import static org.apache.jackrabbit.oak.run.GenerateFullGCCommand.EMPTY_PROPERTY_NAME; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assume.assumeTrue; + +public class GenerateFullGCCommandTest { + + final String OPTION_GARBAGE_NODES_COUNT = "--garbageNodesCount"; + final String OPTION_GARBAGE_NODES_PARENT_COUNT = "--garbageNodesParentCount"; + final String OPTION_GARBAGE_TYPE = "--garbageType"; + final String OPTION_NUMBER_OF_RUNS = "--numberOfRuns"; + final String OPTION_GENERATE_INTERVAL_SECONDS = "--generateIntervalSeconds"; + + @Rule + public MongoConnectionFactory connectionFactory = new MongoConnectionFactory(); + + @Rule + public DocumentMKBuilderProvider builderProvider = new DocumentMKBuilderProvider(); + + private DocumentNodeStore ns; + + private Closer closer; + + @BeforeClass + public static void assumeMongoDB() { + assumeTrue(MongoUtils.isAvailable()); + } + + @Before + public void before() { + ns = createDocumentNodeStore(); + } + + @After + public void after() { + try { + closer.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } + ns.dispose(); + } + + private DocumentNodeStore createDocumentNodeStore() { + MongoConnection c = connectionFactory.getConnection(); + assertNotNull(c); + MongoUtils.dropCollections(c.getDatabase()); + return builderProvider.newBuilder().setFullGCEnabled(false).setLeaseCheckMode(LeaseCheckMode.DISABLED).setAsyncDelay(0) + .setMongoDB(c.getMongoClient(), c.getDBName()).getNodeStore(); + } + + private List<String> getChildNodeNames(NodeState node) { + List<String> childNodeNames = new ArrayList<>(); + for (String childNodeName : node.getChildNodeNames()) { + childNodeNames.add(childNodeName); + } + return childNodeNames; + } + + @Ignore + @Test + public void generateGarbageEmptyPropsUnderOneParentOnCustomMongoDB() { + ns.dispose(); + + // this should be a valid mongoDB connection string for the MongoDB on which to generate the garbage + String mongoDBConnString = ""; + GenerateFullGCCmd cmd = new GenerateFullGCCmd( mongoDBConnString, OPTION_GARBAGE_NODES_COUNT, "10", OPTION_GARBAGE_NODES_PARENT_COUNT, "1", + OPTION_GARBAGE_TYPE, "1"); + String output = captureSystemOut(cmd); + + //sleep thread for 10 seconds to allow the garbage generation to complete + try { + Thread.sleep(10000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + + DocumentNodeStore nodeStore = cmd.getCommand().getDocumentNodeStore(); + NodeState garbageRootNodeState = nodeStore.getRoot().getChildNode(GenerateFullGCCommand.FULLGC_GEN_ROOT_PATH); + List<String> garbageRootNodeChildNames = getChildNodeNames(garbageRootNodeState); + + assertEquals(garbageRootNodeChildNames.size(), 1); + List<String> propParentsGarbageNodeNames = getChildNodeNames(garbageRootNodeState.getChildNode(garbageRootNodeChildNames.get(0))); + assertEquals(propParentsGarbageNodeNames.size(), 1); + + List<String> propGarbageNodeNames = getChildNodeNames(garbageRootNodeState.getChildNode(garbageRootNodeChildNames.get(0)).getChildNode(propParentsGarbageNodeNames.get(0))); + assertEquals(propGarbageNodeNames.size(), 10); + } + + @Test + public void cleanupAllGarbage() { + + // generate garbage first + GenerateFullGCCmd cmdGenerateGarbage = new GenerateFullGCCmd(OPTION_GARBAGE_NODES_COUNT, "10", OPTION_GARBAGE_NODES_PARENT_COUNT, "1", + OPTION_GARBAGE_TYPE, "2"); + cmdGenerateGarbage.run(); + Closer cmd1Closer = cmdGenerateGarbage.getCloser(); + + // cleanup garbage + String mongoDBConnString = "mongodb://cm-p107053-e251100-username:3fuep9uhpd9y...@cmp107053e251100r355e01-shard-00-00.wxmqq.mongodb.net:27017,cmp107053e251100r355e01-shard-00-01.wxmqq.mongodb.net:27017,cmp107053e251100r355e01-shard-00-02.wxmqq.mongodb.net:27017/cm-p107053-e251100-database?ssl=true&authSource=admin&replicaSet=atlas-4x602o-shard-0&retryWrites=true&readPreference=primaryPreferred&readConcernLevel=majority&w=majority"; + GenerateFullGCCmd cmdClean = new GenerateFullGCCmd(mongoDBConnString, "clean"); + cmdClean.run(); + closer = cmdClean.getCloser(); + + NodeDocument garbageRoot = cmdClean.getCommand().getDocument(Collection.NODES, "1:/" + GenerateFullGCCommand.FULLGC_GEN_ROOT_PATH, 0); + // garbage root node should be either deleted or empty (no children) + assertTrue(garbageRoot == null || !garbageRoot.hasChildren()); + + try { + cmd1Closer.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + @Test + public void generateGarbageEmptyPropsUnderOneParent() { + ns.dispose(); + + GenerateFullGCCmd cmd = new GenerateFullGCCmd(OPTION_GARBAGE_NODES_COUNT, "10", OPTION_GARBAGE_NODES_PARENT_COUNT, "1", + OPTION_GARBAGE_TYPE, "1"); + String output = captureSystemOut(cmd); + closer = cmd.getCloser(); + + List<String> generatedBasePaths = cmd.getGeneratedBasePaths(); + + DocumentNodeStore nodeStore = cmd.getCommand().getDocumentNodeStore(); + NodeState garbageRootNodeState = nodeStore.getRoot().getChildNode(GenerateFullGCCommand.FULLGC_GEN_ROOT_PATH); + List<String> garbageRootNodeChildNames = getChildNodeNames(garbageRootNodeState); + + assertEquals(garbageRootNodeChildNames.size(), 1); + for (String generateBasePath : generatedBasePaths) { + assertTrue(garbageRootNodeChildNames.contains(generateBasePath)); + } + + List<String> propParentsGarbageNodeNames = getChildNodeNames(garbageRootNodeState.getChildNode(garbageRootNodeChildNames.get(0))); + assertEquals(propParentsGarbageNodeNames.size(), 1); + + NodeState emptyPropsParentNode = garbageRootNodeState.getChildNode(garbageRootNodeChildNames.get(0)).getChildNode(propParentsGarbageNodeNames.get(0)); + List<String> propGarbageNodeNames = getChildNodeNames(emptyPropsParentNode); + assertEquals(propGarbageNodeNames.size(), 10); + + for (String propGarbageNodeName : propGarbageNodeNames) { + NodeState propGarbageNode = emptyPropsParentNode.getChildNode(propGarbageNodeName); + assertNull(propGarbageNode.getProperty(EMPTY_PROPERTY_NAME)); + } + } + + @Test + public void generateGarbageGapOrphansUnderOneParent() { + ns.dispose(); + + int garbageNodesCount = 10; + int garbageNodesParentCount = 1; + + testGenerateGapOrphans(garbageNodesCount, garbageNodesParentCount); + } + + @Test + public void generateGarbageGapOrphansUnderMultipleParents() { + ns.dispose(); + + int garbageNodesCount = 20; + int garbageNodesParentCount = 4; + + testGenerateGapOrphans(garbageNodesCount, garbageNodesParentCount); + } + + /** + * Calls the GenerateFullGCCommand to generate gap orphans and verifies the generated gap orphans: + * - checks that parents of gap orphans are missing by retrieving the nodes from the document store + * - checks that the expected number gap orphans are generated by retrieving the nodes from the document store + * @param garbageNodesCount + * @param garbageNodesParentCount + */ + private void testGenerateGapOrphans(int garbageNodesCount, int garbageNodesParentCount) { + GenerateFullGCCmd cmd = new GenerateFullGCCmd(OPTION_GARBAGE_NODES_COUNT, String.valueOf(garbageNodesCount), + OPTION_GARBAGE_NODES_PARENT_COUNT, String.valueOf(garbageNodesParentCount), + OPTION_GARBAGE_TYPE, "2"); + String output = captureSystemOut(cmd); + closer = cmd.getCloser(); + + int nodesPerParent = garbageNodesCount / garbageNodesParentCount; + + List<String> generatedBasePaths = cmd.getGeneratedBasePaths(); + + DocumentNodeStore nodeStore = cmd.getCommand().getDocumentNodeStore(); + NodeState garbageRootNodeState = nodeStore.getRoot().getChildNode(GenerateFullGCCommand.FULLGC_GEN_ROOT_PATH); + List<String> garbageRootNodeChildNames = getChildNodeNames(garbageRootNodeState); + + assertEquals(garbageRootNodeChildNames.size(), 1); + for (String generateBasePath : generatedBasePaths) { + assertTrue(garbageRootNodeChildNames.contains(generateBasePath)); + } + + String generateBasePath = generatedBasePaths.get(0); + for (int i = 0; i < garbageNodesParentCount; i ++) { + NodeDocument missingDocument = cmd.getCommand().getDocument(Collection.NODES, "3:/" Review Comment: would suggest to use `Utils.getIdFromPath` etc rather than hard coding the depth - eg changing the base path to be `/tmp/xy` would now require this depth to be fixed (applies to a few different occasions) ########## oak-run/src/main/java/org/apache/jackrabbit/oak/run/GenerateFullGCCommand.java: ########## @@ -0,0 +1,416 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.run; + +import joptsimple.OptionSpec; +import org.apache.jackrabbit.guava.common.io.Closer; +import org.apache.jackrabbit.oak.plugins.document.Collection; +import org.apache.jackrabbit.oak.plugins.document.Document; +import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore; +import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder; +import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException; +import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector; +import org.apache.jackrabbit.oak.run.commons.Command; +import org.apache.jackrabbit.oak.spi.commit.CommitInfo; +import org.apache.jackrabbit.oak.spi.commit.EmptyHook; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import static org.apache.jackrabbit.oak.api.Type.NAME; +import static org.apache.jackrabbit.oak.api.Type.STRING; +import static org.apache.jackrabbit.oak.run.Utils.createDocumentMKBuilder; + +/** + * GenerateFullGCCommand generates garbage nodes in the repository in order to allow for testing fullGC functionality. + */ +public class GenerateFullGCCommand implements Command { + private static final Logger LOG = LoggerFactory.getLogger(GenerateFullGCCommand.class); + + private static final String USAGE = "generateFullGC {<jdbc-uri> | <mongodb-uri>} [options]"; + + /** + * Root node for fullGC garbage generation. + * Necessary in order to allow cleanup of all generated garbage nodes by simply removing the root node. + */ + public static String FULLGC_GEN_ROOT_PATH = "fullGCGenRoot"; + + /** + * Base path for fullGC garbage generation. The timestamp of the run will be appended to this path, + * which is necessary in order for each garbage generation run to be unique and not overwrite previous ones. + * If continuous generation is enabled, the index of the run will also be appended to this path. + */ + public static String FULLGC_GEN_BASE_PATH = "fullGCGenTest_"; + + /** + * Prefix for parent nodes under which garbage nodes will be created. + * The index of the parent node will be appended to this prefix. + */ + public static String FULLGC_GEN_PARENT_NODE_PREFIX = "fullGCParent_"; + public static String FULLGC_GEN_NODE_PREFIX = "fullGCNode_"; + + public static String EMPTY_PROPERTY_NAME = "prop"; + + private int continuousRunIndex = 0; + + private DocumentNodeStore documentNodeStore; + + public DocumentNodeStore getDocumentNodeStore() { + return documentNodeStore; + } + + private static class GenerateFullGCOptions extends Utils.NodeStoreOptions { + + /** + * Sub-command for generating garbage. + * This is the default sub-command to run if none is specified. + */ + static final String CMD_GENERATE = "generate"; + + /** + * Sub-command for cleaning up all generated garbage. + * Using this will remove the root node FULLGC_GEN_ROOT_PATH and all of its children (recursively). + */ + static final String CMD_CLEAN = "clean"; + + final OptionSpec<Integer> createGarbageNodesCount; + final OptionSpec<Integer> garbageNodesParentCount; + final OptionSpec<Integer> garbageType; + final OptionSpec<Integer> numberOfRuns; + final OptionSpec<Integer> generateIntervalSeconds; + + public GenerateFullGCOptions(String usage) { + super(usage); + createGarbageNodesCount = parser + .accepts("garbageNodesCount", "the total number of garbage nodes to create").withRequiredArg() + .ofType(Integer.class).defaultsTo(0); + garbageNodesParentCount = parser + .accepts("garbageNodesParentCount", "total number of parent nodes under which to create garbage nodes").withRequiredArg() + .ofType(Integer.class).defaultsTo(1); + garbageType = parser + .accepts("garbageType", "garbage type to be generated - must be a value from VersionGarbageCollector.fullGCMode").withRequiredArg() + .ofType(Integer.class).defaultsTo(1); + numberOfRuns = parser + .accepts("numberOfRuns", "the number of garbage generation runs to do. Only applies if greater than 1, " + + "otherwise a single run will be done.").withRequiredArg() + .ofType(Integer.class).defaultsTo(1); + generateIntervalSeconds = parser + .accepts("generateIntervalSeconds", "the interval at which to generate a complete garbage count from createGarbageNotesCount. " + + "Applies only if numberOfRuns is greater than 1.").withRequiredArg() + .ofType(Integer.class).defaultsTo(60); + } + + public GenerateFullGCOptions parse(String[] args) { + super.parse(args); + return this; + } + + String getSubCmd() { + List<String> args = getOtherArgs(); + if (args.size() > 0) { + return args.get(0); + } + return CMD_GENERATE; + } + + public int getCreateGarbageNodesCount() { + return createGarbageNodesCount.value(options); + } + + public int getGarbageNodesParentCount() { + return garbageNodesParentCount.value(options); + } + + public int getGarbageType() { + return garbageType.value(options); + } + + public int getNumberOfRuns() { + return numberOfRuns.value(options); + } + + public int getGenerateIntervalSeconds() { + return generateIntervalSeconds.value(options); + } + } + + public void execute(String... args) throws Exception { + Closer closer = Closer.create(); + try { + execute(closer, args); + } catch (Throwable e) { + LOG.error("Command failed", e); + throw closer.rethrow(e); + } finally { + closer.close(); + } + } + + /** + * Method with passed closer is necessary in order to allow for unit tests to check the output of the command. + * It is the responsibility of the caller to close the closer. + * + * Returns the list of generated garbage base paths (under the garbage root node). + * @param closer + * @param args + * @throws Exception + */ + public List<String> execute(Closer closer, String... args) throws Exception { + continuousRunIndex = 0; + + List<String> generateBasePaths = new ArrayList<>(); + + GenerateFullGCOptions options = new GenerateFullGCOptions(USAGE).parse(args); + String subCmd = options.getSubCmd(); + + if (GenerateFullGCOptions.CMD_GENERATE.equals(subCmd)) { + if (options.getNumberOfRuns() > 1 && options.getGenerateIntervalSeconds() > 0) { + generateBasePaths.addAll(generateGarbageContinuously(options, closer)); + } else { + generateBasePaths.add(generateGarbage(options, closer, 0)); + } + } else if (GenerateFullGCOptions.CMD_CLEAN.equals(subCmd)) { + cleanGarbage(options, closer); + } else { + System.err.println("unknown revisions command: " + subCmd); + } + + return generateBasePaths; + } + + private List<String> generateGarbageContinuously(GenerateFullGCOptions options, Closer closer) throws IOException, Exception { + ScheduledExecutorService executor = Executors.newScheduledThreadPool(1); + + DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, closer); + long startGenTimestamp = System.currentTimeMillis(); + + List<String> generatedGarbageBasePaths = new ArrayList<>(); + + int numberOfRuns = options.getNumberOfRuns(); + int intervalSeconds = options.getGenerateIntervalSeconds(); + Runnable task = () -> { + if (continuousRunIndex < numberOfRuns) { + try { + String genBasePath = generateGarbage(options, closer, continuousRunIndex, builder, startGenTimestamp); + generatedGarbageBasePaths.add(genBasePath); + } catch (Exception e) { + LOG.error("Error generating garbage in run " + continuousRunIndex, e); + } + LOG.info("Task executed. Count: " + (continuousRunIndex + 1)); + continuousRunIndex++; + } else { + // Shutdown the executor once the task has run numberOfRuns times + executor.shutdown(); + LOG.info("Task completed " + numberOfRuns + " times. Stopping execution."); + } + }; + + // Schedule the task to run every intervalSeconds + executor.scheduleAtFixedRate(task, 0, intervalSeconds, TimeUnit.SECONDS); + + return generatedGarbageBasePaths; + } + + /** + * Generate garbage nodes in the repository in order to allow for testing fullGC functionality. + * + * Returns the path of the generated FULLGC_GEN_BASE_PATH node (under the root). + * @param options + * @param closer + * @param runIndex + * @return + * @throws IOException + * @throws Exception + */ + private String generateGarbage(GenerateFullGCOptions options, Closer closer, int runIndex) throws IOException, Exception { + + DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, closer); + long generationTimestamp = System.currentTimeMillis(); + + return generateGarbage(options, closer, runIndex, builder, generationTimestamp); + } + + private String generateGarbage(GenerateFullGCOptions options, Closer closer, int runIndex, + DocumentNodeStoreBuilder<?> builder, long timestamp) throws IOException, Exception { + + if (builder == null) { + System.err.println("generateFullGC mode only available for DocumentNodeStore"); + System.exit(1); + } + + String generationBasePath = FULLGC_GEN_BASE_PATH + timestamp + "_" + runIndex; + System.out.println("Generating fullGC on the document: " + generationBasePath); + documentNodeStore = builder.build(); + + VersionGarbageCollector.FullGCMode fullGCMode = getFullGCMode(options); + if (fullGCMode == VersionGarbageCollector.FullGCMode.NONE) { + LOG.error("Invalid garbageType specified. Must be one of the following: 1 - EMPTYPROPS, 2 - GAP_ORPHANS, 3 - GAP_ORPHANS_EMPTYPROPS"); + System.exit(1); + } + + //1. Create nodes with properties + NodeBuilder rootNode = documentNodeStore.getRoot().builder(); + NodeBuilder garbageRootNode = rootNode.child(FULLGC_GEN_ROOT_PATH); + garbageRootNode.child(generationBasePath).setProperty("jcr:primaryType", "nt:file", NAME); Review Comment: would use `org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE` instead also, would not use `nt:file` as that requires eg `jcr:content` child etc (which wasn't noticed in the test as the merge happens without commithook - something that's a bit risky, but this is probably an exception as it is primarily a test command - but this aspect isn't ideal and might lead to surprises). we could use `org.apache.jackrabbit.oak.spi.nodetype.NT_OAK_UNSTRUCTURED` instead (as that is unorderable, and unstructured, it would be a good fix as it doesn't have any special requirements otherwise). ########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java: ########## @@ -168,7 +168,7 @@ public class VersionGarbageCollector { * Ultimately the goal is to clean up all possible garbage. After hardening these modes * might no longer be supported. */ - enum FullGCMode { + public enum FullGCMode { Review Comment: not sure if we want this public? what about adding a helper under `org.apache.jackrabbit.oak.plugins.document` in oak-run instead? that helper could perhaps also have some `includesEmptyProps` / `includesGapOrphans` etc methods - as fullgc modes is going to be a combination of different modes (and the current check is hard coded to only modes 1 and 2) ########## oak-run/src/main/java/org/apache/jackrabbit/oak/run/GenerateFullGCCommand.java: ########## @@ -0,0 +1,416 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.run; + +import joptsimple.OptionSpec; +import org.apache.jackrabbit.guava.common.io.Closer; +import org.apache.jackrabbit.oak.plugins.document.Collection; +import org.apache.jackrabbit.oak.plugins.document.Document; +import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore; +import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder; +import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException; +import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector; +import org.apache.jackrabbit.oak.run.commons.Command; +import org.apache.jackrabbit.oak.spi.commit.CommitInfo; +import org.apache.jackrabbit.oak.spi.commit.EmptyHook; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import static org.apache.jackrabbit.oak.api.Type.NAME; +import static org.apache.jackrabbit.oak.api.Type.STRING; +import static org.apache.jackrabbit.oak.run.Utils.createDocumentMKBuilder; + +/** + * GenerateFullGCCommand generates garbage nodes in the repository in order to allow for testing fullGC functionality. + */ +public class GenerateFullGCCommand implements Command { + private static final Logger LOG = LoggerFactory.getLogger(GenerateFullGCCommand.class); + + private static final String USAGE = "generateFullGC {<jdbc-uri> | <mongodb-uri>} [options]"; + + /** + * Root node for fullGC garbage generation. + * Necessary in order to allow cleanup of all generated garbage nodes by simply removing the root node. + */ + public static String FULLGC_GEN_ROOT_PATH = "fullGCGenRoot"; + + /** + * Base path for fullGC garbage generation. The timestamp of the run will be appended to this path, + * which is necessary in order for each garbage generation run to be unique and not overwrite previous ones. + * If continuous generation is enabled, the index of the run will also be appended to this path. + */ + public static String FULLGC_GEN_BASE_PATH = "fullGCGenTest_"; + + /** + * Prefix for parent nodes under which garbage nodes will be created. + * The index of the parent node will be appended to this prefix. + */ + public static String FULLGC_GEN_PARENT_NODE_PREFIX = "fullGCParent_"; + public static String FULLGC_GEN_NODE_PREFIX = "fullGCNode_"; + + public static String EMPTY_PROPERTY_NAME = "prop"; + + private int continuousRunIndex = 0; + + private DocumentNodeStore documentNodeStore; + + public DocumentNodeStore getDocumentNodeStore() { + return documentNodeStore; + } + + private static class GenerateFullGCOptions extends Utils.NodeStoreOptions { + + /** + * Sub-command for generating garbage. + * This is the default sub-command to run if none is specified. + */ + static final String CMD_GENERATE = "generate"; + + /** + * Sub-command for cleaning up all generated garbage. + * Using this will remove the root node FULLGC_GEN_ROOT_PATH and all of its children (recursively). + */ + static final String CMD_CLEAN = "clean"; + + final OptionSpec<Integer> createGarbageNodesCount; + final OptionSpec<Integer> garbageNodesParentCount; + final OptionSpec<Integer> garbageType; + final OptionSpec<Integer> numberOfRuns; + final OptionSpec<Integer> generateIntervalSeconds; + + public GenerateFullGCOptions(String usage) { + super(usage); + createGarbageNodesCount = parser + .accepts("garbageNodesCount", "the total number of garbage nodes to create").withRequiredArg() + .ofType(Integer.class).defaultsTo(0); + garbageNodesParentCount = parser + .accepts("garbageNodesParentCount", "total number of parent nodes under which to create garbage nodes").withRequiredArg() + .ofType(Integer.class).defaultsTo(1); + garbageType = parser + .accepts("garbageType", "garbage type to be generated - must be a value from VersionGarbageCollector.fullGCMode").withRequiredArg() + .ofType(Integer.class).defaultsTo(1); + numberOfRuns = parser + .accepts("numberOfRuns", "the number of garbage generation runs to do. Only applies if greater than 1, " + + "otherwise a single run will be done.").withRequiredArg() + .ofType(Integer.class).defaultsTo(1); + generateIntervalSeconds = parser + .accepts("generateIntervalSeconds", "the interval at which to generate a complete garbage count from createGarbageNotesCount. " + + "Applies only if numberOfRuns is greater than 1.").withRequiredArg() + .ofType(Integer.class).defaultsTo(60); + } + + public GenerateFullGCOptions parse(String[] args) { + super.parse(args); + return this; + } + + String getSubCmd() { + List<String> args = getOtherArgs(); + if (args.size() > 0) { + return args.get(0); + } + return CMD_GENERATE; + } + + public int getCreateGarbageNodesCount() { + return createGarbageNodesCount.value(options); + } + + public int getGarbageNodesParentCount() { + return garbageNodesParentCount.value(options); + } + + public int getGarbageType() { + return garbageType.value(options); + } + + public int getNumberOfRuns() { + return numberOfRuns.value(options); + } + + public int getGenerateIntervalSeconds() { + return generateIntervalSeconds.value(options); + } + } + + public void execute(String... args) throws Exception { + Closer closer = Closer.create(); + try { + execute(closer, args); + } catch (Throwable e) { + LOG.error("Command failed", e); + throw closer.rethrow(e); + } finally { + closer.close(); + } + } + + /** + * Method with passed closer is necessary in order to allow for unit tests to check the output of the command. + * It is the responsibility of the caller to close the closer. + * + * Returns the list of generated garbage base paths (under the garbage root node). + * @param closer + * @param args + * @throws Exception + */ + public List<String> execute(Closer closer, String... args) throws Exception { + continuousRunIndex = 0; + + List<String> generateBasePaths = new ArrayList<>(); + + GenerateFullGCOptions options = new GenerateFullGCOptions(USAGE).parse(args); + String subCmd = options.getSubCmd(); + + if (GenerateFullGCOptions.CMD_GENERATE.equals(subCmd)) { + if (options.getNumberOfRuns() > 1 && options.getGenerateIntervalSeconds() > 0) { + generateBasePaths.addAll(generateGarbageContinuously(options, closer)); + } else { + generateBasePaths.add(generateGarbage(options, closer, 0)); + } + } else if (GenerateFullGCOptions.CMD_CLEAN.equals(subCmd)) { + cleanGarbage(options, closer); + } else { + System.err.println("unknown revisions command: " + subCmd); + } + + return generateBasePaths; + } + + private List<String> generateGarbageContinuously(GenerateFullGCOptions options, Closer closer) throws IOException, Exception { + ScheduledExecutorService executor = Executors.newScheduledThreadPool(1); + + DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, closer); + long startGenTimestamp = System.currentTimeMillis(); + + List<String> generatedGarbageBasePaths = new ArrayList<>(); + + int numberOfRuns = options.getNumberOfRuns(); + int intervalSeconds = options.getGenerateIntervalSeconds(); + Runnable task = () -> { + if (continuousRunIndex < numberOfRuns) { + try { + String genBasePath = generateGarbage(options, closer, continuousRunIndex, builder, startGenTimestamp); + generatedGarbageBasePaths.add(genBasePath); + } catch (Exception e) { + LOG.error("Error generating garbage in run " + continuousRunIndex, e); + } + LOG.info("Task executed. Count: " + (continuousRunIndex + 1)); + continuousRunIndex++; + } else { + // Shutdown the executor once the task has run numberOfRuns times + executor.shutdown(); + LOG.info("Task completed " + numberOfRuns + " times. Stopping execution."); + } + }; + + // Schedule the task to run every intervalSeconds + executor.scheduleAtFixedRate(task, 0, intervalSeconds, TimeUnit.SECONDS); + + return generatedGarbageBasePaths; + } + + /** + * Generate garbage nodes in the repository in order to allow for testing fullGC functionality. + * + * Returns the path of the generated FULLGC_GEN_BASE_PATH node (under the root). + * @param options + * @param closer + * @param runIndex + * @return + * @throws IOException + * @throws Exception + */ + private String generateGarbage(GenerateFullGCOptions options, Closer closer, int runIndex) throws IOException, Exception { + + DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, closer); + long generationTimestamp = System.currentTimeMillis(); + + return generateGarbage(options, closer, runIndex, builder, generationTimestamp); + } + + private String generateGarbage(GenerateFullGCOptions options, Closer closer, int runIndex, + DocumentNodeStoreBuilder<?> builder, long timestamp) throws IOException, Exception { + + if (builder == null) { + System.err.println("generateFullGC mode only available for DocumentNodeStore"); + System.exit(1); + } + + String generationBasePath = FULLGC_GEN_BASE_PATH + timestamp + "_" + runIndex; + System.out.println("Generating fullGC on the document: " + generationBasePath); + documentNodeStore = builder.build(); + + VersionGarbageCollector.FullGCMode fullGCMode = getFullGCMode(options); + if (fullGCMode == VersionGarbageCollector.FullGCMode.NONE) { + LOG.error("Invalid garbageType specified. Must be one of the following: 1 - EMPTYPROPS, 2 - GAP_ORPHANS, 3 - GAP_ORPHANS_EMPTYPROPS"); + System.exit(1); + } + + //1. Create nodes with properties + NodeBuilder rootNode = documentNodeStore.getRoot().builder(); + NodeBuilder garbageRootNode = rootNode.child(FULLGC_GEN_ROOT_PATH); + garbageRootNode.child(generationBasePath).setProperty("jcr:primaryType", "nt:file", NAME); + + int nodesCountUnderParent = options.getCreateGarbageNodesCount() / options.getGarbageNodesParentCount(); + for(int i = 0; i < options.getGarbageNodesParentCount(); i ++) { + garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + i).setProperty("jcr:primaryType", "nt:folder", NAME); + + for(int j = 0; j < nodesCountUnderParent; j ++) { + garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + i).child(FULLGC_GEN_NODE_PREFIX + j). + setProperty("jcr:primaryType", "nt:file", NAME); + + if (fullGCMode == VersionGarbageCollector.FullGCMode.EMPTYPROPS || fullGCMode == VersionGarbageCollector.FullGCMode.GAP_ORPHANS_EMPTYPROPS) { + garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + i).child(FULLGC_GEN_NODE_PREFIX + j). Review Comment: what might be interesting is adding different types of "child subtrees". IIUC then the it will delete the nodes at level `FULLGC_GEN_PARENT_NODE_PREFIX` (which then leaves its children gap orphans). currently it only creates a 1-level deep child under that node. what would be interesting is to have different cases: * (later perhaps) 0-level deep but just split docs (we actually probably will miss that type of garbage, that's going to be a new TODO probably, but this test could trigger it) * 1-level deep (which you already have) * 2-level deep * 3-level deep reason being that it might cause different behavior in fullGC code later on - and thus having slight variations of that garbage could be useful to have ########## oak-run/src/main/java/org/apache/jackrabbit/oak/run/GenerateFullGCCommand.java: ########## @@ -0,0 +1,416 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.run; + +import joptsimple.OptionSpec; +import org.apache.jackrabbit.guava.common.io.Closer; +import org.apache.jackrabbit.oak.plugins.document.Collection; +import org.apache.jackrabbit.oak.plugins.document.Document; +import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore; +import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder; +import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException; +import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector; +import org.apache.jackrabbit.oak.run.commons.Command; +import org.apache.jackrabbit.oak.spi.commit.CommitInfo; +import org.apache.jackrabbit.oak.spi.commit.EmptyHook; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import static org.apache.jackrabbit.oak.api.Type.NAME; +import static org.apache.jackrabbit.oak.api.Type.STRING; +import static org.apache.jackrabbit.oak.run.Utils.createDocumentMKBuilder; + +/** + * GenerateFullGCCommand generates garbage nodes in the repository in order to allow for testing fullGC functionality. + */ +public class GenerateFullGCCommand implements Command { + private static final Logger LOG = LoggerFactory.getLogger(GenerateFullGCCommand.class); + + private static final String USAGE = "generateFullGC {<jdbc-uri> | <mongodb-uri>} [options]"; + + /** + * Root node for fullGC garbage generation. + * Necessary in order to allow cleanup of all generated garbage nodes by simply removing the root node. + */ + public static String FULLGC_GEN_ROOT_PATH = "fullGCGenRoot"; + + /** + * Base path for fullGC garbage generation. The timestamp of the run will be appended to this path, + * which is necessary in order for each garbage generation run to be unique and not overwrite previous ones. + * If continuous generation is enabled, the index of the run will also be appended to this path. + */ + public static String FULLGC_GEN_BASE_PATH = "fullGCGenTest_"; + + /** + * Prefix for parent nodes under which garbage nodes will be created. + * The index of the parent node will be appended to this prefix. + */ + public static String FULLGC_GEN_PARENT_NODE_PREFIX = "fullGCParent_"; + public static String FULLGC_GEN_NODE_PREFIX = "fullGCNode_"; + + public static String EMPTY_PROPERTY_NAME = "prop"; + + private int continuousRunIndex = 0; + + private DocumentNodeStore documentNodeStore; + + public DocumentNodeStore getDocumentNodeStore() { + return documentNodeStore; + } + + private static class GenerateFullGCOptions extends Utils.NodeStoreOptions { + + /** + * Sub-command for generating garbage. + * This is the default sub-command to run if none is specified. + */ + static final String CMD_GENERATE = "generate"; + + /** + * Sub-command for cleaning up all generated garbage. + * Using this will remove the root node FULLGC_GEN_ROOT_PATH and all of its children (recursively). + */ + static final String CMD_CLEAN = "clean"; + + final OptionSpec<Integer> createGarbageNodesCount; + final OptionSpec<Integer> garbageNodesParentCount; + final OptionSpec<Integer> garbageType; + final OptionSpec<Integer> numberOfRuns; + final OptionSpec<Integer> generateIntervalSeconds; + + public GenerateFullGCOptions(String usage) { + super(usage); + createGarbageNodesCount = parser + .accepts("garbageNodesCount", "the total number of garbage nodes to create").withRequiredArg() + .ofType(Integer.class).defaultsTo(0); + garbageNodesParentCount = parser + .accepts("garbageNodesParentCount", "total number of parent nodes under which to create garbage nodes").withRequiredArg() + .ofType(Integer.class).defaultsTo(1); + garbageType = parser + .accepts("garbageType", "garbage type to be generated - must be a value from VersionGarbageCollector.fullGCMode").withRequiredArg() + .ofType(Integer.class).defaultsTo(1); + numberOfRuns = parser + .accepts("numberOfRuns", "the number of garbage generation runs to do. Only applies if greater than 1, " + + "otherwise a single run will be done.").withRequiredArg() + .ofType(Integer.class).defaultsTo(1); + generateIntervalSeconds = parser + .accepts("generateIntervalSeconds", "the interval at which to generate a complete garbage count from createGarbageNotesCount. " + + "Applies only if numberOfRuns is greater than 1.").withRequiredArg() + .ofType(Integer.class).defaultsTo(60); + } + + public GenerateFullGCOptions parse(String[] args) { + super.parse(args); + return this; + } + + String getSubCmd() { + List<String> args = getOtherArgs(); + if (args.size() > 0) { + return args.get(0); + } + return CMD_GENERATE; + } + + public int getCreateGarbageNodesCount() { + return createGarbageNodesCount.value(options); + } + + public int getGarbageNodesParentCount() { + return garbageNodesParentCount.value(options); + } + + public int getGarbageType() { + return garbageType.value(options); + } + + public int getNumberOfRuns() { + return numberOfRuns.value(options); + } + + public int getGenerateIntervalSeconds() { + return generateIntervalSeconds.value(options); + } + } + + public void execute(String... args) throws Exception { + Closer closer = Closer.create(); + try { + execute(closer, args); + } catch (Throwable e) { + LOG.error("Command failed", e); + throw closer.rethrow(e); + } finally { + closer.close(); + } + } + + /** + * Method with passed closer is necessary in order to allow for unit tests to check the output of the command. + * It is the responsibility of the caller to close the closer. + * + * Returns the list of generated garbage base paths (under the garbage root node). + * @param closer + * @param args + * @throws Exception + */ + public List<String> execute(Closer closer, String... args) throws Exception { + continuousRunIndex = 0; + + List<String> generateBasePaths = new ArrayList<>(); + + GenerateFullGCOptions options = new GenerateFullGCOptions(USAGE).parse(args); + String subCmd = options.getSubCmd(); + + if (GenerateFullGCOptions.CMD_GENERATE.equals(subCmd)) { + if (options.getNumberOfRuns() > 1 && options.getGenerateIntervalSeconds() > 0) { + generateBasePaths.addAll(generateGarbageContinuously(options, closer)); + } else { + generateBasePaths.add(generateGarbage(options, closer, 0)); + } + } else if (GenerateFullGCOptions.CMD_CLEAN.equals(subCmd)) { + cleanGarbage(options, closer); + } else { + System.err.println("unknown revisions command: " + subCmd); + } + + return generateBasePaths; + } + + private List<String> generateGarbageContinuously(GenerateFullGCOptions options, Closer closer) throws IOException, Exception { + ScheduledExecutorService executor = Executors.newScheduledThreadPool(1); + + DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, closer); + long startGenTimestamp = System.currentTimeMillis(); + + List<String> generatedGarbageBasePaths = new ArrayList<>(); + + int numberOfRuns = options.getNumberOfRuns(); + int intervalSeconds = options.getGenerateIntervalSeconds(); + Runnable task = () -> { + if (continuousRunIndex < numberOfRuns) { + try { + String genBasePath = generateGarbage(options, closer, continuousRunIndex, builder, startGenTimestamp); + generatedGarbageBasePaths.add(genBasePath); + } catch (Exception e) { + LOG.error("Error generating garbage in run " + continuousRunIndex, e); + } + LOG.info("Task executed. Count: " + (continuousRunIndex + 1)); + continuousRunIndex++; + } else { + // Shutdown the executor once the task has run numberOfRuns times + executor.shutdown(); + LOG.info("Task completed " + numberOfRuns + " times. Stopping execution."); + } + }; + + // Schedule the task to run every intervalSeconds + executor.scheduleAtFixedRate(task, 0, intervalSeconds, TimeUnit.SECONDS); + + return generatedGarbageBasePaths; + } + + /** + * Generate garbage nodes in the repository in order to allow for testing fullGC functionality. + * + * Returns the path of the generated FULLGC_GEN_BASE_PATH node (under the root). + * @param options + * @param closer + * @param runIndex + * @return + * @throws IOException + * @throws Exception + */ + private String generateGarbage(GenerateFullGCOptions options, Closer closer, int runIndex) throws IOException, Exception { + + DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, closer); + long generationTimestamp = System.currentTimeMillis(); + + return generateGarbage(options, closer, runIndex, builder, generationTimestamp); + } + + private String generateGarbage(GenerateFullGCOptions options, Closer closer, int runIndex, + DocumentNodeStoreBuilder<?> builder, long timestamp) throws IOException, Exception { + + if (builder == null) { + System.err.println("generateFullGC mode only available for DocumentNodeStore"); + System.exit(1); + } + + String generationBasePath = FULLGC_GEN_BASE_PATH + timestamp + "_" + runIndex; + System.out.println("Generating fullGC on the document: " + generationBasePath); + documentNodeStore = builder.build(); + + VersionGarbageCollector.FullGCMode fullGCMode = getFullGCMode(options); + if (fullGCMode == VersionGarbageCollector.FullGCMode.NONE) { + LOG.error("Invalid garbageType specified. Must be one of the following: 1 - EMPTYPROPS, 2 - GAP_ORPHANS, 3 - GAP_ORPHANS_EMPTYPROPS"); Review Comment: assume this log message will be outdated soon with more fullGC modes ready for rollout. eg we will support ALL_ORPHANS at some point etc... Perhaps just mention to use a valid fullGc mode (and perhaps add the classname)? ########## oak-run/src/main/java/org/apache/jackrabbit/oak/run/GenerateFullGCCommand.java: ########## @@ -0,0 +1,416 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.run; + +import joptsimple.OptionSpec; +import org.apache.jackrabbit.guava.common.io.Closer; +import org.apache.jackrabbit.oak.plugins.document.Collection; +import org.apache.jackrabbit.oak.plugins.document.Document; +import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore; +import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder; +import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException; +import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector; +import org.apache.jackrabbit.oak.run.commons.Command; +import org.apache.jackrabbit.oak.spi.commit.CommitInfo; +import org.apache.jackrabbit.oak.spi.commit.EmptyHook; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import static org.apache.jackrabbit.oak.api.Type.NAME; +import static org.apache.jackrabbit.oak.api.Type.STRING; +import static org.apache.jackrabbit.oak.run.Utils.createDocumentMKBuilder; + +/** + * GenerateFullGCCommand generates garbage nodes in the repository in order to allow for testing fullGC functionality. + */ +public class GenerateFullGCCommand implements Command { + private static final Logger LOG = LoggerFactory.getLogger(GenerateFullGCCommand.class); + + private static final String USAGE = "generateFullGC {<jdbc-uri> | <mongodb-uri>} [options]"; + + /** + * Root node for fullGC garbage generation. + * Necessary in order to allow cleanup of all generated garbage nodes by simply removing the root node. + */ + public static String FULLGC_GEN_ROOT_PATH = "fullGCGenRoot"; + + /** + * Base path for fullGC garbage generation. The timestamp of the run will be appended to this path, + * which is necessary in order for each garbage generation run to be unique and not overwrite previous ones. + * If continuous generation is enabled, the index of the run will also be appended to this path. + */ + public static String FULLGC_GEN_BASE_PATH = "fullGCGenTest_"; + + /** + * Prefix for parent nodes under which garbage nodes will be created. + * The index of the parent node will be appended to this prefix. + */ + public static String FULLGC_GEN_PARENT_NODE_PREFIX = "fullGCParent_"; + public static String FULLGC_GEN_NODE_PREFIX = "fullGCNode_"; + + public static String EMPTY_PROPERTY_NAME = "prop"; + + private int continuousRunIndex = 0; + + private DocumentNodeStore documentNodeStore; + + public DocumentNodeStore getDocumentNodeStore() { + return documentNodeStore; + } + + private static class GenerateFullGCOptions extends Utils.NodeStoreOptions { + + /** + * Sub-command for generating garbage. + * This is the default sub-command to run if none is specified. + */ + static final String CMD_GENERATE = "generate"; + + /** + * Sub-command for cleaning up all generated garbage. + * Using this will remove the root node FULLGC_GEN_ROOT_PATH and all of its children (recursively). + */ + static final String CMD_CLEAN = "clean"; + + final OptionSpec<Integer> createGarbageNodesCount; + final OptionSpec<Integer> garbageNodesParentCount; + final OptionSpec<Integer> garbageType; + final OptionSpec<Integer> numberOfRuns; + final OptionSpec<Integer> generateIntervalSeconds; + + public GenerateFullGCOptions(String usage) { + super(usage); + createGarbageNodesCount = parser + .accepts("garbageNodesCount", "the total number of garbage nodes to create").withRequiredArg() + .ofType(Integer.class).defaultsTo(0); + garbageNodesParentCount = parser + .accepts("garbageNodesParentCount", "total number of parent nodes under which to create garbage nodes").withRequiredArg() + .ofType(Integer.class).defaultsTo(1); + garbageType = parser + .accepts("garbageType", "garbage type to be generated - must be a value from VersionGarbageCollector.fullGCMode").withRequiredArg() + .ofType(Integer.class).defaultsTo(1); + numberOfRuns = parser + .accepts("numberOfRuns", "the number of garbage generation runs to do. Only applies if greater than 1, " + + "otherwise a single run will be done.").withRequiredArg() + .ofType(Integer.class).defaultsTo(1); + generateIntervalSeconds = parser + .accepts("generateIntervalSeconds", "the interval at which to generate a complete garbage count from createGarbageNotesCount. " + + "Applies only if numberOfRuns is greater than 1.").withRequiredArg() + .ofType(Integer.class).defaultsTo(60); + } + + public GenerateFullGCOptions parse(String[] args) { + super.parse(args); + return this; + } + + String getSubCmd() { + List<String> args = getOtherArgs(); + if (args.size() > 0) { + return args.get(0); + } + return CMD_GENERATE; + } + + public int getCreateGarbageNodesCount() { + return createGarbageNodesCount.value(options); + } + + public int getGarbageNodesParentCount() { + return garbageNodesParentCount.value(options); + } + + public int getGarbageType() { + return garbageType.value(options); + } + + public int getNumberOfRuns() { + return numberOfRuns.value(options); + } + + public int getGenerateIntervalSeconds() { + return generateIntervalSeconds.value(options); + } + } + + public void execute(String... args) throws Exception { + Closer closer = Closer.create(); + try { + execute(closer, args); + } catch (Throwable e) { + LOG.error("Command failed", e); + throw closer.rethrow(e); + } finally { + closer.close(); + } + } + + /** + * Method with passed closer is necessary in order to allow for unit tests to check the output of the command. + * It is the responsibility of the caller to close the closer. + * + * Returns the list of generated garbage base paths (under the garbage root node). + * @param closer + * @param args + * @throws Exception + */ + public List<String> execute(Closer closer, String... args) throws Exception { + continuousRunIndex = 0; + + List<String> generateBasePaths = new ArrayList<>(); + + GenerateFullGCOptions options = new GenerateFullGCOptions(USAGE).parse(args); + String subCmd = options.getSubCmd(); + + if (GenerateFullGCOptions.CMD_GENERATE.equals(subCmd)) { + if (options.getNumberOfRuns() > 1 && options.getGenerateIntervalSeconds() > 0) { + generateBasePaths.addAll(generateGarbageContinuously(options, closer)); + } else { + generateBasePaths.add(generateGarbage(options, closer, 0)); + } + } else if (GenerateFullGCOptions.CMD_CLEAN.equals(subCmd)) { + cleanGarbage(options, closer); + } else { + System.err.println("unknown revisions command: " + subCmd); + } + + return generateBasePaths; + } + + private List<String> generateGarbageContinuously(GenerateFullGCOptions options, Closer closer) throws IOException, Exception { + ScheduledExecutorService executor = Executors.newScheduledThreadPool(1); + + DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, closer); + long startGenTimestamp = System.currentTimeMillis(); + + List<String> generatedGarbageBasePaths = new ArrayList<>(); + + int numberOfRuns = options.getNumberOfRuns(); + int intervalSeconds = options.getGenerateIntervalSeconds(); + Runnable task = () -> { + if (continuousRunIndex < numberOfRuns) { + try { + String genBasePath = generateGarbage(options, closer, continuousRunIndex, builder, startGenTimestamp); + generatedGarbageBasePaths.add(genBasePath); + } catch (Exception e) { + LOG.error("Error generating garbage in run " + continuousRunIndex, e); + } + LOG.info("Task executed. Count: " + (continuousRunIndex + 1)); + continuousRunIndex++; + } else { + // Shutdown the executor once the task has run numberOfRuns times + executor.shutdown(); + LOG.info("Task completed " + numberOfRuns + " times. Stopping execution."); + } + }; + + // Schedule the task to run every intervalSeconds + executor.scheduleAtFixedRate(task, 0, intervalSeconds, TimeUnit.SECONDS); + + return generatedGarbageBasePaths; + } + + /** + * Generate garbage nodes in the repository in order to allow for testing fullGC functionality. + * + * Returns the path of the generated FULLGC_GEN_BASE_PATH node (under the root). + * @param options + * @param closer + * @param runIndex + * @return + * @throws IOException + * @throws Exception + */ + private String generateGarbage(GenerateFullGCOptions options, Closer closer, int runIndex) throws IOException, Exception { + + DocumentNodeStoreBuilder<?> builder = createDocumentMKBuilder(options, closer); + long generationTimestamp = System.currentTimeMillis(); + + return generateGarbage(options, closer, runIndex, builder, generationTimestamp); + } + + private String generateGarbage(GenerateFullGCOptions options, Closer closer, int runIndex, + DocumentNodeStoreBuilder<?> builder, long timestamp) throws IOException, Exception { + + if (builder == null) { + System.err.println("generateFullGC mode only available for DocumentNodeStore"); + System.exit(1); + } + + String generationBasePath = FULLGC_GEN_BASE_PATH + timestamp + "_" + runIndex; + System.out.println("Generating fullGC on the document: " + generationBasePath); + documentNodeStore = builder.build(); + + VersionGarbageCollector.FullGCMode fullGCMode = getFullGCMode(options); + if (fullGCMode == VersionGarbageCollector.FullGCMode.NONE) { + LOG.error("Invalid garbageType specified. Must be one of the following: 1 - EMPTYPROPS, 2 - GAP_ORPHANS, 3 - GAP_ORPHANS_EMPTYPROPS"); + System.exit(1); + } + + //1. Create nodes with properties + NodeBuilder rootNode = documentNodeStore.getRoot().builder(); + NodeBuilder garbageRootNode = rootNode.child(FULLGC_GEN_ROOT_PATH); + garbageRootNode.child(generationBasePath).setProperty("jcr:primaryType", "nt:file", NAME); + + int nodesCountUnderParent = options.getCreateGarbageNodesCount() / options.getGarbageNodesParentCount(); + for(int i = 0; i < options.getGarbageNodesParentCount(); i ++) { + garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + i).setProperty("jcr:primaryType", "nt:folder", NAME); + + for(int j = 0; j < nodesCountUnderParent; j ++) { + garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + i).child(FULLGC_GEN_NODE_PREFIX + j). + setProperty("jcr:primaryType", "nt:file", NAME); + + if (fullGCMode == VersionGarbageCollector.FullGCMode.EMPTYPROPS || fullGCMode == VersionGarbageCollector.FullGCMode.GAP_ORPHANS_EMPTYPROPS) { + garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + i).child(FULLGC_GEN_NODE_PREFIX + j). + setProperty(EMPTY_PROPERTY_NAME, "bar", STRING); + } + } + } + documentNodeStore.merge(rootNode, EmptyHook.INSTANCE, CommitInfo.EMPTY); + documentNodeStore.runBackgroundOperations(); + + + //2. Generate garbage nodes - EMPTY_PROPERTIES + if (fullGCMode == VersionGarbageCollector.FullGCMode.EMPTYPROPS) { + for (int i = 0; i < options.getGarbageNodesParentCount(); i++) { + for (int j = 0; j < nodesCountUnderParent; j++) { + garbageRootNode.child(generationBasePath).child(FULLGC_GEN_PARENT_NODE_PREFIX + i).child(FULLGC_GEN_NODE_PREFIX + j). + removeProperty(EMPTY_PROPERTY_NAME); + } + } + } + documentNodeStore.merge(rootNode, EmptyHook.INSTANCE, CommitInfo.EMPTY); + documentNodeStore.runBackgroundOperations(); + + //3.1. Generate garbage nodes - GAP_ORPHANS - remove parent nodes + if (fullGCMode == VersionGarbageCollector.FullGCMode.GAP_ORPHANS) { + StringBuilder sbNodePath = new StringBuilder(); + List<String> deleteNodePaths = new ArrayList<>(); + for (int i = 0; i < options.getGarbageNodesParentCount(); i++) { + + sbNodePath.setLength(0); + sbNodePath.append("3:/").append(FULLGC_GEN_ROOT_PATH).append("/").append(generationBasePath).append("/"). Review Comment: (same suggestion here to use Utils for path composition and id conversion) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
