keith-turner commented on code in PR #6168: URL: https://github.com/apache/accumulo/pull/6168#discussion_r2884908049
########## test/src/main/java/org/apache/accumulo/test/MultipleManagerIT.java: ########## @@ -0,0 +1,305 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.test; + +import static java.util.stream.Collectors.toSet; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.TreeSet; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import org.apache.accumulo.core.Constants; +import org.apache.accumulo.core.cli.ServerOpts; +import org.apache.accumulo.core.client.Accumulo; +import org.apache.accumulo.core.client.admin.CompactionConfig; +import org.apache.accumulo.core.clientImpl.ClientContext; +import org.apache.accumulo.core.conf.Property; +import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.fate.Fate; +import org.apache.accumulo.core.fate.FateInstanceType; +import org.apache.accumulo.core.fate.FatePartition; +import org.apache.accumulo.core.fate.FateStore; +import org.apache.accumulo.core.fate.TraceRepo; +import org.apache.accumulo.core.fate.user.UserFateStore; +import org.apache.accumulo.core.lock.ServiceLock; +import org.apache.accumulo.core.lock.ServiceLockPaths; +import org.apache.accumulo.core.lock.ServiceLockPaths.ServiceLockPath; +import org.apache.accumulo.core.metadata.SystemTables; +import org.apache.accumulo.core.util.UtilWaitThread; +import org.apache.accumulo.manager.Manager; +import org.apache.accumulo.manager.tableOps.FateEnv; +import org.apache.accumulo.minicluster.ServerType; +import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl; +import org.apache.accumulo.server.ServerContext; +import org.apache.accumulo.test.fate.FastFate; +import org.apache.accumulo.test.functional.ConfigurableMacBase; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.junit.jupiter.api.Test; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.Sets; +import com.google.common.net.HostAndPort; + +/** + * {@link ComprehensiveMultiManagerIT} runs multiple managers with lots of Accumulo APIs, however + * that does not actually verify that fate operations actually run on multiple managers. This test + * runs a smaller set of Accumulo API operations and does the following. + * + * <ul> + * <li>Starts new manager processes and verifies fate operations start running on them</li> + * <li>Kills assistant/non-primary manager processes and verifies the system recovers</li> + * <li>Kills primary manager process and verifies the system recovers</li> + * <li>Verifies that Accumulo API calls are not impacted by managers starting/stopping</li> + * </ul> + * + */ +public class MultipleManagerIT extends ConfigurableMacBase { + + // A manager that will quickly clean up fate reservations held by dead managers + public static class FastFateCleanupManager extends Manager { + protected FastFateCleanupManager(ServerOpts opts, String[] args) throws IOException { + super(opts, ServerContext::new, args); + } + + @Override + protected Fate<FateEnv> createFateInstance(FateEnv env, FateStore<FateEnv> store, + ServerContext context) { + LoggerFactory.getLogger(FastFateCleanupManager.class) + .info("Creating Fast fate cleanup manager for {}", store.type()); + return new FastFate<>(env, store, true, TraceRepo::toLogString, getConfiguration()); + } + + public static void main(String[] args) throws Exception { + try (FastFateCleanupManager manager = new FastFateCleanupManager(new ServerOpts(), args)) { + manager.runServer(); + } + } + } + + @Override + protected void configure(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSite) { + // FOLLOW_ON add a way to start multiple managers to mini + cfg.getClusterServerConfiguration().setNumDefaultCompactors(8); + // Set this lower so that locks timeout faster + cfg.setProperty(Property.INSTANCE_ZK_TIMEOUT, "5s"); + cfg.setServerClass(ServerType.MANAGER, r -> FastFateCleanupManager.class); + super.configure(cfg, hadoopCoreSite); + } + + @Test + public void testFate() throws Exception { + + List<Process> managerWorkers = new ArrayList<>(); + var executor = Executors.newCachedThreadPool(); + + // Start a lot of background threads that should cause fate operations to run. + try (var client = Accumulo.newClient().from(getClientProperties()).build()) { + // Create a table in order to wait for the single manager to become the primary manager + client.tableOperations().create("waitTable"); + + // start more manager processes, should be assigned fate work + managerWorkers.add(exec(FastFateCleanupManager.class)); Review Comment: Yeah it seems to be working w/o doing anything, but I was not sure how. Looked into it and found that mini accumulo sets `manager.port.client=0` and this seems to cause the manager to select a random port. That behavior is not documented on the property. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
