Repository: accumulo Updated Branches: refs/heads/1.7 61bb1b47d -> 7985b917d
ACCUMULO-3747 do not clear a server from the dead server list just because we tried to stop it Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/c3bb4c6d Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/c3bb4c6d Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/c3bb4c6d Branch: refs/heads/1.7 Commit: c3bb4c6da82bbc5ab939a684b110d61aadb8ffd2 Parents: 28be43e Author: Eric C. Newton <eric.new...@gmail.com> Authored: Tue Jun 23 15:32:17 2015 -0400 Committer: Eric C. Newton <eric.new...@gmail.com> Committed: Tue Jun 23 15:32:17 2015 -0400 ---------------------------------------------------------------------- .../java/org/apache/accumulo/master/Master.java | 4 +- .../accumulo/test/DetectDeadTabletServers.java | 96 ++++++++++++++++++++ 2 files changed, 98 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo/blob/c3bb4c6d/server/master/src/main/java/org/apache/accumulo/master/Master.java ---------------------------------------------------------------------- diff --git a/server/master/src/main/java/org/apache/accumulo/master/Master.java b/server/master/src/main/java/org/apache/accumulo/master/Master.java index b3d2ac5..545d93a 100644 --- a/server/master/src/main/java/org/apache/accumulo/master/Master.java +++ b/server/master/src/main/java/org/apache/accumulo/master/Master.java @@ -949,15 +949,15 @@ public class Master implements LiveTServerSet.Listener, TableObserver, CurrentSt log.warn("attempting to stop " + server); try { TServerConnection connection = tserverSet.getConnection(server); - if (connection != null) + if (connection != null) { connection.halt(masterLock); + } } catch (TTransportException e) { // ignore: it's probably down } catch (Exception e) { log.info("error talking to troublesome tablet server ", e); } badServers.remove(server); - tserverSet.remove(server); } } } http://git-wip-us.apache.org/repos/asf/accumulo/blob/c3bb4c6d/test/src/main/java/org/apache/accumulo/test/DetectDeadTabletServers.java ---------------------------------------------------------------------- diff --git a/test/src/main/java/org/apache/accumulo/test/DetectDeadTabletServers.java b/test/src/main/java/org/apache/accumulo/test/DetectDeadTabletServers.java new file mode 100644 index 0000000..15afd25 --- /dev/null +++ b/test/src/main/java/org/apache/accumulo/test/DetectDeadTabletServers.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.accumulo.test; + +import static org.apache.accumulo.minicluster.ServerType.TABLET_SERVER; +import static org.junit.Assert.assertEquals; + +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.impl.MasterClient; +import org.apache.accumulo.core.client.security.tokens.PasswordToken; +import org.apache.accumulo.core.conf.Property; +import org.apache.accumulo.core.master.thrift.MasterClientService; +import org.apache.accumulo.core.master.thrift.MasterMonitorInfo; +import org.apache.accumulo.core.metadata.MetadataTable; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.accumulo.core.security.Credentials; +import org.apache.accumulo.fate.util.UtilWaitThread; +import org.apache.accumulo.minicluster.impl.MiniAccumuloConfigImpl; +import org.apache.accumulo.test.functional.ConfigurableMacIT; +import org.apache.accumulo.trace.instrument.Tracer; +import org.apache.hadoop.conf.Configuration; +import org.junit.Test; + +import com.google.common.collect.Iterators; + +public class DetectDeadTabletServers extends ConfigurableMacIT { + + @Override + protected void configure(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSite) { + cfg.setProperty(Property.INSTANCE_ZK_TIMEOUT, "3s"); + } + + @Test + public void test() throws Exception { + Connector c = getConnector(); + log.info("verifying that everything is up"); + Iterators.size(c.createScanner(MetadataTable.NAME, Authorizations.EMPTY).iterator()); + + MasterMonitorInfo stats = getStats(c); + assertEquals(2, stats.tServerInfo.size()); + assertEquals(0, stats.badTServers.size()); + assertEquals(0, stats.deadTabletServers.size()); + log.info("Killing a tablet server"); + getCluster().killProcess(TABLET_SERVER, getCluster().getProcesses().get(TABLET_SERVER).iterator().next()); + + while (true) { + stats = getStats(c); + if (2 != stats.tServerInfo.size()) { + break; + } + UtilWaitThread.sleep(500); + } + assertEquals(1, stats.tServerInfo.size()); + assertEquals(1, stats.badTServers.size() + stats.deadTabletServers.size()); + while (true) { + stats = getStats(c); + if (0 != stats.deadTabletServers.size()) { + break; + } + UtilWaitThread.sleep(500); + } + assertEquals(1, stats.tServerInfo.size()); + assertEquals(0, stats.badTServers.size()); + assertEquals(1, stats.deadTabletServers.size()); + } + + private MasterMonitorInfo getStats(Connector c) throws Exception { + Credentials creds = new Credentials("root", new PasswordToken(ROOT_PASSWORD)); + MasterClientService.Iface client = null; + try { + client = MasterClient.getConnectionWithRetry(c.getInstance()); + log.info("Fetching master stats"); + return client.getMasterStats(Tracer.traceInfo(), creds.toThrift(c.getInstance())); + } finally { + if (client != null) { + MasterClient.close(client); + } + } + } + + +}