Repository: accumulo
Updated Branches:
  refs/heads/1.7 61bb1b47d -> 7985b917d


ACCUMULO-3747 do not clear a server from the dead server list just because we 
tried to stop it


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/c3bb4c6d
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/c3bb4c6d
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/c3bb4c6d

Branch: refs/heads/1.7
Commit: c3bb4c6da82bbc5ab939a684b110d61aadb8ffd2
Parents: 28be43e
Author: Eric C. Newton <eric.new...@gmail.com>
Authored: Tue Jun 23 15:32:17 2015 -0400
Committer: Eric C. Newton <eric.new...@gmail.com>
Committed: Tue Jun 23 15:32:17 2015 -0400

----------------------------------------------------------------------
 .../java/org/apache/accumulo/master/Master.java |  4 +-
 .../accumulo/test/DetectDeadTabletServers.java  | 96 ++++++++++++++++++++
 2 files changed, 98 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/c3bb4c6d/server/master/src/main/java/org/apache/accumulo/master/Master.java
----------------------------------------------------------------------
diff --git a/server/master/src/main/java/org/apache/accumulo/master/Master.java 
b/server/master/src/main/java/org/apache/accumulo/master/Master.java
index b3d2ac5..545d93a 100644
--- a/server/master/src/main/java/org/apache/accumulo/master/Master.java
+++ b/server/master/src/main/java/org/apache/accumulo/master/Master.java
@@ -949,15 +949,15 @@ public class Master implements LiveTServerSet.Listener, 
TableObserver, CurrentSt
           log.warn("attempting to stop " + server);
           try {
             TServerConnection connection = tserverSet.getConnection(server);
-            if (connection != null)
+            if (connection != null) {
               connection.halt(masterLock);
+            }
           } catch (TTransportException e) {
             // ignore: it's probably down
           } catch (Exception e) {
             log.info("error talking to troublesome tablet server ", e);
           }
           badServers.remove(server);
-          tserverSet.remove(server);
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/c3bb4c6d/test/src/main/java/org/apache/accumulo/test/DetectDeadTabletServers.java
----------------------------------------------------------------------
diff --git 
a/test/src/main/java/org/apache/accumulo/test/DetectDeadTabletServers.java 
b/test/src/main/java/org/apache/accumulo/test/DetectDeadTabletServers.java
new file mode 100644
index 0000000..15afd25
--- /dev/null
+++ b/test/src/main/java/org/apache/accumulo/test/DetectDeadTabletServers.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.test;
+
+import static org.apache.accumulo.minicluster.ServerType.TABLET_SERVER;
+import static org.junit.Assert.assertEquals;
+
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.impl.MasterClient;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.master.thrift.MasterClientService;
+import org.apache.accumulo.core.master.thrift.MasterMonitorInfo;
+import org.apache.accumulo.core.metadata.MetadataTable;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.security.Credentials;
+import org.apache.accumulo.fate.util.UtilWaitThread;
+import org.apache.accumulo.minicluster.impl.MiniAccumuloConfigImpl;
+import org.apache.accumulo.test.functional.ConfigurableMacIT;
+import org.apache.accumulo.trace.instrument.Tracer;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+import com.google.common.collect.Iterators;
+
+public class DetectDeadTabletServers extends ConfigurableMacIT {
+
+  @Override
+  protected void configure(MiniAccumuloConfigImpl cfg, Configuration 
hadoopCoreSite) {
+    cfg.setProperty(Property.INSTANCE_ZK_TIMEOUT, "3s");
+  }
+
+  @Test
+  public void test() throws Exception {
+    Connector c = getConnector();
+    log.info("verifying that everything is up");
+    Iterators.size(c.createScanner(MetadataTable.NAME, 
Authorizations.EMPTY).iterator());
+
+    MasterMonitorInfo stats = getStats(c);
+    assertEquals(2, stats.tServerInfo.size());
+    assertEquals(0, stats.badTServers.size());
+    assertEquals(0, stats.deadTabletServers.size());
+    log.info("Killing a tablet server");
+    getCluster().killProcess(TABLET_SERVER, 
getCluster().getProcesses().get(TABLET_SERVER).iterator().next());
+
+    while (true) {
+      stats = getStats(c);
+      if (2 != stats.tServerInfo.size()) {
+        break;
+      }
+      UtilWaitThread.sleep(500);
+    }
+    assertEquals(1, stats.tServerInfo.size());
+    assertEquals(1, stats.badTServers.size() + stats.deadTabletServers.size());
+    while (true) {
+      stats = getStats(c);
+      if (0 != stats.deadTabletServers.size()) {
+        break;
+      }
+      UtilWaitThread.sleep(500);
+    }
+    assertEquals(1, stats.tServerInfo.size());
+    assertEquals(0, stats.badTServers.size());
+    assertEquals(1, stats.deadTabletServers.size());
+  }
+
+  private MasterMonitorInfo getStats(Connector c) throws Exception {
+    Credentials creds = new Credentials("root", new 
PasswordToken(ROOT_PASSWORD));
+    MasterClientService.Iface client = null;
+    try {
+      client = MasterClient.getConnectionWithRetry(c.getInstance());
+      log.info("Fetching master stats");
+      return client.getMasterStats(Tracer.traceInfo(), 
creds.toThrift(c.getInstance()));
+    } finally {
+      if (client != null) {
+        MasterClient.close(client);
+      }
+    }
+  }
+
+
+}

Reply via email to