[ https://issues.apache.org/jira/browse/HBASE-23872?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Michael Stack resolved HBASE-23872. ----------------------------------- Resolution: Fixed Pushed to branch-2 and master. This test has passed the last few runs done locally. Attached what I pushed. Small potatoes change: {code} kalashnikov:hbase.apache.git stack$ more 0001-HBASE-23872-Flakey-Test-TestGenerateDelegationToken-.patch >From 435b79335e81c0c6cc79dc8b1a1c6f495ed76d74 Mon Sep 17 00:00:00 2001 From: stack <st...@apache.org> Date: Wed, 19 Feb 2020 13:41:18 -0800 Subject: [PATCH] HBASE-23872 [Flakey Test] TestGenerateDelegationToken; Master not initialized after 200000ms --- .../java/org/apache/hadoop/hbase/master/ServerManager.java | 4 +++- .../apache/hadoop/hbase/security/token/SecureTestCluster.java | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index a61f96ad9c..681d507ef0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -132,6 +132,8 @@ public class ServerManager { public static final int FLUSHEDSEQUENCEID_FLUSHER_INTERVAL_DEFAULT = 3 * 60 * 60 * 1000; // 3 hours + public static final String MAX_CLOCK_SKEW_MS = "hbase.master.maxclockskew"; + private static final Logger LOG = LoggerFactory.getLogger(ServerManager.class); // Set if we are to shutdown the cluster. @@ -179,7 +181,7 @@ public class ServerManager { public ServerManager(final MasterServices master) { this.master = master; Configuration c = master.getConfiguration(); - maxSkew = c.getLong("hbase.master.maxclockskew", 30000); + maxSkew = c.getLong(MAX_CLOCK_SKEW_MS, 30000); warningSkew = c.getLong("hbase.master.warningclockskew", 10000); persistFlushedSequenceId = c.getBoolean(PERSIST_FLUSHEDSEQUENCEID, PERSIST_FLUSHEDSEQUENCEID_DEFAULT); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/security/token/SecureTestCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/token/SecureTestCluster.java index 964ef140f2..3900431936 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/security/token/SecureTestCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/token/SecureTestCluster.java @@ -22,6 +22,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.LocalHBaseCluster; import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; +import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.security.HBaseKerberosUtils; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.minikdc.MiniKdc; @@ -56,6 +57,9 @@ public class SecureTestCluster { */ @BeforeClass public static void setUp() throws Exception { + // Can take a long time for the mini kdc to come up on loaded test cluster. Tolerate this in + // test by upping the skew time allowed from 30s to 90s. + TEST_UTIL.getConfiguration().setLong(ServerManager.MAX_CLOCK_SKEW_MS, 90000); KDC = TEST_UTIL.setupMiniKdc(KEYTAB_FILE); USERNAME = UserGroupInformation.getLoginUser().getShortUserName(); PRINCIPAL = USERNAME + "/" + HOST; {code} > [Flakey Test] TestGenerateDelegationToken; Master not initialized after > 200000ms > -------------------------------------------------------------------------------- > > Key: HBASE-23872 > URL: https://issues.apache.org/jira/browse/HBASE-23872 > Project: HBase > Issue Type: Task > Components: flakies > Reporter: Michael Stack > Assignee: Michael Stack > Priority: Major > Fix For: 3.0.0, 2.3.0 > > Attachments: > 0001-HBASE-23872-Flakey-Test-TestGenerateDelegationToken-.patch > > > This one was complaint about Master not initializing. Happened twice in a row > on local run. Looking the in the .txt and -output.txt was no help but when I > looked in the xml, I found this complaint: > {code} > org.apache.hadoop.hbase.ClockOutOfSyncException: Server > localhost,53153,1582096705773 has been rejected; Reported time is too far out > of sync with master. Time difference of 44964ms > max allowed of 30000ms > {code} > Indeed, RS is initialized then does its kerberos stuff and there is a big > pause of almost 45 seconds. > For now, let me up tolerance for all tests derived from SecureTestCluster -- This message was sent by Atlassian Jira (v8.3.4#803005)