[ https://issues.apache.org/jira/browse/HBASE-19868?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Duo Zhang resolved HBASE-19868. ------------------------------- Resolution: Fixed Hadoop Flags: Reviewed Resolve. > TestCoprocessorWhitelistMasterObserver is flakey > ------------------------------------------------ > > Key: HBASE-19868 > URL: https://issues.apache.org/jira/browse/HBASE-19868 > Project: HBase > Issue Type: Sub-task > Components: flakey, test > Affects Versions: 2.0.0-beta-1 > Reporter: Peter Somogyi > Assignee: Peter Somogyi > Priority: Major > Fix For: 2.0.0-beta-2 > > Attachments: HBASE-19868.branch-2.001.patch, > HBASE-19868.master.002.patch > > > TestCoprocessorWhitelistMasterObserver is failing 33% of the time. In the > logs it looks like the failure is related to Master initialization. > Following log is from > [https://builds.apache.org/job/HBase%20Nightly/job/branch-2/203] > {noformat} > 2018-01-26 02:36:36,686 WARN [M:0;1f0c4777c1ba:35049] > master.TableNamespaceManager(307): Caught exception in initializing namespace > table manager > org.apache.hadoop.hbase.DoNotRetryIOException: hconnection-0x18cd2ac8 closed > at > org.apache.hadoop.hbase.client.ConnectionImplementation.locateRegion(ConnectionImplementation.java:722) > at > org.apache.hadoop.hbase.client.ConnectionUtils$ShortCircuitingClusterConnection.locateRegion(ConnectionUtils.java:131) > at > org.apache.hadoop.hbase.client.ConnectionImplementation.locateRegion(ConnectionImplementation.java:714) > at > org.apache.hadoop.hbase.client.ConnectionUtils$ShortCircuitingClusterConnection.locateRegion(ConnectionUtils.java:131) > at > org.apache.hadoop.hbase.client.ConnectionImplementation.locateRegion(ConnectionImplementation.java:684) > at > org.apache.hadoop.hbase.client.ConnectionUtils$ShortCircuitingClusterConnection.locateRegion(ConnectionUtils.java:131) > at > org.apache.hadoop.hbase.client.ConnectionImplementation.getRegionLocation(ConnectionImplementation.java:562) > at > org.apache.hadoop.hbase.client.ConnectionUtils$ShortCircuitingClusterConnection.getRegionLocation(ConnectionUtils.java:131) > at > org.apache.hadoop.hbase.client.HRegionLocator.getRegionLocation(HRegionLocator.java:73) > at > org.apache.hadoop.hbase.client.RegionServerCallable.prepare(RegionServerCallable.java:223) > at > org.apache.hadoop.hbase.client.RpcRetryingCallerImpl.callWithRetries(RpcRetryingCallerImpl.java:105) > at org.apache.hadoop.hbase.client.HTable.get(HTable.java:388) > at org.apache.hadoop.hbase.client.HTable.get(HTable.java:362) > at > org.apache.hadoop.hbase.master.TableNamespaceManager.get(TableNamespaceManager.java:141) > at > org.apache.hadoop.hbase.master.TableNamespaceManager.isTableAvailableAndInitialized(TableNamespaceManager.java:281) > at > org.apache.hadoop.hbase.master.TableNamespaceManager.start(TableNamespaceManager.java:103) > at > org.apache.hadoop.hbase.master.ClusterSchemaServiceImpl.doStart(ClusterSchemaServiceImpl.java:62) > at > org.apache.hbase.thirdparty.com.google.common.util.concurrent.AbstractService.startAsync(AbstractService.java:226) > at > org.apache.hadoop.hbase.master.HMaster.initClusterSchemaService(HMaster.java:1059) > at > org.apache.hadoop.hbase.master.HMaster.finishActiveMasterInitialization(HMaster.java:921) > at > org.apache.hadoop.hbase.master.HMaster.startActiveMasterManager(HMaster.java:2034) > at org.apache.hadoop.hbase.master.HMaster.run(HMaster.java:553) > at java.lang.Thread.run(Thread.java:748) > 2018-01-26 02:36:36,691 ERROR [M:0;1f0c4777c1ba:35049] > helpers.MarkerIgnoringBase(159): Failed to become active master > java.lang.IllegalStateException: Expected the service > ClusterSchemaServiceImpl [FAILED] to be RUNNING, but the service has FAILED > at > org.apache.hbase.thirdparty.com.google.common.util.concurrent.AbstractService.checkCurrentState(AbstractService.java:345) > at > org.apache.hbase.thirdparty.com.google.common.util.concurrent.AbstractService.awaitRunning(AbstractService.java:291) > at > org.apache.hadoop.hbase.master.HMaster.initClusterSchemaService(HMaster.java:1061) > at > org.apache.hadoop.hbase.master.HMaster.finishActiveMasterInitialization(HMaster.java:921) > at > org.apache.hadoop.hbase.master.HMaster.startActiveMasterManager(HMaster.java:2034) > at org.apache.hadoop.hbase.master.HMaster.run(HMaster.java:553) > at java.lang.Thread.run(Thread.java:748) > Caused by: org.apache.hadoop.hbase.DoNotRetryIOException: > hconnection-0x18cd2ac8 closed > at > org.apache.hadoop.hbase.client.ConnectionImplementation.locateRegion(ConnectionImplementation.java:722) > at > org.apache.hadoop.hbase.client.ConnectionUtils$ShortCircuitingClusterConnection.locateRegion(ConnectionUtils.java:131) > at > org.apache.hadoop.hbase.client.ConnectionImplementation.locateRegion(ConnectionImplementation.java:714) > at > org.apache.hadoop.hbase.client.ConnectionUtils$ShortCircuitingClusterConnection.locateRegion(ConnectionUtils.java:131) > at > org.apache.hadoop.hbase.client.ConnectionImplementation.locateRegion(ConnectionImplementation.java:684) > at > org.apache.hadoop.hbase.client.ConnectionUtils$ShortCircuitingClusterConnection.locateRegion(ConnectionUtils.java:131) > at > org.apache.hadoop.hbase.client.ConnectionImplementation.getRegionLocation(ConnectionImplementation.java:562) > at > org.apache.hadoop.hbase.client.ConnectionUtils$ShortCircuitingClusterConnection.getRegionLocation(ConnectionUtils.java:131) > at > org.apache.hadoop.hbase.client.HRegionLocator.getRegionLocation(HRegionLocator.java:73) > at > org.apache.hadoop.hbase.client.RegionServerCallable.prepare(RegionServerCallable.java:223) > at > org.apache.hadoop.hbase.client.RpcRetryingCallerImpl.callWithRetries(RpcRetryingCallerImpl.java:105) > at org.apache.hadoop.hbase.client.HTable.get(HTable.java:388) > at org.apache.hadoop.hbase.client.HTable.get(HTable.java:362) > at > org.apache.hadoop.hbase.master.TableNamespaceManager.get(TableNamespaceManager.java:141) > at > org.apache.hadoop.hbase.master.TableNamespaceManager.isTableAvailableAndInitialized(TableNamespaceManager.java:281) > at > org.apache.hadoop.hbase.master.TableNamespaceManager.start(TableNamespaceManager.java:103) > at > org.apache.hadoop.hbase.master.ClusterSchemaServiceImpl.doStart(ClusterSchemaServiceImpl.java:62) > at > org.apache.hbase.thirdparty.com.google.common.util.concurrent.AbstractService.startAsync(AbstractService.java:226) > at > org.apache.hadoop.hbase.master.HMaster.initClusterSchemaService(HMaster.java:1059) > ... 4 more > 2018-01-26 02:36:36,691 ERROR [M:0;1f0c4777c1ba:35049] > helpers.MarkerIgnoringBase(143): Master server abort: loaded coprocessors > are: > [org.apache.hadoop.hbase.security.access.TestCoprocessorWhitelistMasterObserver$TestRegionObserver, > org.apache.hadoop.hbase.security.access.CoprocessorWhitelistMasterObserver, > org.apache.hadoop.hbase.coprocessor.MultiRowMutationEndpoint] > 2018-01-26 02:36:36,691 ERROR [M:0;1f0c4777c1ba:35049] > helpers.MarkerIgnoringBase(159): ***** ABORTING master > 1f0c4777c1ba,35049,1516934184742: Unhandled exception. Starting shutdown. > ***** > java.lang.IllegalStateException: Expected the service > ClusterSchemaServiceImpl [FAILED] to be RUNNING, but the service has FAILED > at > org.apache.hbase.thirdparty.com.google.common.util.concurrent.AbstractService.checkCurrentState(AbstractService.java:345) > at > org.apache.hbase.thirdparty.com.google.common.util.concurrent.AbstractService.awaitRunning(AbstractService.java:291) > at > org.apache.hadoop.hbase.master.HMaster.initClusterSchemaService(HMaster.java:1061) > at > org.apache.hadoop.hbase.master.HMaster.finishActiveMasterInitialization(HMaster.java:921) > at > org.apache.hadoop.hbase.master.HMaster.startActiveMasterManager(HMaster.java:2034) > at org.apache.hadoop.hbase.master.HMaster.run(HMaster.java:553) > at java.lang.Thread.run(Thread.java:748) > Caused by: org.apache.hadoop.hbase.DoNotRetryIOException: > hconnection-0x18cd2ac8 closed > at > org.apache.hadoop.hbase.client.ConnectionImplementation.locateRegion(ConnectionImplementation.java:722) > at > org.apache.hadoop.hbase.client.ConnectionUtils$ShortCircuitingClusterConnection.locateRegion(ConnectionUtils.java:131) > at > org.apache.hadoop.hbase.client.ConnectionImplementation.locateRegion(ConnectionImplementation.java:714) > at > org.apache.hadoop.hbase.client.ConnectionUtils$ShortCircuitingClusterConnection.locateRegion(ConnectionUtils.java:131) > at > org.apache.hadoop.hbase.client.ConnectionImplementation.locateRegion(ConnectionImplementation.java:684) > at > org.apache.hadoop.hbase.client.ConnectionUtils$ShortCircuitingClusterConnection.locateRegion(ConnectionUtils.java:131) > at > org.apache.hadoop.hbase.client.ConnectionImplementation.getRegionLocation(ConnectionImplementation.java:562) > at > org.apache.hadoop.hbase.client.ConnectionUtils$ShortCircuitingClusterConnection.getRegionLocation(ConnectionUtils.java:131) > at > org.apache.hadoop.hbase.client.HRegionLocator.getRegionLocation(HRegionLocator.java:73) > at > org.apache.hadoop.hbase.client.RegionServerCallable.prepare(RegionServerCallable.java:223) > at > org.apache.hadoop.hbase.client.RpcRetryingCallerImpl.callWithRetries(RpcRetryingCallerImpl.java:105) > at org.apache.hadoop.hbase.client.HTable.get(HTable.java:388) > at org.apache.hadoop.hbase.client.HTable.get(HTable.java:362) > at > org.apache.hadoop.hbase.master.TableNamespaceManager.get(TableNamespaceManager.java:141) > at > org.apache.hadoop.hbase.master.TableNamespaceManager.isTableAvailableAndInitialized(TableNamespaceManager.java:281) > at > org.apache.hadoop.hbase.master.TableNamespaceManager.start(TableNamespaceManager.java:103) > at > org.apache.hadoop.hbase.master.ClusterSchemaServiceImpl.doStart(ClusterSchemaServiceImpl.java:62) > at > org.apache.hbase.thirdparty.com.google.common.util.concurrent.AbstractService.startAsync(AbstractService.java:226) > at > org.apache.hadoop.hbase.master.HMaster.initClusterSchemaService(HMaster.java:1059) > ... 4 more > 2018-01-26 02:36:36,692 DEBUG [M:0;1f0c4777c1ba:35049] > coprocessor.CoprocessorHost(289): Stop coprocessor > org.apache.hadoop.hbase.security.access.CoprocessorWhitelistMasterObserver > 2018-01-26 02:36:36,692 WARN [M:0;1f0c4777c1ba:35049] > coprocessor.BaseEnvironment(99): Not stopping coprocessor > org.apache.hadoop.hbase.security.access.CoprocessorWhitelistMasterObserver > because not active (state=STOPPED) > 2018-01-26 02:36:36,692 INFO [M:0;1f0c4777c1ba:35049] > regionserver.HRegionServer(2142): ***** STOPPING region server > '1f0c4777c1ba,35049,1516934184742' *****{noformat} -- This message was sent by Atlassian JIRA (v7.6.3#76005)