[ https://issues.apache.org/jira/browse/HBASE-21919?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16771199#comment-16771199 ]
Xiang Li edited comment on HBASE-21919 at 2/18/19 4:29 PM: ----------------------------------------------------------- The root cause seems as follow: {code:title=HMaster#createTable()|borderStyle=solid} return MasterProcedureUtil .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) { @Override protected void run() throws IOException { getMaster().getMasterCoprocessorHost().preCreateTable(desc, newRegions); LOG.info(getClientIdAuditPrefix() + " create " + desc); // TODO: We can handle/merge duplicate requests, and differentiate the case of // TableExistsException by saying if the schema is the same or not. // // We need to wait for the procedure to potentially fail due to "prepare" sanity // checks. This will block only the beginning of the procedure. See HBASE-19953. ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch(); submitProcedure( new CreateTableProcedure(procedureExecutor.getEnvironment(), desc, newRegions, latch)); latch.await(); getMaster().getMasterCoprocessorHost().postCreateTable(desc, newRegions); <-- postCreateTable is executed after latch is released } {code} {code:title=CreateTableProcedure#executeFromState()|borderStyle=solid} case CREATE_TABLE_PRE_OPERATION: // Verify if we can create the table boolean exists = !prepareCreate(env); releaseSyncLatch(); if (exists) { assert isFailed() : "the delete should have an exception here"; return Flow.NO_MORE_STATE; } preCreate(env); setNextState(CreateTableState.CREATE_TABLE_WRITE_FS_LAYOUT); break; {code} releaseSyncLatch() is called in CREATE_TABLE_PRE_OPERATION, which is a little earlier. Supposed to be called in CREATE_TABLE_ADD_TO_META? Making any sense to you? [~xucang] Testing my patch using UT was (Author: water): The root cause seems here {code:title=HMaster#createTable()|borderStyle=solid} return MasterProcedureUtil .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) { @Override protected void run() throws IOException { getMaster().getMasterCoprocessorHost().preCreateTable(desc, newRegions); LOG.info(getClientIdAuditPrefix() + " create " + desc); // TODO: We can handle/merge duplicate requests, and differentiate the case of // TableExistsException by saying if the schema is the same or not. // // We need to wait for the procedure to potentially fail due to "prepare" sanity // checks. This will block only the beginning of the procedure. See HBASE-19953. ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch(); submitProcedure( new CreateTableProcedure(procedureExecutor.getEnvironment(), desc, newRegions, latch)); latch.await(); getMaster().getMasterCoprocessorHost().postCreateTable(desc, newRegions); <-- postCreateTable is executed after latch is released } {code} {code:title=CreateTableProcedure#executeFromState()|borderStyle=solid} case CREATE_TABLE_PRE_OPERATION: // Verify if we can create the table boolean exists = !prepareCreate(env); releaseSyncLatch(); if (exists) { assert isFailed() : "the delete should have an exception here"; return Flow.NO_MORE_STATE; } preCreate(env); setNextState(CreateTableState.CREATE_TABLE_WRITE_FS_LAYOUT); break; {code} releaseSyncLatch() is called in CREATE_TABLE_PRE_OPERATION, which is a little earlier. Supposed to be called in CREATE_TABLE_ADD_TO_META? Test my patch using UT > RSGroupAdminEndpoint#postCreateTable tries to get table state before it is in > meta > ---------------------------------------------------------------------------------- > > Key: HBASE-21919 > URL: https://issues.apache.org/jira/browse/HBASE-21919 > Project: HBase > Issue Type: Bug > Components: proc-v2, rsgroup > Reporter: Xiang Li > Assignee: Xiang Li > Priority: Major > > Using the latest master branch. When creating a table, the following error > could be seen in the master's log and it only happens when rsgroup is enabled. > {code} > 2019-02-17 04:08:41,853 INFO > [RpcServer.default.FPBQ.Fifo.handler=29,queue=2,port=16000] master.HMaster: > Client=lixiang//10.23.10.9 create 't1', {NAME => 'cf1', VERSIONS => '1', > EVICT_BLOCKS_ON_CLOSE => 'false', NEW_VERSION_BEHAVIOR => 'false', > KEEP_DELETED_CELLS => 'FALSE', CACHE_DATA_ON_WRITE => 'false', > DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', MIN_VERSIONS => '0', > REPLICATION_SCOPE => '0', BLOOMFILTER => 'ROW', CACHE_INDEX_ON_WRITE => > 'false', IN_MEMORY => 'false', CACHE_BLOOMS_ON_WRITE => 'false', > PREFETCH_BLOCKS_ON_OPEN => 'false', COMPRESSION => 'NONE', BLOCKCACHE => > 'true', BLOCKSIZE => '65536'} > 2019-02-17 04:08:41,958 INFO > [RpcServer.default.FPBQ.Fifo.handler=29,queue=2,port=16000] > rsgroup.RSGroupAdminServer: Moving table t1 to RSGroup default > 2019-02-17 04:08:41,962 INFO [RegionOpenAndInitThread-t1-1] > regionserver.HRegion: creating HRegion t1 HTD == 't1', {NAME => 'cf1', > VERSIONS => '1', EVICT_BLOCKS_ON_CLOSE => 'false', NEW_VERSION_BEHAVIOR => > 'false', KEEP_DELETED_CELLS => 'FALSE', CACHE_DATA_ON_WRITE => 'false', > DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', MIN_VERSIONS => '0', > REPLICATION_SCOPE => '0', BLOOMFILTER => 'ROW', CACHE_INDEX_ON_WRITE => > 'false', IN_MEMORY => 'false', CACHE_BLOOMS_ON_WRITE => 'false', > PREFETCH_BLOCKS_ON_OPEN => 'false', COMPRESSION => 'NONE', BLOCKCACHE => > 'true', BLOCKSIZE => '65536'} RootDir = > file:/home/lixiang/standalonehbase/hbase/.tmp Table name == t1 > 2019-02-17 04:08:41,964 INFO [RegionOpenAndInitThread-t1-1] > regionserver.HRegion: Closed > t1,,1550376521847.1954e4b74647fb1f85342bdff188bdf4. > 2019-02-17 04:08:41,967 ERROR > [RpcServer.default.FPBQ.Fifo.handler=29,queue=2,port=16000] > master.TableStateManager: Unable to get table t1 state > org.apache.hadoop.hbase.master.TableStateManager$TableStateNotFoundException: > t1 > at > org.apache.hadoop.hbase.master.TableStateManager.getTableState(TableStateManager.java:215) > at > org.apache.hadoop.hbase.master.TableStateManager.isTableState(TableStateManager.java:147) > at > org.apache.hadoop.hbase.master.assignment.AssignmentManager.isTableDisabled(AssignmentManager.java:354) > at > org.apache.hadoop.hbase.rsgroup.RSGroupAdminServer.moveTables(RSGroupAdminServer.java:411) > at > org.apache.hadoop.hbase.rsgroup.RSGroupAdminEndpoint.assignTableToGroup(RSGroupAdminEndpoint.java:471) > at > org.apache.hadoop.hbase.rsgroup.RSGroupAdminEndpoint.postCreateTable(RSGroupAdminEndpoint.java:494) > at > org.apache.hadoop.hbase.master.MasterCoprocessorHost$13.call(MasterCoprocessorHost.java:350) > at > org.apache.hadoop.hbase.master.MasterCoprocessorHost$13.call(MasterCoprocessorHost.java:347) > at > org.apache.hadoop.hbase.coprocessor.CoprocessorHost$ObserverOperationWithoutResult.callObserver(CoprocessorHost.java:551) > at > org.apache.hadoop.hbase.coprocessor.CoprocessorHost.execOperation(CoprocessorHost.java:625) > at > org.apache.hadoop.hbase.master.MasterCoprocessorHost.postCreateTable(MasterCoprocessorHost.java:347) > at org.apache.hadoop.hbase.master.HMaster$4.run(HMaster.java:2083) > at > org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil.submitProcedure(MasterProcedureUtil.java:134) > at > org.apache.hadoop.hbase.master.HMaster.createTable(HMaster.java:2066) > at > org.apache.hadoop.hbase.master.MasterRpcServices.createTable(MasterRpcServices.java:644) > at > org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos$MasterService$2.callBlockingMethod(MasterProtos.java) > at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:413) > at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:130) > at > org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:324) > at > org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:304) > 2019-02-17 04:08:41,969 INFO [PEWorker-14] hbase.MetaTableAccessor: Added 1 > regions to meta. > 2019-02-17 04:08:41,972 INFO [PEWorker-14] hbase.MetaTableAccessor: Updated > tableName=t1, state=ENABLING in hbase:meta > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)