[ https://issues.apache.org/jira/browse/HBASE-22486?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Guanghao Zhang updated HBASE-22486: ----------------------------------- Description: When the LockProcedure was scheduled. It will called TimeoutExecutorThread#add method. {code:java} public void add(Procedure<TEnvironment> procedure) { assert procedure.getState() == ProcedureState.WAITING_TIMEOUT; LOG.info("ADDED {}; timeout={}, timestamp={}", procedure, procedure.getTimeout(), procedure.getTimeoutTimestamp()); queue.add(new DelayedProcedure<>(procedure)); } {code} But LockProcedure's unlock method may called before it was scheduled. And it will change the state to RUNNABLE. This will make the assert error. {code:java} // Can be called before procedure gets scheduled, in which case, the execute() will finish // immediately and release the underlying locks. public void unlock(final MasterProcedureEnv env) { unlock.set(true); locked.set(false); // Maybe timeout already awakened the event and the procedure has finished. synchronized (event) { if (!event.isReady()) { LOG.info("Update procedure {} state to RUNNABLE", this); setState(ProcedureProtos.ProcedureState.RUNNABLE); event.wake(env.getProcedureScheduler()); } } } {code} I added log in these methods. {color:#FF0000}2019-05-28 15:34:16,179 INFO [Time-limited test] locking.LockProcedure(223): Update procedure pid=21, state=WAITING_TIMEOUT, locked=true; org.apache.hadoop.hbase.master.locking.LockProcedure, tableName=namespace:table, type=EXCLUSIVE state to RUNNABLE{color} 2019-05-28 15:34:16,179 INFO [PEWorker-1] procedure2.TimeoutExecutorThread(81): Procedure pid=21, state=WAITING_TIMEOUT, locked=true; org.apache.hadoop.hbase.master.locking.LockProcedure, tableName=namespace:table, type=EXCLUSIVE state is WAITING_TIMEOUT 2019-05-28 15:34:16,179 DEBUG [Time-limited test] procedure.MasterProcedureScheduler(352): Add TableQueue(namespace:table, xlock=true (21) sharedLock=0 size=1) to run queue because: pid=21, state=RUNNABLE, locked=true; org.apache.hadoop.hbase.master.locking.LockProcedure, tableName=namespace:table, type=EXCLUSIVE has lock 2019-05-28 15:34:16,179 INFO [PEWorker-1] procedure2.ProcedureExecutor$WorkerThread(1967): ASSERT pid=21 java.lang.AssertionError at org.apache.hadoop.hbase.procedure2.TimeoutExecutorThread.add(TimeoutExecutorThread.java:82) at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execProcedure(ProcedureExecutor.java:1690) at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.executeProcedure(ProcedureExecutor.java:1395) at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.access$1100(ProcedureExecutor.java:78) at org.apache.hadoop.hbase.procedure2.ProcedureExecutor$WorkerThread.run(ProcedureExecutor.java:1965) was: When the LockProcedure was scheduled. It will called TimeoutExecutorThread#add method. {code:java} public void add(Procedure<TEnvironment> procedure) { assert procedure.getState() == ProcedureState.WAITING_TIMEOUT; LOG.info("ADDED {}; timeout={}, timestamp={}", procedure, procedure.getTimeout(), procedure.getTimeoutTimestamp()); queue.add(new DelayedProcedure<>(procedure)); } {code} But LockProcedure's unlock method may called before it was scheduled. And it will change the state to RUNNABLE. This will make the assert error. {code:java} // Can be called before procedure gets scheduled, in which case, the execute() will finish // immediately and release the underlying locks. public void unlock(final MasterProcedureEnv env) { unlock.set(true); locked.set(false); // Maybe timeout already awakened the event and the procedure has finished. synchronized (event) { if (!event.isReady()) { LOG.info("Update procedure {} state to RUNNABLE", this); setState(ProcedureProtos.ProcedureState.RUNNABLE); event.wake(env.getProcedureScheduler()); } } } {code} > Fix flaky test TestLockManager > ------------------------------ > > Key: HBASE-22486 > URL: https://issues.apache.org/jira/browse/HBASE-22486 > Project: HBase > Issue Type: Bug > Reporter: Guanghao Zhang > Assignee: Guanghao Zhang > Priority: Major > > When the LockProcedure was scheduled. It will called > TimeoutExecutorThread#add method. > {code:java} > public void add(Procedure<TEnvironment> procedure) { > assert procedure.getState() == ProcedureState.WAITING_TIMEOUT; > LOG.info("ADDED {}; timeout={}, timestamp={}", procedure, > procedure.getTimeout(), > procedure.getTimeoutTimestamp()); > queue.add(new DelayedProcedure<>(procedure)); > } > {code} > But LockProcedure's unlock method may called before it was scheduled. And it > will change the state to RUNNABLE. This will make the assert error. > {code:java} > // Can be called before procedure gets scheduled, in which case, the > execute() will finish > // immediately and release the underlying locks. > public void unlock(final MasterProcedureEnv env) { > unlock.set(true); > locked.set(false); > // Maybe timeout already awakened the event and the procedure has finished. > synchronized (event) { > if (!event.isReady()) { > LOG.info("Update procedure {} state to RUNNABLE", this); > setState(ProcedureProtos.ProcedureState.RUNNABLE); > event.wake(env.getProcedureScheduler()); > } > } > } > {code} > > I added log in these methods. > {color:#FF0000}2019-05-28 15:34:16,179 INFO [Time-limited test] > locking.LockProcedure(223): Update procedure pid=21, state=WAITING_TIMEOUT, > locked=true; org.apache.hadoop.hbase.master.locking.LockProcedure, > tableName=namespace:table, type=EXCLUSIVE state to RUNNABLE{color} > 2019-05-28 15:34:16,179 INFO [PEWorker-1] > procedure2.TimeoutExecutorThread(81): Procedure pid=21, > state=WAITING_TIMEOUT, locked=true; > org.apache.hadoop.hbase.master.locking.LockProcedure, > tableName=namespace:table, type=EXCLUSIVE state is WAITING_TIMEOUT > 2019-05-28 15:34:16,179 DEBUG [Time-limited test] > procedure.MasterProcedureScheduler(352): Add TableQueue(namespace:table, > xlock=true (21) sharedLock=0 size=1) to run queue because: pid=21, > state=RUNNABLE, locked=true; > org.apache.hadoop.hbase.master.locking.LockProcedure, > tableName=namespace:table, type=EXCLUSIVE has lock > 2019-05-28 15:34:16,179 INFO [PEWorker-1] > procedure2.ProcedureExecutor$WorkerThread(1967): ASSERT pid=21 > java.lang.AssertionError > at > org.apache.hadoop.hbase.procedure2.TimeoutExecutorThread.add(TimeoutExecutorThread.java:82) > at > org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execProcedure(ProcedureExecutor.java:1690) > at > org.apache.hadoop.hbase.procedure2.ProcedureExecutor.executeProcedure(ProcedureExecutor.java:1395) > at > org.apache.hadoop.hbase.procedure2.ProcedureExecutor.access$1100(ProcedureExecutor.java:78) > > at > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$WorkerThread.run(ProcedureExecutor.java:1965) -- This message was sent by Atlassian JIRA (v7.6.3#76005)