http://git-wip-us.apache.org/repos/asf/hbase-site/blob/bd675fa3/devapidocs/src-html/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.KeepAliveWorkerThread.html ---------------------------------------------------------------------- diff --git a/devapidocs/src-html/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.KeepAliveWorkerThread.html b/devapidocs/src-html/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.KeepAliveWorkerThread.html index 3bc66bb..97aa79c 100644 --- a/devapidocs/src-html/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.KeepAliveWorkerThread.html +++ b/devapidocs/src-html/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.KeepAliveWorkerThread.html @@ -1435,459 +1435,460 @@ <span class="sourceLineNo">1427</span> */<a name="line.1427"></a> <span class="sourceLineNo">1428</span> private void execProcedure(final RootProcedureState procStack,<a name="line.1428"></a> <span class="sourceLineNo">1429</span> final Procedure<TEnvironment> procedure) {<a name="line.1429"></a> -<span class="sourceLineNo">1430</span> Preconditions.checkArgument(procedure.getState() == ProcedureState.RUNNABLE);<a name="line.1430"></a> -<span class="sourceLineNo">1431</span><a name="line.1431"></a> -<span class="sourceLineNo">1432</span> // Procedures can suspend themselves. They skip out by throwing a ProcedureSuspendedException.<a name="line.1432"></a> -<span class="sourceLineNo">1433</span> // The exception is caught below and then we hurry to the exit without disturbing state. The<a name="line.1433"></a> -<span class="sourceLineNo">1434</span> // idea is that the processing of this procedure will be unsuspended later by an external event<a name="line.1434"></a> -<span class="sourceLineNo">1435</span> // such the report of a region open. TODO: Currently, its possible for two worker threads<a name="line.1435"></a> -<span class="sourceLineNo">1436</span> // to be working on the same procedure concurrently (locking in procedures is NOT about<a name="line.1436"></a> -<span class="sourceLineNo">1437</span> // concurrency but about tying an entity to a procedure; i.e. a region to a particular<a name="line.1437"></a> -<span class="sourceLineNo">1438</span> // procedure instance). This can make for issues if both threads are changing state.<a name="line.1438"></a> -<span class="sourceLineNo">1439</span> // See env.getProcedureScheduler().wakeEvent(regionNode.getProcedureEvent());<a name="line.1439"></a> -<span class="sourceLineNo">1440</span> // in RegionTransitionProcedure#reportTransition for example of Procedure putting<a name="line.1440"></a> -<span class="sourceLineNo">1441</span> // itself back on the scheduler making it possible for two threads running against<a name="line.1441"></a> -<span class="sourceLineNo">1442</span> // the one Procedure. Might be ok if they are both doing different, idempotent sections.<a name="line.1442"></a> -<span class="sourceLineNo">1443</span> boolean suspended = false;<a name="line.1443"></a> -<span class="sourceLineNo">1444</span><a name="line.1444"></a> -<span class="sourceLineNo">1445</span> // Whether to 're-' -execute; run through the loop again.<a name="line.1445"></a> -<span class="sourceLineNo">1446</span> boolean reExecute = false;<a name="line.1446"></a> -<span class="sourceLineNo">1447</span><a name="line.1447"></a> -<span class="sourceLineNo">1448</span> Procedure<TEnvironment>[] subprocs = null;<a name="line.1448"></a> -<span class="sourceLineNo">1449</span> do {<a name="line.1449"></a> -<span class="sourceLineNo">1450</span> reExecute = false;<a name="line.1450"></a> -<span class="sourceLineNo">1451</span> try {<a name="line.1451"></a> -<span class="sourceLineNo">1452</span> subprocs = procedure.doExecute(getEnvironment());<a name="line.1452"></a> -<span class="sourceLineNo">1453</span> if (subprocs != null && subprocs.length == 0) {<a name="line.1453"></a> -<span class="sourceLineNo">1454</span> subprocs = null;<a name="line.1454"></a> -<span class="sourceLineNo">1455</span> }<a name="line.1455"></a> -<span class="sourceLineNo">1456</span> } catch (ProcedureSuspendedException e) {<a name="line.1456"></a> -<span class="sourceLineNo">1457</span> if (LOG.isTraceEnabled()) {<a name="line.1457"></a> -<span class="sourceLineNo">1458</span> LOG.trace("Suspend " + procedure);<a name="line.1458"></a> -<span class="sourceLineNo">1459</span> }<a name="line.1459"></a> -<span class="sourceLineNo">1460</span> suspended = true;<a name="line.1460"></a> -<span class="sourceLineNo">1461</span> } catch (ProcedureYieldException e) {<a name="line.1461"></a> -<span class="sourceLineNo">1462</span> if (LOG.isTraceEnabled()) {<a name="line.1462"></a> -<span class="sourceLineNo">1463</span> LOG.trace("Yield " + procedure + ": " + e.getMessage(), e);<a name="line.1463"></a> -<span class="sourceLineNo">1464</span> }<a name="line.1464"></a> -<span class="sourceLineNo">1465</span> scheduler.yield(procedure);<a name="line.1465"></a> -<span class="sourceLineNo">1466</span> return;<a name="line.1466"></a> -<span class="sourceLineNo">1467</span> } catch (InterruptedException e) {<a name="line.1467"></a> -<span class="sourceLineNo">1468</span> if (LOG.isTraceEnabled()) {<a name="line.1468"></a> -<span class="sourceLineNo">1469</span> LOG.trace("Yield interrupt " + procedure + ": " + e.getMessage(), e);<a name="line.1469"></a> -<span class="sourceLineNo">1470</span> }<a name="line.1470"></a> -<span class="sourceLineNo">1471</span> handleInterruptedException(procedure, e);<a name="line.1471"></a> -<span class="sourceLineNo">1472</span> scheduler.yield(procedure);<a name="line.1472"></a> -<span class="sourceLineNo">1473</span> return;<a name="line.1473"></a> -<span class="sourceLineNo">1474</span> } catch (Throwable e) {<a name="line.1474"></a> -<span class="sourceLineNo">1475</span> // Catch NullPointerExceptions or similar errors...<a name="line.1475"></a> -<span class="sourceLineNo">1476</span> String msg = "CODE-BUG: Uncaught runtime exception: " + procedure;<a name="line.1476"></a> -<span class="sourceLineNo">1477</span> LOG.error(msg, e);<a name="line.1477"></a> -<span class="sourceLineNo">1478</span> procedure.setFailure(new RemoteProcedureException(msg, e));<a name="line.1478"></a> -<span class="sourceLineNo">1479</span> }<a name="line.1479"></a> -<span class="sourceLineNo">1480</span><a name="line.1480"></a> -<span class="sourceLineNo">1481</span> if (!procedure.isFailed()) {<a name="line.1481"></a> -<span class="sourceLineNo">1482</span> if (subprocs != null) {<a name="line.1482"></a> -<span class="sourceLineNo">1483</span> if (subprocs.length == 1 && subprocs[0] == procedure) {<a name="line.1483"></a> -<span class="sourceLineNo">1484</span> // Procedure returned itself. Quick-shortcut for a state machine-like procedure;<a name="line.1484"></a> -<span class="sourceLineNo">1485</span> // i.e. we go around this loop again rather than go back out on the scheduler queue.<a name="line.1485"></a> -<span class="sourceLineNo">1486</span> subprocs = null;<a name="line.1486"></a> -<span class="sourceLineNo">1487</span> reExecute = true;<a name="line.1487"></a> -<span class="sourceLineNo">1488</span> if (LOG.isTraceEnabled()) {<a name="line.1488"></a> -<span class="sourceLineNo">1489</span> LOG.trace("Short-circuit to next step on pid=" + procedure.getProcId());<a name="line.1489"></a> -<span class="sourceLineNo">1490</span> }<a name="line.1490"></a> -<span class="sourceLineNo">1491</span> } else {<a name="line.1491"></a> -<span class="sourceLineNo">1492</span> // Yield the current procedure, and make the subprocedure runnable<a name="line.1492"></a> -<span class="sourceLineNo">1493</span> // subprocs may come back 'null'.<a name="line.1493"></a> -<span class="sourceLineNo">1494</span> subprocs = initializeChildren(procStack, procedure, subprocs);<a name="line.1494"></a> -<span class="sourceLineNo">1495</span> LOG.info("Initialized subprocedures=" +<a name="line.1495"></a> -<span class="sourceLineNo">1496</span> (subprocs == null? null:<a name="line.1496"></a> -<span class="sourceLineNo">1497</span> Stream.of(subprocs).map(e -> "{" + e.toString() + "}").<a name="line.1497"></a> -<span class="sourceLineNo">1498</span> collect(Collectors.toList()).toString()));<a name="line.1498"></a> -<span class="sourceLineNo">1499</span> }<a name="line.1499"></a> -<span class="sourceLineNo">1500</span> } else if (procedure.getState() == ProcedureState.WAITING_TIMEOUT) {<a name="line.1500"></a> -<span class="sourceLineNo">1501</span> if (LOG.isTraceEnabled()) {<a name="line.1501"></a> -<span class="sourceLineNo">1502</span> LOG.trace("Added to timeoutExecutor " + procedure);<a name="line.1502"></a> -<span class="sourceLineNo">1503</span> }<a name="line.1503"></a> -<span class="sourceLineNo">1504</span> timeoutExecutor.add(procedure);<a name="line.1504"></a> -<span class="sourceLineNo">1505</span> } else if (!suspended) {<a name="line.1505"></a> -<span class="sourceLineNo">1506</span> // No subtask, so we are done<a name="line.1506"></a> -<span class="sourceLineNo">1507</span> procedure.setState(ProcedureState.SUCCESS);<a name="line.1507"></a> -<span class="sourceLineNo">1508</span> }<a name="line.1508"></a> -<span class="sourceLineNo">1509</span> }<a name="line.1509"></a> -<span class="sourceLineNo">1510</span><a name="line.1510"></a> -<span class="sourceLineNo">1511</span> // Add the procedure to the stack<a name="line.1511"></a> -<span class="sourceLineNo">1512</span> procStack.addRollbackStep(procedure);<a name="line.1512"></a> -<span class="sourceLineNo">1513</span><a name="line.1513"></a> -<span class="sourceLineNo">1514</span> // allows to kill the executor before something is stored to the wal.<a name="line.1514"></a> -<span class="sourceLineNo">1515</span> // useful to test the procedure recovery.<a name="line.1515"></a> -<span class="sourceLineNo">1516</span> if (testing != null && testing.shouldKillBeforeStoreUpdate(suspended)) {<a name="line.1516"></a> -<span class="sourceLineNo">1517</span> LOG.debug("TESTING: Kill before store update: " + procedure);<a name="line.1517"></a> -<span class="sourceLineNo">1518</span> stop();<a name="line.1518"></a> -<span class="sourceLineNo">1519</span> return;<a name="line.1519"></a> -<span class="sourceLineNo">1520</span> }<a name="line.1520"></a> -<span class="sourceLineNo">1521</span><a name="line.1521"></a> -<span class="sourceLineNo">1522</span> // TODO: The code here doesn't check if store is running before persisting to the store as<a name="line.1522"></a> -<span class="sourceLineNo">1523</span> // it relies on the method call below to throw RuntimeException to wind up the stack and<a name="line.1523"></a> -<span class="sourceLineNo">1524</span> // executor thread to stop. The statement following the method call below seems to check if<a name="line.1524"></a> -<span class="sourceLineNo">1525</span> // store is not running, to prevent scheduling children procedures, re-execution or yield<a name="line.1525"></a> -<span class="sourceLineNo">1526</span> // of this procedure. This may need more scrutiny and subsequent cleanup in future<a name="line.1526"></a> -<span class="sourceLineNo">1527</span> //<a name="line.1527"></a> -<span class="sourceLineNo">1528</span> // Commit the transaction even if a suspend (state may have changed). Note this append<a name="line.1528"></a> -<span class="sourceLineNo">1529</span> // can take a bunch of time to complete.<a name="line.1529"></a> -<span class="sourceLineNo">1530</span> updateStoreOnExec(procStack, procedure, subprocs);<a name="line.1530"></a> -<span class="sourceLineNo">1531</span><a name="line.1531"></a> -<span class="sourceLineNo">1532</span> // if the store is not running we are aborting<a name="line.1532"></a> -<span class="sourceLineNo">1533</span> if (!store.isRunning()) return;<a name="line.1533"></a> -<span class="sourceLineNo">1534</span> // if the procedure is kind enough to pass the slot to someone else, yield<a name="line.1534"></a> -<span class="sourceLineNo">1535</span> if (procedure.isRunnable() && !suspended &&<a name="line.1535"></a> -<span class="sourceLineNo">1536</span> procedure.isYieldAfterExecutionStep(getEnvironment())) {<a name="line.1536"></a> -<span class="sourceLineNo">1537</span> scheduler.yield(procedure);<a name="line.1537"></a> -<span class="sourceLineNo">1538</span> return;<a name="line.1538"></a> -<span class="sourceLineNo">1539</span> }<a name="line.1539"></a> -<span class="sourceLineNo">1540</span><a name="line.1540"></a> -<span class="sourceLineNo">1541</span> assert (reExecute && subprocs == null) || !reExecute;<a name="line.1541"></a> -<span class="sourceLineNo">1542</span> } while (reExecute);<a name="line.1542"></a> -<span class="sourceLineNo">1543</span> // Submit the new subprocedures<a name="line.1543"></a> -<span class="sourceLineNo">1544</span> if (subprocs != null && !procedure.isFailed()) {<a name="line.1544"></a> -<span class="sourceLineNo">1545</span> submitChildrenProcedures(subprocs);<a name="line.1545"></a> -<span class="sourceLineNo">1546</span> }<a name="line.1546"></a> -<span class="sourceLineNo">1547</span><a name="line.1547"></a> -<span class="sourceLineNo">1548</span> // if the procedure is complete and has a parent, count down the children latch.<a name="line.1548"></a> -<span class="sourceLineNo">1549</span> // If 'suspended', do nothing to change state -- let other threads handle unsuspend event.<a name="line.1549"></a> -<span class="sourceLineNo">1550</span> if (!suspended && procedure.isFinished() && procedure.hasParent()) {<a name="line.1550"></a> -<span class="sourceLineNo">1551</span> countDownChildren(procStack, procedure);<a name="line.1551"></a> -<span class="sourceLineNo">1552</span> }<a name="line.1552"></a> -<span class="sourceLineNo">1553</span> }<a name="line.1553"></a> -<span class="sourceLineNo">1554</span><a name="line.1554"></a> -<span class="sourceLineNo">1555</span> private Procedure[] initializeChildren(final RootProcedureState procStack,<a name="line.1555"></a> -<span class="sourceLineNo">1556</span> final Procedure procedure, final Procedure[] subprocs) {<a name="line.1556"></a> -<span class="sourceLineNo">1557</span> assert subprocs != null : "expected subprocedures";<a name="line.1557"></a> -<span class="sourceLineNo">1558</span> final long rootProcId = getRootProcedureId(procedure);<a name="line.1558"></a> -<span class="sourceLineNo">1559</span> for (int i = 0; i < subprocs.length; ++i) {<a name="line.1559"></a> -<span class="sourceLineNo">1560</span> final Procedure subproc = subprocs[i];<a name="line.1560"></a> -<span class="sourceLineNo">1561</span> if (subproc == null) {<a name="line.1561"></a> -<span class="sourceLineNo">1562</span> String msg = "subproc[" + i + "] is null, aborting the procedure";<a name="line.1562"></a> -<span class="sourceLineNo">1563</span> procedure.setFailure(new RemoteProcedureException(msg,<a name="line.1563"></a> -<span class="sourceLineNo">1564</span> new IllegalArgumentIOException(msg)));<a name="line.1564"></a> -<span class="sourceLineNo">1565</span> return null;<a name="line.1565"></a> -<span class="sourceLineNo">1566</span> }<a name="line.1566"></a> -<span class="sourceLineNo">1567</span><a name="line.1567"></a> -<span class="sourceLineNo">1568</span> assert subproc.getState() == ProcedureState.INITIALIZING : subproc;<a name="line.1568"></a> -<span class="sourceLineNo">1569</span> subproc.setParentProcId(procedure.getProcId());<a name="line.1569"></a> -<span class="sourceLineNo">1570</span> subproc.setRootProcId(rootProcId);<a name="line.1570"></a> -<span class="sourceLineNo">1571</span> subproc.setProcId(nextProcId());<a name="line.1571"></a> -<span class="sourceLineNo">1572</span> procStack.addSubProcedure(subproc);<a name="line.1572"></a> -<span class="sourceLineNo">1573</span> }<a name="line.1573"></a> -<span class="sourceLineNo">1574</span><a name="line.1574"></a> -<span class="sourceLineNo">1575</span> if (!procedure.isFailed()) {<a name="line.1575"></a> -<span class="sourceLineNo">1576</span> procedure.setChildrenLatch(subprocs.length);<a name="line.1576"></a> -<span class="sourceLineNo">1577</span> switch (procedure.getState()) {<a name="line.1577"></a> -<span class="sourceLineNo">1578</span> case RUNNABLE:<a name="line.1578"></a> -<span class="sourceLineNo">1579</span> procedure.setState(ProcedureState.WAITING);<a name="line.1579"></a> -<span class="sourceLineNo">1580</span> break;<a name="line.1580"></a> -<span class="sourceLineNo">1581</span> case WAITING_TIMEOUT:<a name="line.1581"></a> -<span class="sourceLineNo">1582</span> timeoutExecutor.add(procedure);<a name="line.1582"></a> -<span class="sourceLineNo">1583</span> break;<a name="line.1583"></a> -<span class="sourceLineNo">1584</span> default:<a name="line.1584"></a> -<span class="sourceLineNo">1585</span> break;<a name="line.1585"></a> -<span class="sourceLineNo">1586</span> }<a name="line.1586"></a> -<span class="sourceLineNo">1587</span> }<a name="line.1587"></a> -<span class="sourceLineNo">1588</span> return subprocs;<a name="line.1588"></a> -<span class="sourceLineNo">1589</span> }<a name="line.1589"></a> -<span class="sourceLineNo">1590</span><a name="line.1590"></a> -<span class="sourceLineNo">1591</span> private void submitChildrenProcedures(final Procedure[] subprocs) {<a name="line.1591"></a> -<span class="sourceLineNo">1592</span> for (int i = 0; i < subprocs.length; ++i) {<a name="line.1592"></a> -<span class="sourceLineNo">1593</span> final Procedure subproc = subprocs[i];<a name="line.1593"></a> -<span class="sourceLineNo">1594</span> subproc.updateMetricsOnSubmit(getEnvironment());<a name="line.1594"></a> -<span class="sourceLineNo">1595</span> assert !procedures.containsKey(subproc.getProcId());<a name="line.1595"></a> -<span class="sourceLineNo">1596</span> procedures.put(subproc.getProcId(), subproc);<a name="line.1596"></a> -<span class="sourceLineNo">1597</span> scheduler.addFront(subproc);<a name="line.1597"></a> -<span class="sourceLineNo">1598</span> }<a name="line.1598"></a> -<span class="sourceLineNo">1599</span> }<a name="line.1599"></a> -<span class="sourceLineNo">1600</span><a name="line.1600"></a> -<span class="sourceLineNo">1601</span> private void countDownChildren(final RootProcedureState procStack, final Procedure procedure) {<a name="line.1601"></a> -<span class="sourceLineNo">1602</span> final Procedure parent = procedures.get(procedure.getParentProcId());<a name="line.1602"></a> -<span class="sourceLineNo">1603</span> if (parent == null) {<a name="line.1603"></a> -<span class="sourceLineNo">1604</span> assert procStack.isRollingback();<a name="line.1604"></a> -<span class="sourceLineNo">1605</span> return;<a name="line.1605"></a> -<span class="sourceLineNo">1606</span> }<a name="line.1606"></a> -<span class="sourceLineNo">1607</span><a name="line.1607"></a> -<span class="sourceLineNo">1608</span> // If this procedure is the last child awake the parent procedure<a name="line.1608"></a> -<span class="sourceLineNo">1609</span> if (parent.tryRunnable()) {<a name="line.1609"></a> -<span class="sourceLineNo">1610</span> // If we succeeded in making the parent runnable -- i.e. all of its<a name="line.1610"></a> -<span class="sourceLineNo">1611</span> // children have completed, move parent to front of the queue.<a name="line.1611"></a> -<span class="sourceLineNo">1612</span> store.update(parent);<a name="line.1612"></a> -<span class="sourceLineNo">1613</span> scheduler.addFront(parent);<a name="line.1613"></a> -<span class="sourceLineNo">1614</span> LOG.info("Finished subprocedure(s) of " + parent + "; resume parent processing.");<a name="line.1614"></a> -<span class="sourceLineNo">1615</span> return;<a name="line.1615"></a> -<span class="sourceLineNo">1616</span> }<a name="line.1616"></a> -<span class="sourceLineNo">1617</span> }<a name="line.1617"></a> -<span class="sourceLineNo">1618</span><a name="line.1618"></a> -<span class="sourceLineNo">1619</span> private void updateStoreOnExec(final RootProcedureState procStack,<a name="line.1619"></a> -<span class="sourceLineNo">1620</span> final Procedure procedure, final Procedure[] subprocs) {<a name="line.1620"></a> -<span class="sourceLineNo">1621</span> if (subprocs != null && !procedure.isFailed()) {<a name="line.1621"></a> -<span class="sourceLineNo">1622</span> if (LOG.isTraceEnabled()) {<a name="line.1622"></a> -<span class="sourceLineNo">1623</span> LOG.trace("Stored " + procedure + ", children " + Arrays.toString(subprocs));<a name="line.1623"></a> -<span class="sourceLineNo">1624</span> }<a name="line.1624"></a> -<span class="sourceLineNo">1625</span> store.insert(procedure, subprocs);<a name="line.1625"></a> -<span class="sourceLineNo">1626</span> } else {<a name="line.1626"></a> -<span class="sourceLineNo">1627</span> if (LOG.isTraceEnabled()) {<a name="line.1627"></a> -<span class="sourceLineNo">1628</span> LOG.trace("Store update " + procedure);<a name="line.1628"></a> -<span class="sourceLineNo">1629</span> }<a name="line.1629"></a> -<span class="sourceLineNo">1630</span> if (procedure.isFinished() && !procedure.hasParent()) {<a name="line.1630"></a> -<span class="sourceLineNo">1631</span> // remove child procedures<a name="line.1631"></a> -<span class="sourceLineNo">1632</span> final long[] childProcIds = procStack.getSubprocedureIds();<a name="line.1632"></a> -<span class="sourceLineNo">1633</span> if (childProcIds != null) {<a name="line.1633"></a> -<span class="sourceLineNo">1634</span> store.delete(procedure, childProcIds);<a name="line.1634"></a> -<span class="sourceLineNo">1635</span> for (int i = 0; i < childProcIds.length; ++i) {<a name="line.1635"></a> -<span class="sourceLineNo">1636</span> procedures.remove(childProcIds[i]);<a name="line.1636"></a> -<span class="sourceLineNo">1637</span> }<a name="line.1637"></a> -<span class="sourceLineNo">1638</span> } else {<a name="line.1638"></a> -<span class="sourceLineNo">1639</span> store.update(procedure);<a name="line.1639"></a> -<span class="sourceLineNo">1640</span> }<a name="line.1640"></a> -<span class="sourceLineNo">1641</span> } else {<a name="line.1641"></a> -<span class="sourceLineNo">1642</span> store.update(procedure);<a name="line.1642"></a> -<span class="sourceLineNo">1643</span> }<a name="line.1643"></a> -<span class="sourceLineNo">1644</span> }<a name="line.1644"></a> -<span class="sourceLineNo">1645</span> }<a name="line.1645"></a> -<span class="sourceLineNo">1646</span><a name="line.1646"></a> -<span class="sourceLineNo">1647</span> private void handleInterruptedException(final Procedure proc, final InterruptedException e) {<a name="line.1647"></a> -<span class="sourceLineNo">1648</span> if (LOG.isTraceEnabled()) {<a name="line.1648"></a> -<span class="sourceLineNo">1649</span> LOG.trace("Interrupt during " + proc + ". suspend and retry it later.", e);<a name="line.1649"></a> -<span class="sourceLineNo">1650</span> }<a name="line.1650"></a> -<span class="sourceLineNo">1651</span><a name="line.1651"></a> -<span class="sourceLineNo">1652</span> // NOTE: We don't call Thread.currentThread().interrupt()<a name="line.1652"></a> -<span class="sourceLineNo">1653</span> // because otherwise all the subsequent calls e.g. Thread.sleep() will throw<a name="line.1653"></a> -<span class="sourceLineNo">1654</span> // the InterruptedException. If the master is going down, we will be notified<a name="line.1654"></a> -<span class="sourceLineNo">1655</span> // and the executor/store will be stopped.<a name="line.1655"></a> -<span class="sourceLineNo">1656</span> // (The interrupted procedure will be retried on the next run)<a name="line.1656"></a> -<span class="sourceLineNo">1657</span> }<a name="line.1657"></a> -<span class="sourceLineNo">1658</span><a name="line.1658"></a> -<span class="sourceLineNo">1659</span> private void execCompletionCleanup(final Procedure proc) {<a name="line.1659"></a> -<span class="sourceLineNo">1660</span> final TEnvironment env = getEnvironment();<a name="line.1660"></a> -<span class="sourceLineNo">1661</span> if (proc.holdLock(env) && proc.hasLock(env)) {<a name="line.1661"></a> -<span class="sourceLineNo">1662</span> releaseLock(proc, true);<a name="line.1662"></a> -<span class="sourceLineNo">1663</span> }<a name="line.1663"></a> -<span class="sourceLineNo">1664</span> try {<a name="line.1664"></a> -<span class="sourceLineNo">1665</span> proc.completionCleanup(env);<a name="line.1665"></a> -<span class="sourceLineNo">1666</span> } catch (Throwable e) {<a name="line.1666"></a> -<span class="sourceLineNo">1667</span> // Catch NullPointerExceptions or similar errors...<a name="line.1667"></a> -<span class="sourceLineNo">1668</span> LOG.error("CODE-BUG: uncatched runtime exception for procedure: " + proc, e);<a name="line.1668"></a> -<span class="sourceLineNo">1669</span> }<a name="line.1669"></a> -<span class="sourceLineNo">1670</span> }<a name="line.1670"></a> -<span class="sourceLineNo">1671</span><a name="line.1671"></a> -<span class="sourceLineNo">1672</span> private void procedureFinished(final Procedure proc) {<a name="line.1672"></a> -<span class="sourceLineNo">1673</span> // call the procedure completion cleanup handler<a name="line.1673"></a> -<span class="sourceLineNo">1674</span> execCompletionCleanup(proc);<a name="line.1674"></a> -<span class="sourceLineNo">1675</span><a name="line.1675"></a> -<span class="sourceLineNo">1676</span> CompletedProcedureRetainer retainer = new CompletedProcedureRetainer(proc);<a name="line.1676"></a> -<span class="sourceLineNo">1677</span><a name="line.1677"></a> -<span class="sourceLineNo">1678</span> // update the executor internal state maps<a name="line.1678"></a> -<span class="sourceLineNo">1679</span> if (!proc.shouldWaitClientAck(getEnvironment())) {<a name="line.1679"></a> -<span class="sourceLineNo">1680</span> retainer.setClientAckTime(0);<a name="line.1680"></a> -<span class="sourceLineNo">1681</span> }<a name="line.1681"></a> -<span class="sourceLineNo">1682</span><a name="line.1682"></a> -<span class="sourceLineNo">1683</span> completed.put(proc.getProcId(), retainer);<a name="line.1683"></a> -<span class="sourceLineNo">1684</span> rollbackStack.remove(proc.getProcId());<a name="line.1684"></a> -<span class="sourceLineNo">1685</span> procedures.remove(proc.getProcId());<a name="line.1685"></a> -<span class="sourceLineNo">1686</span><a name="line.1686"></a> -<span class="sourceLineNo">1687</span> // call the runnableSet completion cleanup handler<a name="line.1687"></a> -<span class="sourceLineNo">1688</span> try {<a name="line.1688"></a> -<span class="sourceLineNo">1689</span> scheduler.completionCleanup(proc);<a name="line.1689"></a> -<span class="sourceLineNo">1690</span> } catch (Throwable e) {<a name="line.1690"></a> -<span class="sourceLineNo">1691</span> // Catch NullPointerExceptions or similar errors...<a name="line.1691"></a> -<span class="sourceLineNo">1692</span> LOG.error("CODE-BUG: uncatched runtime exception for completion cleanup: " + proc, e);<a name="line.1692"></a> -<span class="sourceLineNo">1693</span> }<a name="line.1693"></a> -<span class="sourceLineNo">1694</span><a name="line.1694"></a> -<span class="sourceLineNo">1695</span> // Notify the listeners<a name="line.1695"></a> -<span class="sourceLineNo">1696</span> sendProcedureFinishedNotification(proc.getProcId());<a name="line.1696"></a> -<span class="sourceLineNo">1697</span> }<a name="line.1697"></a> -<span class="sourceLineNo">1698</span><a name="line.1698"></a> -<span class="sourceLineNo">1699</span> RootProcedureState getProcStack(long rootProcId) {<a name="line.1699"></a> -<span class="sourceLineNo">1700</span> return rollbackStack.get(rootProcId);<a name="line.1700"></a> -<span class="sourceLineNo">1701</span> }<a name="line.1701"></a> -<span class="sourceLineNo">1702</span><a name="line.1702"></a> -<span class="sourceLineNo">1703</span> // ==========================================================================<a name="line.1703"></a> -<span class="sourceLineNo">1704</span> // Worker Thread<a name="line.1704"></a> -<span class="sourceLineNo">1705</span> // ==========================================================================<a name="line.1705"></a> -<span class="sourceLineNo">1706</span> private class WorkerThread extends StoppableThread {<a name="line.1706"></a> -<span class="sourceLineNo">1707</span> private final AtomicLong executionStartTime = new AtomicLong(Long.MAX_VALUE);<a name="line.1707"></a> -<span class="sourceLineNo">1708</span> private volatile Procedure<?> activeProcedure;<a name="line.1708"></a> -<span class="sourceLineNo">1709</span><a name="line.1709"></a> -<span class="sourceLineNo">1710</span> public WorkerThread(ThreadGroup group) {<a name="line.1710"></a> -<span class="sourceLineNo">1711</span> this(group, "PEWorker-");<a name="line.1711"></a> -<span class="sourceLineNo">1712</span> }<a name="line.1712"></a> -<span class="sourceLineNo">1713</span><a name="line.1713"></a> -<span class="sourceLineNo">1714</span> protected WorkerThread(ThreadGroup group, String prefix) {<a name="line.1714"></a> -<span class="sourceLineNo">1715</span> super(group, prefix + workerId.incrementAndGet());<a name="line.1715"></a> -<span class="sourceLineNo">1716</span> setDaemon(true);<a name="line.1716"></a> -<span class="sourceLineNo">1717</span> }<a name="line.1717"></a> -<span class="sourceLineNo">1718</span><a name="line.1718"></a> -<span class="sourceLineNo">1719</span> @Override<a name="line.1719"></a> -<span class="sourceLineNo">1720</span> public void sendStopSignal() {<a name="line.1720"></a> -<span class="sourceLineNo">1721</span> scheduler.signalAll();<a name="line.1721"></a> -<span class="sourceLineNo">1722</span> }<a name="line.1722"></a> -<span class="sourceLineNo">1723</span><a name="line.1723"></a> -<span class="sourceLineNo">1724</span> @Override<a name="line.1724"></a> -<span class="sourceLineNo">1725</span> public void run() {<a name="line.1725"></a> -<span class="sourceLineNo">1726</span> long lastUpdate = EnvironmentEdgeManager.currentTime();<a name="line.1726"></a> -<span class="sourceLineNo">1727</span> try {<a name="line.1727"></a> -<span class="sourceLineNo">1728</span> while (isRunning() && keepAlive(lastUpdate)) {<a name="line.1728"></a> -<span class="sourceLineNo">1729</span> Procedure<?> proc = scheduler.poll(keepAliveTime, TimeUnit.MILLISECONDS);<a name="line.1729"></a> -<span class="sourceLineNo">1730</span> if (proc == null) {<a name="line.1730"></a> -<span class="sourceLineNo">1731</span> continue;<a name="line.1731"></a> -<span class="sourceLineNo">1732</span> }<a name="line.1732"></a> -<span class="sourceLineNo">1733</span> this.activeProcedure = proc;<a name="line.1733"></a> -<span class="sourceLineNo">1734</span> int activeCount = activeExecutorCount.incrementAndGet();<a name="line.1734"></a> -<span class="sourceLineNo">1735</span> int runningCount = store.setRunningProcedureCount(activeCount);<a name="line.1735"></a> -<span class="sourceLineNo">1736</span> LOG.trace("Execute pid={} runningCount={}, activeCount={}", proc.getProcId(),<a name="line.1736"></a> -<span class="sourceLineNo">1737</span> runningCount, activeCount);<a name="line.1737"></a> -<span class="sourceLineNo">1738</span> executionStartTime.set(EnvironmentEdgeManager.currentTime());<a name="line.1738"></a> -<span class="sourceLineNo">1739</span> try {<a name="line.1739"></a> -<span class="sourceLineNo">1740</span> executeProcedure(proc);<a name="line.1740"></a> -<span class="sourceLineNo">1741</span> } catch (AssertionError e) {<a name="line.1741"></a> -<span class="sourceLineNo">1742</span> LOG.info("ASSERT pid=" + proc.getProcId(), e);<a name="line.1742"></a> -<span class="sourceLineNo">1743</span> throw e;<a name="line.1743"></a> -<span class="sourceLineNo">1744</span> } finally {<a name="line.1744"></a> -<span class="sourceLineNo">1745</span> activeCount = activeExecutorCount.decrementAndGet();<a name="line.1745"></a> -<span class="sourceLineNo">1746</span> runningCount = store.setRunningProcedureCount(activeCount);<a name="line.1746"></a> -<span class="sourceLineNo">1747</span> LOG.trace("Halt pid={} runningCount={}, activeCount={}", proc.getProcId(),<a name="line.1747"></a> -<span class="sourceLineNo">1748</span> runningCount, activeCount);<a name="line.1748"></a> -<span class="sourceLineNo">1749</span> this.activeProcedure = null;<a name="line.1749"></a> -<span class="sourceLineNo">1750</span> lastUpdate = EnvironmentEdgeManager.currentTime();<a name="line.1750"></a> -<span class="sourceLineNo">1751</span> executionStartTime.set(Long.MAX_VALUE);<a name="line.1751"></a> -<span class="sourceLineNo">1752</span> }<a name="line.1752"></a> -<span class="sourceLineNo">1753</span> }<a name="line.1753"></a> -<span class="sourceLineNo">1754</span> } catch (Throwable t) {<a name="line.1754"></a> -<span class="sourceLineNo">1755</span> LOG.warn("Worker terminating UNNATURALLY {}", this.activeProcedure, t);<a name="line.1755"></a> -<span class="sourceLineNo">1756</span> } finally {<a name="line.1756"></a> -<span class="sourceLineNo">1757</span> LOG.trace("Worker terminated.");<a name="line.1757"></a> -<span class="sourceLineNo">1758</span> }<a name="line.1758"></a> -<span class="sourceLineNo">1759</span> workerThreads.remove(this);<a name="line.1759"></a> -<span class="sourceLineNo">1760</span> }<a name="line.1760"></a> -<span class="sourceLineNo">1761</span><a name="line.1761"></a> -<span class="sourceLineNo">1762</span> @Override<a name="line.1762"></a> -<span class="sourceLineNo">1763</span> public String toString() {<a name="line.1763"></a> -<span class="sourceLineNo">1764</span> Procedure<?> p = this.activeProcedure;<a name="line.1764"></a> -<span class="sourceLineNo">1765</span> return getName() + "(pid=" + (p == null? Procedure.NO_PROC_ID: p.getProcId() + ")");<a name="line.1765"></a> -<span class="sourceLineNo">1766</span> }<a name="line.1766"></a> -<span class="sourceLineNo">1767</span><a name="line.1767"></a> -<span class="sourceLineNo">1768</span> /**<a name="line.1768"></a> -<span class="sourceLineNo">1769</span> * @return the time since the current procedure is running<a name="line.1769"></a> -<span class="sourceLineNo">1770</span> */<a name="line.1770"></a> -<span class="sourceLineNo">1771</span> public long getCurrentRunTime() {<a name="line.1771"></a> -<span class="sourceLineNo">1772</span> return EnvironmentEdgeManager.currentTime() - executionStartTime.get();<a name="line.1772"></a> -<span class="sourceLineNo">1773</span> }<a name="line.1773"></a> -<span class="sourceLineNo">1774</span><a name="line.1774"></a> -<span class="sourceLineNo">1775</span> // core worker never timeout<a name="line.1775"></a> -<span class="sourceLineNo">1776</span> protected boolean keepAlive(long lastUpdate) {<a name="line.1776"></a> -<span class="sourceLineNo">1777</span> return true;<a name="line.1777"></a> -<span class="sourceLineNo">1778</span> }<a name="line.1778"></a> -<span class="sourceLineNo">1779</span> }<a name="line.1779"></a> -<span class="sourceLineNo">1780</span><a name="line.1780"></a> -<span class="sourceLineNo">1781</span> // A worker thread which can be added when core workers are stuck. Will timeout after<a name="line.1781"></a> -<span class="sourceLineNo">1782</span> // keepAliveTime if there is no procedure to run.<a name="line.1782"></a> -<span class="sourceLineNo">1783</span> private final class KeepAliveWorkerThread extends WorkerThread {<a name="line.1783"></a> -<span class="sourceLineNo">1784</span><a name="line.1784"></a> -<span class="sourceLineNo">1785</span> public KeepAliveWorkerThread(ThreadGroup group) {<a name="line.1785"></a> -<span class="sourceLineNo">1786</span> super(group, "KeepAlivePEWorker-");<a name="line.1786"></a> -<span class="sourceLineNo">1787</span> }<a name="line.1787"></a> -<span class="sourceLineNo">1788</span><a name="line.1788"></a> -<span class="sourceLineNo">1789</span> @Override<a name="line.1789"></a> -<span class="sourceLineNo">1790</span> protected boolean keepAlive(long lastUpdate) {<a name="line.1790"></a> -<span class="sourceLineNo">1791</span> return EnvironmentEdgeManager.currentTime() - lastUpdate < keepAliveTime;<a name="line.1791"></a> -<span class="sourceLineNo">1792</span> }<a name="line.1792"></a> -<span class="sourceLineNo">1793</span> }<a name="line.1793"></a> -<span class="sourceLineNo">1794</span><a name="line.1794"></a> -<span class="sourceLineNo">1795</span> // ----------------------------------------------------------------------------<a name="line.1795"></a> -<span class="sourceLineNo">1796</span> // TODO-MAYBE: Should we provide a InlineChore to notify the store with the<a name="line.1796"></a> -<span class="sourceLineNo">1797</span> // full set of procedures pending and completed to write a compacted<a name="line.1797"></a> -<span class="sourceLineNo">1798</span> // version of the log (in case is a log)?<a name="line.1798"></a> -<span class="sourceLineNo">1799</span> // In theory no, procedures are have a short life, so at some point the store<a name="line.1799"></a> -<span class="sourceLineNo">1800</span> // will have the tracker saying everything is in the last log.<a name="line.1800"></a> -<span class="sourceLineNo">1801</span> // ----------------------------------------------------------------------------<a name="line.1801"></a> -<span class="sourceLineNo">1802</span><a name="line.1802"></a> -<span class="sourceLineNo">1803</span> private final class WorkerMonitor extends InlineChore {<a name="line.1803"></a> -<span class="sourceLineNo">1804</span> public static final String WORKER_MONITOR_INTERVAL_CONF_KEY =<a name="line.1804"></a> -<span class="sourceLineNo">1805</span> "hbase.procedure.worker.monitor.interval.msec";<a name="line.1805"></a> -<span class="sourceLineNo">1806</span> private static final int DEFAULT_WORKER_MONITOR_INTERVAL = 5000; // 5sec<a name="line.1806"></a> -<span class="sourceLineNo">1807</span><a name="line.1807"></a> -<span class="sourceLineNo">1808</span> public static final String WORKER_STUCK_THRESHOLD_CONF_KEY =<a name="line.1808"></a> -<span class="sourceLineNo">1809</span> "hbase.procedure.worker.stuck.threshold.msec";<a name="line.1809"></a> -<span class="sourceLineNo">1810</span> private static final int DEFAULT_WORKER_STUCK_THRESHOLD = 10000; // 10sec<a name="line.1810"></a> -<span class="sourceLineNo">1811</span><a name="line.1811"></a> -<span class="sourceLineNo">1812</span> public static final String WORKER_ADD_STUCK_PERCENTAGE_CONF_KEY =<a name="line.1812"></a> -<span class="sourceLineNo">1813</span> "hbase.procedure.worker.add.stuck.percentage";<a name="line.1813"></a> -<span class="sourceLineNo">1814</span> private static final float DEFAULT_WORKER_ADD_STUCK_PERCENTAGE = 0.5f; // 50% stuck<a name="line.1814"></a> -<span class="sourceLineNo">1815</span><a name="line.1815"></a> -<span class="sourceLineNo">1816</span> private float addWorkerStuckPercentage = DEFAULT_WORKER_ADD_STUCK_PERCENTAGE;<a name="line.1816"></a> -<span class="sourceLineNo">1817</span> private int timeoutInterval = DEFAULT_WORKER_MONITOR_INTERVAL;<a name="line.1817"></a> -<span class="sourceLineNo">1818</span> private int stuckThreshold = DEFAULT_WORKER_STUCK_THRESHOLD;<a name="line.1818"></a> -<span class="sourceLineNo">1819</span><a name="line.1819"></a> -<span class="sourceLineNo">1820</span> public WorkerMonitor() {<a name="line.1820"></a> -<span class="sourceLineNo">1821</span> refreshConfig();<a name="line.1821"></a> -<span class="sourceLineNo">1822</span> }<a name="line.1822"></a> -<span class="sourceLineNo">1823</span><a name="line.1823"></a> -<span class="sourceLineNo">1824</span> @Override<a name="line.1824"></a> -<span class="sourceLineNo">1825</span> public void run() {<a name="line.1825"></a> -<span class="sourceLineNo">1826</span> final int stuckCount = checkForStuckWorkers();<a name="line.1826"></a> -<span class="sourceLineNo">1827</span> checkThreadCount(stuckCount);<a name="line.1827"></a> -<span class="sourceLineNo">1828</span><a name="line.1828"></a> -<span class="sourceLineNo">1829</span> // refresh interval (poor man dynamic conf update)<a name="line.1829"></a> -<span class="sourceLineNo">1830</span> refreshConfig();<a name="line.1830"></a> -<span class="sourceLineNo">1831</span> }<a name="line.1831"></a> -<span class="sourceLineNo">1832</span><a name="line.1832"></a> -<span class="sourceLineNo">1833</span> private int checkForStuckWorkers() {<a name="line.1833"></a> -<span class="sourceLineNo">1834</span> // check if any of the worker is stuck<a name="line.1834"></a> -<span class="sourceLineNo">1835</span> int stuckCount = 0;<a name="line.1835"></a> -<span class="sourceLineNo">1836</span> for (WorkerThread worker : workerThreads) {<a name="line.1836"></a> -<span class="sourceLineNo">1837</span> if (worker.getCurrentRunTime() < stuckThreshold) {<a name="line.1837"></a> -<span class="sourceLineNo">1838</span> continue;<a name="line.1838"></a> -<span class="sourceLineNo">1839</span> }<a name="line.1839"></a> -<span class="sourceLineNo">1840</span><a name="line.1840"></a> -<span class="sourceLineNo">1841</span> // WARN the worker is stuck<a name="line.1841"></a> -<span class="sourceLineNo">1842</span> stuckCount++;<a name="line.1842"></a> -<span class="sourceLineNo">1843</span> LOG.warn("Worker stuck {} run time {}", worker,<a name="line.1843"></a> -<span class="sourceLineNo">1844</span> StringUtils.humanTimeDiff(worker.getCurrentRunTime()));<a name="line.1844"></a> -<span class="sourceLineNo">1845</span> }<a name="line.1845"></a> -<span class="sourceLineNo">1846</span> return stuckCount;<a name="line.1846"></a> -<span class="sourceLineNo">1847</span> }<a name="line.1847"></a> -<span class="sourceLineNo">1848</span><a name="line.1848"></a> -<span class="sourceLineNo">1849</span> private void checkThreadCount(final int stuckCount) {<a name="line.1849"></a> -<span class="sourceLineNo">1850</span> // nothing to do if there are no runnable tasks<a name="line.1850"></a> -<span class="sourceLineNo">1851</span> if (stuckCount < 1 || !scheduler.hasRunnables()) {<a name="line.1851"></a> -<span class="sourceLineNo">1852</span> return;<a name="line.1852"></a> -<span class="sourceLineNo">1853</span> }<a name="line.1853"></a> -<span class="sourceLineNo">1854</span><a name="line.1854"></a> -<span class="sourceLineNo">1855</span> // add a new thread if the worker stuck percentage exceed the threshold limit<a name="line.1855"></a> -<span class="sourceLineNo">1856</span> // and every handler is active.<a name="line.1856"></a> -<span class="sourceLineNo">1857</span> final float stuckPerc = ((float) stuckCount) / workerThreads.size();<a name="line.1857"></a> -<span class="sourceLineNo">1858</span> // let's add new worker thread more aggressively, as they will timeout finally if there is no<a name="line.1858"></a> -<span class="sourceLineNo">1859</span> // work to do.<a name="line.1859"></a> -<span class="sourceLineNo">1860</span> if (stuckPerc >= addWorkerStuckPercentage && workerThreads.size() < maxPoolSize) {<a name="line.1860"></a> -<span class="sourceLineNo">1861</span> final KeepAliveWorkerThread worker = new KeepAliveWorkerThread(threadGroup);<a name="line.1861"></a> -<span class="sourceLineNo">1862</span> workerThreads.add(worker);<a name="line.1862"></a> -<span class="sourceLineNo">1863</span> worker.start();<a name="line.1863"></a> -<span class="sourceLineNo">1864</span> LOG.debug("Added new worker thread {}", worker);<a name="line.1864"></a> -<span class="sourceLineNo">1865</span> }<a name="line.1865"></a> -<span class="sourceLineNo">1866</span> }<a name="line.1866"></a> -<span class="sourceLineNo">1867</span><a name="line.1867"></a> -<span class="sourceLineNo">1868</span> private void refreshConfig() {<a name="line.1868"></a> -<span class="sourceLineNo">1869</span> addWorkerStuckPercentage = conf.getFloat(WORKER_ADD_STUCK_PERCENTAGE_CONF_KEY,<a name="line.1869"></a> -<span class="sourceLineNo">1870</span> DEFAULT_WORKER_ADD_STUCK_PERCENTAGE);<a name="line.1870"></a> -<span class="sourceLineNo">1871</span> timeoutInterval = conf.getInt(WORKER_MONITOR_INTERVAL_CONF_KEY,<a name="line.1871"></a> -<span class="sourceLineNo">1872</span> DEFAULT_WORKER_MONITOR_INTERVAL);<a name="line.1872"></a> -<span class="sourceLineNo">1873</span> stuckThreshold = conf.getInt(WORKER_STUCK_THRESHOLD_CONF_KEY,<a name="line.1873"></a> -<span class="sourceLineNo">1874</span> DEFAULT_WORKER_STUCK_THRESHOLD);<a name="line.1874"></a> -<span class="sourceLineNo">1875</span> }<a name="line.1875"></a> -<span class="sourceLineNo">1876</span><a name="line.1876"></a> -<span class="sourceLineNo">1877</span> @Override<a name="line.1877"></a> -<span class="sourceLineNo">1878</span> public int getTimeoutInterval() {<a name="line.1878"></a> -<span class="sourceLineNo">1879</span> return timeoutInterval;<a name="line.1879"></a> -<span class="sourceLineNo">1880</span> }<a name="line.1880"></a> -<span class="sourceLineNo">1881</span> }<a name="line.1881"></a> -<span class="sourceLineNo">1882</span>}<a name="line.1882"></a> +<span class="sourceLineNo">1430</span> Preconditions.checkArgument(procedure.getState() == ProcedureState.RUNNABLE,<a name="line.1430"></a> +<span class="sourceLineNo">1431</span> procedure.toString());<a name="line.1431"></a> +<span class="sourceLineNo">1432</span><a name="line.1432"></a> +<span class="sourceLineNo">1433</span> // Procedures can suspend themselves. They skip out by throwing a ProcedureSuspendedException.<a name="line.1433"></a> +<span class="sourceLineNo">1434</span> // The exception is caught below and then we hurry to the exit without disturbing state. The<a name="line.1434"></a> +<span class="sourceLineNo">1435</span> // idea is that the processing of this procedure will be unsuspended later by an external event<a name="line.1435"></a> +<span class="sourceLineNo">1436</span> // such the report of a region open. TODO: Currently, its possible for two worker threads<a name="line.1436"></a> +<span class="sourceLineNo">1437</span> // to be working on the same procedure concurrently (locking in procedures is NOT about<a name="line.1437"></a> +<span class="sourceLineNo">1438</span> // concurrency but about tying an entity to a procedure; i.e. a region to a particular<a name="line.1438"></a> +<span class="sourceLineNo">1439</span> // procedure instance). This can make for issues if both threads are changing state.<a name="line.1439"></a> +<span class="sourceLineNo">1440</span> // See env.getProcedureScheduler().wakeEvent(regionNode.getProcedureEvent());<a name="line.1440"></a> +<span class="sourceLineNo">1441</span> // in RegionTransitionProcedure#reportTransition for example of Procedure putting<a name="line.1441"></a> +<span class="sourceLineNo">1442</span> // itself back on the scheduler making it possible for two threads running against<a name="line.1442"></a> +<span class="sourceLineNo">1443</span> // the one Procedure. Might be ok if they are both doing different, idempotent sections.<a name="line.1443"></a> +<span class="sourceLineNo">1444</span> boolean suspended = false;<a name="line.1444"></a> +<span class="sourceLineNo">1445</span><a name="line.1445"></a> +<span class="sourceLineNo">1446</span> // Whether to 're-' -execute; run through the loop again.<a name="line.1446"></a> +<span class="sourceLineNo">1447</span> boolean reExecute = false;<a name="line.1447"></a> +<span class="sourceLineNo">1448</span><a name="line.1448"></a> +<span class="sourceLineNo">1449</span> Procedure<TEnvironment>[] subprocs = null;<a name="line.1449"></a> +<span class="sourceLineNo">1450</span> do {<a name="line.1450"></a> +<span class="sourceLineNo">1451</span> reExecute = false;<a name="line.1451"></a> +<span class="sourceLineNo">1452</span> try {<a name="line.1452"></a> +<span class="sourceLineNo">1453</span> subprocs = procedure.doExecute(getEnvironment());<a name="line.1453"></a> +<span class="sourceLineNo">1454</span> if (subprocs != null && subprocs.length == 0) {<a name="line.1454"></a> +<span class="sourceLineNo">1455</span> subprocs = null;<a name="line.1455"></a> +<span class="sourceLineNo">1456</span> }<a name="line.1456"></a> +<span class="sourceLineNo">1457</span> } catch (ProcedureSuspendedException e) {<a name="line.1457"></a> +<span class="sourceLineNo">1458</span> if (LOG.isTraceEnabled()) {<a name="line.1458"></a> +<span class="sourceLineNo">1459</span> LOG.trace("Suspend " + procedure);<a name="line.1459"></a> +<span class="sourceLineNo">1460</span> }<a name="line.1460"></a> +<span class="sourceLineNo">1461</span> suspended = true;<a name="line.1461"></a> +<span class="sourceLineNo">1462</span> } catch (ProcedureYieldException e) {<a name="line.1462"></a> +<span class="sourceLineNo">1463</span> if (LOG.isTraceEnabled()) {<a name="line.1463"></a> +<span class="sourceLineNo">1464</span> LOG.trace("Yield " + procedure + ": " + e.getMessage(), e);<a name="line.1464"></a> +<span class="sourceLineNo">1465</span> }<a name="line.1465"></a> +<span class="sourceLineNo">1466</span> scheduler.yield(procedure);<a name="line.1466"></a> +<span class="sourceLineNo">1467</span> return;<a name="line.1467"></a> +<span class="sourceLineNo">1468</span> } catch (InterruptedException e) {<a name="line.1468"></a> +<span class="sourceLineNo">1469</span> if (LOG.isTraceEnabled()) {<a name="line.1469"></a> +<span class="sourceLineNo">1470</span> LOG.trace("Yield interrupt " + procedure + ": " + e.getMessage(), e);<a name="line.1470"></a> +<span class="sourceLineNo">1471</span> }<a name="line.1471"></a> +<span class="sourceLineNo">1472</span> handleInterruptedException(procedure, e);<a name="line.1472"></a> +<span class="sourceLineNo">1473</span> scheduler.yield(procedure);<a name="line.1473"></a> +<span class="sourceLineNo">1474</span> return;<a name="line.1474"></a> +<span class="sourceLineNo">1475</span> } catch (Throwable e) {<a name="line.1475"></a> +<span class="sourceLineNo">1476</span> // Catch NullPointerExceptions or similar errors...<a name="line.1476"></a> +<span class="sourceLineNo">1477</span> String msg = "CODE-BUG: Uncaught runtime exception: " + procedure;<a name="line.1477"></a> +<span class="sourceLineNo">1478</span> LOG.error(msg, e);<a name="line.1478"></a> +<span class="sourceLineNo">1479</span> procedure.setFailure(new RemoteProcedureException(msg, e));<a name="line.1479"></a> +<span class="sourceLineNo">1480</span> }<a name="line.1480"></a> +<span class="sourceLineNo">1481</span><a name="line.1481"></a> +<span class="sourceLineNo">1482</span> if (!procedure.isFailed()) {<a name="line.1482"></a> +<span class="sourceLineNo">1483</span> if (subprocs != null) {<a name="line.1483"></a> +<span class="sourceLineNo">1484</span> if (subprocs.length == 1 && subprocs[0] == procedure) {<a name="line.1484"></a> +<span class="sourceLineNo">1485</span> // Procedure returned itself. Quick-shortcut for a state machine-like procedure;<a name="line.1485"></a> +<span class="sourceLineNo">1486</span> // i.e. we go around this loop again rather than go back out on the scheduler queue.<a name="line.1486"></a> +<span class="sourceLineNo">1487</span> subprocs = null;<a name="line.1487"></a> +<span class="sourceLineNo">1488</span> reExecute = true;<a name="line.1488"></a> +<span class="sourceLineNo">1489</span> if (LOG.isTraceEnabled()) {<a name="line.1489"></a> +<span class="sourceLineNo">1490</span> LOG.trace("Short-circuit to next step on pid=" + procedure.getProcId());<a name="line.1490"></a> +<span class="sourceLineNo">1491</span> }<a name="line.1491"></a> +<span class="sourceLineNo">1492</span> } else {<a name="line.1492"></a> +<span class="sourceLineNo">1493</span> // Yield the current procedure, and make the subprocedure runnable<a name="line.1493"></a> +<span class="sourceLineNo">1494</span> // subprocs may come back 'null'.<a name="line.1494"></a> +<span class="sourceLineNo">1495</span> subprocs = initializeChildren(procStack, procedure, subprocs);<a name="line.1495"></a> +<span class="sourceLineNo">1496</span> LOG.info("Initialized subprocedures=" +<a name="line.1496"></a> +<span class="sourceLineNo">1497</span> (subprocs == null? null:<a name="line.1497"></a> +<span class="sourceLineNo">1498</span> Stream.of(subprocs).map(e -> "{" + e.toString() + "}").<a name="line.1498"></a> +<span class="sourceLineNo">1499</span> collect(Collectors.toList()).toString()));<a name="line.1499"></a> +<span class="sourceLineNo">1500</span> }<a name="line.1500"></a> +<span class="sourceLineNo">1501</span> } else if (procedure.getState() == ProcedureState.WAITING_TIMEOUT) {<a name="line.1501"></a> +<span class="sourceLineNo">1502</span> if (LOG.isTraceEnabled()) {<a name="line.1502"></a> +<span class="sourceLineNo">1503</span> LOG.trace("Added to timeoutExecutor " + procedure);<a name="line.1503"></a> +<span class="sourceLineNo">1504</span> }<a name="line.1504"></a> +<span class="sourceLineNo">1505</span> timeoutExecutor.add(procedure);<a name="line.1505"></a> +<span class="sourceLineNo">1506</span> } else if (!suspended) {<a name="line.1506"></a> +<span class="sourceLineNo">1507</span> // No subtask, so we are done<a name="line.1507"></a> +<span class="sourceLineNo">1508</span> procedure.setState(ProcedureState.SUCCESS);<a name="line.1508"></a> +<span class="sourceLineNo">1509</span> }<a name="line.1509"></a> +<span class="sourceLineNo">1510</span> }<a name="line.1510"></a> +<span class="sourceLineNo">1511</span><a name="line.1511"></a> +<span class="sourceLineNo">1512</span> // Add the procedure to the stack<a name="line.1512"></a> +<span class="sourceLineNo">1513</span> procStack.addRollbackStep(procedure);<a name="line.1513"></a> +<span class="sourceLineNo">1514</span><a name="line.1514"></a> +<span class="sourceLineNo">1515</span> // allows to kill the executor before something is stored to the wal.<a name="line.1515"></a> +<span class="sourceLineNo">1516</span> // useful to test the procedure recovery.<a name="line.1516"></a> +<span class="sourceLineNo">1517</span> if (testing != null && testing.shouldKillBeforeStoreUpdate(suspended)) {<a name="line.1517"></a> +<span class="sourceLineNo">1518</span> LOG.debug("TESTING: Kill before store update: " + procedure);<a name="line.1518"></a> +<span class="sourceLineNo">1519</span> stop();<a name="line.1519"></a> +<span class="sourceLineNo">1520</span> return;<a name="line.1520"></a> +<span class="sourceLineNo">1521</span> }<a name="line.1521"></a> +<span class="sourceLineNo">1522</span><a name="line.1522"></a> +<span class="sourceLineNo">1523</span> // TODO: The code here doesn't check if store is running before persisting to the store as<a name="line.1523"></a> +<span class="sourceLineNo">1524</span> // it relies on the method call below to throw RuntimeException to wind up the stack and<a name="line.1524"></a> +<span class="sourceLineNo">1525</span> // executor thread to stop. The statement following the method call below seems to check if<a name="line.1525"></a> +<span class="sourceLineNo">1526</span> // store is not running, to prevent scheduling children procedures, re-execution or yield<a name="line.1526"></a> +<span class="sourceLineNo">1527</span> // of this procedure. This may need more scrutiny and subsequent cleanup in future<a name="line.1527"></a> +<span class="sourceLineNo">1528</span> //<a name="line.1528"></a> +<span class="sourceLineNo">1529</span> // Commit the transaction even if a suspend (state may have changed). Note this append<a name="line.1529"></a> +<span class="sourceLineNo">1530</span> // can take a bunch of time to complete.<a name="line.1530"></a> +<span class="sourceLineNo">1531</span> updateStoreOnExec(procStack, procedure, subprocs);<a name="line.1531"></a> +<span class="sourceLineNo">1532</span><a name="line.1532"></a> +<span class="sourceLineNo">1533</span> // if the store is not running we are aborting<a name="line.1533"></a> +<span class="sourceLineNo">1534</span> if (!store.isRunning()) return;<a name="line.1534"></a> +<span class="sourceLineNo">1535</span> // if the procedure is kind enough to pass the slot to someone else, yield<a name="line.1535"></a> +<span class="sourceLineNo">1536</span> if (procedure.isRunnable() && !suspended &&<a name="line.1536"></a> +<span class="sourceLineNo">1537</span> procedure.isYieldAfterExecutionStep(getEnvironment())) {<a name="line.1537"></a> +<span class="sourceLineNo">1538</span> scheduler.yield(procedure);<a name="line.1538"></a> +<span class="sourceLineNo">1539</span> return;<a name="line.1539"></a> +<span class="sourceLineNo">1540</span> }<a name="line.1540"></a> +<span class="sourceLineNo">1541</span><a name="line.1541"></a> +<span class="sourceLineNo">1542</span> assert (reExecute && subprocs == null) || !reExecute;<a name="line.1542"></a> +<span class="sourceLineNo">1543</span> } while (reExecute);<a name="line.1543"></a> +<span class="sourceLineNo">1544</span> // Submit the new subprocedures<a name="line.1544"></a> +<span class="sourceLineNo">1545</span> if (subprocs != null && !procedure.isFailed()) {<a name="line.1545"></a> +<span class="sourceLineNo">1546</span> submitChildrenProcedures(subprocs);<a name="line.1546"></a> +<span class="sourceLineNo">1547</span> }<a name="line.1547"></a> +<span class="sourceLineNo">1548</span><a name="line.1548"></a> +<span class="sourceLineNo">1549</span> // if the procedure is complete and has a parent, count down the children latch.<a name="line.1549"></a> +<span class="sourceLineNo">1550</span> // If 'suspended', do nothing to change state -- let other threads handle unsuspend event.<a name="line.1550"></a> +<span class="sourceLineNo">1551</span> if (!suspended && procedure.isFinished() && procedure.hasParent()) {<a name="line.1551"></a> +<span class="sourceLineNo">1552</span> countDownChildren(procStack, procedure);<a name="line.1552"></a> +<span class="sourceLineNo">1553</span> }<a name="line.1553"></a> +<span class="sourceLineNo">1554</span> }<a name="line.1554"></a> +<span class="sourceLineNo">1555</span><a name="line.1555"></a> +<span class="sourceLineNo">1556</span> private Procedure[] initializeChildren(final RootProcedureState procStack,<a name="line.1556"></a> +<span class="sourceLineNo">1557</span> final Procedure procedure, final Procedure[] subprocs) {<a name="line.1557"></a> +<span class="sourceLineNo">1558</span> assert subprocs != null : "expected subprocedures";<a name="line.1558"></a> +<span class="sourceLineNo">1559</span> final long rootProcId = getRootProcedureId(procedure);<a name="line.1559"></a> +<span class="sourceLineNo">1560</span> for (int i = 0; i < subprocs.length; ++i) {<a name="line.1560"></a> +<span class="sourceLineNo">1561</span> final Procedure subproc = subprocs[i];<a name="line.1561"></a> +<span class="sourceLineNo">1562</span> if (subproc == null) {<a name="line.1562"></a> +<span class="sourceLineNo">1563</span> String msg = "subproc[" + i + "] is null, aborting the procedure";<a name="line.1563"></a> +<span class="sourceLineNo">1564</span> procedure.setFailure(new RemoteProcedureException(msg,<a name="line.1564"></a> +<span class="sourceLineNo">1565</span> new IllegalArgumentIOException(msg)));<a name="line.1565"></a> +<span class="sourceLineNo">1566</span> return null;<a name="line.1566"></a> +<span class="sourceLineNo">1567</span> }<a name="line.1567"></a> +<span class="sourceLineNo">1568</span><a name="line.1568"></a> +<span class="sourceLineNo">1569</span> assert subproc.getState() == ProcedureState.INITIALIZING : subproc;<a name="line.1569"></a> +<span class="sourceLineNo">1570</span> subproc.setParentProcId(procedure.getProcId());<a name="line.1570"></a> +<span class="sourceLineNo">1571</span> subproc.setRootProcId(rootProcId);<a name="line.1571"></a> +<span class="sourceLineNo">1572</span> subproc.setProcId(nextProcId());<a name="line.1572"></a> +<span class="sourceLineNo">1573</span> procStack.addSubProcedure(subproc);<a name="line.1573"></a> +<span class="sourceLineNo">1574</span> }<a name="line.1574"></a> +<span class="sourceLineNo">1575</span><a name="line.1575"></a> +<span class="sourceLineNo">1576</span> if (!procedure.isFailed()) {<a name="line.1576"></a> +<span class="sourceLineNo">1577</span> procedure.setChildrenLatch(subprocs.length);<a name="line.1577"></a> +<span class="sourceLineNo">1578</span> switch (procedure.getState()) {<a name="line.1578"></a> +<span class="sourceLineNo">1579</span> case RUNNABLE:<a name="line.1579"></a> +<span class="sourceLineNo">1580</span> procedure.setState(ProcedureState.WAITING);<a name="line.1580"></a> +<span class="sourceLineNo">1581</span> break;<a name="line.1581"></a> +<span class="sourceLineNo">1582</span> case WAITING_TIMEOUT:<a name="line.1582"></a> +<span class="sourceLineNo">1583</span> timeoutExecutor.add(procedure);<a name="line.1583"></a> +<span class="sourceLineNo">1584</span> break;<a name="line.1584"></a> +<span class="sourceLineNo">1585</span> default:<a name="line.1585"></a> +<span class="sourceLineNo">1586</span> break;<a name="line.1586"></a> +<span class="sourceLineNo">1587</span> }<a name="line.1587"></a> +<span class="sourceLineNo">1588</span> }<a name="line.1588"></a> +<span class="sourceLineNo">1589</span> return subprocs;<a name="line.1589"></a> +<span class="sourceLineNo">1590</span> }<a name="line.1590"></a> +<span class="sourceLineNo">1591</span><a name="line.1591"></a> +<span class="sourceLineNo">1592</span> private void submitChildrenProcedures(final Procedure[] subprocs) {<a name="line.1592"></a> +<span class="sourceLineNo">1593</span> for (int i = 0; i < subprocs.length; ++i) {<a name="line.1593"></a> +<span class="sourceLineNo">1594</span> final Procedure subproc = subprocs[i];<a name="line.1594"></a> +<span class="sourceLineNo">1595</span> subproc.updateMetricsOnSubmit(getEnvironment());<a name="line.1595"></a> +<span class="sourceLineNo">1596</span> assert !procedures.containsKey(subproc.getProcId());<a name="line.1596"></a> +<span class="sourceLineNo">1597</span> procedures.put(subproc.getProcId(), subproc);<a name="line.1597"></a> +<span class="sourceLineNo">1598</span> scheduler.addFront(subproc);<a name="line.1598"></a> +<span class="sourceLineNo">1599</span> }<a name="line.1599"></a> +<span class="sourceLineNo">1600</span> }<a name="line.1600"></a> +<span class="sourceLineNo">1601</span><a name="line.1601"></a> +<span class="sourceLineNo">1602</span> private void countDownChildren(final RootProcedureState procStack, final Procedure procedure) {<a name="line.1602"></a> +<span class="sourceLineNo">1603</span> final Procedure parent = procedures.get(procedure.getParentProcId());<a name="line.1603"></a> +<span class="sourceLineNo">1604</span> if (parent == null) {<a name="line.1604"></a> +<span class="sourceLineNo">1605</span> assert procStack.isRollingback();<a name="line.1605"></a> +<span class="sourceLineNo">1606</span> return;<a name="line.1606"></a> +<span class="sourceLineNo">1607</span> }<a name="line.1607"></a> +<span class="sourceLineNo">1608</span><a name="line.1608"></a> +<span class="sourceLineNo">1609</span> // If this procedure is the last child awake the parent procedure<a name="line.1609"></a> +<span class="sourceLineNo">1610</span> if (parent.tryRunnable()) {<a name="line.1610"></a> +<span class="sourceLineNo">1611</span> // If we succeeded in making the parent runnable -- i.e. all of its<a name="line.1611"></a> +<span class="sourceLineNo">1612</span> // children have completed, move parent to front of the queue.<a name="line.1612"></a> +<span class="sourceLineNo">1613</span> store.update(parent);<a name="line.1613"></a> +<span class="sourceLineNo">1614</span> scheduler.addFront(parent);<a name="line.1614"></a> +<span class="sourceLineNo">1615</span> LOG.info("Finished subprocedure(s) of " + parent + "; resume parent processing.");<a name="line.1615"></a> +<span class="sourceLineNo">1616</span> return;<a name="line.1616"></a> +<span class="sourceLineNo">1617</span> }<a name="line.1617"></a> +<span class="sourceLineNo">1618</span> }<a name="line.1618"></a> +<span class="sourceLineNo">1619</span><a name="line.1619"></a> +<span class="sourceLineNo">1620</span> private void updateStoreOnExec(final RootProcedureState procStack,<a name="line.1620"></a> +<span class="sourceLineNo">1621</span> final Procedure procedure, final Procedure[] subprocs) {<a name="line.1621"></a> +<span class="sourceLineNo">1622</span> if (subprocs != null && !procedure.isFailed()) {<a name="line.1622"></a> +<span class="sourceLineNo">1623</span> if (LOG.isTraceEnabled()) {<a name="line.1623"></a> +<span class="sourceLineNo">1624</span> LOG.trace("Stored " + procedure + ", children " + Arrays.toString(subprocs));<a name="line.1624"></a> +<span class="sourceLineNo">1625</span> }<a name="line.1625"></a> +<span class="sourceLineNo">1626</span> store.insert(procedure, subprocs);<a name="line.1626"></a> +<span class="sourceLineNo">1627</span> } else {<a name="line.1627"></a> +<span class="sourceLineNo">1628</span> if (LOG.isTraceEnabled()) {<a name="line.1628"></a> +<span class="sourceLineNo">1629</span> LOG.trace("Store update " + procedure);<a name="line.1629"></a> +<span class="sourceLineNo">1630</span> }<a name="line.1630"></a> +<span class="sourceLineNo">1631</span> if (procedure.isFinished() && !procedure.hasParent()) {<a name="line.1631"></a> +<span class="sourceLineNo">1632</span> // remove child procedures<a name="line.1632"></a> +<span class="sourceLineNo">1633</span> final long[] childProcIds = procStack.getSubprocedureIds();<a name="line.1633"></a> +<span class="sourceLineNo">1634</span> if (childProcIds != null) {<a name="line.1634"></a> +<span class="sourceLineNo">1635</span> store.delete(procedure, childProcIds);<a name="line.1635"></a> +<span class="sourceLineNo">1636</span> for (int i = 0; i < childProcIds.length; ++i) {<a name="line.1636"></a> +<span class="sourceLineNo">1637</span> procedures.remove(childProcIds[i]);<a name="line.1637"></a> +<span class="sourceLineNo">1638</span> }<a name="line.1638"></a> +<span class="sourceLineNo">1639</span> } else {<a name="line.1639"></a> +<span class="sourceLineNo">1640</span> store.update(procedure);<a name="line.1640"></a> +<span class="sourceLineNo">1641</span> }<a name="line.1641"></a> +<span class="sourceLineNo">1642</span> } else {<a name="line.1642"></a> +<span class="sourceLineNo">1643</span> store.update(procedure);<a name="line.1643"></a> +<span class="sourceLineNo">1644</span> }<a name="line.1644"></a> +<span class="sourceLineNo">1645</span> }<a name="line.1645"></a> +<span class="sourceLineNo">1646</span> }<a name="line.1646"></a> +<span class="sourceLineNo">1647</span><a name="line.1647"></a> +<span class="sourceLineNo">1648</span> private void handleInterruptedException(final Procedure proc, final InterruptedException e) {<a name="line.1648"></a> +<span class="sourceLineNo">1649</span> if (LOG.isTraceEnabled()) {<a name="line.1649"></a> +<span class="sourceLineNo">1650</span> LOG.trace("Interrupt during " + proc + ". suspend and retry it later.", e);<a name="line.1650"></a> +<span class="sourceLineNo">1651</span> }<a name="line.1651"></a> +<span class="sourceLineNo">1652</span><a name="line.1652"></a> +<span class="sourceLineNo">1653</span> // NOTE: We don't call Thread.currentThread().interrupt()<a name="line.1653"></a> +<span class="sourceLineNo">1654</span> // because otherwise all the subsequent calls e.g. Thread.sleep() will throw<a name="line.1654"></a> +<span class="sourceLineNo">1655</span> // the InterruptedException. If the master is going down, we will be notified<a name="line.1655"></a> +<span class="sourceLineNo">1656</span> // and the executor/store will be stopped.<a name="line.1656"></a> +<span class="sourceLineNo">1657</span> // (The interrupted procedure will be retried on the next run)<a name="line.1657"></a> +<span class="sourceLineNo">1658</span> }<a name="line.1658"></a> +<span class="sourceLineNo">1659</span><a name="line.1659"></a> +<span class="sourceLineNo">1660</span> private void execCompletionCleanup(final Procedure proc) {<a name="line.1660"></a> +<span class="sourceLineNo">1661</span> final TEnvironment env = getEnvironment();<a name="line.1661"></a> +<span class="sourceLineNo">1662</span> if (proc.holdLock(env) && proc.hasLock(env)) {<a name="line.1662"></a> +<span class="sourceLineNo">1663</span> releaseLock(proc, true);<a name="line.1663"></a> +<span class="sourceLineNo">1664</span> }<a name="line.1664"></a> +<span class="sourceLineNo">1665</span> try {<a name="line.1665"></a> +<span class="sourceLineNo">1666</span> proc.completionCleanup(env);<a name="line.1666"></a> +<span class="sourceLineNo">1667</span> } catch (Throwable e) {<a name="line.1667"></a> +<span class="sourceLineNo">1668</span> // Catch NullPointerExceptions or similar errors...<a name="line.1668"></a> +<span class="sourceLineNo">1669</span> LOG.error("CODE-BUG: uncatched runtime exception for procedure: " + proc, e);<a name="line.1669"></a> +<span class="sourceLineNo">1670</span> }<a name="line.1670"></a> +<span class="sourceLineNo">1671</span> }<a name="line.1671"></a> +<span class="sourceLineNo">1672</span><a name="line.1672"></a> +<span class="sourceLineNo">1673</span> private void procedureFinished(final Procedure proc) {<a name="line.1673"></a> +<span class="sourceLineNo">1674</span> // call the procedure completion cleanup handler<a name="line.1674"></a> +<span class="sourceLineNo">1675</span> execCompletionCleanup(proc);<a name="line.1675"></a> +<span class="sourceLineNo">1676</span><a name="line.1676"></a> +<span class="sourceLineNo">1677</span> CompletedProcedureRetainer retainer = new CompletedProcedureRetainer(proc);<a name="line.1677"></a> +<span class="sourceLineNo">1678</span><a name="line.1678"></a> +<span class="sourceLineNo">1679</span> // update the executor internal state maps<a name="line.1679"></a> +<span class="sourceLineNo">1680</span> if (!proc.shouldWaitClientAck(getEnvironment())) {<a name="line.1680"></a> +<span class="sourceLineNo">1681</span> retainer.setClientAckTime(0);<a name="line.1681"></a> +<span class="sourceLineNo">1682</span> }<a name="line.1682"></a> +<span class="sourceLineNo">1683</span><a name="line.1683"></a> +<span class="sourceLineNo">1684</span> completed.put(proc.getProcId(), retainer);<a name="line.1684"></a> +<span class="sourceLineNo">1685</span> rollbackStack.remove(proc.getProcId());<a name="line.1685"></a> +<span class="sourceLineNo">1686</span> procedures.remove(proc.getProcId());<a name="line.1686"></a> +<span class="sourceLineNo">1687</span><a name="line.1687"></a> +<span class="sourceLineNo">1688</span> // call the runnableSet completion cleanup handler<a name="line.1688"></a> +<span class="sourceLineNo">1689</span> try {<a name="line.1689"></a> +<span class="sourceLineNo">1690</span> scheduler.completionCleanup(proc);<a name="line.1690"></a> +<span class="sourceLineNo">1691</span> } catch (Throwable e) {<a name="line.1691"></a> +<span class="sourceLineNo">1692</span> // Catch NullPointerExceptions or similar errors...<a name="line.1692"></a> +<span class="sourceLineNo">1693</span> LOG.error("CODE-BUG: uncatched runtime exception for completion cleanup: " + proc, e);<a name="line.1693"></a> +<span class="sourceLineNo">1694</span> }<a name="line.1694"></a> +<span class="sourceLineNo">1695</span><a name="line.1695"></a> +<span class="sourceLineNo">1696</span> // Notify the listeners<a name="line.1696"></a> +<span class="sourceLineNo">1697</span> sendProcedureFinishedNotification(proc.getProcId());<a name="line.1697"></a> +<span class="sourceLineNo">1698</span> }<a name="line.1698"></a> +<span class="sourceLineNo">1699</span><a name="line.1699"></a> +<span class="sourceLineNo">1700</span> RootProcedureState getProcStack(long rootProcId) {<a name="line.1700"></a> +<span class="sourceLineNo">1701</span> return rollbackStack.get(rootProcId);<a name="line.1701"></a> +<span class="sourceLineNo">1702</span> }<a name="line.1702"></a> +<span class="sourceLineNo">1703</span><a name="line.1703"></a> +<span class="sourceLineNo">1704</span> // ==========================================================================<a name="line.1704"></a> +<span class="sourceLineNo">1705</span> // Worker Thread<a name="line.1705"></a> +<span class="sourceLineNo">1706</span> // ==========================================================================<a name="line.1706"></a> +<span class="sourceLineNo">1707</span> private class WorkerThread extends StoppableThread {<a name="line.1707"></a> +<span class="sourceLineNo">1708</span> private final AtomicLong executionStartTime = new AtomicLong(Long.MAX_VALUE);<a name="line.1708"></a> +<span class="sourceLineNo">1709</span> private volatile Procedure<?> activeProcedure;<a name="line.1709"></a> +<span class="sourceLineNo">1710</span><a name="line.1710"></a> +<span class="sourceLineNo">1711</span> public WorkerThread(ThreadGroup group) {<a name="line.1711"></a> +<span class="sourceLineNo">1712</span> this(group, "PEWorker-");<a name="line.1712"></a> +<span class="sourceLineNo">1713</span> }<a name="line.1713"></a> +<span class="sourceLineNo">1714</span><a name="line.1714"></a> +<span class="sourceLineNo">1715</span> protected WorkerThread(ThreadGroup group, String prefix) {<a name="line.1715"></a> +<span class="sourceLineNo">1716</span> super(group, prefix + workerId.incrementAndGet());<a name="line.1716"></a> +<span class="sourceLineNo">1717</span> setDaemon(true);<a name="line.1717"></a> +<span class="sourceLineNo">1718</span> }<a name="line.1718"></a> +<span class="sourceLineNo">1719</span><a name="line.1719"></a> +<span class="sourceLineNo">1720</span> @Override<a name="line.1720"></a> +<span class="sourceLineNo">1721</span> public void sendStopSignal() {<a name="line.1721"></a> +<span class="sourceLineNo">1722</span> scheduler.signalAll();<a name="line.1722"></a> +<span class="sourceLineNo">1723</span> }<a name="line.1723"></a> +<span class="sourceLineNo">1724</span><a name="line.1724"></a> +<span class="sourceLineNo">1725</span> @Override<a name="line.1725"></a> +<span class="sourceLineNo">1726</span> public void run() {<a name="line.1726"></a> +<span class="sourceLineNo">1727</span> long lastUpdate = EnvironmentEdgeManager.currentTime();<a name="line.1727"></a> +<span class="sourceLineNo">1728</span> try {<a name="line.1728"></a> +<span class="sourceLineNo">1729</span> while (isRunning() && keepAlive(lastUpdate)) {<a name="line.1729"></a> +<span class="sourceLineNo">1730</span> Procedure<?> proc = scheduler.poll(keepAliveTime, TimeUnit.MILLISECONDS);<a name="line.1730"></a> +<span class="sourceLineNo">1731</span> if (proc == null) {<a name="line.1731"></a> +<span class="sourceLineNo">1732</span> continue;<a name="line.1732"></a> +<span class="sourceLineNo">1733</span> }<a name="line.1733"></a> +<span class="sourceLineNo">1734</span> this.activeProcedure = proc;<a name="line.1734"></a> +<span class="sourceLineNo">1735</span> int activeCount = activeExecutorCount.incrementAndGet();<a name="line.1735"></a> +<span class="sourceLineNo">1736</span> int runningCount = store.setRunningProcedureCount(activeCount);<a name="line.1736"></a> +<span class="sourceLineNo">1737</span> LOG.trace("Execute pid={} runningCount={}, activeCount={}", proc.getProcId(),<a name="line.1737"></a> +<span class="sourceLineNo">1738</span> runningCount, activeCount);<a name="line.1738"></a> +<span class="sourceLineNo">1739</span> executionStartTime.set(EnvironmentEdgeManager.currentTime());<a name="line.1739"></a> +<span class="sourceLineNo">1740</span> try {<a name="line.1740"></a> +<span class="sourceLineNo">1741</span> executeProcedure(proc);<a name="line.1741"></a> +<span class="sourceLineNo">1742</span> } catch (AssertionError e) {<a name="line.1742"></a> +<span class="sourceLineNo">1743</span> LOG.info("ASSERT pid=" + proc.getProcId(), e);<a name="line.1743"></a> +<span class="sourceLineNo">1744</span> throw e;<a name="line.1744"></a> +<span class="sourceLineNo">1745</span> } finally {<a name="line.1745"></a> +<span class="sourceLineNo">1746</span> activeCount = activeExecutorCount.decrementAndGet();<a name="line.1746"></a> +<span class="sourceLineNo">1747</span> runningCount = store.setRunningProcedureCount(activeCount);<a name="line.1747"></a> +<span class="sourceLineNo">1748</span> LOG.trace("Halt pid={} runningCount={}, activeCount={}", proc.getProcId(),<a name="line.1748"></a> +<span class="sourceLineNo">1749</span> runningCount, activeCount);<a name="line.1749"></a> +<span class="sourceLineNo">1750</span> this.activeProcedure = null;<a name="line.1750"></a> +<span class="sourceLineNo">1751</span> lastUpdate = EnvironmentEdgeManager.currentTime();<a name="line.1751"></a> +<span class="sourceLineNo">1752</span> executionStartTime.set(Long.MAX_VALUE);<a name="line.1752"></a> +<span class="sourceLineNo">1753</span> }<a name="line.1753"></a> +<span class="sourceLineNo">1754</span> }<a name="line.1754"></a> +<span class="sourceLineNo">1755</span> } catch (Throwable t) {<a name="line.1755"></a> +<span class="sourceLineNo">1756</span> LOG.warn("Worker terminating UNNATURALLY {}", this.activeProcedure, t);<a name="line.1756"></a> +<span class="sourceLineNo">1757</span> } finally {<a name="line.1757"></a> +<span class="sourceLineNo">1758</span> LOG.trace("Worker terminated.");<a name="line.1758"></a> +<span class="sourceLineNo">1759</span> }<a name="line.1759"></a> +<span class="sourceLineNo">1760</span> workerThreads.remove(this);<a name="line.1760"></a> +<span class="sourceLineNo">1761</span> }<a name="line.1761"></a> +<span class="sourceLineNo">1762</span><a name="line.1762"></a> +<span class="sourceLineNo">1763</span> @Override<a name="line.1763"></a> +<span class="sourceLineNo">1764</span> public String toString() {<a name="line.1764"></a> +<span class="sourceLineNo">1765</span> Procedure<?> p = this.activeProcedure;<a name="line.1765"></a> +<span class="sourceLineNo">1766</span> return getName() + "(pid=" + (p == null? Procedure.NO_PROC_ID: p.getProcId() + ")");<a name="line.1766"></a> +<span class="sourceLineNo">1767</span> }<a name="line.1767"></a> +<span class="sourceLineNo">1768</span><a name="line.1768"></a> +<span class="sourceLineNo">1769</span> /**<a name="line.1769"></a> +<span class="sourceLineNo">1770</span> * @return the time since the current procedure is running<a name="line.1770"></a> +<span class="sourceLineNo">1771</span> */<a name="line.1771"></a> +<span class="sourceLineNo">1772</span> public long getCurrentRunTime() {<a name="line.1772"></a> +<span class="sourceLineNo">1773</span> return EnvironmentEdgeManager.currentTime() - executionStartTime.get();<a name="line.1773"></a> +<span class="sourceLineNo">1774</span> }<a name="line.1774"></a> +<span class="sourceLineNo">1775</span><a name="line.1775"></a> +<span class="sourceLineNo">1776</span> // core worker never timeout<a name="line.1776"></a> +<span class="sourceLineNo">1777</span> protected boolean keepAlive(long lastUpdate) {<a name="line.1777"></a> +<span class="sourceLineNo">1778</span> return true;<a name="line.1778"></a> +<span class="sourceLineNo">1779</span> }<a name="line.1779"></a> +<span class="sourceLineNo">1780</span> }<a name="line.1780"></a> +<span class="sourceLineNo">1781</span><a name="line.1781"></a> +<span class="sourceLineNo">1782</span> // A worker thread which can be added when core workers are stuck. Will timeout after<a name="line.1782"></a> +<span class="sourceLineNo">1783</span> // keepAliveTime if there is no procedure to run.<a name="line.1783"></a> +<span class="sourceLineNo">1784</span> private final class KeepAliveWorkerThread extends WorkerThread {<a name="line.1784"></a> +<span class="sourceLineNo">1785</span><a name="line.1785"></a> +<span class="sourceLineNo">1786</span> public KeepAliveWorkerThread(ThreadGroup group) {<a name="line.1786"></a> +<span class="sourceLineNo">1787</span> super(group, "KeepAlivePEWorker-");<a name="line.1787"></a> +<span class="sourceLineNo">1788</span> }<a name="line.1788"></a> +<span class="sourceLineNo">1789</span><a name="line.1789"></a> +<span class="sourceLineNo">1790</span> @Override<a name="line.1790"></a> +<span class="sourceLineNo">1791</span> protected boolean keepAlive(long lastUpdate) {<a name="line.1791"></a> +<span class="sourceLineNo">1792</span> return EnvironmentEdgeManager.currentTime() - lastUpdate < keepAliveTime;<a name="line.1792"></a> +<span class="sourceLineNo">1793</span> }<a name="line.1793"></a> +<span class="sourceLineNo">1794</span> }<a name="line.1794"></a> +<span class="sourceLineNo">1795</span><a name="line.1795"></a> +<span class="sourceLineNo">1796</span> // ----------------------------------------------------------------------------<a name="line.1796"></a> +<span class="sourceLineNo">1797</span> // TODO-MAYBE: Should we provide a InlineChore to notify the store with the<a name="line.1797"></a> +<span class="sourceLineNo">1798</span> // full set of procedures pending and completed to write a compacted<a name="line.1798"></a> +<span class="sourceLineNo">1799</span> // version of the log (in case is a log)?<a name="line.1799"></a> +<span class="sourceLineNo">1800</span> // In theory no, procedures are have a short life, so at some point the store<a name="line.1800"></a> +<span class="sourceLineNo">1801</span> // will have the tracker saying everything is in the last log.<a name="line.1801"></a> +<span class="sourceLineNo">1802</span> // ----------------------------------------------------------------------------<a name="line.1802"></a> +<span class="sourceLineNo">1803</span><a name="line.1803"></a> +<span class="sourceLineNo">1804</span> private final class WorkerMonitor extends InlineChore {<a name="line.1804"></a> +<span class="sourceLineNo">1805</span> public static final String WORKER_MONITOR_INTERVAL_CONF_KEY =<a name="line.1805"></a> +<span class="sourceLineNo">1806</span> "hbase.procedure.worker.monitor.interval.msec";<a name="line.1806"></a> +<span class="sourceLineNo">1807</span> private static final int DEFAULT_WORKER_MONITOR_INTERVAL = 5000; // 5sec<a name="line.1807"></a> +<span class="sourceLineNo">1808</span><a name="line.1808"></a> +<span class="sourceLineNo">1809</span> public static final String WORKER_STUCK_THRESHOLD_CONF_KEY =<a name="line.1809"></a> +<span class="sourceLineNo">1810</span> "hbase.procedure.worker.stuck.threshold.msec";<a name="line.1810"></a> +<span class="sourceLineNo">1811</span> private static final int DEFAULT_WORKER_STUCK_THRESHOLD = 10000; // 10sec<a name="line.1811"></a> +<span class="sourceLineNo">1812</span><a name="line.1812"></a> +<span class="sourceLineNo">1813</span> public static final String WORKER_ADD_STUCK_PERCENTAGE_CONF_KEY =<a name="line.1813"></a> +<span class="sourceLineNo">1814</span> "hbase.procedure.worker.add.stuck.percentage";<a name="line.1814"></a> +<span class="sourceLineNo">1815</span> private static final float DEFAULT_WORKER_ADD_STUCK_PERCENTAGE = 0.5f; // 50% stuck<a name="line.1815"></a> +<span class="sourceLineNo">1816</span><a name="line.1816"></a> +<span class="sourceLineNo">1817</span> private float addWorkerStuckPercentage = DEFAULT_WORKER_ADD_STUCK_PERCENTAGE;<a name="line.1817"></a> +<span class="sourceLineNo">1818</span> private int timeoutInterval = DEFAULT_WORKER_MONITOR_INTERVAL;<a name="line.1818"></a> +<span class="sourceLineNo">1819</span> private int stuckThreshold = DEFAULT_WORKER_STUCK_THRESHOLD;<a name="line.1819"></a> +<span class="sourceLineNo">1820</span><a name="line.1820"></a> +<span class="sourceLineNo">1821</span> public WorkerMonitor() {<a name="line.1821"></a> +<span class="sourceLineNo">1822</span> refreshConfig();<a name="line.1822"></a> +<span class="sourceLineNo">1823</span> }<a name="line.1823"></a> +<span class="sourceLineNo">1824</span><a name="line.1824"></a> +<span class="sourceLineNo">1825</span> @Override<a name="line.1825"></a> +<span class="sourceLineNo">1826</span> public void run() {<a name="line.1826"></a> +<span class="sourceLineNo">1827</span> final int stuckCount = checkForStuckWorkers();<a name="line.1827"></a> +<span class="sourceLineNo">1828</span> checkThreadCount(stuckCount);<a name="line.1828"></a> +<span class="sourceLineNo">1829</span><a name="line.1829"></a> +<span class="sourceLineNo">1830</span> // refresh interval (poor man dynamic conf update)<a name="line.1830"></a> +<span class="sourceLineNo">1831</span> refreshConfig();<a name="line.1831"></a> +<span class="sourceLineNo">1832</span> }<a name="line.1832"></a> +<span class="sourceLineNo">1833</span><a name="line.1833"></a> +<span class="sourceLineNo">1834</span> private int checkForStuckWorkers() {<a name="line.1834"></a> +<span class="sourceLineNo">1835</span> // check if any of the worker is stuck<a name="line.1835"></a> +<span class="sourceLineNo">1836</span> int stuckCount = 0;<a name="line.1836"></a> +<span class="sourceLineNo">1837</span> for (WorkerThread worker : workerThreads) {<a name="line.1837"></a> +<span class="sourceLineNo">1838</span> if (worker.getCurrentRunTime() < stuckThreshold) {<a name="line.1838"></a> +<span class="sourceLineNo">1839</span> continue;<a name="line.1839"></a> +<span class="sourceLineNo">1840</span> }<a name="line.1840"></a> +<span class="sourceLineNo">1841</span><a name="line.1841"></a> +<span class="sourceLineNo">1842</sp
<TRUNCATED>