Don't fail on startup in case of an isRunning exception During entity startup treat isRunning exceptions as temporary failures and wait until timeout, don't propagate them on the spot. Could be caused by a temporary problem, for example connectivity issues.
Project: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/commit/5a4e83a2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/tree/5a4e83a2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/diff/5a4e83a2 Branch: refs/heads/master Commit: 5a4e83a2604b7f03f679690006517f8189732aa3 Parents: d8a9a6f Author: Svetoslav Neykov <[email protected]> Authored: Mon May 4 15:27:35 2015 +0300 Committer: Svetoslav Neykov <[email protected]> Committed: Tue May 5 18:40:30 2015 +0300 ---------------------------------------------------------------------- .../entity/basic/SoftwareProcessImpl.java | 24 +++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/5a4e83a2/software/base/src/main/java/brooklyn/entity/basic/SoftwareProcessImpl.java ---------------------------------------------------------------------- diff --git a/software/base/src/main/java/brooklyn/entity/basic/SoftwareProcessImpl.java b/software/base/src/main/java/brooklyn/entity/basic/SoftwareProcessImpl.java index ce28efd..eedc546 100644 --- a/software/base/src/main/java/brooklyn/entity/basic/SoftwareProcessImpl.java +++ b/software/base/src/main/java/brooklyn/entity/basic/SoftwareProcessImpl.java @@ -494,17 +494,26 @@ public abstract class SoftwareProcessImpl extends AbstractEntity implements Soft CountdownTimer timer = startTimeout.countdownTimer(); boolean isRunningResult = false; long delay = 100; + Exception firstFailure = null; while (!isRunningResult && !timer.isExpired()) { Time.sleep(delay); try { isRunningResult = driver.isRunning(); + if (log.isDebugEnabled()) log.debug("checked {}, 'is running' returned: {}", this, isRunningResult); } catch (Exception e) { - ServiceStateLogic.setExpectedState(this, Lifecycle.ON_FIRE); - // provide extra context info, as we're seeing this happen in strange circumstances - if (driver==null) throw new IllegalStateException(this+" concurrent start and shutdown detected"); - throw new IllegalStateException("Error detecting whether "+this+" is running: "+e, e); + Exceptions.propagateIfFatal(e); + + isRunningResult = false; + if (driver != null) { + log.error("checked " + this + ", 'is running' threw an exception", e); + } else { + // provide extra context info, as we're seeing this happen in strange circumstances + log.error(this+" concurrent start and shutdown detected", e); + } + if (firstFailure == null) { + firstFailure = e; + } } - if (log.isDebugEnabled()) log.debug("checked {}, is running returned: {}", this, isRunningResult); // slow exponential delay -- 1.1^N means after 40 tries and 50s elapsed, it reaches the max of 5s intervals // TODO use Repeater delay = Math.min(delay*11/10, 5000); @@ -512,9 +521,12 @@ public abstract class SoftwareProcessImpl extends AbstractEntity implements Soft if (!isRunningResult) { String msg = "Software process entity "+this+" did not pass is-running check within "+ "the required "+startTimeout+" limit ("+timer.getDurationElapsed().toStringRounded()+" elapsed)"; + if (firstFailure != null) { + msg += "; check failed with exception: " + firstFailure.getMessage(); + } log.warn(msg+" (throwing)"); ServiceStateLogic.setExpectedState(this, Lifecycle.RUNNING); - throw new IllegalStateException(msg); + throw new IllegalStateException(msg, firstFailure); } }
