SLIDER-586: negative node count checks on AM
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/fbca62d8 Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/fbca62d8 Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/fbca62d8 Branch: refs/heads/feature/SLIDER-531-registry-enhancements Commit: fbca62d883c7b1bd8327ca44d06e7e7e5b8f284b Parents: a19c505 Author: Steve Loughran <ste...@apache.org> Authored: Thu Oct 30 15:34:18 2014 +0000 Committer: Steve Loughran <ste...@apache.org> Committed: Thu Oct 30 15:34:18 2014 +0000 ---------------------------------------------------------------------- .../TriggerClusterTeardownException.java | 3 +- .../slideram/SliderAMClientProvider.java | 2 +- .../slider/server/appmaster/state/AppState.java | 46 ++++++++++++++++---- .../agent/AgentMiniClusterTestBase.groovy | 2 +- .../slider/providers/agent/TestAgentEcho.groovy | 33 ++++++++++---- .../appstate/TestMockAppStateFlexing.groovy | 24 ++++++++++ 6 files changed, 89 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/fbca62d8/slider-core/src/main/java/org/apache/slider/core/exceptions/TriggerClusterTeardownException.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/core/exceptions/TriggerClusterTeardownException.java b/slider-core/src/main/java/org/apache/slider/core/exceptions/TriggerClusterTeardownException.java index d08b33a..bb9f430 100644 --- a/slider-core/src/main/java/org/apache/slider/core/exceptions/TriggerClusterTeardownException.java +++ b/slider-core/src/main/java/org/apache/slider/core/exceptions/TriggerClusterTeardownException.java @@ -29,8 +29,7 @@ public class TriggerClusterTeardownException extends SliderException { private final FinalApplicationStatus finalApplicationStatus; public TriggerClusterTeardownException(int code, - String message, - FinalApplicationStatus finalApplicationStatus, + FinalApplicationStatus finalApplicationStatus, String message, Object... args) { super(code, message, args); this.finalApplicationStatus = finalApplicationStatus; http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/fbca62d8/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMClientProvider.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMClientProvider.java b/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMClientProvider.java index 125746d..5ce0a78 100644 --- a/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMClientProvider.java +++ b/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMClientProvider.java @@ -150,7 +150,7 @@ public class SliderAMClientProvider extends AbstractClientProvider int instances = mapOperations.getOptionInt(COMPONENT_INSTANCES, 0); if (instances < 0) { throw new BadClusterStateException( - "Component %s has invalid instance count: %d", + "Component %s has negative instance count: %d", mapOperations.name, instances); } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/fbca62d8/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java index 31658bc..a69a60d 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java @@ -678,11 +678,10 @@ public class AppState { String role = roleStatus.getName(); MapOperations comp = resources.getComponent(role); - int desiredInstanceCount = - resources.getComponentOptInt(role, ResourceKeys.COMPONENT_INSTANCES, 0); + int desiredInstanceCount = getDesiredInstanceCount(resources, role); if (desiredInstanceCount == 0) { - log.warn("Role {} has 0 instances specified", role); - } + log.info("Role {} has 0 instances specified", role); + } if (currentDesired != desiredInstanceCount) { log.info("Role {} flexed from {} to {}", role, currentDesired, desiredInstanceCount); @@ -698,13 +697,36 @@ public class AppState { log.info("Adding new role {}", name); ProviderRole dynamicRole = createDynamicProviderRole(name, resources.getComponent(name)); - buildRole(dynamicRole); + RoleStatus roleStatus = buildRole(dynamicRole); + roleStatus.setDesired(getDesiredInstanceCount(resources, name)); roleHistory.addNewProviderRole(dynamicRole); } } } /** + * Get the desired instance count of a role, rejecting negative values + * @param resources resource map + * @param role role name + * @return the instance count + * @throws BadConfigException if the count is negative + */ + private int getDesiredInstanceCount(ConfTreeOperations resources, + String role) throws BadConfigException { + int desiredInstanceCount = + resources.getComponentOptInt(role, ResourceKeys.COMPONENT_INSTANCES, 0); + + if (desiredInstanceCount < 0) { + log.error("Role {} has negative desired instances : {}", role, + desiredInstanceCount); + throw new BadConfigException( + "Negative instance count (%) requested for component %s", + desiredInstanceCount, role); + } + return desiredInstanceCount; + } + + /** * Add knowledge of a role. * This is a build-time operation that is not synchronized, and * should be used while setting up the system state -before servicing @@ -1592,10 +1614,9 @@ public class AppState { if (failures > threshold) { throw new TriggerClusterTeardownException( SliderExitCodes.EXIT_DEPLOYMENT_FAILED, - ErrorStrings.E_UNSTABLE_CLUSTER + + FinalApplicationStatus.FAILED, ErrorStrings.E_UNSTABLE_CLUSTER + " - failed with component %s failing %d times (%d in startup);" + " threshold is %d - last failure: %s", - FinalApplicationStatus.FAILED, role.getName(), role.getFailed(), role.getStartFailed(), @@ -1651,9 +1672,18 @@ public class AppState { expected = role.getDesired(); } - log.info("Reviewing {}", role); + log.info("Reviewing {} : expected {}", role, expected); checkFailureThreshold(role); + if (expected < 0 ) { + // negative value: fail + throw new TriggerClusterTeardownException( + SliderExitCodes.EXIT_DEPLOYMENT_FAILED, + FinalApplicationStatus.FAILED, + "Negative component count of %d desired for component %s", + expected, role); + } + if (delta > 0) { log.info("{}: Asking for {} more nodes(s) for a total of {} ", name, delta, expected); http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/fbca62d8/slider-core/src/test/groovy/org/apache/slider/agent/AgentMiniClusterTestBase.groovy ---------------------------------------------------------------------- diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/AgentMiniClusterTestBase.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/AgentMiniClusterTestBase.groovy index 7786c41..c2ea54a 100644 --- a/slider-core/src/test/groovy/org/apache/slider/agent/AgentMiniClusterTestBase.groovy +++ b/slider-core/src/test/groovy/org/apache/slider/agent/AgentMiniClusterTestBase.groovy @@ -140,7 +140,7 @@ extends YarnZKMiniClusterTestBase { } /** - * Create an AM without a master + * Create a standalone AM * @param clustername AM name * @param size # of nodes * @param deleteExistingData should any existing cluster data be deleted http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/fbca62d8/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentEcho.groovy ---------------------------------------------------------------------- diff --git a/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentEcho.groovy b/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentEcho.groovy index f67ee92..f40d5a7 100644 --- a/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentEcho.groovy +++ b/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentEcho.groovy @@ -26,6 +26,7 @@ import org.apache.slider.client.SliderClient import org.apache.slider.common.SliderExitCodes import org.apache.slider.core.exceptions.BadClusterStateException import org.apache.slider.core.main.ServiceLauncher +import org.junit.Before import org.junit.Test import static org.apache.slider.common.params.Arguments.* @@ -38,6 +39,28 @@ import static org.apache.slider.providers.agent.AgentKeys.* @Slf4j class TestAgentEcho extends AgentTestBase { + File slider_core + String echo_py + File echo_py_path + File app_def_path + String agt_ver + File agt_ver_path + String agt_conf + File agt_conf_path + + @Before + public void setupArtifacts() { + slider_core = new File(new File(".").absoluteFile, "src/test/python"); + echo_py = "echo.py" + echo_py_path = new File(slider_core, echo_py) + app_def_path = new File(app_def_pkg_path) + agt_ver = "version" + agt_ver_path = new File(slider_core, agt_ver) + agt_conf = "agent.ini" + agt_conf_path = new File(slider_core, agt_conf) + + } + @Override void checkTestAssumptions(YarnConfiguration conf) { @@ -53,14 +76,6 @@ class TestAgentEcho extends AgentTestBase { true, false) - File slider_core = new File(new File(".").absoluteFile, "src/test/python"); - String echo_py = "echo.py" - File echo_py_path = new File(slider_core, echo_py) - File app_def_path = new File(app_def_pkg_path) - String agt_ver = "version" - File agt_ver_path = new File(slider_core, agt_ver) - String agt_conf = "agent.ini" - File agt_conf_path = new File(slider_core, agt_conf) assert echo_py_path.exists() assert app_def_path.exists() assert agt_ver_path.exists() @@ -102,7 +117,7 @@ class TestAgentEcho extends AgentTestBase { sliderClient.flex(clustername, [(role): -1]); fail("expected an exception") } catch (BadClusterStateException e) { - assertExceptionDetails(e, SliderExitCodes.EXIT_BAD_STATE, "") + assertExceptionDetails(e, SliderExitCodes.EXIT_BAD_STATE, "negative") } } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/fbca62d8/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexing.groovy ---------------------------------------------------------------------- diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexing.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexing.groovy index a7bf068..1db500b 100644 --- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexing.groovy +++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexing.groovy @@ -20,6 +20,7 @@ package org.apache.slider.server.appmaster.model.appstate import groovy.util.logging.Slf4j import org.apache.hadoop.yarn.api.records.Container +import org.apache.slider.core.exceptions.TriggerClusterTeardownException import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest import org.apache.slider.server.appmaster.model.mock.MockRoles import org.apache.slider.server.appmaster.operations.AbstractRMOperation @@ -112,5 +113,28 @@ class TestMockAppStateFlexing extends BaseMockAppStateTest implements MockRoles } + @Test + public void testFlexNegative() throws Throwable { + int r0 = 6 + int r1 = 0 + int r2 = 0 + role0Status.desired = r0 + role1Status.desired = r1 + role2Status.desired = r2 + List<RoleInstance> instances = createAndStartNodes() + + int clusterSize = r0 + r1 + r2 + assert instances.size() == clusterSize + log.info("shrinking cluster") + role0Status.desired = -2 + List<AppState.NodeCompletionResult> completionResults = [] + try { + createStartAndStopNodes(completionResults) + fail("expected an exception") + } catch (TriggerClusterTeardownException e) { + } + + } + }