This is an automated email from the ASF dual-hosted git repository. hulee pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/helix.git
commit 58fe0e8d18967ec26119daa215ecb570183140b7 Author: Yi Wang <[email protected]> AuthorDate: Tue Mar 26 17:53:19 2019 -0700 Report instance started & health status when getting by id RB=1609426 BUG=helix-1732 G=helix-reviewers A=jxue Signed-off-by: Hunter Lee <[email protected]> --- .../rest/server/json/instance/InstanceInfo.java | 113 +++++++++++++++++ .../rest/server/json/instance/StoppableCheck.java | 1 - .../server/resources/helix/InstanceAccessor.java | 39 ++---- .../helix/rest/server/service/InstanceService.java | 64 +++++++++- .../rest/server/service/InstanceServiceImpl.java | 135 +++++++++++++-------- .../helix/rest/server/TestInstanceAccessor.java | 10 +- 6 files changed, 275 insertions(+), 87 deletions(-) diff --git a/helix-rest/src/main/java/org/apache/helix/rest/server/json/instance/InstanceInfo.java b/helix-rest/src/main/java/org/apache/helix/rest/server/json/instance/InstanceInfo.java new file mode 100644 index 0000000..b1600ce --- /dev/null +++ b/helix-rest/src/main/java/org/apache/helix/rest/server/json/instance/InstanceInfo.java @@ -0,0 +1,113 @@ +package org.apache.helix.rest.server.json.instance; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.helix.ZNRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; + + +@JsonInclude(JsonInclude.Include.NON_EMPTY) +public class InstanceInfo { + private static final Logger _logger = LoggerFactory.getLogger(InstanceInfo.class); + + @JsonProperty("id") + private final String id; + @JsonProperty("liveInstance") + private final ZNRecord liveInstance; + @JsonProperty("config") + private final ZNRecord instanceConfig; + @JsonProperty("partitions") + private final List<String> partitions; + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @JsonProperty("resources") + private final List<String> resources; + @JsonProperty("health") + private final boolean isHealth; + @JsonProperty("failedHealthChecks") + private final List<String> failedHealthChecks; + + private InstanceInfo(Builder builder) { + id = builder.id; + liveInstance = builder.liveInstance; + instanceConfig = builder.instanceConfig; + partitions = builder.partitions; + resources = builder.resources; + isHealth = builder.isHealth; + failedHealthChecks = builder.failedHealthChecks; + } + + public static final class Builder { + private String id; + private ZNRecord liveInstance; + private ZNRecord instanceConfig; + private List<String> partitions; + private List<String> resources; + private boolean isHealth; + private List<String> failedHealthChecks; + + public Builder(String id) { + this.id = id; + } + + public Builder liveInstance(ZNRecord liveInstance) { + this.liveInstance = liveInstance; + return this; + } + + public Builder instanceConfig(ZNRecord instanceConfig) { + this.instanceConfig = instanceConfig; + return this; + } + + public Builder partitions(List<String> partitions) { + this.partitions = partitions; + return this; + } + + public Builder resources(List<String> resources) { + this.resources = resources; + return this; + } + + public Builder healthStatus(Map<String, Boolean> healthChecks) { + this.failedHealthChecks = new ArrayList<>(); + for (String healthCheck : healthChecks.keySet()) { + if (!healthChecks.get(healthCheck)) { + _logger.warn("Health Check {} failed", healthCheck); + this.failedHealthChecks.add(healthCheck); + } + } + this.isHealth = this.failedHealthChecks.isEmpty(); + return this; + } + + public InstanceInfo build() { + return new InstanceInfo(this); + } + } +} diff --git a/helix-rest/src/main/java/org/apache/helix/rest/server/json/instance/StoppableCheck.java b/helix-rest/src/main/java/org/apache/helix/rest/server/json/instance/StoppableCheck.java index 8dd1d31..b8485b2 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/server/json/instance/StoppableCheck.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/server/json/instance/StoppableCheck.java @@ -27,7 +27,6 @@ import java.util.Map; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.collect.ImmutableMap; - public class StoppableCheck { private static final String HELIX_CHECK_PREFIX = "Helix:"; private static final String CUSTOM_CHECK_PREFIX = "Custom:"; diff --git a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstanceAccessor.java b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstanceAccessor.java index db44ff9..dc9da3e 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstanceAccessor.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstanceAccessor.java @@ -21,7 +21,6 @@ package org.apache.helix.rest.server.resources.helix; import java.io.IOException; import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; @@ -48,13 +47,13 @@ import org.apache.helix.model.Error; import org.apache.helix.model.HealthStat; import org.apache.helix.model.HelixConfigScope; import org.apache.helix.model.InstanceConfig; -import org.apache.helix.model.LiveInstance; import org.apache.helix.model.Message; import org.apache.helix.model.ParticipantHistory; import org.apache.helix.model.builder.HelixConfigScopeBuilder; import org.apache.helix.rest.client.CustomRestClient; import org.apache.helix.rest.client.CustomRestClientFactory; import org.apache.helix.rest.common.HelixDataAccessorWrapper; +import org.apache.helix.rest.server.json.instance.InstanceInfo; import org.apache.helix.rest.server.json.instance.StoppableCheck; import org.apache.helix.rest.server.service.InstanceService; import org.apache.helix.rest.server.service.InstanceServiceImpl; @@ -91,7 +90,7 @@ public class InstanceAccessor extends AbstractHelixResource { } @GET - public Response getInstances(@PathParam("clusterId") String clusterId) { + public Response getAllInstances(@PathParam("clusterId") String clusterId) { HelixDataAccessor accessor = getDataAccssor(clusterId); ObjectNode root = JsonNodeFactory.instance.objectNode(); root.put(Properties.id.name(), JsonNodeFactory.instance.textNode(clusterId)); @@ -172,29 +171,17 @@ public class InstanceAccessor extends AbstractHelixResource { @GET @Path("{instanceName}") - public Response getInstance(@PathParam("clusterId") String clusterId, + public Response getInstanceById(@PathParam("clusterId") String clusterId, @PathParam("instanceName") String instanceName) throws IOException { - HelixDataAccessor accessor = getDataAccssor(clusterId); - Map<String, Object> instanceMap = new HashMap<>(); - instanceMap.put(Properties.id.name(), JsonNodeFactory.instance.textNode(instanceName)); - instanceMap.put(InstanceProperties.liveInstance.name(), null); - - InstanceConfig instanceConfig = - accessor.getProperty(accessor.keyBuilder().instanceConfig(instanceName)); - LiveInstance liveInstance = - accessor.getProperty(accessor.keyBuilder().liveInstance(instanceName)); - - if (instanceConfig != null) { - instanceMap.put(InstanceProperties.config.name(), instanceConfig.getRecord()); - } else { - return notFound(); - } - - if (liveInstance != null) { - instanceMap.put(InstanceProperties.liveInstance.name(), liveInstance.getRecord()); - } + ObjectMapper objectMapper = new ObjectMapper(); + HelixDataAccessor dataAccessor = getDataAccssor(clusterId); + // TODO reduce GC by dependency injection + InstanceService instanceService = new InstanceServiceImpl( + new HelixDataAccessorWrapper((ZKHelixDataAccessor) dataAccessor), getConfigAccessor()); + InstanceInfo instanceInfo = instanceService.getInstanceInfo(clusterId, instanceName, + InstanceService.HealthCheck.STARTED_AND_HEALTH_CHECK_LIST); - return JSONRepresentation(instanceMap); + return OK(objectMapper.writeValueAsString(instanceInfo)); } @POST @@ -208,8 +195,8 @@ public class InstanceAccessor extends AbstractHelixResource { InstanceService instanceService = new InstanceServiceImpl( new HelixDataAccessorWrapper((ZKHelixDataAccessor) dataAccessor), getConfigAccessor()); - Map<String, Boolean> helixStoppableCheck = - instanceService.getInstanceStoppableCheck(clusterId, instanceName); + Map<String, Boolean> helixStoppableCheck = instanceService.getInstanceHealthStatus(clusterId, + instanceName, InstanceService.HealthCheck.STOPPABLE_CHECK_LIST); CustomRestClient customClient = CustomRestClientFactory.get(jsonContent); // TODO add the json content parse logic Map<String, Boolean> customStoppableCheck = diff --git a/helix-rest/src/main/java/org/apache/helix/rest/server/service/InstanceService.java b/helix-rest/src/main/java/org/apache/helix/rest/server/service/InstanceService.java index 664d9c0..42bfe67 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/server/service/InstanceService.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/server/service/InstanceService.java @@ -19,16 +19,74 @@ package org.apache.helix.rest.server.service; * under the License. */ +import java.util.Arrays; +import java.util.List; import java.util.Map; +import org.apache.helix.rest.server.json.instance.InstanceInfo; + +import com.google.common.collect.ImmutableList; public interface InstanceService { + enum HealthCheck { + /** + * Check if instance is alive + */ + INSTANCE_NOT_ALIVE, + /** + * Check if instance is enabled both in instance config and cluster config + */ + INSTANCE_NOT_ENABLED, + /** + * Check if instance is stable + * Stable means all the ideal state mapping matches external view (view of current state). + */ + INSTANCE_NOT_STABLE, + /** + * Check if instance has 0 resource assigned + */ + EMPTY_RESOURCE_ASSIGNMENT, + /** + * Check if instance has disabled partitions + */ + HAS_DISABLED_PARTITION, + /** + * Check if instance has valid configuration (pre-requisite for all checks) + */ + INVALID_CONFIG, + /** + * Check if instance has error partitions + */ + HAS_ERROR_PARTITION; + + /** + * Pre-defined list of checks to test if an instance can be stopped at runtime + */ + public static List<HealthCheck> STOPPABLE_CHECK_LIST = Arrays.asList(HealthCheck.values()); + /** + * Pre-defined list of checks to test if an instance is in healthy running state + */ + public static List<HealthCheck> STARTED_AND_HEALTH_CHECK_LIST = + ImmutableList.of(HealthCheck.INSTANCE_NOT_ALIVE, HealthCheck.INSTANCE_NOT_ENABLED, + HealthCheck.INSTANCE_NOT_STABLE, HealthCheck.EMPTY_RESOURCE_ASSIGNMENT); + } + /** * Get the current instance stoppable checks based on Helix own business logic - * * @param clusterId * @param instanceName - * @return a map where key is stoppable check name and boolean value represents whether the check succeeds + * @return a map where key is stoppable check name and boolean value represents whether the check + * succeeds + */ + Map<String, Boolean> getInstanceHealthStatus(String clusterId, String instanceName, + List<HealthCheck> healthChecks); + + /** + * Get the overall status of the instance + * @param clusterId + * @param instanceName + * @return */ - Map<String, Boolean> getInstanceStoppableCheck(String clusterId, String instanceName); + InstanceInfo getInstanceInfo(String clusterId, String instanceName, + List<HealthCheck> healthChecks); } diff --git a/helix-rest/src/main/java/org/apache/helix/rest/server/service/InstanceServiceImpl.java b/helix-rest/src/main/java/org/apache/helix/rest/server/service/InstanceServiceImpl.java index 710cad0..3f8a7cb 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/server/service/InstanceServiceImpl.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/server/service/InstanceServiceImpl.java @@ -19,17 +19,22 @@ package org.apache.helix.rest.server.service; * under the License. */ +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.helix.ConfigAccessor; import org.apache.helix.HelixDataAccessor; import org.apache.helix.HelixException; +import org.apache.helix.model.CurrentState; +import org.apache.helix.model.InstanceConfig; +import org.apache.helix.model.LiveInstance; +import org.apache.helix.rest.server.json.instance.InstanceInfo; import org.apache.helix.util.InstanceValidationUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - public class InstanceServiceImpl implements InstanceService { private static final Logger _logger = LoggerFactory.getLogger(InstanceServiceImpl.class); @@ -42,64 +47,88 @@ public class InstanceServiceImpl implements InstanceService { } @Override - public Map<String, Boolean> getInstanceStoppableCheck(String clusterId, String instanceName) { + public Map<String, Boolean> getInstanceHealthStatus(String clusterId, String instanceName, + List<HealthCheck> healthChecks) { Map<String, Boolean> healthStatus = new HashMap<>(); - healthStatus.put(HealthCheck.INVALID_CONFIG.name(), InstanceValidationUtil.hasValidConfig(_dataAccessor, clusterId, instanceName)); - if (!healthStatus.get(HealthCheck.INVALID_CONFIG.name())) { - _logger.error("The instance {} doesn't have valid configuration", instanceName); - return healthStatus; + for (HealthCheck healthCheck : healthChecks) { + switch (healthCheck) { + case INVALID_CONFIG: + healthStatus.put(HealthCheck.INVALID_CONFIG.name(), + InstanceValidationUtil.hasValidConfig(_dataAccessor, clusterId, instanceName)); + if (!healthStatus.get(HealthCheck.INVALID_CONFIG.name())) { + _logger.error("The instance {} doesn't have valid configuration", instanceName); + return healthStatus; + } + case INSTANCE_NOT_ENABLED: + healthStatus.put(HealthCheck.INSTANCE_NOT_ENABLED.name(), InstanceValidationUtil + .isEnabled(_dataAccessor, _configAccessor, clusterId, instanceName)); + break; + case INSTANCE_NOT_ALIVE: + healthStatus.put(HealthCheck.INSTANCE_NOT_ALIVE.name(), + InstanceValidationUtil.isAlive(_dataAccessor, clusterId, instanceName)); + break; + case INSTANCE_NOT_STABLE: + try { + boolean isStable = InstanceValidationUtil.isInstanceStable(_dataAccessor, instanceName); + healthStatus.put(HealthCheck.INSTANCE_NOT_STABLE.name(), isStable); + } catch (HelixException e) { + _logger.error("Failed to check instance is stable, message: {}", e.getMessage()); + // TODO action on the stable check exception + } + break; + case HAS_ERROR_PARTITION: + healthStatus.put(HealthCheck.HAS_ERROR_PARTITION.name(), + !InstanceValidationUtil.hasErrorPartitions(_dataAccessor, clusterId, instanceName)); + break; + case HAS_DISABLED_PARTITION: + healthStatus.put(HealthCheck.HAS_DISABLED_PARTITION.name(), + !InstanceValidationUtil.hasDisabledPartitions(_dataAccessor, clusterId, instanceName)); + break; + case EMPTY_RESOURCE_ASSIGNMENT: + healthStatus.put(HealthCheck.EMPTY_RESOURCE_ASSIGNMENT.name(), + InstanceValidationUtil.hasResourceAssigned(_dataAccessor, clusterId, instanceName)); + break; + default: + _logger.error("Unsupported health check: {}", healthCheck); + break; + } } - // Any exceptions occurred below due to invalid instance config shouldn't happen - healthStatus.put( - HealthCheck.INSTANCE_NOT_ENABLED.name(), InstanceValidationUtil.isEnabled(_dataAccessor, _configAccessor, clusterId, instanceName)); - healthStatus.put(HealthCheck.INSTANCE_NOT_ALIVE.name(), InstanceValidationUtil.isAlive(_dataAccessor, clusterId, instanceName)); - healthStatus.put( - HealthCheck.EMPTY_RESOURCE_ASSIGNMENT.name(), InstanceValidationUtil.hasResourceAssigned(_dataAccessor, clusterId, instanceName)); - healthStatus.put(HealthCheck.HAS_DISABLED_PARTITIONS.name(), !InstanceValidationUtil.hasDisabledPartitions(_dataAccessor, clusterId, instanceName)); - healthStatus.put( - HealthCheck.HAS_ERROR_PARTITION.name(), !InstanceValidationUtil.hasErrorPartitions(_dataAccessor, clusterId, instanceName)); + return healthStatus; + } + + @Override + public InstanceInfo getInstanceInfo(String clusterId, String instanceName, + List<HealthCheck> healthChecks) { + InstanceInfo.Builder instanceInfoBuilder = new InstanceInfo.Builder(instanceName); - try { - boolean isStable = InstanceValidationUtil.isInstanceStable(_dataAccessor, instanceName); - healthStatus.put(HealthCheck.INSTANCE_NOT_STABLE.name(), isStable); - } catch (HelixException e) { - _logger.error("Failed to check instance is stable, message: {}", e.getMessage()); - // TODO action on the stable check exception + InstanceConfig instanceConfig = + _dataAccessor.getProperty(_dataAccessor.keyBuilder().instanceConfig(instanceName)); + LiveInstance liveInstance = + _dataAccessor.getProperty(_dataAccessor.keyBuilder().liveInstance(instanceName)); + if (instanceConfig != null) { + instanceInfoBuilder.instanceConfig(instanceConfig.getRecord()); } + if (liveInstance != null) { + instanceInfoBuilder.liveInstance(liveInstance.getRecord()); + String sessionId = liveInstance.getSessionId(); - return healthStatus; - } + List<String> resourceNames = _dataAccessor + .getChildNames(_dataAccessor.keyBuilder().currentStates(instanceName, sessionId)); + instanceInfoBuilder.resources(resourceNames); + List<String> partitions = new ArrayList<>(); + for (String resourceName : resourceNames) { + CurrentState currentState = _dataAccessor.getProperty( + _dataAccessor.keyBuilder().currentState(instanceName, sessionId, resourceName)); + if (currentState != null && currentState.getPartitionStateMap() != null) { + partitions.addAll(currentState.getPartitionStateMap().keySet()); + } + } + instanceInfoBuilder.partitions(partitions); + } + instanceInfoBuilder + .healthStatus(getInstanceHealthStatus(clusterId, instanceName, healthChecks)); - public enum HealthCheck { - /** - * Check if instance is alive - */ - INSTANCE_NOT_ALIVE, - /** - * Check if instance is enabled both in instance config and cluster config - */ - INSTANCE_NOT_ENABLED, - /** - * Check if instance is stable - * Stable means all the ideal state mapping matches external view (view of current state). - */ - INSTANCE_NOT_STABLE, - /** - * Check if instance has 0 resource assigned - */ - EMPTY_RESOURCE_ASSIGNMENT, - /** - * Check if instance has disabled partitions - */ - HAS_DISABLED_PARTITIONS, - /** - * Check if instance has valid configuration (pre-requisite for all checks) - */ - INVALID_CONFIG, - /** - * Check if instance has error partitions - */ - HAS_ERROR_PARTITION + return instanceInfoBuilder.build(); } } diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstanceAccessor.java b/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstanceAccessor.java index 12493c5..b92ce26 100644 --- a/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstanceAccessor.java +++ b/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstanceAccessor.java @@ -130,7 +130,7 @@ public class TestInstanceAccessor extends AbstractTestClass { } @Test(dependsOnMethods = "testGetMessagesByStateModelDef") - public void testGetInstances() throws IOException { + public void testGetAllInstances() throws IOException { System.out.println("Start test :" + TestHelper.getTestMethodName()); String body = new JerseyUriRequestBuilder("clusters/{}/instances").isBodyReturnExpected(true) .format(CLUSTER_NAME).get(this); @@ -145,14 +145,16 @@ public class TestInstanceAccessor extends AbstractTestClass { + instances + " vs instances actually: " + _instancesMap.get(CLUSTER_NAME)); } - @Test(dependsOnMethods = "testGetInstances") - public void testGetInstance() throws IOException { + @Test + public void testGetInstanceById() throws IOException { System.out.println("Start test :" + TestHelper.getTestMethodName()); String body = new JerseyUriRequestBuilder("clusters/{}/instances/{}").isBodyReturnExpected(true) .format(CLUSTER_NAME, INSTANCE_NAME).get(this); JsonNode node = OBJECT_MAPPER.readTree(body); String instancesCfg = node.get(InstanceAccessor.InstanceProperties.config.name()).toString(); Assert.assertNotNull(instancesCfg); + boolean isHealth = node.get("health").getBooleanValue(); + Assert.assertFalse(isHealth); InstanceConfig instanceConfig = new InstanceConfig(toZNRecord(instancesCfg)); Assert.assertEquals(instanceConfig, @@ -181,7 +183,7 @@ public class TestInstanceAccessor extends AbstractTestClass { _configAccessor.getInstanceConfig(CLUSTER_NAME, INSTANCE_NAME + "TEST"); } - @Test(dependsOnMethods = "testGetInstance") + @Test(dependsOnMethods = "testGetInstanceById") public void updateInstance() throws IOException { System.out.println("Start test :" + TestHelper.getTestMethodName()); // Disable instance
