YARN-7073. Yarn native services rest API documentation. Contributed by Gour Saha
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/7a567027 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/7a567027 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/7a567027 Branch: refs/heads/yarn-native-services Commit: 7a567027efdc8fd388fcaa9bcc728dd8a28c1ac8 Parents: 49c8c87 Author: Jian He <jia...@apache.org> Authored: Thu Aug 31 17:17:22 2017 -0700 Committer: Jian He <jia...@apache.org> Committed: Tue Sep 19 21:36:21 2017 -0700 ---------------------------------------------------------------------- hadoop-project/src/site/site.xml | 1 + .../hadoop/yarn/service/webapp/ApiServer.java | 47 +- .../definition/YARN-Services-Examples.md | 245 ++++++++ ...RN-Simplified-V1-API-Layer-For-Services.yaml | 128 ++-- .../native-services/NativeServicesAPI.md | 606 +++++++++++++++++++ 5 files changed, 973 insertions(+), 54 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/7a567027/hadoop-project/src/site/site.xml ---------------------------------------------------------------------- diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml index e173988..7283d63 100644 --- a/hadoop-project/src/site/site.xml +++ b/hadoop-project/src/site/site.xml @@ -152,6 +152,7 @@ <menu name="YARN Native Services" inherit="top"> <item name="Introduction" href="hadoop-yarn/hadoop-yarn-site/native-services/NativeServicesIntro.html"/> + <item name="Native Services API" href="hadoop-yarn/hadoop-yarn-site/native-services/NativeServicesAPI.html"/> <item name="Native Services Discovery" href="hadoop-yarn/hadoop-yarn-site/native-services/NativeServicesDiscovery.html"/> </menu> http://git-wip-us.apache.org/repos/asf/hadoop/blob/7a567027/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/java/org/apache/hadoop/yarn/service/webapp/ApiServer.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/java/org/apache/hadoop/yarn/service/webapp/ApiServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/java/org/apache/hadoop/yarn/service/webapp/ApiServer.java index f55e3f1..e8286ef 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/java/org/apache/hadoop/yarn/service/webapp/ApiServer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/java/org/apache/hadoop/yarn/service/webapp/ApiServer.java @@ -24,12 +24,11 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.service.api.records.Component; import org.apache.hadoop.yarn.service.api.records.Service; import org.apache.hadoop.yarn.service.api.records.ServiceState; import org.apache.hadoop.yarn.service.api.records.ServiceStatus; import org.apache.hadoop.yarn.service.client.ServiceClient; -import org.apache.hadoop.yarn.service.api.records.Component; -import org.apache.hadoop.yarn.service.utils.SliderUtils; import org.apache.hadoop.yarn.service.utils.ServiceApiUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -49,6 +48,7 @@ import java.io.IOException; import java.util.Collections; import java.util.Map; +import static org.apache.hadoop.yarn.service.api.records.ServiceState.ACCEPTED; import static org.apache.hadoop.yarn.service.conf.RestApiConstants.*; /** @@ -76,11 +76,11 @@ public class ApiServer { @GET @Path(VERSION) @Consumes({ MediaType.APPLICATION_JSON }) - @Produces({ MediaType.APPLICATION_JSON, MediaType.TEXT_PLAIN }) + @Produces({ MediaType.APPLICATION_JSON }) public Response getVersion() { String version = VersionInfo.getBuildVersion(); LOG.info(version); - return Response.ok(version).build(); + return Response.ok("{ \"hadoop_version\": \"" + version + "\"}").build(); } @POST @@ -94,11 +94,11 @@ public class ApiServer { ApplicationId applicationId = SERVICE_CLIENT.actionCreate(service); LOG.info("Successfully created service " + service.getName() + " applicationId = " + applicationId); - serviceStatus.setState(ServiceState.ACCEPTED); + serviceStatus.setState(ACCEPTED); serviceStatus.setUri( CONTEXT_ROOT + SERVICE_ROOT_PATH + "/" + service .getName()); - return Response.status(Status.CREATED).entity(serviceStatus).build(); + return Response.status(Status.ACCEPTED).entity(serviceStatus).build(); } catch (IllegalArgumentException e) { serviceStatus.setDiagnostics(e.getMessage()); return Response.status(Status.BAD_REQUEST).entity(serviceStatus) @@ -182,16 +182,16 @@ public class ApiServer { + ": Invalid number of containers specified " + component .getNumberOfContainers()).build(); } + ServiceStatus status = new ServiceStatus(); try { Map<String, Long> original = SERVICE_CLIENT.flexByRestService(appName, Collections.singletonMap(component.getName(), component.getNumberOfContainers())); - return Response.ok().entity( - "Updating component " + componentName + " size from " + original - .get(componentName) + " to " + component.getNumberOfContainers()) - .build(); + status.setDiagnostics( + "Updating component (" + componentName + ") size from " + original + .get(componentName) + " to " + component.getNumberOfContainers()); + return Response.ok().entity(status).build(); } catch (YarnException | IOException e) { - ServiceStatus status = new ServiceStatus(); status.setDiagnostics(e.getMessage()); return Response.status(Status.INTERNAL_SERVER_ERROR).entity(status) .build(); @@ -244,31 +244,40 @@ public class ApiServer { } private Response updateLifetime(String appName, Service updateAppData) { + ServiceStatus status = new ServiceStatus(); try { String newLifeTime = SERVICE_CLIENT.updateLifetime(appName, updateAppData.getLifetime()); - return Response.ok("Service " + appName + " lifeTime is successfully updated to " - + updateAppData.getLifetime() + " seconds from now: " + newLifeTime).build(); + status.setDiagnostics( + "Service (" + appName + ")'s lifeTime is updated to " + newLifeTime + + ", " + updateAppData.getLifetime() + + " seconds remaining"); + return Response.ok(status).build(); } catch (Exception e) { String message = - "Failed to update service (" + appName + ") lifetime (" - + updateAppData.getLifetime() + ")"; + "Failed to update service (" + appName + ")'s lifetime to " + + updateAppData.getLifetime(); LOG.error(message, e); - return Response.status(Status.INTERNAL_SERVER_ERROR) - .entity(message + " : " + e.getMessage()).build(); + status.setDiagnostics(message + ": " + e.getMessage()); + return Response.status(Status.INTERNAL_SERVER_ERROR).entity(status) + .build(); } } private Response startService(String appName) { + ServiceStatus status = new ServiceStatus(); try { SERVICE_CLIENT.actionStart(appName); LOG.info("Successfully started service " + appName); - return Response.ok("Service " + appName + " is successfully started").build(); + status.setDiagnostics("Service " + appName + " is successfully started."); + status.setState(ServiceState.ACCEPTED); + return Response.ok(status).build(); } catch (Exception e) { String message = "Failed to start service " + appName; + status.setDiagnostics(message + ": " + e.getMessage()); LOG.info(message, e); return Response.status(Status.INTERNAL_SERVER_ERROR) - .entity(message + ": " + e.getMessage()).build(); + .entity(status).build(); } } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/7a567027/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Services-Examples.md ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Services-Examples.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Services-Examples.md new file mode 100644 index 0000000..3cd3d48 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Services-Examples.md @@ -0,0 +1,245 @@ +<!--- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. See accompanying LICENSE file. +--> + +## Examples + +### Create a simple single-component service with most attribute values as defaults +POST URL - http://localhost:9191/ws/v1/services + +##### POST Request JSON +```json +{ + "name": "hello-world", + "components" : + [ + { + "name": "hello", + "number_of_containers": 1, + "artifact": { + "id": "nginx:latest", + "type": "DOCKER" + }, + "launch_command": "./start_nginx.sh", + "resource": { + "cpus": 1, + "memory": "256" + } + } + ] +} +``` + +##### GET Response JSON +GET URL - http://localhost:9191/ws/v1/services/hello-world + +Note, lifetime value of -1 means unlimited lifetime. + +```json +{ + "name": "hello-world", + "id": "application_1503963985568_0002", + "lifetime": -1, + "components": [ + { + "name": "hello", + "dependencies": [], + "resource": { + "cpus": 1, + "memory": "256" + }, + "configuration": { + "properties": {}, + "env": {}, + "files": [] + }, + "quicklinks": [], + "containers": [ + { + "id": "container_e03_1503963985568_0002_01_000001", + "ip": "10.22.8.143", + "hostname": "myhost.local", + "state": "READY", + "launch_time": 1504051512412, + "bare_host": "10.22.8.143", + "component_name": "hello-0" + }, + { + "id": "container_e03_1503963985568_0002_01_000002", + "ip": "10.22.8.143", + "hostname": "myhost.local", + "state": "READY", + "launch_time": 1504051536450, + "bare_host": "10.22.8.143", + "component_name": "hello-1" + } + ], + "launch_command": "./start_nginx.sh", + "number_of_containers": 1, + "run_privileged_container": false + } + ], + "configuration": { + "properties": {}, + "env": {}, + "files": [] + }, + "quicklinks": {} +} + +``` +### Update to modify the lifetime of a service +PUT URL - http://localhost:9191/ws/v1/services/hello-world + +##### PUT Request JSON + +Note, irrespective of what the current lifetime value is, this update request will set the lifetime of the service to be 3600 seconds (1 hour) from the time the request is submitted. Hence, if a a service has remaining lifetime of 5 mins (say) and would like to extend it to an hour OR if an application has remaining lifetime of 5 hours (say) and would like to reduce it down to an hour, then for both scenarios you need to submit the same request below. + +```json +{ + "lifetime": 3600 +} +``` +### Stop a service +PUT URL - http://localhost:9191/ws/v1/services/hello-world + +##### PUT Request JSON +```json +{ + "state": "STOPPED" +} +``` + +### Start a service +PUT URL - http://localhost:9191/ws/v1/services/hello-world + +##### PUT Request JSON +```json +{ + "state": "STARTED" +} +``` + +### Update to flex up/down the no of containers (instances) of a component of a service +PUT URL - http://localhost:9191/ws/v1/services/hello-world/components/hello + +##### PUT Request JSON +```json +{ + "name": "hello", + "number_of_containers": 3 +} +``` + +### Destroy a service +DELETE URL - http://localhost:9191/ws/v1/services/hello-world + +*** + +### Create a complicated service - HBase +POST URL - http://localhost:9191:/ws/v1/services/hbase-app-1 + +##### POST Request JSON + +```json +{ + "name": "hbase-app-1", + "lifetime": "3600", + "components": [ + { + "name": "hbasemaster", + "number_of_containers": 1, + "artifact": { + "id": "hbase:latest", + "type": "DOCKER" + }, + "launch_command": "/usr/hdp/current/hbase-master/bin/hbase master start", + "resource": { + "cpus": 1, + "memory": "2048" + }, + "configuration": { + "env": { + "HBASE_LOG_DIR": "<LOG_DIR>" + }, + "files": [ + { + "type": "XML", + "dest_file": "/etc/hadoop/conf/core-site.xml", + "props": { + "fs.defaultFS": "${CLUSTER_FS_URI}" + } + }, + { + "type": "XML", + "dest_file": "/etc/hbase/conf/hbase-site.xml", + "props": { + "hbase.cluster.distributed": "true", + "hbase.zookeeper.quorum": "${CLUSTER_ZK_QUORUM}", + "hbase.rootdir": "${SERVICE_HDFS_DIR}/hbase", + "zookeeper.znode.parent": "${SERVICE_ZK_PATH}", + "hbase.master.hostname": "hbasemaster.${SERVICE_NAME}.${USER}.${DOMAIN}", + "hbase.master.info.port": "16010" + } + } + ] + } + }, + { + "name": "regionserver", + "number_of_containers": 3, + "unique_component_support": "true", + "artifact": { + "id": "hbase:latest", + "type": "DOCKER" + }, + "launch_command": "/usr/hdp/current/hbase-regionserver/bin/hbase regionserver start", + "resource": { + "cpus": 1, + "memory": "2048" + }, + "configuration": { + "env": { + "HBASE_LOG_DIR": "<LOG_DIR>" + }, + "files": [ + { + "type": "XML", + "dest_file": "/etc/hadoop/conf/core-site.xml", + "props": { + "fs.defaultFS": "${CLUSTER_FS_URI}" + } + }, + { + "type": "XML", + "dest_file": "/etc/hbase/conf/hbase-site.xml", + "props": { + "hbase.cluster.distributed": "true", + "hbase.zookeeper.quorum": "${CLUSTER_ZK_QUORUM}", + "hbase.rootdir": "${SERVICE_HDFS_DIR}/hbase", + "zookeeper.znode.parent": "${SERVICE_ZK_PATH}", + "hbase.master.hostname": "hbasemaster.${SERVICE_NAME}.${USER}.${DOMAIN}", + "hbase.master.info.port": "16010", + "hbase.regionserver.hostname": "${COMPONENT_INSTANCE_NAME}.${SERVICE_NAME}.${USER}.${DOMAIN}" + } + } + ] + } + } + ], + "quicklinks": { + "HBase Master Status UI": "http://hbasemaster0.${SERVICE_NAME}.${USER}.${DOMAIN}:16010/master-status", + "Proxied HBase Master Status UI": "http://app-proxy/${DOMAIN}/${USER}/${SERVICE_NAME}/hbasemaster/16010/" + } +} +``` http://git-wip-us.apache.org/repos/asf/hadoop/blob/7a567027/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Simplified-V1-API-Layer-For-Services.yaml ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Simplified-V1-API-Layer-For-Services.yaml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Simplified-V1-API-Layer-For-Services.yaml index 17f8c95..b084be7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Simplified-V1-API-Layer-For-Services.yaml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Simplified-V1-API-Layer-For-Services.yaml @@ -17,18 +17,30 @@ swagger: '2.0' info: - title: "[YARN-4793] Simplified API layer for services and beyond" + title: "YARN Simplified API layer for services" description: | - Bringing a new service on YARN today is not a simple experience. The APIs of existing frameworks are either too low level (native YARN), require writing new code (for frameworks with programmatic APIs) or writing a complex spec (for declarative frameworks). In addition to building critical building blocks inside YARN (as part of other efforts at link:https://issues.apache.org/jira/browse/YARN-4692[YARN-4692]), there is a need for simplifying the user facing story for building services. Experience of projects like Apache Slider running real-life services like HBase, Storm, Accumulo, Solr etc, gives us some very good insights on how simplified APIs for services should look like. + Bringing a new service on YARN today is not a simple experience. The APIs of + existing frameworks are either too low level (native YARN), require writing + new code (for frameworks with programmatic APIs) or writing a complex spec + (for declarative frameworks). In addition to building critical building blocks + inside YARN (as part of other efforts at YARN-4692), there is a need for + simplifying the user facing story for building services. Experience of projects + like Apache Slider running real-life services like HBase, Storm, Accumulo, + Solr etc, gives us some very good insights on how simplified APIs for services + should look like. + To this end, we should look at a new simple-services API layer backed by REST + interfaces. This API can be used to create and manage the lifecycle of YARN + services. Services here can range from simple single-component service to + complex multi-component assemblies needing orchestration. YARN-4793 tracks + this effort. - To this end, we should look at a new simple-services API layer backed by REST interfaces. This API can be used to create and manage the lifecycle of YARN services. Services here can range from simple single-component service to complex multi-component assemblies needing orchestration. - - - We should also look at making this a unified REST based entry point for other important features like resource-profile management (link:https://issues.apache.org/jira/browse/YARN-3926[YARN-3926]), package-definitions' lifecycle-management and service-discovery (link:https://issues.apache.org/jira/browse/YARN-913[YARN-913]/link:https://issues.apache.org/jira/browse/YARN-4757[YARN-4757]). We also need to flesh out its relation to our present much lower level REST APIs (link:https://issues.apache.org/jira/browse/YARN-1695[YARN-1695]) in YARN for application-submission and management. - - - This document spotlights on this specification. In most of the cases, the application owner will not be forced to make any changes to their application. This is primarily true if the application is packaged with containerization technologies like docker. Irrespective of how complex the application is, there will be hooks provided at appropriate layers to allow pluggable and customizable application behavior. + This document spotlights on this specification. In most of the cases, the + application owner will not be forced to make any changes to their applications. + This is primarily true if the application is packaged with containerization + technologies like docker. Irrespective of how complex the application is, + there will be hooks provided at appropriate layers to allow pluggable and + customizable application behavior. version: "1.0.0" license: @@ -39,7 +51,6 @@ host: host.mycompany.com # array of all schemes that your API supports schemes: - http - - https # will be prefixed to all paths basePath: /ws/v1/ consumes: @@ -47,9 +58,17 @@ consumes: produces: - application/json paths: + /services/version: + get: + summary: Get current version of the API server. + description: Get current version of the API server. + responses: + 200: + description: Successful request + /services: get: - summary: List of services running in the cluster + summary: (TBD) List of services running in the cluster. description: Get a list of all currently running services (response includes a minimal projection of the service info). For more details do a GET on a specific service name. responses: 200: @@ -74,15 +93,62 @@ paths: $ref: '#/definitions/Service' responses: 202: - description: Request accepted + description: The request to create a service is accepted + 400: + description: Invalid service definition provided in the request body + 500: + description: Failed to create a service default: description: Unexpected error schema: $ref: '#/definitions/ServiceStatus' /services/{service_name}: + put: + summary: Update a service or upgrade the binary version of the components of a running service + description: Update the runtime properties of a service. Currently the following operations are supported - update lifetime, stop/start a service. + The PUT operation is also used to orchestrate an upgrade of the service containers to a newer version of their artifacts (TBD). + parameters: + - name: service_name + in: path + description: Service name + required: true + type: string + - name: Service + in: body + description: The updated service definition. It can contain the updated lifetime of a service or the desired state (STOPPED/STARTED) of a service to initiate a start/stop operation against the specified service + required: true + schema: + $ref: '#/definitions/Service' + responses: + 204: + description: Update or upgrade was successful + 404: + description: Service does not exist + default: + description: Unexpected error + schema: + $ref: '#/definitions/ServiceStatus' + delete: + summary: Destroy a service + description: Destroy a service and release all resources. This API might have to return JSON data providing location of logs (TBD), etc. + parameters: + - name: service_name + in: path + description: Service name + required: true + type: string + responses: + 204: + description: Destroy was successful + 404: + description: Service does not exist + default: + description: Unexpected error + schema: + $ref: '#/definitions/ServiceStatus' get: - summary: Get service details + summary: Get details of a service. description: Return the details (including containers) of a running service parameters: - name: service_name @@ -108,43 +174,36 @@ paths: description: Unexpected error schema: $ref: '#/definitions/ServiceStatus' + /services/{service_name}/components/{component_name}: put: - summary: Update a service or upgrade the binary version of the components of a running service - description: Update the runtime properties of a service. As of now, only update of lifetime and number of instances (flexing) of the components of a service is supported. The PUT operation is also used to orchestrate an upgrade of the service containers to a newer version of their artifacts. + summary: Flex a component's number of instances. + description: Set a component's desired number of instanes parameters: - name: service_name in: path description: Service name required: true type: string - responses: - 204: - description: Update or upgrade was successful - 404: - description: Service does not exist - default: - description: Unexpected error - schema: - $ref: '#/definitions/ServiceStatus' - delete: - summary: Destroy service - description: Destroy a service and release all resources. This API might have to return JSON data providing location of logs, etc. Not finalized yet. - parameters: - - name: service_name + - name: component_name in: path - description: Service name + description: Component name required: true type: string + - name: Component + in: body + description: The definition of a component which contains the updated number of instances. + required: true + schema: + $ref: '#/definitions/Component' responses: - 204: - description: Destroy was successful + 200: + description: Flex was successful 404: description: Service does not exist default: description: Unexpected error schema: $ref: '#/definitions/ServiceStatus' - definitions: Service: description: a service resource has the following attributes. @@ -183,7 +242,7 @@ definitions: format: int64 description: Life time (in seconds) of the service from the time it reaches the STARTED state (after which it is automatically destroyed by YARN). For unlimited lifetime do not set a lifetime value. placement_policy: - description: Advanced scheduling and placement policies (optional). If not specified, it defaults to the default placement policy of the service owner. The design of placement policies are in the works. It is not very clear at this point, how policies in conjunction with labels be exposed to service owners. This is a placeholder for now. The advanced structure of this attribute will be determined by YARN-4902. + description: (TBD) Advanced scheduling and placement policies. If not specified, it defaults to the default placement policy of the service owner. The design of placement policies are in the works. It is not very clear at this point, how policies in conjunction with labels be exposed to service owners. This is a placeholder for now. The advanced structure of this attribute will be determined by YARN-4902. $ref: '#/definitions/PlacementPolicy' components: description: Components of a service. @@ -420,4 +479,3 @@ definitions: type: integer format: int32 description: An error code specific to a scenario which service owners should be able to use to understand the failure in addition to the diagnostic information. - http://git-wip-us.apache.org/repos/asf/hadoop/blob/7a567027/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/native-services/NativeServicesAPI.md ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/native-services/NativeServicesAPI.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/native-services/NativeServicesAPI.md new file mode 100644 index 0000000..f56139a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/native-services/NativeServicesAPI.md @@ -0,0 +1,606 @@ +<!--- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. See accompanying LICENSE file. +--> + +# YARN Simplified API layer for services + +## Overview +Bringing a new service on YARN today is not a simple experience. The APIs of +existing frameworks are either too low level (native YARN), require writing +new code (for frameworks with programmatic APIs) or writing a complex spec +(for declarative frameworks). In addition to building critical building blocks +inside YARN (as part of other efforts at +[YARN-4692](https://issues.apache.org/jira/browse/YARN-4692)), there is a need for +simplifying the user facing story for building services. Experience of projects +like Apache Slider running real-life services like HBase, Storm, Accumulo, +Solr etc, gives us some very good insights on how simplified APIs for services +should look like. + +To this end, we should look at a new simple-services API layer backed by REST +interfaces. This API can be used to create and manage the lifecycle of YARN +services. Services here can range from simple single-component service to +complex multi-component assemblies needing orchestration. +[YARN-4793](https://issues.apache.org/jira/browse/YARN-4793) tracks this +effort. + +This document spotlights on this specification. In most of the cases, the +application owner will not be forced to make any changes to their applications. +This is primarily true if the application is packaged with containerization +technologies like docker. Irrespective of how complex the application is, +there will be hooks provided at appropriate layers to allow pluggable and +customizable application behavior. + + +### Version information +Version: 1.0.0 + +### License information +License: Apache 2.0 +License URL: http://www.apache.org/licenses/LICENSE-2.0.html + +### URI scheme +Host: host.mycompany.com + +BasePath: /ws/v1/ + +Schemes: HTTP + +### Consumes + +* application/json + + +### Produces + +* application/json + + +## Paths +### Create a service +``` +POST /services +``` + +#### Description + +Create a service. The request JSON is a service object with details required for creation. If the request is successful it returns 202 Accepted. A success of this API only confirms success in submission of the service creation request. There is no guarantee that the service will actually reach a RUNNING state. Resource availability and several other factors determines if the service will be deployed in the cluster. It is expected that clients would subsequently call the GET API to get details of the service and determine its state. + +#### Parameters +|Type|Name|Description|Required|Schema|Default| +|----|----|----|----|----|----| +|BodyParameter|Service|Service request object|true|Service|| + + +#### Responses +|HTTP Code|Description|Schema| +|----|----|----| +|202|The request to create a service is accepted|No Content| +|400|Invalid service definition provided in the request body|No Content| +|500|Failed to create a service|No Content| +|default|Unexpected error|ServiceStatus| + + +### (TBD) List of services running in the cluster. +``` +GET /services +``` + +#### Description + +Get a list of all currently running services (response includes a minimal projection of the service info). For more details do a GET on a specific service name. + +#### Responses +|HTTP Code|Description|Schema| +|----|----|----| +|200|An array of services|Service array| +|default|Unexpected error|ServiceStatus| + + +### Get current version of the API server. +``` +GET /services/version +``` + +#### Description + +Get current version of the API server. + +#### Responses +|HTTP Code|Description|Schema| +|----|----|----| +|200|Successful request|No Content| + + +### Update a service or upgrade the binary version of the components of a running service +``` +PUT /services/{service_name} +``` + +#### Description + +Update the runtime properties of a service. Currently the following operations are supported - update lifetime, stop/start a service. The PUT operation is also used to orchestrate an upgrade of the service containers to a newer version of their artifacts (TBD). + +#### Parameters +|Type|Name|Description|Required|Schema|Default| +|----|----|----|----|----|----| +|PathParameter|service_name|Service name|true|string|| +|BodyParameter|Service|The updated service definition. It can contain the updated lifetime of a service or the desired state (STOPPED/STARTED) of a service to initiate a start/stop operation against the specified service|true|Service|| + + +#### Responses +|HTTP Code|Description|Schema| +|----|----|----| +|204|Update or upgrade was successful|No Content| +|404|Service does not exist|No Content| +|default|Unexpected error|ServiceStatus| + + +### Destroy a service +``` +DELETE /services/{service_name} +``` + +#### Description + +Destroy a service and release all resources. This API might have to return JSON data providing location of logs (TBD), etc. + +#### Parameters +|Type|Name|Description|Required|Schema|Default| +|----|----|----|----|----|----| +|PathParameter|service_name|Service name|true|string|| + + +#### Responses +|HTTP Code|Description|Schema| +|----|----|----| +|204|Destroy was successful|No Content| +|404|Service does not exist|No Content| +|default|Unexpected error|ServiceStatus| + + +### Get details of a service. +``` +GET /services/{service_name} +``` + +#### Description + +Return the details (including containers) of a running service + +#### Parameters +|Type|Name|Description|Required|Schema|Default| +|----|----|----|----|----|----| +|PathParameter|service_name|Service name|true|string|| + + +#### Responses +|HTTP Code|Description|Schema| +|----|----|----| +|200|a service object|object| +|404|Service does not exist|No Content| +|default|Unexpected error|ServiceStatus| + + +### Flex a component's number of instances. +``` +PUT /services/{service_name}/components/{component_name} +``` + +#### Description + +Set a component's desired number of instanes + +#### Parameters +|Type|Name|Description|Required|Schema|Default| +|----|----|----|----|----|----| +|PathParameter|service_name|Service name|true|string|| +|PathParameter|component_name|Component name|true|string|| +|BodyParameter|Component|The definition of a component which contains the updated number of instances.|true|Component|| + + +#### Responses +|HTTP Code|Description|Schema| +|----|----|----| +|200|Flex was successful|No Content| +|404|Service does not exist|No Content| +|default|Unexpected error|ServiceStatus| + + +## Definitions +### Artifact + +Artifact of a service component. If not specified, component will just run the bare launch command and no artifact will be localized. + +|Name|Description|Required|Schema|Default| +|----|----|----|----|----| +|id|Artifact id. Examples are package location uri for tarball based services, image name for docker, name of service, etc.|true|string|| +|type|Artifact type, like docker, tarball, etc. (optional). For TARBALL type, the specified tarball will be localized to the container local working directory under a folder named lib. For SERVICE type, the service specified will be read and its components will be added into this service. The original component with artifact type SERVICE will be removed (any properties specified in the original component will be ignored).|false|enum (DOCKER, TARBALL, SERVICE)|DOCKER| +|uri|Artifact location to support multiple artifact stores (optional).|false|string|| + + +### Component + +One or more components of the service. If the service is HBase say, then the component can be a simple role like master or regionserver. If the service is a complex business webapp then a component can be other services say Kafka or Storm. Thereby it opens up the support for complex and nested services. + +|Name|Description|Required|Schema|Default| +|----|----|----|----|----| +|name|Name of the service component (mandatory). If Registry DNS is enabled, the max length is 63 characters. If unique component support is enabled, the max length is lowered to 44 characters.|true|string|| +|dependencies|An array of service components which should be in READY state (as defined by readiness check), before this component can be started. The dependencies across all components of a service should be represented as a DAG.|false|string array|| +|readiness_check|Readiness check for this component.|false|ReadinessCheck|| +|artifact|Artifact of the component (optional). If not specified, the service level global artifact takes effect.|false|Artifact|| +|launch_command|The custom launch command of this component (optional for DOCKER component, required otherwise). When specified at the component level, it overrides the value specified at the global level (if any).|false|string|| +|resource|Resource of this component (optional). If not specified, the service level global resource takes effect.|false|Resource|| +|number_of_containers|Number of containers for this component (optional). If not specified, the service level global number_of_containers takes effect.|false|integer (int64)|| +|run_privileged_container|Run all containers of this component in privileged mode (YARN-4262).|false|boolean|| +|placement_policy|Advanced scheduling and placement policies for all containers of this component (optional). If not specified, the service level placement_policy takes effect. Refer to the description at the global level for more details.|false|PlacementPolicy|| +|configuration|Config properties for this component.|false|Configuration|| +|quicklinks|A list of quicklink keys defined at the service level, and to be resolved by this component.|false|string array|| + + +### ConfigFile + +A config file that needs to be created and made available as a volume in a service component container. + +|Name|Description|Required|Schema|Default| +|----|----|----|----|----| +|type|Config file in the standard format like xml, properties, json, yaml, template.|false|enum (XML, PROPERTIES, JSON, YAML, TEMPLATE, ENV, HADOOP_XML)|| +|dest_file|The path that this configuration file should be created as. If it is an absolute path, it will be mounted into the DOCKER container. Absolute paths are only allowed for DOCKER containers. If it is a relative path, only the file name should be provided, and the file will be created in the container local working directory under a folder named conf.|false|string|| +|src_file|This provides the source location of the configuration file, the content of which is dumped to dest_file post property substitutions, in the format as specified in type. Typically the src_file would point to a source controlled network accessible file maintained by tools like puppet, chef, or hdfs etc. Currently, only hdfs is supported.|false|string|| +|props|A blob of key value pairs that will be dumped in the dest_file in the format as specified in type. If src_file is specified, src_file content are dumped in the dest_file and these properties will overwrite, if any, existing properties in src_file or be added as new properties in src_file.|false|object|| + + +### Configuration + +Set of configuration properties that can be injected into the service components via envs, files and custom pluggable helper docker containers. Files of several standard formats like xml, properties, json, yaml and templates will be supported. + +|Name|Description|Required|Schema|Default| +|----|----|----|----|----| +|properties|A blob of key-value pairs of common service properties.|false|object|| +|env|A blob of key-value pairs which will be appended to the default system properties and handed off to the service at start time. All placeholder references to properties will be substituted before injection.|false|object|| +|files|Array of list of files that needs to be created and made available as volumes in the service component containers.|false|ConfigFile array|| + + +### Container + +An instance of a running service container. + +|Name|Description|Required|Schema|Default| +|----|----|----|----|----| +|id|Unique container id of a running service, e.g. container_e3751_1458061340047_0008_01_000002.|false|string|| +|launch_time|The time when the container was created, e.g. 2016-03-16T01:01:49.000Z. This will most likely be different from cluster launch time.|false|string (date)|| +|ip|IP address of a running container, e.g. 172.31.42.141. The IP address and hostname attribute values are dependent on the cluster/docker network setup as per YARN-4007.|false|string|| +|hostname|Fully qualified hostname of a running container, e.g. ctr-e3751-1458061340047-0008-01-000002.examplestg.site. The IP address and hostname attribute values are dependent on the cluster/docker network setup as per YARN-4007.|false|string|| +|bare_host|The bare node or host in which the container is running, e.g. cn008.example.com.|false|string|| +|state|State of the container of a service.|false|ContainerState|| +|component_name|Name of the component that this container instance belongs to.|false|string|| +|resource|Resource used for this container.|false|Resource|| +|artifact|Artifact used for this container.|false|Artifact|| +|privileged_container|Container running in privileged mode or not.|false|boolean|| + + +### ContainerState + +The current state of the container of a service. + +|Name|Description|Required|Schema|Default| +|----|----|----|----|----| +|state|enum of the state of the container|false|enum (INIT, STARTED, READY)|| + + +### PlacementPolicy + +Placement policy of an instance of a service. This feature is in the works in YARN-6592. + +|Name|Description|Required|Schema|Default| +|----|----|----|----|----| +|label|Assigns a service to a named partition of the cluster where the service desires to run (optional). If not specified all services are submitted to a default label of the service owner. One or more labels can be setup for each service owner account with required constraints like no-preemption, sla-99999, preemption-ok, etc.|false|string|| + + +### ReadinessCheck + +A custom command or a pluggable helper container to determine the readiness of a container of a component. Readiness for every service is different. Hence the need for a simple interface, with scope to support advanced usecases. + +|Name|Description|Required|Schema|Default| +|----|----|----|----|----| +|type|E.g. HTTP (YARN will perform a simple REST call at a regular interval and expect a 204 No content).|true|enum (HTTP, PORT)|| +|props|A blob of key value pairs that will be used to configure the check.|false|object|| +|artifact|Artifact of the pluggable readiness check helper container (optional). If specified, this helper container typically hosts the http uri and encapsulates the complex scripts required to perform actual container readiness check. At the end it is expected to respond a 204 No content just like the simplified use case. This pluggable framework benefits service owners who can run services without any packaging modifications. Note, artifacts of type docker only is supported for now. NOT IMPLEMENTED YET|false|Artifact|| + + +### Resource + +Resource determines the amount of resources (vcores, memory, network, etc.) usable by a container. This field determines the resource to be applied for all the containers of a component or service. The resource specified at the service (or global) level can be overriden at the component level. Only one of profile OR cpu & memory are expected. It raises a validation exception otherwise. + +|Name|Description|Required|Schema|Default| +|----|----|----|----|----| +|profile|Each resource profile has a unique id which is associated with a cluster-level predefined memory, cpus, etc.|false|string|| +|cpus|Amount of vcores allocated to each container (optional but overrides cpus in profile if specified).|false|integer (int32)|| +|memory|Amount of memory allocated to each container (optional but overrides memory in profile if specified). Currently accepts only an integer value and default unit is in MB.|false|string|| + + +### Service + +a service resource has the following attributes. + +|Name|Description|Required|Schema|Default| +|----|----|----|----|----| +|name|A unique service name. If Registry DNS is enabled, the max length is 63 characters.|true|string|| +|id|A unique service id.|false|string|| +|artifact|Artifact of single-component service.|false|Artifact|| +|resource|Resource of single-component service or the global default for multi-component services. Mandatory if it is a single-component service and if cpus and memory are not specified at the Service level.|false|Resource|| +|launch_command|The custom launch command of a service component (optional). If not specified for services with docker images say, it will default to the default start command of the image. If there is a single component in this service, you can specify this without the need to have a 'components' section.|false|string|| +|launch_time|The time when the service was created, e.g. 2016-03-16T01:01:49.000Z.|false|string (date)|| +|number_of_containers|Number of containers for each component in the service. Each component can further override this service-level global default.|false|integer (int64)|| +|number_of_running_containers|In get response this provides the total number of running containers for this service (across all components) at the time of request. Note, a subsequent request can return a different number as and when more containers get allocated until it reaches the total number of containers or if a flex request has been made between the two requests.|false|integer (int64)|| +|lifetime|Life time (in seconds) of the service from the time it reaches the STARTED state (after which it is automatically destroyed by YARN). For unlimited lifetime do not set a lifetime value.|false|integer (int64)|| +|placement_policy|(TBD) Advanced scheduling and placement policies. If not specified, it defaults to the default placement policy of the service owner. The design of placement policies are in the works. It is not very clear at this point, how policies in conjunction with labels be exposed to service owners. This is a placeholder for now. The advanced structure of this attribute will be determined by YARN-4902.|false|PlacementPolicy|| +|components|Components of a service.|false|Component array|| +|configuration|Config properties of a service. Configurations provided at the service/global level are available to all the components. Specific properties can be overridden at the component level.|false|Configuration|| +|containers|Containers of a started service. Specifying a value for this attribute for the POST payload raises a validation error. This blob is available only in the GET response of a started service.|false|Container array|| +|state|State of the service. Specifying a value for this attribute for the POST payload raises a validation error. This attribute is available only in the GET response of a started service.|false|ServiceState|| +|quicklinks|A blob of key-value pairs of quicklinks to be exported for a service.|false|object|| +|queue|The YARN queue that this service should be submitted to.|false|string|| + + +### ServiceState + +The current state of a service. + +|Name|Description|Required|Schema|Default| +|----|----|----|----|----| +|state|enum of the state of the service|false|enum (ACCEPTED, STARTED, READY, STOPPED, FAILED)|| + + +### ServiceStatus + +The current status of a submitted service, returned as a response to the GET API. + +|Name|Description|Required|Schema|Default| +|----|----|----|----|----| +|diagnostics|Diagnostic information (if any) for the reason of the current state of the service. It typically has a non-null value, if the service is in a non-running state.|false|string|| +|state|Service state.|false|ServiceState|| +|code|An error code specific to a scenario which service owners should be able to use to understand the failure in addition to the diagnostic information.|false|integer (int32)|| + + + +## Examples + +### Create a simple single-component service with most attribute values as defaults +POST URL - http://localhost:9191/ws/v1/services + +##### POST Request JSON +```json +{ + "name": "hello-world", + "components" : + [ + { + "name": "hello", + "number_of_containers": 1, + "artifact": { + "id": "nginx:latest", + "type": "DOCKER" + }, + "launch_command": "./start_nginx.sh", + "resource": { + "cpus": 1, + "memory": "256" + } + } + ] +} +``` + +##### GET Response JSON +GET URL - http://localhost:9191/ws/v1/services/hello-world + +Note, lifetime value of -1 means unlimited lifetime. + +```json +{ + "name": "hello-world", + "id": "application_1503963985568_0002", + "lifetime": -1, + "components": [ + { + "name": "hello", + "dependencies": [], + "resource": { + "cpus": 1, + "memory": "256" + }, + "configuration": { + "properties": {}, + "env": {}, + "files": [] + }, + "quicklinks": [], + "containers": [ + { + "id": "container_e03_1503963985568_0002_01_000001", + "ip": "10.22.8.143", + "hostname": "myhost.local", + "state": "READY", + "launch_time": 1504051512412, + "bare_host": "10.22.8.143", + "component_name": "hello-0" + }, + { + "id": "container_e03_1503963985568_0002_01_000002", + "ip": "10.22.8.143", + "hostname": "myhost.local", + "state": "READY", + "launch_time": 1504051536450, + "bare_host": "10.22.8.143", + "component_name": "hello-1" + } + ], + "launch_command": "./start_nginx.sh", + "number_of_containers": 1, + "run_privileged_container": false + } + ], + "configuration": { + "properties": {}, + "env": {}, + "files": [] + }, + "quicklinks": {} +} + +``` +### Update to modify the lifetime of a service +PUT URL - http://localhost:9191/ws/v1/services/hello-world + +##### PUT Request JSON + +Note, irrespective of what the current lifetime value is, this update request will set the lifetime of the service to be 3600 seconds (1 hour) from the time the request is submitted. Hence, if a a service has remaining lifetime of 5 mins (say) and would like to extend it to an hour OR if an application has remaining lifetime of 5 hours (say) and would like to reduce it down to an hour, then for both scenarios you need to submit the same request below. + +```json +{ + "lifetime": 3600 +} +``` +### Stop a service +PUT URL - http://localhost:9191/ws/v1/services/hello-world + +##### PUT Request JSON +```json +{ + "state": "STOPPED" +} +``` + +### Start a service +PUT URL - http://localhost:9191/ws/v1/services/hello-world + +##### PUT Request JSON +```json +{ + "state": "STARTED" +} +``` + +### Update to flex up/down the no of containers (instances) of a component of a service +PUT URL - http://localhost:9191/ws/v1/services/hello-world/components/hello + +##### PUT Request JSON +```json +{ + "name": "hello", + "number_of_containers": 3 +} +``` + +### Destroy a service +DELETE URL - http://localhost:9191/ws/v1/services/hello-world + +*** + +### Create a complicated service - HBase +POST URL - http://localhost:9191:/ws/v1/services/hbase-app-1 + +##### POST Request JSON + +```json +{ + "name": "hbase-app-1", + "lifetime": "3600", + "components": [ + { + "name": "hbasemaster", + "number_of_containers": 1, + "artifact": { + "id": "hbase:latest", + "type": "DOCKER" + }, + "launch_command": "/usr/hdp/current/hbase-master/bin/hbase master start", + "resource": { + "cpus": 1, + "memory": "2048" + }, + "configuration": { + "env": { + "HBASE_LOG_DIR": "<LOG_DIR>" + }, + "files": [ + { + "type": "XML", + "dest_file": "/etc/hadoop/conf/core-site.xml", + "props": { + "fs.defaultFS": "${CLUSTER_FS_URI}" + } + }, + { + "type": "XML", + "dest_file": "/etc/hbase/conf/hbase-site.xml", + "props": { + "hbase.cluster.distributed": "true", + "hbase.zookeeper.quorum": "${CLUSTER_ZK_QUORUM}", + "hbase.rootdir": "${SERVICE_HDFS_DIR}/hbase", + "zookeeper.znode.parent": "${SERVICE_ZK_PATH}", + "hbase.master.hostname": "hbasemaster.${SERVICE_NAME}.${USER}.${DOMAIN}", + "hbase.master.info.port": "16010" + } + } + ] + } + }, + { + "name": "regionserver", + "number_of_containers": 3, + "unique_component_support": "true", + "artifact": { + "id": "hbase:latest", + "type": "DOCKER" + }, + "launch_command": "/usr/hdp/current/hbase-regionserver/bin/hbase regionserver start", + "resource": { + "cpus": 1, + "memory": "2048" + }, + "configuration": { + "env": { + "HBASE_LOG_DIR": "<LOG_DIR>" + }, + "files": [ + { + "type": "XML", + "dest_file": "/etc/hadoop/conf/core-site.xml", + "props": { + "fs.defaultFS": "${CLUSTER_FS_URI}" + } + }, + { + "type": "XML", + "dest_file": "/etc/hbase/conf/hbase-site.xml", + "props": { + "hbase.cluster.distributed": "true", + "hbase.zookeeper.quorum": "${CLUSTER_ZK_QUORUM}", + "hbase.rootdir": "${SERVICE_HDFS_DIR}/hbase", + "zookeeper.znode.parent": "${SERVICE_ZK_PATH}", + "hbase.master.hostname": "hbasemaster.${SERVICE_NAME}.${USER}.${DOMAIN}", + "hbase.master.info.port": "16010", + "hbase.regionserver.hostname": "${COMPONENT_INSTANCE_NAME}.${SERVICE_NAME}.${USER}.${DOMAIN}" + } + } + ] + } + } + ], + "quicklinks": { + "HBase Master Status UI": "http://hbasemaster0.${SERVICE_NAME}.${USER}.${DOMAIN}:16010/master-status", + "Proxied HBase Master Status UI": "http://app-proxy/${DOMAIN}/${USER}/${SERVICE_NAME}/hbasemaster/16010/" + } +} +``` \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org