This is an automated email from the ASF dual-hosted git repository. ztang pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push: new 729ccb2 SUBMARINE-58. Submarine client needs to generate fat jar. Contributed by Zac Zhou. 729ccb2 is described below commit 729ccb2cabde67ee68600438b334a4f65d947092 Author: Zhankun Tang <zt...@apache.org> AuthorDate: Sun May 19 21:18:33 2019 +0800 SUBMARINE-58. Submarine client needs to generate fat jar. Contributed by Zac Zhou. --- hadoop-submarine/hadoop-submarine-all/pom.xml | 183 ++++++++++++ hadoop-submarine/hadoop-submarine-core/pom.xml | 4 + .../client/cli/param/runjob/RunJobParameters.java | 5 +- .../param/runjob/TensorFlowRunJobParameters.java | 10 +- .../submarine/common/resource/ResourceUtils.java | 332 +++++++++++++++++++++ .../common/resource/UnitsConversionUtil.java | 164 ++++++++++ .../submarine/common/resource/package-info.java | 19 ++ .../src/site/markdown/QuickStart.md | 63 +++- .../cli/runjob/TestRunJobCliParsingCommonYaml.java | 21 +- .../pytorch/TestRunJobCliParsingPyTorchYaml.java | 82 +++-- .../TestRunJobCliParsingTensorFlowYaml.java | 85 ++++-- ...stRunJobCliParsingTensorFlowYamlStandalone.java | 6 +- .../yarn/submarine/common/MockClientContext.java | 23 -- .../runjob-common-yaml/empty-framework.yaml | 6 +- .../runjob-common-yaml/missing-configs.yaml | 6 +- .../runjob-common-yaml/missing-framework.yaml | 6 +- .../runjob-common-yaml/some-sections-missing.yaml | 4 +- .../runjob-common-yaml/test-false-values.yaml | 4 +- .../runjob-common-yaml/wrong-indentation.yaml | 6 +- .../runjob-common-yaml/wrong-property-name.yaml | 6 +- .../runjob-pytorch-yaml/envs-are-missing.yaml | 2 +- .../invalid-config-ps-section.yaml | 2 +- .../invalid-config-tensorboard-section.yaml | 2 +- .../security-principal-is-missing.yaml | 2 +- .../valid-config-with-overrides.yaml | 2 +- .../runjob-pytorch-yaml/valid-config.yaml | 2 +- .../{valid-config.yaml => valid-gpu-config.yaml} | 0 .../runjob-tensorflow-yaml/envs-are-missing.yaml | 6 +- .../security-principal-is-missing.yaml | 6 +- .../tensorboard-dockerimage-is-missing.yaml | 6 +- .../valid-config-with-overrides.yaml | 6 +- .../runjob-tensorflow-yaml/valid-config.yaml | 6 +- ...image-is-missing.yaml => valid-gpu-config.yaml} | 5 +- hadoop-submarine/hadoop-submarine-dist/pom.xml | 131 ++++++++ .../src/assembly/distribution.xml | 61 ++++ .../hadoop-submarine-tony-runtime/pom.xml | 1 + .../yarn/submarine/runtimes/tony/TonyUtils.java | 21 +- .../hadoop-submarine-yarnservice-runtime/pom.xml | 4 +- .../runtimes/yarnservice/AbstractComponent.java | 6 +- .../component/TestTensorFlowWorkerComponent.java | 6 +- .../utils/TestSubmarineResourceUtils.java | 50 +++- hadoop-submarine/pom.xml | 133 +++++++-- 42 files changed, 1310 insertions(+), 185 deletions(-) diff --git a/hadoop-submarine/hadoop-submarine-all/pom.xml b/hadoop-submarine/hadoop-submarine-all/pom.xml new file mode 100644 index 0000000..e2d2e17 --- /dev/null +++ b/hadoop-submarine/hadoop-submarine-all/pom.xml @@ -0,0 +1,183 @@ +<?xml version="1.0"?> +<!-- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. See accompanying LICENSE file. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 + http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <artifactId>hadoop-submarine</artifactId> + <groupId>org.apache.hadoop</groupId> + <version>0.2.0-SNAPSHOT</version> + </parent> + <artifactId>${project.artifactId}</artifactId> + <version>${project.version}</version> + <name>Hadoop Submarine All</name> + + <properties> + <!-- Needed for generating FindBugs warnings using parent pom --> + <yarn.basedir>${project.parent.parent.basedir}</yarn.basedir> + <project.artifactId>hadoop-submarine-all</project.artifactId> + <project.version>0.2.0-SNAPSHOT</project.version> + </properties> + + <dependencies> + + <!-- Dependencies for Hadoop commons --> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-submarine-core</artifactId> + <version>${project.version}</version> + </dependency> + </dependencies> + + <profiles> + + <profile> + <id>hadoop-3.2</id> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-submarine-yarnservice-runtime</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-submarine-tony-runtime</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs-client</artifactId> + <version>${hadoop.version}</version> + </dependency> + </dependencies> + </profile> + + <!-- Default profile--> + <profile> + <id>hadoop-3.1</id> + <activation> + <activeByDefault>true</activeByDefault> + </activation> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-submarine-yarnservice-runtime</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-submarine-tony-runtime</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs-client</artifactId> + <version>${hadoop.version}</version> + </dependency> + </dependencies> + </profile> + + <profile> + <id>hadoop-2.9</id> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-submarine-tony-runtime</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs-client</artifactId> + <version>${hadoop.version}</version> + </dependency> + </dependencies> + </profile> + + <profile> + <id>hadoop-2.7</id> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-submarine-tony-runtime</artifactId> + <version>${project.version}</version> + </dependency> + </dependencies> + </profile> + </profiles> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <version>3.2.1</version> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + <configuration> + <!-- + <shadedArtifactAttached>true</shadedArtifactAttached> + <shadedClassifierName>with-all-dependencies</shadedClassifierName> + --> + <outputFile>target/${project.artifactId}-${project.version}-${project.activeProfiles[0].id}.jar</outputFile> + <artifactSet> + <excludes> + <exclude>classworlds:classworlds</exclude> + <exclude>junit:junit</exclude> + <exclude>jmock:*</exclude> + <exclude>*:xml-apis</exclude> + <exclude>org.apache.maven:lib:tests</exclude> + </excludes> + </artifactSet> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> + <mainClass>org.apache.hadoop.yarn.submarine.client.cli.Cli</mainClass> + </transformer> + <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/> + </transformers> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + +</project> diff --git a/hadoop-submarine/hadoop-submarine-core/pom.xml b/hadoop-submarine/hadoop-submarine-core/pom.xml index 4a387fc..3b267fa 100644 --- a/hadoop-submarine/hadoop-submarine-core/pom.xml +++ b/hadoop-submarine/hadoop-submarine-core/pom.xml @@ -67,6 +67,10 @@ <groupId>org.yaml</groupId> <artifactId>snakeyaml</artifactId> </dependency> + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-lang3</artifactId> + </dependency> <!-- Dependencies for Hadoop commons --> diff --git a/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/runjob/RunJobParameters.java b/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/runjob/RunJobParameters.java index 629b1be..037593b 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/runjob/RunJobParameters.java +++ b/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/runjob/RunJobParameters.java @@ -45,7 +45,7 @@ import org.apache.hadoop.yarn.submarine.client.cli.runjob.RoleParameters; import org.apache.hadoop.yarn.submarine.common.ClientContext; import org.apache.hadoop.yarn.submarine.common.api.TensorFlowRole; import org.apache.hadoop.yarn.submarine.common.fs.RemoteDirectoryManager; -import org.apache.hadoop.yarn.util.resource.ResourceUtils; +import org.apache.hadoop.yarn.submarine.common.resource.ResourceUtils; import org.yaml.snakeyaml.introspector.Property; import org.yaml.snakeyaml.introspector.PropertyUtils; @@ -271,8 +271,7 @@ public abstract class RunJobParameters extends RunParameters { throw new ParseException( "--" + CliConstants.WORKER_RES + " is absent."); } - return ResourceUtils.createResourceFromString(workerResourceStr, - clientContext.getOrCreateYarnClient().getResourceTypeInfo()); + return ResourceUtils.createResourceFromString(workerResourceStr); } return null; } diff --git a/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/runjob/TensorFlowRunJobParameters.java b/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/runjob/TensorFlowRunJobParameters.java index a5f6ad6..b0deeb7 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/runjob/TensorFlowRunJobParameters.java +++ b/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/runjob/TensorFlowRunJobParameters.java @@ -27,7 +27,7 @@ import org.apache.hadoop.yarn.submarine.client.cli.param.ParametersHolder; import org.apache.hadoop.yarn.submarine.client.cli.runjob.RoleParameters; import org.apache.hadoop.yarn.submarine.common.ClientContext; import org.apache.hadoop.yarn.submarine.common.api.TensorFlowRole; -import org.apache.hadoop.yarn.util.resource.ResourceUtils; +import org.apache.hadoop.yarn.submarine.common.resource.ResourceUtils; import java.io.IOException; import java.util.List; @@ -127,8 +127,7 @@ public class TensorFlowRunJobParameters extends RunJobParameters { if (psResourceStr == null) { throw new ParseException("--" + CliConstants.PS_RES + " is absent."); } - return ResourceUtils.createResourceFromString(psResourceStr, - clientContext.getOrCreateYarnClient().getResourceTypeInfo()); + return ResourceUtils.createResourceFromString(psResourceStr); } return null; } @@ -151,9 +150,8 @@ public class TensorFlowRunJobParameters extends RunJobParameters { if (tensorboardResourceStr == null || tensorboardResourceStr.isEmpty()) { tensorboardResourceStr = CliConstants.TENSORBOARD_DEFAULT_RESOURCES; } - Resource tensorboardResource = - ResourceUtils.createResourceFromString(tensorboardResourceStr, - clientContext.getOrCreateYarnClient().getResourceTypeInfo()); + Resource tensorboardResource = ResourceUtils.createResourceFromString( + tensorboardResourceStr); String tensorboardDockerImage = parametersHolder.getOptionValue(CliConstants.TENSORBOARD_DOCKER_IMAGE); return new RoleParameters(TensorFlowRole.TENSORBOARD, 1, null, diff --git a/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/common/resource/ResourceUtils.java b/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/common/resource/ResourceUtils.java new file mode 100644 index 0000000..375ae35 --- /dev/null +++ b/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/common/resource/ResourceUtils.java @@ -0,0 +1,332 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.common.resource; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.submarine.common.exception.SubmarineRuntimeException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * This class implements some methods with the almost the same logic as + * org.apache.hadoop.yarn.util.resource.ResourceUtils of hadoop 3.3. + * If the hadoop dependencies are upgraded to 3.3, this class can be refactored + * with org.apache.hadoop.yarn.util.resource.ResourceUtils. + */ +public final class ResourceUtils { + + private final static String RES_PATTERN = "^[^=]+=\\d+\\s?\\w*$"; + private final static String SET_RESOURCE_VALUE_METHOD = "setResourceValue"; + private final static String SET_MEMORY_SIZE_METHOD = "setMemorySize"; + private final static String DEPRECATED_SET_MEMORY_SIZE_METHOD = + "setMemory"; + private final static String GET_MEMORY_SIZE_METHOD = "getMemorySize"; + private final static String DEPRECATED_GET_MEMORY_SIZE_METHOD = + "getMemory"; + private final static String GET_RESOURCE_VALUE_METHOD = "getResourceValue"; + private final static String GET_RESOURCE_TYPE_METHOD = + "getResourcesTypeInfo"; + private final static String REINITIALIZE_RESOURCES_METHOD = + "reinitializeResources"; + public static final String MEMORY_URI = "memory-mb"; + public static final String VCORES_URI = "vcores"; + public static final String GPU_URI = "yarn.io/gpu"; + public static final String FPGA_URI = "yarn.io/fpga"; + + private static final Logger LOG = + LoggerFactory.getLogger(ResourceUtils.class); + + private ResourceUtils() {} + + public static Resource createResourceFromString(String resourceStr) { + Map<String, Long> typeToValue = parseResourcesString(resourceStr); + Resource resource = Resource.newInstance(0, 0); + for (Map.Entry<String, Long> entry : typeToValue.entrySet()) { + if(entry.getKey().equals(VCORES_URI)) { + resource.setVirtualCores(entry.getValue().intValue()); + continue; + } else if (entry.getKey().equals(MEMORY_URI)) { + setMemorySize(resource, entry.getValue()); + continue; + } + setResource(resource, entry.getKey(), entry.getValue().intValue()); + } + return resource; + } + + private static Map<String, Long> parseResourcesString(String resourcesStr) { + Map<String, Long> resources = new HashMap<>(); + String[] pairs = resourcesStr.trim().split(","); + for (String resource : pairs) { + resource = resource.trim(); + if (!resource.matches(RES_PATTERN)) { + throw new IllegalArgumentException("\"" + resource + "\" is not a " + + "valid resource type/amount pair. " + + "Please provide key=amount pairs separated by commas."); + } + String[] splits = resource.split("="); + String key = splits[0], value = splits[1]; + String units = getUnits(value); + + String valueWithoutUnit = value.substring(0, + value.length()- units.length()).trim(); + long resourceValue = Long.parseLong(valueWithoutUnit); + + // Convert commandline unit to standard YARN unit. + if (units.equals("M") || units.equals("m")) { + units = "Mi"; + } else if (units.equals("G") || units.equals("g")) { + units = "Gi"; + } else if (!units.isEmpty()){ + throw new IllegalArgumentException("Acceptable units are M/G or empty"); + } + + // special handle memory-mb and memory + if (key.equals(MEMORY_URI)) { + if (!units.isEmpty()) { + resourceValue = UnitsConversionUtil.convert(units, "Mi", + resourceValue); + } + } + + if (key.equals("memory")) { + key = MEMORY_URI; + resourceValue = UnitsConversionUtil.convert(units, "Mi", + resourceValue); + } + + // special handle gpu + if (key.equals("gpu")) { + key = GPU_URI; + } + + // special handle fpga + if (key.equals("fpga")) { + key = FPGA_URI; + } + + resources.put(key, resourceValue); + } + return resources; + } + + /** + * As hadoop 2.9.2 and lower don't support resources except cpu and memory. + * Use reflection to set GPU or other resources for compatibility with + * hadoop 2.9.2 + */ + public static void setResource(Resource resource, String resourceName, + int resourceValue) { + try { + Method method = resource.getClass().getMethod(SET_RESOURCE_VALUE_METHOD, + String.class, long.class); + method.invoke(resource, resourceName, resourceValue); + } catch (NoSuchMethodException e) { + LOG.error("There is no '" + SET_RESOURCE_VALUE_METHOD + "' API in this" + + "version of YARN", e); + throw new SubmarineRuntimeException(e.getMessage(), e.getCause()); + } catch (IllegalAccessException | InvocationTargetException e) { + LOG.error("Failed to invoke '" + SET_RESOURCE_VALUE_METHOD + + "' method to set GPU resources", e); + throw new SubmarineRuntimeException(e.getMessage(), e.getCause()); + } + return; + } + + public static void setMemorySize(Resource resource, Long memorySize) { + boolean useWithIntParameter = false; + // For hadoop 2.9.2 and above + try { + Method method = resource.getClass().getMethod(SET_MEMORY_SIZE_METHOD, + long.class); + method.setAccessible(true); + method.invoke(resource, memorySize); + } catch (NoSuchMethodException nsme) { + LOG.info("There is no '" + SET_MEMORY_SIZE_METHOD + "(long)' API in" + + " this version of YARN"); + useWithIntParameter = true; + } catch (IllegalAccessException | InvocationTargetException e) { + LOG.error("Failed to invoke '" + SET_MEMORY_SIZE_METHOD + + "' method", e); + throw new SubmarineRuntimeException(e.getMessage(), e.getCause()); + } + // For hadoop 2.7.3 + if (useWithIntParameter) { + try { + LOG.info("Trying to use '" + DEPRECATED_SET_MEMORY_SIZE_METHOD + + "(int)' API for this version of YARN"); + Method method = resource.getClass().getMethod( + DEPRECATED_SET_MEMORY_SIZE_METHOD, int.class); + method.invoke(resource, memorySize.intValue()); + } catch (NoSuchMethodException e) { + LOG.error("There is no '" + DEPRECATED_SET_MEMORY_SIZE_METHOD + + "(int)' API in this version of YARN", e); + throw new SubmarineRuntimeException(e.getMessage(), e.getCause()); + } catch (IllegalAccessException | InvocationTargetException e) { + LOG.error("Failed to invoke '" + DEPRECATED_SET_MEMORY_SIZE_METHOD + + "' method", e); + throw new SubmarineRuntimeException(e.getMessage(), e.getCause()); + } + } + } + + public static long getMemorySize(Resource resource) { + boolean useWithIntParameter = false; + long memory = 0; + // For hadoop 2.9.2 and above + try { + Method method = resource.getClass().getMethod(GET_MEMORY_SIZE_METHOD); + method.setAccessible(true); + memory = (long) method.invoke(resource); + } catch (NoSuchMethodException e) { + LOG.info("There is no '" + GET_MEMORY_SIZE_METHOD + "' API in" + + " this version of YARN"); + useWithIntParameter = true; + } catch (IllegalAccessException | InvocationTargetException e) { + LOG.error("Failed to invoke '" + GET_MEMORY_SIZE_METHOD + + "' method", e); + throw new SubmarineRuntimeException(e.getMessage(), e.getCause()); + } + // For hadoop 2.7.3 + if (useWithIntParameter) { + try { + LOG.info("Trying to use '" + DEPRECATED_GET_MEMORY_SIZE_METHOD + + "' API for this version of YARN"); + Method method = resource.getClass().getMethod( + DEPRECATED_GET_MEMORY_SIZE_METHOD); + method.setAccessible(true); + memory = ((Integer) method.invoke(resource)).longValue(); + } catch (NoSuchMethodException e) { + LOG.error("There is no '" + DEPRECATED_GET_MEMORY_SIZE_METHOD + + "' API in this version of YARN", e); + throw new SubmarineRuntimeException(e.getMessage(), e.getCause()); + } catch (IllegalAccessException | InvocationTargetException e) { + LOG.error("Failed to invoke '" + DEPRECATED_GET_MEMORY_SIZE_METHOD + + "' method", e); + throw new SubmarineRuntimeException(e.getMessage(), e.getCause()); + } + } + return memory; + } + + /** + * As hadoop 2.9.2 and lower don't support resources except cpu and memory. + * Use reflection to set GPU or other resources for compatibility with + * hadoop 2.9.2 + */ + public static long getResourceValue(Resource resource, String resourceName) { + long resourceValue = 0; + try { + Method method = resource.getClass().getMethod(GET_RESOURCE_VALUE_METHOD, + String.class); + Object value = method.invoke(resource, resourceName); + resourceValue = (long) value; + } catch (NoSuchMethodException e) { + LOG.info("There is no '" + GET_RESOURCE_VALUE_METHOD + "' API in this" + + " version of YARN"); + } catch (InvocationTargetException e) { + if (e.getTargetException().getClass().getName().equals( + "org.apache.hadoop.yarn.exceptions.ResourceNotFoundException")) { + LOG.info("Not found resource " + resourceName); + } else { + LOG.info("Failed to invoke '" + GET_RESOURCE_VALUE_METHOD + "'" + + " method to get resource " + resourceName); + throw new SubmarineRuntimeException(e.getMessage(), e.getCause()); + } + } catch (IllegalAccessException | ClassCastException e) { + LOG.error("Failed to invoke '" + GET_RESOURCE_VALUE_METHOD + + "' method to get resource " + resourceName, e); + throw new SubmarineRuntimeException(e.getMessage(), e.getCause()); + } + return resourceValue; + } + + /** + * As hadoop 2.9.2 and lower don't support resources except cpu and memory. + * Use reflection to add GPU or other resources for compatibility with + * hadoop 2.9.2 + */ + public static void configureResourceType(String resrouceName) { + Class resourceTypeInfo; + try{ + resourceTypeInfo = Class.forName( + "org.apache.hadoop.yarn.api.records.ResourceTypeInfo"); + Class resourceUtils = Class.forName( + "org.apache.hadoop.yarn.util.resource.ResourceUtils"); + Method method = resourceUtils.getMethod(GET_RESOURCE_TYPE_METHOD); + Object resTypes = method.invoke(null); + + Method resourceTypeInstance = resourceTypeInfo.getMethod("newInstance", + String.class, String.class); + Object resourceType = resourceTypeInstance.invoke(null, resrouceName, ""); + ((ArrayList)resTypes).add(resourceType); + + Method reInitialMethod = resourceUtils.getMethod( + REINITIALIZE_RESOURCES_METHOD, List.class); + reInitialMethod.invoke(null, resTypes); + + } catch (ClassNotFoundException e) { + LOG.info("There is no specified class API in this" + + " version of YARN"); + LOG.info(e.getMessage()); + throw new SubmarineRuntimeException(e.getMessage(), e.getCause()); + } catch (NoSuchMethodException nsme) { + LOG.info("There is no '" + GET_RESOURCE_VALUE_METHOD + "' API in this" + + " version of YARN"); + } catch (IllegalAccessException | InvocationTargetException e) { + LOG.info("Failed to invoke 'configureResourceType' method ", e); + throw new SubmarineRuntimeException(e.getMessage(), e.getCause()); + } + } + + private static String getUnits(String resourceValue) { + return parseResourceValue(resourceValue)[0]; + } + + /** + * Extract unit and actual value from resource value. + * @param resourceValue Value of the resource + * @return Array containing unit and value. [0]=unit, [1]=value + * @throws IllegalArgumentException if units contain non alpha characters + */ + private static String[] parseResourceValue(String resourceValue) { + String[] resource = new String[2]; + int i = 0; + for (; i < resourceValue.length(); i++) { + if (Character.isAlphabetic(resourceValue.charAt(i))) { + break; + } + } + String units = resourceValue.substring(i); + + if (StringUtils.isAlpha(units) || units.equals("")) { + resource[0] = units; + resource[1] = resourceValue.substring(0, i); + return resource; + } else { + throw new IllegalArgumentException("Units '" + units + "'" + + " contains non alphabet characters, which is not allowed."); + } + } + +} diff --git a/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/common/resource/UnitsConversionUtil.java b/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/common/resource/UnitsConversionUtil.java new file mode 100644 index 0000000..8a8635a --- /dev/null +++ b/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/common/resource/UnitsConversionUtil.java @@ -0,0 +1,164 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.submarine.common.resource; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Almost the same logic as UnitsConversionUtil[YARN-4081]. If the dependencies + * are upgraded to hadoop 3.*, this class can be replaced. + */ +public final class UnitsConversionUtil { + + private UnitsConversionUtil() {} + + /** + * Helper class for encapsulating conversion values. + */ + public static class Converter { + private long numerator; + private long denominator; + + Converter(long n, long d) { + this.numerator = n; + this.denominator = d; + } + } + + private static final String[] UNITS = {"p", "n", "u", "m", "", "k", "M", "G", + "T", "P", "Ki", "Mi", "Gi", "Ti", "Pi"}; + private static final List<String> SORTED_UNITS = Arrays.asList(UNITS); + public static final Set<String> KNOWN_UNITS = createKnownUnitsSet(); + private static final Converter PICO = + new Converter(1L, 1000L * 1000L * 1000L * 1000L); + private static final Converter NANO = + new Converter(1L, 1000L * 1000L * 1000L); + private static final Converter MICRO = new Converter(1L, 1000L * 1000L); + private static final Converter MILLI = new Converter(1L, 1000L); + private static final Converter BASE = new Converter(1L, 1L); + private static final Converter KILO = new Converter(1000L, 1L); + private static final Converter MEGA = new Converter(1000L * 1000L, 1L); + private static final Converter GIGA = + new Converter(1000L * 1000L * 1000L, 1L); + private static final Converter TERA = + new Converter(1000L * 1000L * 1000L * 1000L, 1L); + private static final Converter PETA = + new Converter(1000L * 1000L * 1000L * 1000L * 1000L, 1L); + + private static final Converter KILO_BINARY = new Converter(1024L, 1L); + private static final Converter MEGA_BINARY = new Converter(1024L * 1024L, 1L); + private static final Converter GIGA_BINARY = + new Converter(1024L * 1024L * 1024L, 1L); + private static final Converter TERA_BINARY = + new Converter(1024L * 1024L * 1024L * 1024L, 1L); + private static final Converter PETA_BINARY = + new Converter(1024L * 1024L * 1024L * 1024L * 1024L, 1L); + + private static Set<String> createKnownUnitsSet() { + Set<String> ret = new HashSet<>(); + ret.addAll(Arrays.asList(UNITS)); + return ret; + } + + private static Converter getConverter(String unit) { + switch (unit) { + case "p": + return PICO; + case "n": + return NANO; + case "u": + return MICRO; + case "m": + return MILLI; + case "": + return BASE; + case "k": + return KILO; + case "M": + return MEGA; + case "G": + return GIGA; + case "T": + return TERA; + case "P": + return PETA; + case "Ki": + return KILO_BINARY; + case "Mi": + return MEGA_BINARY; + case "Gi": + return GIGA_BINARY; + case "Ti": + return TERA_BINARY; + case "Pi": + return PETA_BINARY; + default: + throw new IllegalArgumentException( + "Unknown unit '" + unit + "'. Known units are " + KNOWN_UNITS); + } + } + + /** + * Converts a value from one unit to another. Supported units can be obtained + * by inspecting the KNOWN_UNITS set. + * + * @param fromUnit the unit of the from value + * @param toUnit the target unit + * @param fromValue the value you wish to convert + * @return the value in toUnit + */ + public static long convert(String fromUnit, String toUnit, long fromValue) { + if (toUnit == null || fromUnit == null) { + throw new IllegalArgumentException("One or more arguments are null"); + } + + if (fromUnit.equals(toUnit)) { + return fromValue; + } + Converter fc = getConverter(fromUnit); + Converter tc = getConverter(toUnit); + long numerator = fc.numerator * tc.denominator; + long denominator = fc.denominator * tc.numerator; + long numeratorMultiplierLimit = Long.MAX_VALUE / numerator; + if (numerator < denominator) { + if (numeratorMultiplierLimit < fromValue) { + String overflowMsg = + "Converting " + fromValue + " from '" + fromUnit + "' to '" + toUnit + + "' will result in an overflow of Long"; + throw new IllegalArgumentException(overflowMsg); + } + return (fromValue * numerator) / denominator; + } + if (numeratorMultiplierLimit > fromValue) { + return (numerator * fromValue) / denominator; + } + long tmp = numerator / denominator; + if ((Long.MAX_VALUE / tmp) < fromValue) { + String overflowMsg = + "Converting " + fromValue + " from '" + fromUnit + "' to '" + toUnit + + "' will result in an overflow of Long"; + throw new IllegalArgumentException(overflowMsg); + } + return fromValue * tmp; + } + +} diff --git a/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/common/resource/package-info.java b/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/common/resource/package-info.java new file mode 100644 index 0000000..144316b --- /dev/null +++ b/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/common/resource/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * This package contains resource utility classes. + */ +package org.apache.hadoop.yarn.submarine.common.resource; \ No newline at end of file diff --git a/hadoop-submarine/hadoop-submarine-core/src/site/markdown/QuickStart.md b/hadoop-submarine/hadoop-submarine-core/src/site/markdown/QuickStart.md index 5648c11..f693917 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/site/markdown/QuickStart.md +++ b/hadoop-submarine/hadoop-submarine-core/src/site/markdown/QuickStart.md @@ -164,6 +164,22 @@ See below screenshot: ![alt text](./images/tensorboard-service.png "Tensorboard service") +If there is no hadoop client, we can also use the java command and the uber jar, hadoop-submarine-all-*.jar, to submit the job. + +``` +java -cp /path-to/hadoop-conf:/path-to/hadoop-submarine-all-*.jar \ + org.apache.hadoop.yarn.submarine.client.cli.Cli job run \ + --env DOCKER_JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/jre/ \ + --env DOCKER_HADOOP_HDFS_HOME=/hadoop-3.1.0 --name tf-job-001 \ + --docker_image <your-docker-image> \ + --input_path hdfs://default/dataset/cifar-10-data \ + --checkpoint_path hdfs://default/tmp/cifar-10-jobdir \ + --worker_resources memory=4G,vcores=2,gpu=2 \ + --worker_launch_cmd "python ... (Your training application cmd)" \ + --tensorboard # this will launch a companion tensorboard container for monitoring +``` + + ### Launch Distributed Tensorflow Application: #### Commandline @@ -181,6 +197,20 @@ yarn jar hadoop-yarn-applications-submarine-<version>.jar job run \ --num_ps 2 \ --ps_resources memory=4G,vcores=2,gpu=0 --ps_launch_cmd "cmd for ps" \ ``` +Or +``` +java -cp /path-to/hadoop-conf:/path-to/hadoop-submarine-all-*.jar \ + org.apache.hadoop.yarn.submarine.client.cli.Cli job run \ + --name tf-job-001 --docker_image <your docker image> \ + --input_path hdfs://default/dataset/cifar-10-data \ + --checkpoint_path hdfs://default/tmp/cifar-10-jobdir \ + --env DOCKER_JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/jre/ \ + --env DOCKER_HADOOP_HDFS_HOME=/hadoop-3.1.0 \ + --num_workers 2 \ + --worker_resources memory=8G,vcores=2,gpu=1 --worker_launch_cmd "cmd for worker ..." \ + --num_ps 2 \ + --ps_resources memory=4G,vcores=2,gpu=0 --ps_launch_cmd "cmd for ps" \ +``` #### Notes: @@ -197,7 +227,11 @@ yarn jar hadoop-yarn-applications-submarine-<version>.jar job run \ ``` yarn jar hadoop-yarn-applications-submarine-3.2.0-SNAPSHOT.jar job show --name tf-job-001 ``` - +Or +``` +java -cp /path-to/hadoop-conf:/path-to/hadoop-submarine-all-*.jar \ + org.apache.hadoop.yarn.submarine.client.cli.Cli job show --name tf-job-001 +``` Output looks like: ``` Job Meta Info: @@ -222,6 +256,17 @@ yarn jar /tmp/hadoop-yarn-applications-submarine-3.2.0-SNAPSHOT.jar \ --env DOCKER_HADOOP_HDFS_HOME=/hadoop-current \ --num_workers 0 --tensorboard ``` +Or +``` +# Cleanup previous service if needed +yarn app -destroy tensorboard-service; \ +java -cp /path-to/hadoop-conf:/path-to/hadoop-submarine-all-*.jar \ + org.apache.hadoop.yarn.submarine.client.cli.Cli job run \ + --name tensorboard-service --verbose --docker_image wtan/tf-1.8.0-cpu:0.0.3 \ + --env DOCKER_JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/jre/ \ + --env DOCKER_HADOOP_HDFS_HOME=/hadoop-3.1.0 \ + --num_workers 0 --tensorboard +``` You can view multiple job training history like from the `Tensorboard` link: @@ -242,4 +287,18 @@ If you want to build the Submarine project by yourself, you can follow the steps - Run 'mvn install -DskipTests' from Hadoop source top level once. -- Navigate to hadoop-submarine folder and run 'mvn clean package'. \ No newline at end of file +- Navigate to hadoop-submarine folder and run 'mvn clean package'. + + - By Default, hadoop-submarine is built based on hadoop 3.1.2 dependencies. + Both yarn service runtime and tony runtime are built. + You can also add a parameter of "-Phadoop-3.2" to specify the dependencies + to hadoop 3.2.0. + + - Hadoop-submarine can support hadoop 2.9.2 and hadoop 2.7.4 as well. + You can add "-Phadoop-2.9" to build submarine based on hadoop 2.9.2. + For example: + ``` + mvn clean package -Phadoop-2.9 + ``` + As yarn service is based on hadoop 3.*, so only tony runtime is built + in this case. diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/runjob/TestRunJobCliParsingCommonYaml.java b/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/runjob/TestRunJobCliParsingCommonYaml.java index 5e2da3a..f174f04 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/runjob/TestRunJobCliParsingCommonYaml.java +++ b/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/runjob/TestRunJobCliParsingCommonYaml.java @@ -16,15 +16,14 @@ package org.apache.hadoop.yarn.submarine.client.cli.runjob; -import org.apache.hadoop.yarn.api.records.ResourceInformation; -import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.submarine.client.cli.YamlConfigTestUtils; import org.apache.hadoop.yarn.submarine.client.cli.param.runjob.RunJobParameters; import org.apache.hadoop.yarn.submarine.client.cli.param.runjob.TensorFlowRunJobParameters; import org.apache.hadoop.yarn.submarine.client.cli.param.yaml.YamlParseException; import org.apache.hadoop.yarn.submarine.common.conf.SubmarineLogs; -import org.apache.hadoop.yarn.util.resource.ResourceUtils; +import org.apache.hadoop.yarn.submarine.common.exception.SubmarineRuntimeException; +import org.apache.hadoop.yarn.submarine.common.resource.ResourceUtils; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -32,10 +31,10 @@ import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.File; -import java.util.ArrayList; -import java.util.List; import static org.apache.hadoop.yarn.submarine.client.cli.runjob.TestRunJobCliParsingCommon.getMockClientContext; import static org.junit.Assert.assertFalse; @@ -49,6 +48,8 @@ public class TestRunJobCliParsingCommonYaml { private static final String DIR_NAME = "runjob-common-yaml"; private static final String TF_DIR = "runjob-pytorch-yaml"; private File yamlConfig; + private static Logger LOG = LoggerFactory.getLogger( + TestRunJobCliParsingCommonYaml.class); @Before public void before() { @@ -62,10 +63,12 @@ public class TestRunJobCliParsingCommonYaml { @BeforeClass public static void configureResourceTypes() { - List<ResourceTypeInfo> resTypes = new ArrayList<>( - ResourceUtils.getResourcesTypeInfo()); - resTypes.add(ResourceTypeInfo.newInstance(ResourceInformation.GPU_URI, "")); - ResourceUtils.reinitializeResources(resTypes); + try { + ResourceUtils.configureResourceType(ResourceUtils.GPU_URI); + } catch (SubmarineRuntimeException e) { + LOG.info("The hadoop dependency doesn't support gpu resource, " + + "so just skip this test case."); + } } @Rule diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/runjob/pytorch/TestRunJobCliParsingPyTorchYaml.java b/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/runjob/pytorch/TestRunJobCliParsingPyTorchYaml.java index 858c2de..bf94edc 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/runjob/pytorch/TestRunJobCliParsingPyTorchYaml.java +++ b/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/runjob/pytorch/TestRunJobCliParsingPyTorchYaml.java @@ -24,29 +24,28 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import java.io.File; -import java.util.ArrayList; import java.util.List; -import org.apache.hadoop.yarn.api.records.ResourceInformation; -import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; -import org.apache.hadoop.yarn.resourcetypes.ResourceTypesTestHelper; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.submarine.client.cli.YamlConfigTestUtils; import org.apache.hadoop.yarn.submarine.client.cli.param.runjob.PyTorchRunJobParameters; import org.apache.hadoop.yarn.submarine.client.cli.param.runjob.RunJobParameters; import org.apache.hadoop.yarn.submarine.client.cli.param.yaml.YamlParseException; import org.apache.hadoop.yarn.submarine.client.cli.runjob.RunJobCli; import org.apache.hadoop.yarn.submarine.common.conf.SubmarineLogs; -import org.apache.hadoop.yarn.util.resource.ResourceUtils; +import org.apache.hadoop.yarn.submarine.common.exception.SubmarineRuntimeException; +import org.apache.hadoop.yarn.submarine.common.resource.ResourceUtils; +import org.apache.hadoop.yarn.util.resource.Resources; import org.junit.After; import org.junit.Assert; import org.junit.Before; -import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Test class that verifies the correctness of PyTorch @@ -56,6 +55,8 @@ public class TestRunJobCliParsingPyTorchYaml { private static final String OVERRIDDEN_PREFIX = "overridden_"; private static final String DIR_NAME = "runjob-pytorch-yaml"; private File yamlConfig; + private static Logger LOG = LoggerFactory.getLogger( + TestRunJobCliParsingPyTorchYaml.class); @Before public void before() { @@ -67,14 +68,6 @@ public class TestRunJobCliParsingPyTorchYaml { YamlConfigTestUtils.deleteFile(yamlConfig); } - @BeforeClass - public static void configureResourceTypes() { - List<ResourceTypeInfo> resTypes = new ArrayList<>( - ResourceUtils.getResourcesTypeInfo()); - resTypes.add(ResourceTypeInfo.newInstance(ResourceInformation.GPU_URI, "")); - ResourceUtils.reinitializeResources(resTypes); - } - @Rule public ExpectedException exception = ExpectedException.none(); @@ -105,23 +98,38 @@ public class TestRunJobCliParsingPyTorchYaml { } } - private void verifyWorkerValues(RunJobParameters jobRunParameters, - String prefix) { + private PyTorchRunJobParameters verifyWorkerCommonValues(RunJobParameters + jobRunParameters, String prefix) { assertTrue(RunJobParameters.class + " must be an instance of " + PyTorchRunJobParameters.class, jobRunParameters instanceof PyTorchRunJobParameters); - PyTorchRunJobParameters tensorFlowParams = + PyTorchRunJobParameters pyTorchParams = (PyTorchRunJobParameters) jobRunParameters; - assertEquals(3, tensorFlowParams.getNumWorkers()); + assertEquals(3, pyTorchParams.getNumWorkers()); assertEquals(prefix + "testLaunchCmdWorker", - tensorFlowParams.getWorkerLaunchCmd()); + pyTorchParams.getWorkerLaunchCmd()); assertEquals(prefix + "testDockerImageWorker", - tensorFlowParams.getWorkerDockerImage()); - assertEquals(ResourceTypesTestHelper.newResource(20480L, 32, - ImmutableMap.<String, String> builder() - .put(ResourceInformation.GPU_URI, "2").build()), - tensorFlowParams.getWorkerResource()); + pyTorchParams.getWorkerDockerImage()); + return pyTorchParams; + } + + private void verifyWorkerValues(RunJobParameters jobRunParameters, + String prefix) { + PyTorchRunJobParameters pyTorchParams = verifyWorkerCommonValues + (jobRunParameters, prefix); + assertEquals(Resources.createResource(20480, 32), + pyTorchParams.getWorkerResource()); + } + + private void verifyWorkerValuesWithGpu(RunJobParameters jobRunParameters, + String prefix) { + + PyTorchRunJobParameters pyTorchParams = verifyWorkerCommonValues + (jobRunParameters, prefix); + Resource workResource = Resources.createResource(20480, 32); + ResourceUtils.setResource(workResource, ResourceUtils.GPU_URI, 2); + assertEquals(workResource, pyTorchParams.getWorkerResource()); } private void verifySecurityValues(RunJobParameters jobRunParameters) { @@ -147,6 +155,30 @@ public class TestRunJobCliParsingPyTorchYaml { } @Test + public void testValidGpuYamlParsing() throws Exception { + try { + ResourceUtils.configureResourceType(ResourceUtils.GPU_URI); + } catch (SubmarineRuntimeException e) { + LOG.info("The hadoop dependency doesn't support gpu resource, " + + "so just skip this test case."); + return; + } + + RunJobCli runJobCli = new RunJobCli(getMockClientContext()); + Assert.assertFalse(SubmarineLogs.isVerbose()); + + yamlConfig = YamlConfigTestUtils.createTempFileWithContents( + DIR_NAME + "/valid-gpu-config.yaml"); + runJobCli.run( + new String[] {"-f", yamlConfig.getAbsolutePath(), "--verbose"}); + + RunJobParameters jobRunParameters = runJobCli.getRunJobParameters(); + verifyBasicConfigValues(jobRunParameters); + verifyWorkerValuesWithGpu(jobRunParameters, ""); + verifySecurityValues(jobRunParameters); + } + + @Test public void testRoleOverrides() throws Exception { RunJobCli runJobCli = new RunJobCli(getMockClientContext()); Assert.assertFalse(SubmarineLogs.isVerbose()); diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/runjob/tensorflow/TestRunJobCliParsingTensorFlowYaml.java b/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/runjob/tensorflow/TestRunJobCliParsingTensorFlowYaml.java index c3b2acc..9c69720 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/runjob/tensorflow/TestRunJobCliParsingTensorFlowYaml.java +++ b/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/runjob/tensorflow/TestRunJobCliParsingTensorFlowYaml.java @@ -18,26 +18,25 @@ package org.apache.hadoop.yarn.submarine.client.cli.runjob.tensorflow; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import org.apache.hadoop.yarn.api.records.ResourceInformation; -import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; -import org.apache.hadoop.yarn.resourcetypes.ResourceTypesTestHelper; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.submarine.client.cli.YamlConfigTestUtils; import org.apache.hadoop.yarn.submarine.client.cli.param.runjob.RunJobParameters; import org.apache.hadoop.yarn.submarine.client.cli.param.runjob.TensorFlowRunJobParameters; import org.apache.hadoop.yarn.submarine.client.cli.runjob.RunJobCli; import org.apache.hadoop.yarn.submarine.common.conf.SubmarineLogs; -import org.apache.hadoop.yarn.util.resource.ResourceUtils; +import org.apache.hadoop.yarn.submarine.common.exception.SubmarineRuntimeException; +import org.apache.hadoop.yarn.submarine.common.resource.ResourceUtils; +import org.apache.hadoop.yarn.util.resource.Resources; import org.junit.After; import org.junit.Assert; import org.junit.Before; -import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.File; -import java.util.ArrayList; import java.util.List; import static org.apache.hadoop.yarn.submarine.client.cli.runjob.TestRunJobCliParsingCommon.getMockClientContext; @@ -53,6 +52,8 @@ public class TestRunJobCliParsingTensorFlowYaml { private static final String OVERRIDDEN_PREFIX = "overridden_"; private static final String DIR_NAME = "runjob-tensorflow-yaml"; private File yamlConfig; + private static Logger LOG = LoggerFactory.getLogger( + TestRunJobCliParsingTensorFlowYaml.class); @Before public void before() { @@ -64,14 +65,6 @@ public class TestRunJobCliParsingTensorFlowYaml { YamlConfigTestUtils.deleteFile(yamlConfig); } - @BeforeClass - public static void configureResourceTypes() { - List<ResourceTypeInfo> resTypes = new ArrayList<>( - ResourceUtils.getResourcesTypeInfo()); - resTypes.add(ResourceTypeInfo.newInstance(ResourceInformation.GPU_URI, "")); - ResourceUtils.reinitializeResources(resTypes); - } - @Rule public ExpectedException exception = ExpectedException.none(); @@ -114,14 +107,12 @@ public class TestRunJobCliParsingTensorFlowYaml { assertEquals(prefix + "testLaunchCmdPs", tensorFlowParams.getPSLaunchCmd()); assertEquals(prefix + "testDockerImagePs", tensorFlowParams.getPsDockerImage()); - assertEquals(ResourceTypesTestHelper.newResource(20500L, 34, - ImmutableMap.<String, String> builder() - .put(ResourceInformation.GPU_URI, "4").build()), + assertEquals(Resources.createResource(20500, 34), tensorFlowParams.getPsResource()); } - private void verifyWorkerValues(RunJobParameters jobRunParameters, - String prefix) { + private TensorFlowRunJobParameters verifyWorkerCommonValues( + RunJobParameters jobRunParameters, String prefix) { assertTrue(RunJobParameters.class + " must be an instance of " + TensorFlowRunJobParameters.class, jobRunParameters instanceof TensorFlowRunJobParameters); @@ -133,12 +124,26 @@ public class TestRunJobCliParsingTensorFlowYaml { tensorFlowParams.getWorkerLaunchCmd()); assertEquals(prefix + "testDockerImageWorker", tensorFlowParams.getWorkerDockerImage()); - assertEquals(ResourceTypesTestHelper.newResource(20480L, 32, - ImmutableMap.<String, String> builder() - .put(ResourceInformation.GPU_URI, "2").build()), + return tensorFlowParams; + } + + private void verifyWorkerValues(RunJobParameters jobRunParameters, + String prefix) { + TensorFlowRunJobParameters tensorFlowParams = verifyWorkerCommonValues + (jobRunParameters, prefix); + assertEquals(Resources.createResource(20480, 32), tensorFlowParams.getWorkerResource()); } + private void verifyWorkerValuesWithGpu(RunJobParameters jobRunParameters, + String prefix) { + TensorFlowRunJobParameters tensorFlowParams = verifyWorkerCommonValues + (jobRunParameters, prefix); + Resource workResource = Resources.createResource(20480, 32); + ResourceUtils.setResource(workResource, ResourceUtils.GPU_URI, 2); + assertEquals(workResource, tensorFlowParams.getWorkerResource()); + } + private void verifySecurityValues(RunJobParameters jobRunParameters) { assertEquals("keytabPath", jobRunParameters.getKeytab()); assertEquals("testPrincipal", jobRunParameters.getPrincipal()); @@ -155,9 +160,7 @@ public class TestRunJobCliParsingTensorFlowYaml { assertTrue(tensorFlowParams.isTensorboardEnabled()); assertEquals("tensorboardDockerImage", tensorFlowParams.getTensorboardDockerImage()); - assertEquals(ResourceTypesTestHelper.newResource(21000L, 37, - ImmutableMap.<String, String> builder() - .put(ResourceInformation.GPU_URI, "3").build()), + assertEquals(Resources.createResource(21000, 37), tensorFlowParams.getTensorboardResource()); } @@ -180,6 +183,32 @@ public class TestRunJobCliParsingTensorFlowYaml { } @Test + public void testValidGpuYamlParsing() throws Exception { + try { + ResourceUtils.configureResourceType(ResourceUtils.GPU_URI); + } catch (SubmarineRuntimeException e) { + LOG.info("The hadoop dependency doesn't support gpu resource, " + + "so just skip this test case."); + return; + } + + RunJobCli runJobCli = new RunJobCli(getMockClientContext()); + Assert.assertFalse(SubmarineLogs.isVerbose()); + + yamlConfig = YamlConfigTestUtils.createTempFileWithContents( + DIR_NAME + "/valid-gpu-config.yaml"); + runJobCli.run( + new String[] {"-f", yamlConfig.getAbsolutePath(), "--verbose"}); + + RunJobParameters jobRunParameters = runJobCli.getRunJobParameters(); + verifyBasicConfigValues(jobRunParameters); + verifyPsValues(jobRunParameters, ""); + verifyWorkerValuesWithGpu(jobRunParameters, ""); + verifySecurityValues(jobRunParameters); + verifyTensorboardValues(jobRunParameters); + } + + @Test public void testRoleOverrides() throws Exception { RunJobCli runJobCli = new RunJobCli(getMockClientContext()); Assert.assertFalse(SubmarineLogs.isVerbose()); @@ -240,9 +269,7 @@ public class TestRunJobCliParsingTensorFlowYaml { assertTrue(tensorFlowParams.isTensorboardEnabled()); assertNull("tensorboardDockerImage should be null!", tensorFlowParams.getTensorboardDockerImage()); - assertEquals(ResourceTypesTestHelper.newResource(21000L, 37, - ImmutableMap.<String, String> builder() - .put(ResourceInformation.GPU_URI, "3").build()), + assertEquals(Resources.createResource(21000, 37), tensorFlowParams.getTensorboardResource()); } diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/runjob/tensorflow/TestRunJobCliParsingTensorFlowYamlStandalone.java b/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/runjob/tensorflow/TestRunJobCliParsingTensorFlowYamlStandalone.java index 59b0b59..2088d1d 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/runjob/tensorflow/TestRunJobCliParsingTensorFlowYamlStandalone.java +++ b/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/runjob/tensorflow/TestRunJobCliParsingTensorFlowYamlStandalone.java @@ -132,14 +132,14 @@ public class TestRunJobCliParsingTensorFlowYamlStandalone { private void assertWorkerValues(Role worker) { assertEquals("testLaunchCmdWorker", worker.getLaunchCmd()); assertEquals("testDockerImageWorker", worker.getDockerImage()); - assertEquals("memory=20480M,vcores=32,gpu=2", worker.getResources()); + assertEquals("memory=20480M,vcores=32", worker.getResources()); assertEquals(3, worker.getReplicas()); } private void assertPsValues(Role ps) { assertEquals("testLaunchCmdPs", ps.getLaunchCmd()); assertEquals("testDockerImagePs", ps.getDockerImage()); - assertEquals("memory=20500M,vcores=34,gpu=4", ps.getResources()); + assertEquals("memory=20500M,vcores=34", ps.getResources()); assertEquals(4, ps.getReplicas()); } @@ -161,7 +161,7 @@ public class TestRunJobCliParsingTensorFlowYamlStandalone { TensorBoard tensorBoard = yamlConfigFile.getTensorBoard(); assertNotNull("Tensorboard should not be null!", tensorBoard); assertEquals("tensorboardDockerImage", tensorBoard.getDockerImage()); - assertEquals("memory=21000M,vcores=37,gpu=3", tensorBoard.getResources()); + assertEquals("memory=21000M,vcores=37", tensorBoard.getResources()); } @Before diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/common/MockClientContext.java b/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/common/MockClientContext.java index 23c45d2..3740f86 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/common/MockClientContext.java +++ b/hadoop-submarine/hadoop-submarine-core/src/test/java/org/apache/hadoop/yarn/submarine/common/MockClientContext.java @@ -20,15 +20,6 @@ package org.apache.hadoop.yarn.submarine.common; import org.apache.hadoop.yarn.submarine.common.fs.MockRemoteDirectoryManager; import org.apache.hadoop.yarn.submarine.common.fs.RemoteDirectoryManager; -import org.apache.hadoop.yarn.client.api.YarnClient; -import org.apache.hadoop.yarn.exceptions.YarnException; -import org.apache.hadoop.yarn.util.resource.ResourceUtils; - -import java.io.IOException; - -import static org.junit.Assert.fail; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; public class MockClientContext extends ClientContext { @@ -44,18 +35,4 @@ public class MockClientContext extends ClientContext { RemoteDirectoryManager remoteDirectoryMgr) { this.remoteDirectoryMgr = remoteDirectoryMgr; } - - @Override - public synchronized YarnClient getOrCreateYarnClient() { - YarnClient client = mock(YarnClient.class); - try { - when(client.getResourceTypeInfo()).thenReturn( - ResourceUtils.getResourcesTypeInfo()); - } catch (YarnException e) { - fail(e.getMessage()); - } catch (IOException e) { - fail(e.getMessage()); - } - return client; - } } diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/empty-framework.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/empty-framework.yaml index 0d9577b..e3c15a4 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/empty-framework.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/empty-framework.yaml @@ -43,12 +43,12 @@ scheduling: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: testLaunchCmdWorker docker_image: testDockerImageWorker ps: - resources: memory=20500M,vcores=34,gpu=4 + resources: memory=20500M,vcores=34 replicas: 4 launch_cmd: testLaunchCmdPs docker_image: testDockerImagePs @@ -59,5 +59,5 @@ security: distribute_keytab: true tensorBoard: - resources: memory=21000M,vcores=37,gpu=3 + resources: memory=21000M,vcores=37 docker_image: tensorboardDockerImage \ No newline at end of file diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/missing-configs.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/missing-configs.yaml index db19dc2..6fa8ed4 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/missing-configs.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/missing-configs.yaml @@ -23,11 +23,11 @@ scheduling: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: testLaunchCmdWorker ps: - resources: memory=20500M,vcores=34,gpu=4 + resources: memory=20500M,vcores=34 replicas: 4 launch_cmd: testLaunchCmdPs @@ -37,5 +37,5 @@ security: distribute_keytab: true tensorBoard: - resources: memory=21000M,vcores=37,gpu=3 + resources: memory=21000M,vcores=37 docker_image: tensorboardDockerImage \ No newline at end of file diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/missing-framework.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/missing-framework.yaml index be67fbb..6523a38 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/missing-framework.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/missing-framework.yaml @@ -42,12 +42,12 @@ scheduling: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: testLaunchCmdWorker docker_image: testDockerImageWorker ps: - resources: memory=20500M,vcores=34,gpu=4 + resources: memory=20500M,vcores=34 replicas: 4 launch_cmd: testLaunchCmdPs docker_image: testDockerImagePs @@ -58,5 +58,5 @@ security: distribute_keytab: true tensorBoard: - resources: memory=21000M,vcores=37,gpu=3 + resources: memory=21000M,vcores=37 docker_image: tensorboardDockerImage \ No newline at end of file diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/some-sections-missing.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/some-sections-missing.yaml index 9c7a56f..3850537 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/some-sections-missing.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/some-sections-missing.yaml @@ -40,10 +40,10 @@ configs: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: testLaunchCmdWorker ps: - resources: memory=20500M,vcores=34,gpu=4 + resources: memory=20500M,vcores=34 replicas: 4 launch_cmd: testLaunchCmdPs \ No newline at end of file diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/test-false-values.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/test-false-values.yaml index 8a97c49..0e300a0 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/test-false-values.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/test-false-values.yaml @@ -43,11 +43,11 @@ scheduling: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: testLaunchCmdWorker ps: - resources: memory=20500M,vcores=34,gpu=4 + resources: memory=20500M,vcores=34 replicas: 4 launch_cmd: testLaunchCmdPs diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/wrong-indentation.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/wrong-indentation.yaml index 66f0e16..375c680 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/wrong-indentation.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/wrong-indentation.yaml @@ -42,11 +42,11 @@ scheduling: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: testLaunchCmdWorker ps: - resources: memory=20500M,vcores=34,gpu=4 + resources: memory=20500M,vcores=34 replicas: 4 launch_cmd: testLaunchCmdPs @@ -56,5 +56,5 @@ security: distribute_keytab: true tensorBoard: - resources: memory=21000M,vcores=37,gpu=3 + resources: memory=21000M,vcores=37 docker_image: tensorboardDockerImage \ No newline at end of file diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/wrong-property-name.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/wrong-property-name.yaml index 41b5f2b..6ea9c37 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/wrong-property-name.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-common-yaml/wrong-property-name.yaml @@ -42,11 +42,11 @@ scheduling: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: testLaunchCmdWorker ps: - resources: memory=20500M,vcores=34,gpu=4 + resources: memory=20500M,vcores=34 replicas: 4 launch_cmd: testLaunchCmdPs @@ -56,5 +56,5 @@ security: distribute_keytab: true tensorBoard: - resources: memory=21000M,vcores=37,gpu=3 + resources: memory=21000M,vcores=37 docker_image: tensorboardDockerImage \ No newline at end of file diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/envs-are-missing.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/envs-are-missing.yaml index d5b82fe..758be1e 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/envs-are-missing.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/envs-are-missing.yaml @@ -40,7 +40,7 @@ scheduling: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: testLaunchCmdWorker docker_image: testDockerImageWorker diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/invalid-config-ps-section.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/invalid-config-ps-section.yaml index a48160e..a161e61a 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/invalid-config-ps-section.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/invalid-config-ps-section.yaml @@ -43,7 +43,7 @@ scheduling: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: testLaunchCmdWorker docker_image: testDockerImageWorker diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/invalid-config-tensorboard-section.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/invalid-config-tensorboard-section.yaml index 5acc2f2..675fba5 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/invalid-config-tensorboard-section.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/invalid-config-tensorboard-section.yaml @@ -43,7 +43,7 @@ scheduling: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: testLaunchCmdWorker docker_image: testDockerImageWorker diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/security-principal-is-missing.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/security-principal-is-missing.yaml index 31868b6..5f6c256 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/security-principal-is-missing.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/security-principal-is-missing.yaml @@ -43,7 +43,7 @@ scheduling: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: testLaunchCmdWorker docker_image: testDockerImageWorker diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/valid-config-with-overrides.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/valid-config-with-overrides.yaml index 4a1e2c8..71ac275 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/valid-config-with-overrides.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/valid-config-with-overrides.yaml @@ -43,7 +43,7 @@ scheduling: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: overridden_testLaunchCmdWorker docker_image: overridden_testDockerImageWorker diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/valid-config.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/valid-config.yaml index a494e9c..2b9dd50 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/valid-config.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/valid-config.yaml @@ -43,7 +43,7 @@ scheduling: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: testLaunchCmdWorker docker_image: testDockerImageWorker diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/valid-config.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/valid-gpu-config.yaml similarity index 100% copy from hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/valid-config.yaml copy to hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-pytorch-yaml/valid-gpu-config.yaml diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/envs-are-missing.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/envs-are-missing.yaml index 4625dad..4deb962 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/envs-are-missing.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/envs-are-missing.yaml @@ -40,12 +40,12 @@ scheduling: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: testLaunchCmdWorker docker_image: testDockerImageWorker ps: - resources: memory=20500M,vcores=34,gpu=4 + resources: memory=20500M,vcores=34 replicas: 4 launch_cmd: testLaunchCmdPs docker_image: testDockerImagePs @@ -56,5 +56,5 @@ security: distribute_keytab: true tensorBoard: - resources: memory=21000M,vcores=37,gpu=3 + resources: memory=21000M,vcores=37 docker_image: tensorboardDockerImage \ No newline at end of file diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/security-principal-is-missing.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/security-principal-is-missing.yaml index d67ef3b..43419232 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/security-principal-is-missing.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/security-principal-is-missing.yaml @@ -43,12 +43,12 @@ scheduling: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: testLaunchCmdWorker docker_image: testDockerImageWorker ps: - resources: memory=20500M,vcores=34,gpu=4 + resources: memory=20500M,vcores=34 replicas: 4 launch_cmd: testLaunchCmdPs docker_image: testDockerImagePs @@ -58,5 +58,5 @@ security: distribute_keytab: true tensorBoard: - resources: memory=21000M,vcores=37,gpu=3 + resources: memory=21000M,vcores=37 docker_image: tensorboardDockerImage \ No newline at end of file diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/tensorboard-dockerimage-is-missing.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/tensorboard-dockerimage-is-missing.yaml index c6b2d70..cdefb9f 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/tensorboard-dockerimage-is-missing.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/tensorboard-dockerimage-is-missing.yaml @@ -43,12 +43,12 @@ scheduling: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: testLaunchCmdWorker docker_image: testDockerImageWorker ps: - resources: memory=20500M,vcores=34,gpu=4 + resources: memory=20500M,vcores=34 replicas: 4 launch_cmd: testLaunchCmdPs docker_image: testDockerImagePs @@ -59,4 +59,4 @@ security: distribute_keytab: true tensorBoard: - resources: memory=21000M,vcores=37,gpu=3 \ No newline at end of file + resources: memory=21000M,vcores=37 diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/valid-config-with-overrides.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/valid-config-with-overrides.yaml index 7b84055..042bf35 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/valid-config-with-overrides.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/valid-config-with-overrides.yaml @@ -43,7 +43,7 @@ scheduling: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: overridden_testLaunchCmdWorker docker_image: overridden_testDockerImageWorker @@ -58,7 +58,7 @@ roles: - /etc/hosts:/overridden_Worker ps: - resources: memory=20500M,vcores=34,gpu=4 + resources: memory=20500M,vcores=34 replicas: 4 launch_cmd: overridden_testLaunchCmdPs docker_image: overridden_testDockerImagePs @@ -78,5 +78,5 @@ security: distribute_keytab: true tensorBoard: - resources: memory=21000M,vcores=37,gpu=3 + resources: memory=21000M,vcores=37 docker_image: tensorboardDockerImage \ No newline at end of file diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/valid-config.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/valid-config.yaml index 1bbaf58..7e31200 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/valid-config.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/valid-config.yaml @@ -43,12 +43,12 @@ scheduling: roles: worker: - resources: memory=20480M,vcores=32,gpu=2 + resources: memory=20480M,vcores=32 replicas: 3 launch_cmd: testLaunchCmdWorker docker_image: testDockerImageWorker ps: - resources: memory=20500M,vcores=34,gpu=4 + resources: memory=20500M,vcores=34 replicas: 4 launch_cmd: testLaunchCmdPs docker_image: testDockerImagePs @@ -59,5 +59,5 @@ security: distribute_keytab: true tensorBoard: - resources: memory=21000M,vcores=37,gpu=3 + resources: memory=21000M,vcores=37 docker_image: tensorboardDockerImage \ No newline at end of file diff --git a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/tensorboard-dockerimage-is-missing.yaml b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/valid-gpu-config.yaml similarity index 93% copy from hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/tensorboard-dockerimage-is-missing.yaml copy to hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/valid-gpu-config.yaml index c6b2d70..1ef1df0 100644 --- a/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/tensorboard-dockerimage-is-missing.yaml +++ b/hadoop-submarine/hadoop-submarine-core/src/test/resources/runjob-tensorflow-yaml/valid-gpu-config.yaml @@ -48,7 +48,7 @@ roles: launch_cmd: testLaunchCmdWorker docker_image: testDockerImageWorker ps: - resources: memory=20500M,vcores=34,gpu=4 + resources: memory=20500M,vcores=34 replicas: 4 launch_cmd: testLaunchCmdPs docker_image: testDockerImagePs @@ -59,4 +59,5 @@ security: distribute_keytab: true tensorBoard: - resources: memory=21000M,vcores=37,gpu=3 \ No newline at end of file + resources: memory=21000M,vcores=37 + docker_image: tensorboardDockerImage \ No newline at end of file diff --git a/hadoop-submarine/hadoop-submarine-dist/pom.xml b/hadoop-submarine/hadoop-submarine-dist/pom.xml new file mode 100644 index 0000000..423fd0b --- /dev/null +++ b/hadoop-submarine/hadoop-submarine-dist/pom.xml @@ -0,0 +1,131 @@ +<?xml version="1.0"?> +<!-- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. See accompanying LICENSE file. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 + http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <artifactId>hadoop-submarine</artifactId> + <groupId>org.apache.hadoop</groupId> + <version>0.2.0-SNAPSHOT</version> + </parent> + <artifactId>${project.artifactId}</artifactId> + <version>${project.version}</version> + <name>Hadoop Submarine Dist</name> + <packaging>pom</packaging> + + <properties> + <!-- Needed for generating FindBugs warnings using parent pom --> + <yarn.basedir>${project.parent.parent.basedir}</yarn.basedir> + <project.artifactId>hadoop-submarine-dist</project.artifactId> + <project.version>0.2.0-SNAPSHOT</project.version> + </properties> + + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-submarine-core</artifactId> + <version>${project.version}</version> + </dependency> + </dependencies> + + <profiles> + <profile> + <id>hadoop-3.2</id> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-submarine-yarnservice-runtime</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-submarine-tony-runtime</artifactId> + <version>${project.version}</version> + </dependency> + </dependencies> + </profile> + + <!-- Default profile--> + <profile> + <id>hadoop-3.1</id> + <activation> + <activeByDefault>true</activeByDefault> + </activation> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-submarine-yarnservice-runtime</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-submarine-tony-runtime</artifactId> + <version>${project.version}</version> + </dependency> + </dependencies> + </profile> + + <profile> + <id>hadoop-2.9</id> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-submarine-tony-runtime</artifactId> + <version>${project.version}</version> + </dependency> + </dependencies> + </profile> + + <profile> + <id>hadoop-2.7</id> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-submarine-tony-runtime</artifactId> + <version>${project.version}</version> + </dependency> + </dependencies> + </profile> + </profiles> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-assembly-plugin</artifactId> + <executions> + <execution> + <id>dist</id> + <phase>package</phase> + <goals> + <goal>single</goal> + </goals> + <configuration> + <finalName>${project.artifactId}-${project.version}-${project.activeProfiles[0].id}</finalName> + <appendAssemblyId>false</appendAssemblyId> + <attach>false</attach> + <descriptors> + <descriptor>src/assembly/distribution.xml</descriptor> + </descriptors> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + +</project> diff --git a/hadoop-submarine/hadoop-submarine-dist/src/assembly/distribution.xml b/hadoop-submarine/hadoop-submarine-dist/src/assembly/distribution.xml new file mode 100644 index 0000000..c6e1c25 --- /dev/null +++ b/hadoop-submarine/hadoop-submarine-dist/src/assembly/distribution.xml @@ -0,0 +1,61 @@ +<!-- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. See accompanying LICENSE file. +--> + +<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.0.0 http://maven.apache.org/xsd/assembly-2.0.0.xsd"> + <id>distribution</id> + <formats> + <format>dir</format> + <format>tar.gz</format> + </formats> + + <dependencySets> + <dependencySet> + <outputDirectory>/</outputDirectory> + <includes> + <include>org.apache.hadoop:hadoop-submarine-core</include> + </includes> + </dependencySet> + <dependencySet> + <outputDirectory>/</outputDirectory> + <includes> + <include>org.apache.hadoop:hadoop-submarine-tony-runtime</include> + </includes> + </dependencySet> + <dependencySet> + <outputDirectory>/</outputDirectory> + <includes> + <include>org.apache.hadoop:hadoop-submarine-yarnservice-runtime</include> + </includes> + </dependencySet> + </dependencySets> + + <fileSets> + <fileSet> + <directory>../../</directory> + <includes> + <include>LICENSE*</include> + <include>NOTICE*</include> + </includes> + </fileSet> + <fileSet> + <directory>../hadoop-submarine-all/target/</directory> + <outputDirectory>/</outputDirectory> + <includes> + <include>hadoop-submarine-all-${project.version}-*.jar</include> + </includes> + </fileSet> + </fileSets> +</assembly> diff --git a/hadoop-submarine/hadoop-submarine-tony-runtime/pom.xml b/hadoop-submarine/hadoop-submarine-tony-runtime/pom.xml index 8dbda98..a004201 100644 --- a/hadoop-submarine/hadoop-submarine-tony-runtime/pom.xml +++ b/hadoop-submarine/hadoop-submarine-tony-runtime/pom.xml @@ -23,6 +23,7 @@ <modelVersion>4.0.0</modelVersion> <artifactId>hadoop-submarine-tony-runtime</artifactId> + <name>Hadoop Submarine Tony Runtime</name> <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> diff --git a/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/TonyUtils.java b/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/TonyUtils.java index 47ad1a4..d818fe0 100644 --- a/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/TonyUtils.java +++ b/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/TonyUtils.java @@ -19,9 +19,8 @@ import com.linkedin.tony.util.Utils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.yarn.api.records.ResourceInformation; -import org.apache.hadoop.yarn.exceptions.ResourceNotFoundException; import org.apache.hadoop.yarn.submarine.client.cli.param.runjob.TensorFlowRunJobParameters; +import org.apache.hadoop.yarn.submarine.common.resource.ResourceUtils; import java.util.ArrayList; import java.util.Arrays; @@ -52,7 +51,7 @@ public final class TonyUtils { tonyConf.setLong( TonyConfigurationKeys.getResourceKey(Constants.PS_JOB_NAME, Constants.MEMORY), - parameters.getPsResource().getMemorySize()); + ResourceUtils.getMemorySize(parameters.getPsResource())); } if (parameters.getWorkerResource() != null) { tonyConf.setInt( @@ -62,16 +61,12 @@ public final class TonyUtils { tonyConf.setLong( TonyConfigurationKeys.getResourceKey(Constants.WORKER_JOB_NAME, Constants.MEMORY), - parameters.getWorkerResource().getMemorySize()); - try { - tonyConf.setLong( - TonyConfigurationKeys.getResourceKey(Constants.WORKER_JOB_NAME, - Constants.GPUS), - parameters.getWorkerResource() - .getResourceValue(ResourceInformation.GPU_URI)); - } catch (ResourceNotFoundException rnfe) { - LOG.error("GPU resources not enabled."); - } + ResourceUtils.getMemorySize(parameters.getWorkerResource())); + tonyConf.setLong( + TonyConfigurationKeys.getResourceKey(Constants.WORKER_JOB_NAME, + Constants.GPUS), + ResourceUtils.getResourceValue(parameters.getWorkerResource(), + ResourceUtils.GPU_URI)); } if (parameters.getQueue() != null) { tonyConf.set( diff --git a/hadoop-submarine/hadoop-submarine-yarnservice-runtime/pom.xml b/hadoop-submarine/hadoop-submarine-yarnservice-runtime/pom.xml index 15dffb9..3609b5a 100644 --- a/hadoop-submarine/hadoop-submarine-yarnservice-runtime/pom.xml +++ b/hadoop-submarine/hadoop-submarine-yarnservice-runtime/pom.xml @@ -22,7 +22,7 @@ <groupId>org.apache.hadoop</groupId> <version>0.2.0-SNAPSHOT</version> </parent> - <artifactId>hadoop-submarine-score-yarnservice-runtime</artifactId> + <artifactId>hadoop-submarine-yarnservice-runtime</artifactId> <version>0.2.0-SNAPSHOT</version> <name>Hadoop Submarine YARN Service Runtime</name> @@ -108,12 +108,10 @@ <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-services-api</artifactId> - <version>3.3.0-SNAPSHOT</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-services-core</artifactId> - <version>3.3.0-SNAPSHOT</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> diff --git a/hadoop-submarine/hadoop-submarine-yarnservice-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/AbstractComponent.java b/hadoop-submarine/hadoop-submarine-yarnservice-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/AbstractComponent.java index 4015422..0e0bc75 100644 --- a/hadoop-submarine/hadoop-submarine-yarnservice-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/AbstractComponent.java +++ b/hadoop-submarine/hadoop-submarine-yarnservice-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/AbstractComponent.java @@ -30,7 +30,6 @@ import org.apache.hadoop.yarn.submarine.runtimes.yarnservice.command.LaunchComma import java.io.IOException; import java.util.Objects; -import static org.apache.hadoop.yarn.service.conf.YarnServiceConstants.CONTAINER_STATE_REPORT_AS_SERVICE_STATE; import static org.apache.hadoop.yarn.submarine.runtimes.yarnservice.tensorflow.TensorFlowCommons.addCommonEnvironments; import static org.apache.hadoop.yarn.submarine.runtimes.yarnservice.tensorflow.TensorFlowCommons.getScriptFileName; import static org.apache.hadoop.yarn.submarine.utils.DockerUtilities.getDockerArtifact; @@ -85,8 +84,11 @@ public abstract class AbstractComponent { if (role.equals(TensorFlowRole.PRIMARY_WORKER) || role.equals(PyTorchRole.PRIMARY_WORKER)) { component.setNumberOfContainers(1L); + // If the dependencies are upgraded to hadoop 3.3.0. + // yarn.service.container-state-report-as-service-state can be replaced + // with CONTAINER_STATE_REPORT_AS_SERVICE_STATE component.getConfiguration().setProperty( - CONTAINER_STATE_REPORT_AS_SERVICE_STATE, "true"); + "yarn.service.container-state-report-as-service-state", "true"); } else { component.setNumberOfContainers( (long) parameters.getNumWorkers() - 1); diff --git a/hadoop-submarine/hadoop-submarine-yarnservice-runtime/src/test/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/tensorflow/component/TestTensorFlowWorkerComponent.java b/hadoop-submarine/hadoop-submarine-yarnservice-runtime/src/test/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/tensorflow/component/TestTensorFlowWorkerComponent.java index c17757f..d75aff9 100644 --- a/hadoop-submarine/hadoop-submarine-yarnservice-runtime/src/test/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/tensorflow/component/TestTensorFlowWorkerComponent.java +++ b/hadoop-submarine/hadoop-submarine-yarnservice-runtime/src/test/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/tensorflow/component/TestTensorFlowWorkerComponent.java @@ -33,7 +33,6 @@ import java.io.IOException; import java.util.Map; import static junit.framework.TestCase.assertTrue; -import static org.apache.hadoop.yarn.service.conf.YarnServiceConstants.CONTAINER_STATE_REPORT_AS_SERVICE_STATE; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.mockito.ArgumentMatchers.any; @@ -209,8 +208,11 @@ public class TestTensorFlowWorkerComponent { Component component = workerComponent.createComponent(); assertEquals(1L, (long) component.getNumberOfContainers()); + // If the dependencies are upgraded to hadoop 3.3.0. + // yarn.service.container-state-report-as-service-state can be replaced + // with CONTAINER_STATE_REPORT_AS_SERVICE_STATE verifyCommons(component, ImmutableMap.of( - CONTAINER_STATE_REPORT_AS_SERVICE_STATE, "true")); + "yarn.service.container-state-report-as-service-state", "true")); } } \ No newline at end of file diff --git a/hadoop-submarine/hadoop-submarine-yarnservice-runtime/src/test/java/org/apache/hadoop/yarn/submarine/utils/TestSubmarineResourceUtils.java b/hadoop-submarine/hadoop-submarine-yarnservice-runtime/src/test/java/org/apache/hadoop/yarn/submarine/utils/TestSubmarineResourceUtils.java index f22fbaa..1c0a73b 100644 --- a/hadoop-submarine/hadoop-submarine-yarnservice-runtime/src/test/java/org/apache/hadoop/yarn/submarine/utils/TestSubmarineResourceUtils.java +++ b/hadoop-submarine/hadoop-submarine-yarnservice-runtime/src/test/java/org/apache/hadoop/yarn/submarine/utils/TestSubmarineResourceUtils.java @@ -19,12 +19,17 @@ package org.apache.hadoop.yarn.submarine.utils; import com.google.common.collect.ImmutableMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.LocalConfigurationProvider; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.service.api.records.ResourceInformation; -import org.apache.hadoop.yarn.util.resource.CustomResourceTypesConfigurationProvider; import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.junit.After; import org.junit.Test; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; import java.util.Map; import static org.junit.Assert.*; @@ -33,13 +38,48 @@ import static org.junit.Assert.*; * This class is to test {@link SubmarineResourceUtils}. */ public class TestSubmarineResourceUtils { + /** + * With the dependencies of hadoop 3.2.0, Need to create a + * CustomResourceTypesConfigurationProvider implementations. If the + * dependencies are upgraded to hadoop 3.3.0. It can be replaced by + * org.apache.hadoop.yarn.util.resource.CustomResourceTypesConfigurationProvi- + * der + */ + private static class CustomResourceTypesConfigurationProvider + extends LocalConfigurationProvider { + + @Override + public InputStream getConfigurationInputStream(Configuration bootstrapConf, + String name) throws YarnException, IOException { + if (YarnConfiguration.RESOURCE_TYPES_CONFIGURATION_FILE.equals(name)) { + return new ByteArrayInputStream( + ("<configuration>\n" + + " <property>\n" + + " <name>yarn.resource-types</name>\n" + + " <value>" + CUSTOM_RESOURCE_NAME + "</value>\n" + + " </property>\n" + + " <property>\n" + + " <name>yarn.resource-types.a-custom-resource.units</name>\n" + + + " <value>G</value>\n" + + " </property>\n" + + "</configuration>\n").getBytes()); + } else { + return super.getConfigurationInputStream(bootstrapConf, name); + } + } + } + private static final String CUSTOM_RESOURCE_NAME = "a-custom-resource"; private void initResourceTypes() { - CustomResourceTypesConfigurationProvider.initResourceTypes( - ImmutableMap.<String, String>builder() - .put(CUSTOM_RESOURCE_NAME, "G") - .build()); + // If the dependencies are upgraded to hadoop 3.3.0. It can be replaced by + // org.apache.hadoop.yarn.util.resource.CustomResourceTypesConfigurationPro- + // vider + Configuration configuration = new Configuration(); + configuration.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, + CustomResourceTypesConfigurationProvider.class.getName()); + ResourceUtils.resetResourceTypes(configuration); } @After diff --git a/hadoop-submarine/pom.xml b/hadoop-submarine/pom.xml index 1f44556..951e9d3 100644 --- a/hadoop-submarine/pom.xml +++ b/hadoop-submarine/pom.xml @@ -20,7 +20,7 @@ <parent> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-project</artifactId> - <version>3.3.0-SNAPSHOT</version> + <version>3.2.0</version> <relativePath/> </parent> <artifactId>hadoop-submarine</artifactId> @@ -32,29 +32,126 @@ <hadoop.common.build.dir>${basedir}/../hadoop-common-project/hadoop-common/target</hadoop.common.build.dir> </properties> - <!-- Do not add dependencies here, add them to the POM of the leaf module --> - <modules> <module>hadoop-submarine-core</module> - <module>hadoop-submarine-yarnservice-runtime</module> - <module>hadoop-submarine-tony-runtime</module> + <module>hadoop-submarine-all</module> + <module>hadoop-submarine-dist</module> </modules> - <profiles> - <profile> - <id>clover</id> - <activation> - <activeByDefault>false</activeByDefault> - <property> - <name>clover</name> - </property> - </activation> + <dependencyManagement> <dependencies> <dependency> - <groupId>com.cenqua.clover</groupId> - <artifactId>clover</artifactId> + <groupId>org.mockito</groupId> + <artifactId>mockito-core</artifactId> + <version>2.23.4</version> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-services-api</artifactId> + <version>${hadoop.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <version>${hadoop.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-api</artifactId> + <version>${hadoop.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-common</artifactId> + <version>${hadoop.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-client</artifactId> + <version>${hadoop.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs</artifactId> + <version>${hadoop.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-lang3</artifactId> + <version>3.7</version> </dependency> </dependencies> - </profile> -</profiles> + </dependencyManagement> + + <profiles> + <profile> + <id>hadoop-3.2</id> + <properties> + <hadoop.version>3.2.0</hadoop.version> + </properties> + <modules> + <module>hadoop-submarine-yarnservice-runtime</module> + <module>hadoop-submarine-tony-runtime</module> + </modules> + </profile> + + <!-- Default profile--> + <profile> + <id>hadoop-3.1</id> + <activation> + <activeByDefault>true</activeByDefault> + </activation> + <properties> + <hadoop.version>3.1.2</hadoop.version> + </properties> + <modules> + <module>hadoop-submarine-yarnservice-runtime</module> + <module>hadoop-submarine-tony-runtime</module> + </modules> + </profile> + + <profile> + <id>hadoop-2.9</id> + <properties> + <hadoop.version>2.9.2</hadoop.version> + </properties> + <modules> + <module>hadoop-submarine-tony-runtime</module> + </modules> + </profile> + + <profile> + <id>hadoop-2.7</id> + <properties> + <hadoop.version>2.7.3</hadoop.version> + </properties> + <modules> + <module>hadoop-submarine-tony-runtime</module> + </modules> + </profile> + + <profile> + <id>clover</id> + <activation> + <activeByDefault>false</activeByDefault> + <property> + <name>clover</name> + </property> + </activation> + <dependencies> + <dependency> + <groupId>com.cenqua.clover</groupId> + <artifactId>clover</artifactId> + </dependency> + </dependencies> + </profile> + </profiles> + </project> --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org