This is an automated email from the ASF dual-hosted git repository. ayushsaxena pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push: new 66baf1eb51d HADOOP-18682. Move hadoop docker scripts under the main source code (#6483). Contributed by Christos Bisias. 66baf1eb51d is described below commit 66baf1eb51d72ab93d71ac32d3770b340c49b052 Author: Christos Bisias <christos...@gmail.com> AuthorDate: Mon Nov 4 18:52:37 2024 +0200 HADOOP-18682. Move hadoop docker scripts under the main source code (#6483). Contributed by Christos Bisias. --- .../src/site/markdown/HadoopDocker.md | 68 ++++++++++++++++ .../src/site/markdown/SingleCluster.md.vm | 6 ++ hadoop-dist/pom.xml | 94 ++++++++++++++++++++++ hadoop-dist/src/main/compose/hadoop/.env | 18 +++++ hadoop-dist/src/main/compose/hadoop/config | 50 ++++++++++++ .../src/main/compose/hadoop/docker-compose.yaml | 46 +++++++++++ hadoop-dist/src/main/docker/Dockerfile | 26 ++++++ pom.xml | 14 ++++ 8 files changed, 322 insertions(+) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/HadoopDocker.md b/hadoop-common-project/hadoop-common/src/site/markdown/HadoopDocker.md new file mode 100644 index 00000000000..c92a4c64de1 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/HadoopDocker.md @@ -0,0 +1,68 @@ +<!--- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. See accompanying LICENSE file. +--> + +## Hadoop Docker + +### Running from existing setups + +There are special branches for running hadoop in docker. + +The `docker-hadoop-runner*` branches contain scripts that set up base images that can be used for running any Hadoop version. + +* [docker-hadoop-runner-latest](https://github.com/apache/hadoop/tree/docker-hadoop-runner-latest) +* [docker-hadoop-runner-jdk11](https://github.com/apache/hadoop/tree/docker-hadoop-runner-jdk11) +* [docker-hadoop-runner-jdk8](https://github.com/apache/hadoop/tree/docker-hadoop-runner-jdk8) +* [docker-hadoop-runner](https://github.com/apache/hadoop/tree/docker-hadoop-runner) + +The `docker-hadoop*` branches can be used for running a specific version. + +* [docker-hadoop-3](https://github.com/apache/hadoop/tree/docker-hadoop-3) + * `hadoop-3.3.6` +* [docker-hadoop-2](https://github.com/apache/hadoop/tree/docker-hadoop-2) + * `hadoop-2.10.2` + +### Running from the source code + +There is a setup under `hadoop-dist` that contains Docker Compose definitions +for running the current version of Hadoop in a multi-node docker environment. + +This is meant for testing code changes locally and debugging. + +The base image used by the Docker setup is built as part of the maven lifecycle. +The distribution files generated while building the project with the `-Pdist` profile enabled, +will be used for running hadoop inside the containers. + +In order to start the docker environment you need to do the following +* Build the project, using the `-Pdist` profile + ```shell + > mvn clean install -Dmaven.javadoc.skip=true -DskipTests -DskipShade -Pdist,src + ``` +* From the project root, navigate under the docker-compose dir under the generated dist directory + ```shell + > cd hadoop-dist/target/hadoop-<current-version>/compose/hadoop + ``` +* Start the docker environment + ```shell + > docker-compose up -d --scale datanode=3 + ``` +* Connect to a container to execute commands + ```shell + > docker exec -it hadoop_datanode_1 bash + bash-4.2$ hdfs dfs -mkdir /test + ``` + +### Config files + +To add or remove properties from the `core-site.xml`, `hdfs-site.xml`, etc. files used in the docker environment, +simply edit the `config` file before starting the containers. The changes will be persisted in the docker environment. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/SingleCluster.md.vm b/hadoop-common-project/hadoop-common/src/site/markdown/SingleCluster.md.vm index 8153dce5c3f..ad0698a03eb 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/SingleCluster.md.vm +++ b/hadoop-common-project/hadoop-common/src/site/markdown/SingleCluster.md.vm @@ -236,3 +236,9 @@ Fully-Distributed Operation --------------------------- For information on setting up fully-distributed, non-trivial clusters see [Cluster Setup](./ClusterSetup.html). + +Hadoop in Docker containers +--------------------------- + +For information on setting up hadoop in docker, using either official releases or the main source code, +check [Hadoop Docker](./HadoopDocker.html). diff --git a/hadoop-dist/pom.xml b/hadoop-dist/pom.xml index dc92d440109..be746d38f16 100644 --- a/hadoop-dist/pom.xml +++ b/hadoop-dist/pom.xml @@ -29,6 +29,13 @@ <name>Apache Hadoop Distribution</name> <packaging>jar</packaging> + <properties> + <file.encoding>UTF-8</file.encoding> + <downloadSources>true</downloadSources> + <docker.hadoop-runner.version>docker-hadoop-runner</docker.hadoop-runner.version> + <maven.test.skip>true</maven.test.skip> + </properties> + <!-- Using dependencies to ensure this module is the last one --> <dependencies> <dependency> @@ -151,6 +158,43 @@ </execution> </executions> </plugin> + <plugin> + <artifactId>maven-resources-plugin</artifactId> + <executions> + <execution> + <id>copy-compose-files</id> + <phase>package</phase> + <goals> + <goal>copy-resources</goal> + </goals> + <configuration> + <outputDirectory>${project.build.directory}/hadoop-${project.version}/compose</outputDirectory> + <resources> + <resource> + <directory>src/main/compose</directory> + <filtering>true</filtering> + </resource> + </resources> + </configuration> + </execution> + <execution> + <id>copy-and-filter-dockerfile</id> + <phase>package</phase> + <goals> + <goal>copy-resources</goal> + </goals> + <configuration> + <outputDirectory>${project.build.directory}/hadoop-${project.version}</outputDirectory> + <resources> + <resource> + <directory>src/main/docker</directory> + <filtering>true</filtering> + </resource> + </resources> + </configuration> + </execution> + </executions> + </plugin> </plugins> </build> @@ -230,6 +274,56 @@ </plugins> </build> </profile> + <profile> + <id>docker-build</id> + <build> + <plugins> + <plugin> + <groupId>io.fabric8</groupId> + <artifactId>docker-maven-plugin</artifactId> + <executions> + <execution> + <goals> + <goal>build</goal> + </goals> + <phase>package</phase> + </execution> + </executions> + <configuration> + <images> + <image> + <name>${docker.image}</name> + <build> + <dockerFileDir> + ${project.build.directory}/hadoop-${project.version} + </dockerFileDir> + </build> + </image> + </images> + </configuration> + </plugin> + </plugins> + </build> + </profile> + <profile> + <id>docker-push</id> + <build> + <plugins> + <plugin> + <groupId>io.fabric8</groupId> + <artifactId>docker-maven-plugin</artifactId> + <executions> + <execution> + <goals> + <goal>push</goal> + </goals> + <phase>package</phase> + </execution> + </executions> + </plugin> + </plugins> + </build> + </profile> </profiles> </project> diff --git a/hadoop-dist/src/main/compose/hadoop/.env b/hadoop-dist/src/main/compose/hadoop/.env new file mode 100644 index 00000000000..838efcdebf7 --- /dev/null +++ b/hadoop-dist/src/main/compose/hadoop/.env @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +HADOOP_IMAGE=apache/hadoop +HADOOP_RUNNER_VERSION=${docker.hadoop-runner.version} +HADOOP_RUNNER_IMAGE=apache/hadoop-runner diff --git a/hadoop-dist/src/main/compose/hadoop/config b/hadoop-dist/src/main/compose/hadoop/config new file mode 100644 index 00000000000..1fac879f7ea --- /dev/null +++ b/hadoop-dist/src/main/compose/hadoop/config @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CORE-SITE.XML_fs.default.name=hdfs://namenode +CORE-SITE.XML_fs.defaultFS=hdfs://namenode + +HDFS-SITE.XML_dfs.namenode.rpc-address=namenode:8020 +HDFS-SITE.XML_dfs.replication=1 + +MAPRED-SITE.XML_mapreduce.framework.name=yarn +MAPRED-SITE.XML_yarn.app.mapreduce.am.env=HADOOP_MAPRED_HOME=$HADOOP_HOME +MAPRED-SITE.XML_mapreduce.map.env=HADOOP_MAPRED_HOME=$HADOOP_HOME +MAPRED-SITE.XML_mapreduce.reduce.env=HADOOP_MAPRED_HOME=$HADOOP_HOME + +YARN-SITE.XML_yarn.resourcemanager.hostname=resourcemanager +YARN-SITE.XML_yarn.nodemanager.pmem-check-enabled=false +YARN-SITE.XML_yarn.nodemanager.delete.debug-delay-sec=600 +YARN-SITE.XML_yarn.nodemanager.vmem-check-enabled=false +YARN-SITE.XML_yarn.nodemanager.aux-services=mapreduce_shuffle + +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-applications=10000 +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-am-resource-percent=0.1 +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.resource-calculator=org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.queues=default +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.capacity=100 +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.user-limit-factor=1 +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.maximum-capacity=100 +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.state=RUNNING +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_submit_applications=* +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_administer_queue=* +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.node-locality-delay=40 +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings= +CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings-override.enable=false + +LOG4J.PROPERTIES_log4j.rootLogger=INFO, stdout +LOG4J.PROPERTIES_log4j.appender.stdout=org.apache.log4j.ConsoleAppender +LOG4J.PROPERTIES_log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +LOG4J.PROPERTIES_log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n diff --git a/hadoop-dist/src/main/compose/hadoop/docker-compose.yaml b/hadoop-dist/src/main/compose/hadoop/docker-compose.yaml new file mode 100644 index 00000000000..f999c39300e --- /dev/null +++ b/hadoop-dist/src/main/compose/hadoop/docker-compose.yaml @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "3.8" + +x-common-config: + &common-config + image: ${HADOOP_RUNNER_IMAGE}:${HADOOP_RUNNER_VERSION} + volumes: + - ../..:/opt/hadoop + env_file: + - ./config + +services: + namenode: + <<: *common-config + hostname: namenode + command: ["hdfs", "namenode"] + ports: + - 9870:9870 + environment: + ENSURE_NAMENODE_DIR: "/tmp/hadoop-root/dfs/name" + datanode: + <<: *common-config + command: ["hdfs", "datanode"] + resourcemanager: + <<: *common-config + hostname: resourcemanager + command: ["yarn", "resourcemanager"] + ports: + - 8088:8088 + nodemanager: + <<: *common-config + command: ["yarn", "nodemanager"] diff --git a/hadoop-dist/src/main/docker/Dockerfile b/hadoop-dist/src/main/docker/Dockerfile new file mode 100644 index 00000000000..7ea60ed7f65 --- /dev/null +++ b/hadoop-dist/src/main/docker/Dockerfile @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM apache/hadoop-runner:@docker.hadoop-runner.version@ + +COPY . /opt/hadoop + +WORKDIR /opt/hadoop + +USER root + +RUN chown -R hadoop:users /opt/hadoop + +USER hadoop diff --git a/pom.xml b/pom.xml index 1d4fda50677..5d2775a813c 100644 --- a/pom.xml +++ b/pom.xml @@ -82,6 +82,8 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x <!-- required as child projects with different version can't use ${project.version} --> <hadoop.version>3.5.0-SNAPSHOT</hadoop.version> + <docker.image>apache/hadoop:${project.version}</docker.image> + <distMgmtSnapshotsId>apache.snapshots.https</distMgmtSnapshotsId> <distMgmtSnapshotsName>Apache Development Snapshot Repository</distMgmtSnapshotsName> <distMgmtSnapshotsUrl>https://repository.apache.org/content/repositories/snapshots</distMgmtSnapshotsUrl> @@ -119,6 +121,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x <jsonschema2pojo-maven-plugin.version>1.1.1</jsonschema2pojo-maven-plugin.version> <maven-compiler-plugin.version>3.10.1</maven-compiler-plugin.version> <cyclonedx.version>2.7.10</cyclonedx.version> + <docker-maven-plugin.version>0.29.0</docker-maven-plugin.version> <shell-executable>bash</shell-executable> @@ -150,6 +153,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x <artifactId>maven-dependency-plugin</artifactId> <version>${maven-dependency-plugin.version}</version> </plugin> + <plugin> + <groupId>io.fabric8</groupId> + <artifactId>docker-maven-plugin</artifactId> + <version>${docker-maven-plugin.version}</version> + </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-enforcer-plugin</artifactId> @@ -892,5 +900,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x </activation> </profile> + <profile> + <id>docker-build</id> + <properties> + <docker.image>${user.name}/hadoop:${project.version}</docker.image> + </properties> + </profile> </profiles> </project> --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org