potiuk commented on a change in pull request #6266: [AIRFLOW-2439] Production Docker image support including refactoring of build scripts - depends on [AIRFLOW-5704] URL: https://github.com/apache/airflow/pull/6266#discussion_r341808823
########## File path: Dockerfile ########## @@ -77,252 +75,300 @@ RUN curl -sL https://deb.nodesource.com/setup_10.x | bash - \ libssl-dev \ locales \ netcat \ - nodejs \ rsync \ sasl2-bin \ sudo \ + libmariadb-dev-compat \ && apt-get autoremove -yqq --purge \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Install graphviz - needed to build docs with diagrams -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - graphviz \ - && apt-get autoremove -yqq --purge \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# Install MySQL client from Oracle repositories (Debian installs mariadb) -RUN KEY="A4A9406876FCBD3C456770C88C718D3B5072E1F5" \ - && GNUPGHOME="$(mktemp -d)" \ - && export GNUPGHOME \ - && for KEYSERVER in $(shuf -e \ - ha.pool.sks-keyservers.net \ - hkp://p80.pool.sks-keyservers.net:80 \ - keyserver.ubuntu.com \ - hkp://keyserver.ubuntu.com:80 \ - pgp.mit.edu) ; do \ - gpg --keyserver "${KEYSERVER}" --recv-keys "${KEY}" && break || true ; \ - done \ - && gpg --export "${KEY}" | apt-key add - \ - && gpgconf --kill all \ - rm -rf "${GNUPGHOME}"; \ - apt-key list > /dev/null \ - && echo "deb http://repo.mysql.com/apt/debian/ stretch mysql-5.6" | tee -a /etc/apt/sources.list.d/mysql.list \ - && apt-get update \ - && apt-get install --no-install-recommends -y \ - libmysqlclient-dev \ - mysql-client \ - && apt-get autoremove -yqq --purge \ - && apt-get clean && rm -rf /var/lib/apt/lists/* - RUN adduser airflow \ && echo "airflow ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/airflow \ && chmod 0440 /etc/sudoers.d/airflow ############################################################################################################ -# This is an image with all APT dependencies needed by CI. It is built on top of the airlfow APT image -# Parameters: -# airflow-apt-deps - this is the base image for CI deps image. +# CI airflow image ############################################################################################################ -FROM airflow-apt-deps-ci-slim as airflow-apt-deps-ci +FROM airflow-base as airflow-ci SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"] -ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ +# Setting to 1 speeds up building the image. Cassandra driver without CYTHON saves around 10 minutes +# But might not be suitable for production image +ENV CASS_DRIVER_NO_CYTHON="1" +ENV CASS_DRIVER_BUILD_CONCURRENCY=8 + +ENV JAVA_HOME=/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64/ + +# By changing the CI build epoch we can force reinstalling apt dependenecies for CI +# It can also be overwritten manually by setting the build variable. +ARG CI_APT_DEPENDENCIES_EPOCH_NUMBER="1" +ENV CI_APT_DEPENDENCIES_EPOCH_NUMBER=${CI_APT_DEPENDENCIES_EPOCH_NUMBER} + +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + apt-transport-https ca-certificates wget dirmngr gnupg software-properties-common curl gnupg2 \ + && export APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \ + && curl -sL https://adoptopenjdk.jfrog.io/adoptopenjdk/api/gpg/key/public | apt-key add - \ + && curl -sL https://deb.nodesource.com/setup_10.x | bash - \ + && add-apt-repository --yes https://adoptopenjdk.jfrog.io/adoptopenjdk/deb/ \ + && apt-get update \ + && apt-get install --no-install-recommends -y \ + gnupg \ + graphviz \ + krb5-user \ + ldap-utils \ + less \ + lsb-release \ + nodejs \ + net-tools \ + adoptopenjdk-8-hotspot \ + openssh-client \ + openssh-server \ + postgresql-client \ + python-selinux \ + sqlite3 \ + tmux \ + unzip \ + vim \ + && apt-get autoremove -yqq --purge \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ + ; + +ENV HADOOP_DISTRO="cdh" HADOOP_MAJOR="5" HADOOP_DISTRO_VERSION="5.11.0" HADOOP_VERSION="2.6.0" \ + HADOOP_HOME="/tmp/hadoop-cdh" +ENV HIVE_VERSION="1.1.0" HIVE_HOME="/tmp/hive" +ENV HADOOP_URL="https://archive.cloudera.com/${HADOOP_DISTRO}${HADOOP_MAJOR}/${HADOOP_DISTRO}/${HADOOP_MAJOR}/" +ENV MINICLUSTER_BASE="https://github.com/bolkedebruin/minicluster/releases/download/" \ + MINICLUSTER_HOME="/tmp/minicluster" \ + MINICLUSTER_VER="1.1" + +RUN mkdir -pv "${HADOOP_HOME}" \ + && mkdir -pv "${HIVE_HOME}" \ + && mkdir -pv "${MINICLUSTER_HOME}" \ + && mkdir -pv "/user/hive/warehouse" \ + && chmod -R 777 "${HIVE_HOME}" \ + &&chmod -R 777 "/user/" + +ENV HADOOP_DOWNLOAD_URL="${HADOOP_URL}hadoop-${HADOOP_VERSION}-${HADOOP_DISTRO}${HADOOP_DISTRO_VERSION}.tar.gz" \ + HADOOP_TMP_FILE="/tmp/hadoop.tar.gz" + +RUN curl -sL "${HADOOP_DOWNLOAD_URL}" >"${HADOOP_TMP_FILE}" \ + && tar xzf "${HADOOP_TMP_FILE}" --absolute-names --strip-components 1 -C "${HADOOP_HOME}" \ + && rm "${HADOOP_TMP_FILE}" + +ENV HIVE_URL="${HADOOP_URL}hive-${HIVE_VERSION}-${HADOOP_DISTRO}${HADOOP_DISTRO_VERSION}.tar.gz" \ + HIVE_TMP_FILE="/tmp/hive.tar.gz" + +RUN curl -sL "${HIVE_URL}" >"${HIVE_TMP_FILE}" \ + && tar xzf "${HIVE_TMP_FILE}" --strip-components 1 -C "${HIVE_HOME}" \ + && rm "${HIVE_TMP_FILE}" + +ENV MINICLUSTER_URL="${MINICLUSTER_BASE}${MINICLUSTER_VER}/minicluster-${MINICLUSTER_VER}-SNAPSHOT-bin.zip" \ + MINICLUSTER_TMP_FILE="/tmp/minicluster.zip" + +RUN curl -sL "${MINICLUSTER_URL}" > "${MINICLUSTER_TMP_FILE}" \ + && unzip "${MINICLUSTER_TMP_FILE}" -d "/tmp" \ + && rm "${MINICLUSTER_TMP_FILE}" + +ENV PATH "${PATH}:/tmp/hive/bin" + +RUN curl -fsSL https://download.docker.com/linux/debian/gpg | apt-key add - \ + && add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/debian stretch stable" \ + && apt-get update \ + && apt-get -y install --no-install-recommends docker-ce \ + && apt-get autoremove -yqq --purge \ + && apt-get clean && rm -rf /var/lib/apt/lists/* -ARG APT_DEPS_IMAGE="airflow-apt-deps-ci-slim" -ENV APT_DEPS_IMAGE=${APT_DEPS_IMAGE} ARG KUBERNETES_VERSION="v1.15.0" ENV KUBERNETES_VERSION=${KUBERNETES_VERSION} ARG KIND_VERSION="v0.5.0" ENV KIND_VERSION=${KIND_VERSION} -RUN echo "${APT_DEPS_IMAGE}" - -# Note the ifs below might be removed if Buildkit will become usable. It should skip building this -# image automatically if it is not used. For now we still go through all layers below but they are empty -RUN if [[ "${APT_DEPS_IMAGE}" == "airflow-apt-deps-ci" ]]; then \ - # Note missing man directories on debian-stretch - # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=863199 - mkdir -pv /usr/share/man/man1 \ - && mkdir -pv /usr/share/man/man7 \ - && apt-get update \ - && apt-get install --no-install-recommends -y \ - gnupg \ - apt-transport-https \ - ca-certificates \ - software-properties-common \ - krb5-user \ - ldap-utils \ - less \ - lsb-release \ - net-tools \ - openjdk-8-jdk \ - openssh-client \ - openssh-server \ - postgresql-client \ - python-selinux \ - sqlite3 \ - tmux \ - unzip \ - vim \ - && apt-get autoremove -yqq --purge \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* \ - ;\ +RUN curl -Lo kubectl \ + "https://storage.googleapis.com/kubernetes-release/release/${KUBERNETES_VERSION}/bin/linux/amd64/kubectl" \ + && chmod +x kubectl \ + && mv kubectl /usr/local/bin/kubectl + +RUN curl -Lo kind \ + "https://github.com/kubernetes-sigs/kind/releases/download/${KIND_VERSION}/kind-linux-amd64" \ + && chmod +x kind \ + && mv kind /usr/local/bin/kind + +ARG AIRFLOW_REPO=apache/airflow +ENV AIRFLOW_REPO=${AIRFLOW_REPO} + +ARG AIRFLOW_BRANCH=master +ENV AIRFLOW_BRANCH=${AIRFLOW_BRANCH} + +# Airflow Extras installed +ARG AIRFLOW_CI_EXTRAS="all,devel" +ENV AIRFLOW_CI_EXTRAS=${AIRFLOW_CI_EXTRAS} + +RUN echo "Installing with extras: ${AIRFLOW_CI_EXTRAS}." + +# Increase the value here to force reinstalling pip dependencies from the scratch for CI build +# It can also be overwritten manually by setting the build variable. +ARG PIP_DEPENDENCIES_EPOCH_NUMBER="1" +ENV PIP_DEPENDENCIES_EPOCH_NUMBER=${PIP_DEPENDENCIES_EPOCH_NUMBER} + +ENV PATH="/root/.local/bin:/root:${PATH}" + +# In case of CI builds we want to pre-install master version of airflow dependencies so that +# We do not have to always reinstall it from the scratch and loose time for that. +# CI build is optimised for build speed +RUN pip install --user \ + "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_CI_EXTRAS}]" \ + && pip uninstall --yes apache-airflow snakebite + +ARG AIRFLOW_SOURCES=/opt/airflow +ENV AIRFLOW_SOURCES=${AIRFLOW_SOURCES} + +# Copy all www files here so that we can run npm building for production +COPY airflow/www/ ${AIRFLOW_SOURCES}/airflow/www/ Review comment: I will take a look at that as last optimisation left. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services