ashb commented on a change in pull request #6266: [AIRFLOW-2439] Production Docker image support including refactoring of build scripts - depends on [AIRFLOW-5704] URL: https://github.com/apache/airflow/pull/6266#discussion_r337073537
########## File path: Dockerfile ########## @@ -77,252 +75,300 @@ RUN curl -sL https://deb.nodesource.com/setup_10.x | bash - \ libssl-dev \ locales \ netcat \ - nodejs \ rsync \ sasl2-bin \ sudo \ + libmariadb-dev-compat \ && apt-get autoremove -yqq --purge \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Install graphviz - needed to build docs with diagrams -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - graphviz \ - && apt-get autoremove -yqq --purge \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# Install MySQL client from Oracle repositories (Debian installs mariadb) -RUN KEY="A4A9406876FCBD3C456770C88C718D3B5072E1F5" \ - && GNUPGHOME="$(mktemp -d)" \ - && export GNUPGHOME \ - && for KEYSERVER in $(shuf -e \ - ha.pool.sks-keyservers.net \ - hkp://p80.pool.sks-keyservers.net:80 \ - keyserver.ubuntu.com \ - hkp://keyserver.ubuntu.com:80 \ - pgp.mit.edu) ; do \ - gpg --keyserver "${KEYSERVER}" --recv-keys "${KEY}" && break || true ; \ - done \ - && gpg --export "${KEY}" | apt-key add - \ - && gpgconf --kill all \ - rm -rf "${GNUPGHOME}"; \ - apt-key list > /dev/null \ - && echo "deb http://repo.mysql.com/apt/debian/ stretch mysql-5.6" | tee -a /etc/apt/sources.list.d/mysql.list \ - && apt-get update \ - && apt-get install --no-install-recommends -y \ - libmysqlclient-dev \ - mysql-client \ - && apt-get autoremove -yqq --purge \ - && apt-get clean && rm -rf /var/lib/apt/lists/* - RUN adduser airflow \ && echo "airflow ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/airflow \ && chmod 0440 /etc/sudoers.d/airflow ############################################################################################################ -# This is an image with all APT dependencies needed by CI. It is built on top of the airlfow APT image -# Parameters: -# airflow-apt-deps - this is the base image for CI deps image. +# CI airflow image ############################################################################################################ -FROM airflow-apt-deps-ci-slim as airflow-apt-deps-ci +FROM airflow-base as airflow-ci SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"] -ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ +# Setting to 1 speeds up building the image. Cassandra driver without CYTHON saves around 10 minutes +# But might not be suitable for production image +ENV CASS_DRIVER_NO_CYTHON="1" +ENV CASS_DRIVER_BUILD_CONCURRENCY=8 + +ENV JAVA_HOME=/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64/ + +# By changing the CI build epoch we can force reinstalling apt dependenecies for CI +# It can also be overwritten manually by setting the build variable. +ARG CI_APT_DEPENDENCIES_EPOCH_NUMBER="1" +ENV CI_APT_DEPENDENCIES_EPOCH_NUMBER=${CI_APT_DEPENDENCIES_EPOCH_NUMBER} + +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + apt-transport-https ca-certificates wget dirmngr gnupg software-properties-common curl gnupg2 \ + && export APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \ + && curl -sL https://adoptopenjdk.jfrog.io/adoptopenjdk/api/gpg/key/public | apt-key add - \ + && curl -sL https://deb.nodesource.com/setup_10.x | bash - \ + && add-apt-repository --yes https://adoptopenjdk.jfrog.io/adoptopenjdk/deb/ \ + && apt-get update \ + && apt-get install --no-install-recommends -y \ + gnupg \ + graphviz \ + krb5-user \ + ldap-utils \ + less \ + lsb-release \ + nodejs \ + net-tools \ + adoptopenjdk-8-hotspot \ + openssh-client \ + openssh-server \ + postgresql-client \ + python-selinux \ + sqlite3 \ + tmux \ + unzip \ + vim \ + && apt-get autoremove -yqq --purge \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ + ; + +ENV HADOOP_DISTRO="cdh" HADOOP_MAJOR="5" HADOOP_DISTRO_VERSION="5.11.0" HADOOP_VERSION="2.6.0" \ + HADOOP_HOME="/tmp/hadoop-cdh" +ENV HIVE_VERSION="1.1.0" HIVE_HOME="/tmp/hive" +ENV HADOOP_URL="https://archive.cloudera.com/${HADOOP_DISTRO}${HADOOP_MAJOR}/${HADOOP_DISTRO}/${HADOOP_MAJOR}/" +ENV MINICLUSTER_BASE="https://github.com/bolkedebruin/minicluster/releases/download/" \ + MINICLUSTER_HOME="/tmp/minicluster" \ + MINICLUSTER_VER="1.1" + +RUN mkdir -pv "${HADOOP_HOME}" \ + && mkdir -pv "${HIVE_HOME}" \ + && mkdir -pv "${MINICLUSTER_HOME}" \ + && mkdir -pv "/user/hive/warehouse" \ + && chmod -R 777 "${HIVE_HOME}" \ + &&chmod -R 777 "/user/" + +ENV HADOOP_DOWNLOAD_URL="${HADOOP_URL}hadoop-${HADOOP_VERSION}-${HADOOP_DISTRO}${HADOOP_DISTRO_VERSION}.tar.gz" \ + HADOOP_TMP_FILE="/tmp/hadoop.tar.gz" + +RUN curl -sL "${HADOOP_DOWNLOAD_URL}" >"${HADOOP_TMP_FILE}" \ + && tar xzf "${HADOOP_TMP_FILE}" --absolute-names --strip-components 1 -C "${HADOOP_HOME}" \ + && rm "${HADOOP_TMP_FILE}" + +ENV HIVE_URL="${HADOOP_URL}hive-${HIVE_VERSION}-${HADOOP_DISTRO}${HADOOP_DISTRO_VERSION}.tar.gz" \ + HIVE_TMP_FILE="/tmp/hive.tar.gz" + +RUN curl -sL "${HIVE_URL}" >"${HIVE_TMP_FILE}" \ + && tar xzf "${HIVE_TMP_FILE}" --strip-components 1 -C "${HIVE_HOME}" \ + && rm "${HIVE_TMP_FILE}" + +ENV MINICLUSTER_URL="${MINICLUSTER_BASE}${MINICLUSTER_VER}/minicluster-${MINICLUSTER_VER}-SNAPSHOT-bin.zip" \ + MINICLUSTER_TMP_FILE="/tmp/minicluster.zip" + +RUN curl -sL "${MINICLUSTER_URL}" > "${MINICLUSTER_TMP_FILE}" \ + && unzip "${MINICLUSTER_TMP_FILE}" -d "/tmp" \ + && rm "${MINICLUSTER_TMP_FILE}" + +ENV PATH "${PATH}:/tmp/hive/bin" + +RUN curl -fsSL https://download.docker.com/linux/debian/gpg | apt-key add - \ Review comment: Yeah, this one I'm okay with it staying is it is. (The changes in docker are probably more meaningful than nodejs where we are fine with whatever version is in debian right now i.e. 10.x) ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services