ashb commented on a change in pull request #6266: [AIRFLOW-2439] Production 
Docker image support including refactoring of build scripts - depends on 
[AIRFLOW-5704]
URL: https://github.com/apache/airflow/pull/6266#discussion_r336900127
 
 

 ##########
 File path: Dockerfile
 ##########
 @@ -77,252 +75,300 @@ RUN curl -sL https://deb.nodesource.com/setup_10.x | 
bash - \
            libssl-dev \
            locales  \
            netcat \
-           nodejs \
            rsync \
            sasl2-bin \
            sudo \
+           libmariadb-dev-compat \
     && apt-get autoremove -yqq --purge \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 
-# Install graphviz - needed to build docs with diagrams
-RUN apt-get update \
-    && apt-get install -y --no-install-recommends \
-           graphviz \
-    && apt-get autoremove -yqq --purge \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
-
-# Install MySQL client from Oracle repositories (Debian installs mariadb)
-RUN KEY="A4A9406876FCBD3C456770C88C718D3B5072E1F5" \
-    && GNUPGHOME="$(mktemp -d)" \
-    && export GNUPGHOME \
-    && for KEYSERVER in $(shuf -e \
-            ha.pool.sks-keyservers.net \
-            hkp://p80.pool.sks-keyservers.net:80 \
-            keyserver.ubuntu.com \
-            hkp://keyserver.ubuntu.com:80 \
-            pgp.mit.edu) ; do \
-          gpg --keyserver "${KEYSERVER}" --recv-keys "${KEY}" && break || true 
; \
-       done \
-    && gpg --export "${KEY}" | apt-key add - \
-    && gpgconf --kill all \
-    rm -rf "${GNUPGHOME}"; \
-    apt-key list > /dev/null \
-    && echo "deb http://repo.mysql.com/apt/debian/ stretch mysql-5.6" | tee -a 
/etc/apt/sources.list.d/mysql.list \
-    && apt-get update \
-    && apt-get install --no-install-recommends -y \
-        libmysqlclient-dev \
-        mysql-client \
-    && apt-get autoremove -yqq --purge \
-    && apt-get clean && rm -rf /var/lib/apt/lists/*
-
 RUN adduser airflow \
     && echo "airflow ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/airflow \
     && chmod 0440 /etc/sudoers.d/airflow
 
 
############################################################################################################
-# This is an image with all APT dependencies needed by CI. It is built on top 
of the airlfow APT image
-# Parameters:
-#     airflow-apt-deps - this is the base image for CI deps image.
+# CI airflow image
 
############################################################################################################
-FROM airflow-apt-deps-ci-slim as airflow-apt-deps-ci
+FROM airflow-base as airflow-ci
 
 SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"]
 
-ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
+# Setting to 1 speeds up building the image. Cassandra driver without CYTHON 
saves around 10 minutes
+# But might not be suitable for production image
+ENV CASS_DRIVER_NO_CYTHON="1"
+ENV CASS_DRIVER_BUILD_CONCURRENCY=8
+
+ENV JAVA_HOME=/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64/
+
+# By changing the CI build epoch we can force reinstalling apt dependenecies 
for CI
+# It can also be overwritten manually by setting the build variable.
+ARG CI_APT_DEPENDENCIES_EPOCH_NUMBER="1"
+ENV CI_APT_DEPENDENCIES_EPOCH_NUMBER=${CI_APT_DEPENDENCIES_EPOCH_NUMBER}
+
+RUN apt-get update \
+    && apt-get install --no-install-recommends -y \
+         apt-transport-https ca-certificates wget dirmngr gnupg 
software-properties-common curl gnupg2 \
+    && export APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
+    && curl -sL https://adoptopenjdk.jfrog.io/adoptopenjdk/api/gpg/key/public 
| apt-key add - \
+    && curl -sL https://deb.nodesource.com/setup_10.x | bash - \
+    && add-apt-repository --yes 
https://adoptopenjdk.jfrog.io/adoptopenjdk/deb/ \
+    && apt-get update \
+    && apt-get install --no-install-recommends -y \
+      gnupg \
+      graphviz \
+      krb5-user \
+      ldap-utils \
+      less \
+      lsb-release \
+      nodejs \
+      net-tools \
+      adoptopenjdk-8-hotspot \
+      openssh-client \
+      openssh-server \
+      postgresql-client \
+      python-selinux \
+      sqlite3 \
+      tmux \
+      unzip \
+      vim \
+    && apt-get autoremove -yqq --purge \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* \
+    ;
+
+ENV HADOOP_DISTRO="cdh" HADOOP_MAJOR="5" HADOOP_DISTRO_VERSION="5.11.0" 
HADOOP_VERSION="2.6.0" \
+    HADOOP_HOME="/tmp/hadoop-cdh"
+ENV HIVE_VERSION="1.1.0" HIVE_HOME="/tmp/hive"
+ENV 
HADOOP_URL="https://archive.cloudera.com/${HADOOP_DISTRO}${HADOOP_MAJOR}/${HADOOP_DISTRO}/${HADOOP_MAJOR}/";
+ENV 
MINICLUSTER_BASE="https://github.com/bolkedebruin/minicluster/releases/download/";
 \
+    MINICLUSTER_HOME="/tmp/minicluster" \
+    MINICLUSTER_VER="1.1"
+
+RUN mkdir -pv "${HADOOP_HOME}" \
+    && mkdir -pv "${HIVE_HOME}" \
+    && mkdir -pv "${MINICLUSTER_HOME}" \
+    && mkdir -pv "/user/hive/warehouse" \
+    && chmod -R 777 "${HIVE_HOME}" \
+    &&chmod -R 777 "/user/"
+
+ENV 
HADOOP_DOWNLOAD_URL="${HADOOP_URL}hadoop-${HADOOP_VERSION}-${HADOOP_DISTRO}${HADOOP_DISTRO_VERSION}.tar.gz"
 \
+    HADOOP_TMP_FILE="/tmp/hadoop.tar.gz"
+
+RUN curl -sL "${HADOOP_DOWNLOAD_URL}" >"${HADOOP_TMP_FILE}" \
+    && tar xzf "${HADOOP_TMP_FILE}" --absolute-names --strip-components 1 -C 
"${HADOOP_HOME}" \
+    && rm "${HADOOP_TMP_FILE}"
+
+ENV 
HIVE_URL="${HADOOP_URL}hive-${HIVE_VERSION}-${HADOOP_DISTRO}${HADOOP_DISTRO_VERSION}.tar.gz"
 \
+    HIVE_TMP_FILE="/tmp/hive.tar.gz"
+
+RUN curl -sL "${HIVE_URL}" >"${HIVE_TMP_FILE}" \
+    && tar xzf "${HIVE_TMP_FILE}" --strip-components 1 -C "${HIVE_HOME}" \
+    && rm "${HIVE_TMP_FILE}"
+
+ENV 
MINICLUSTER_URL="${MINICLUSTER_BASE}${MINICLUSTER_VER}/minicluster-${MINICLUSTER_VER}-SNAPSHOT-bin.zip"
 \
+    MINICLUSTER_TMP_FILE="/tmp/minicluster.zip"
+
+RUN curl -sL "${MINICLUSTER_URL}" > "${MINICLUSTER_TMP_FILE}" \
+    && unzip "${MINICLUSTER_TMP_FILE}" -d "/tmp" \
+    && rm "${MINICLUSTER_TMP_FILE}"
+
+ENV PATH "${PATH}:/tmp/hive/bin"
+
+RUN curl -fsSL https://download.docker.com/linux/debian/gpg | apt-key add - \
 
 Review comment:
   And again, https://packages.debian.org/buster/docker.io might be a 
possibility here.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to