ashb commented on a change in pull request #6266: [AIRFLOW-2439] Production 
Docker image support including refactoring of build scripts - depends on 
[AIRFLOW-5704]
URL: https://github.com/apache/airflow/pull/6266#discussion_r336901053
 
 

 ##########
 File path: Dockerfile
 ##########
 @@ -77,252 +75,300 @@ RUN curl -sL https://deb.nodesource.com/setup_10.x | 
bash - \
            libssl-dev \
            locales  \
            netcat \
-           nodejs \
            rsync \
            sasl2-bin \
            sudo \
+           libmariadb-dev-compat \
     && apt-get autoremove -yqq --purge \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 
-# Install graphviz - needed to build docs with diagrams
-RUN apt-get update \
-    && apt-get install -y --no-install-recommends \
-           graphviz \
-    && apt-get autoremove -yqq --purge \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
-
-# Install MySQL client from Oracle repositories (Debian installs mariadb)
-RUN KEY="A4A9406876FCBD3C456770C88C718D3B5072E1F5" \
-    && GNUPGHOME="$(mktemp -d)" \
-    && export GNUPGHOME \
-    && for KEYSERVER in $(shuf -e \
-            ha.pool.sks-keyservers.net \
-            hkp://p80.pool.sks-keyservers.net:80 \
-            keyserver.ubuntu.com \
-            hkp://keyserver.ubuntu.com:80 \
-            pgp.mit.edu) ; do \
-          gpg --keyserver "${KEYSERVER}" --recv-keys "${KEY}" && break || true 
; \
-       done \
-    && gpg --export "${KEY}" | apt-key add - \
-    && gpgconf --kill all \
-    rm -rf "${GNUPGHOME}"; \
-    apt-key list > /dev/null \
-    && echo "deb http://repo.mysql.com/apt/debian/ stretch mysql-5.6" | tee -a 
/etc/apt/sources.list.d/mysql.list \
-    && apt-get update \
-    && apt-get install --no-install-recommends -y \
-        libmysqlclient-dev \
-        mysql-client \
-    && apt-get autoremove -yqq --purge \
-    && apt-get clean && rm -rf /var/lib/apt/lists/*
-
 RUN adduser airflow \
     && echo "airflow ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/airflow \
     && chmod 0440 /etc/sudoers.d/airflow
 
 
############################################################################################################
-# This is an image with all APT dependencies needed by CI. It is built on top 
of the airlfow APT image
-# Parameters:
-#     airflow-apt-deps - this is the base image for CI deps image.
+# CI airflow image
 
############################################################################################################
-FROM airflow-apt-deps-ci-slim as airflow-apt-deps-ci
+FROM airflow-base as airflow-ci
 
 SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"]
 
-ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
+# Setting to 1 speeds up building the image. Cassandra driver without CYTHON 
saves around 10 minutes
+# But might not be suitable for production image
+ENV CASS_DRIVER_NO_CYTHON="1"
+ENV CASS_DRIVER_BUILD_CONCURRENCY=8
+
+ENV JAVA_HOME=/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64/
+
+# By changing the CI build epoch we can force reinstalling apt dependenecies 
for CI
+# It can also be overwritten manually by setting the build variable.
+ARG CI_APT_DEPENDENCIES_EPOCH_NUMBER="1"
+ENV CI_APT_DEPENDENCIES_EPOCH_NUMBER=${CI_APT_DEPENDENCIES_EPOCH_NUMBER}
+
+RUN apt-get update \
+    && apt-get install --no-install-recommends -y \
+         apt-transport-https ca-certificates wget dirmngr gnupg 
software-properties-common curl gnupg2 \
+    && export APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
+    && curl -sL https://adoptopenjdk.jfrog.io/adoptopenjdk/api/gpg/key/public 
| apt-key add - \
+    && curl -sL https://deb.nodesource.com/setup_10.x | bash - \
+    && add-apt-repository --yes 
https://adoptopenjdk.jfrog.io/adoptopenjdk/deb/ \
+    && apt-get update \
+    && apt-get install --no-install-recommends -y \
+      gnupg \
+      graphviz \
+      krb5-user \
+      ldap-utils \
+      less \
+      lsb-release \
+      nodejs \
+      net-tools \
+      adoptopenjdk-8-hotspot \
+      openssh-client \
+      openssh-server \
+      postgresql-client \
+      python-selinux \
+      sqlite3 \
+      tmux \
+      unzip \
+      vim \
+    && apt-get autoremove -yqq --purge \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* \
+    ;
+
+ENV HADOOP_DISTRO="cdh" HADOOP_MAJOR="5" HADOOP_DISTRO_VERSION="5.11.0" 
HADOOP_VERSION="2.6.0" \
+    HADOOP_HOME="/tmp/hadoop-cdh"
+ENV HIVE_VERSION="1.1.0" HIVE_HOME="/tmp/hive"
+ENV 
HADOOP_URL="https://archive.cloudera.com/${HADOOP_DISTRO}${HADOOP_MAJOR}/${HADOOP_DISTRO}/${HADOOP_MAJOR}/";
+ENV 
MINICLUSTER_BASE="https://github.com/bolkedebruin/minicluster/releases/download/";
 \
+    MINICLUSTER_HOME="/tmp/minicluster" \
+    MINICLUSTER_VER="1.1"
+
+RUN mkdir -pv "${HADOOP_HOME}" \
+    && mkdir -pv "${HIVE_HOME}" \
+    && mkdir -pv "${MINICLUSTER_HOME}" \
+    && mkdir -pv "/user/hive/warehouse" \
+    && chmod -R 777 "${HIVE_HOME}" \
+    &&chmod -R 777 "/user/"
+
+ENV 
HADOOP_DOWNLOAD_URL="${HADOOP_URL}hadoop-${HADOOP_VERSION}-${HADOOP_DISTRO}${HADOOP_DISTRO_VERSION}.tar.gz"
 \
+    HADOOP_TMP_FILE="/tmp/hadoop.tar.gz"
+
+RUN curl -sL "${HADOOP_DOWNLOAD_URL}" >"${HADOOP_TMP_FILE}" \
+    && tar xzf "${HADOOP_TMP_FILE}" --absolute-names --strip-components 1 -C 
"${HADOOP_HOME}" \
+    && rm "${HADOOP_TMP_FILE}"
+
+ENV 
HIVE_URL="${HADOOP_URL}hive-${HIVE_VERSION}-${HADOOP_DISTRO}${HADOOP_DISTRO_VERSION}.tar.gz"
 \
+    HIVE_TMP_FILE="/tmp/hive.tar.gz"
+
+RUN curl -sL "${HIVE_URL}" >"${HIVE_TMP_FILE}" \
+    && tar xzf "${HIVE_TMP_FILE}" --strip-components 1 -C "${HIVE_HOME}" \
+    && rm "${HIVE_TMP_FILE}"
+
+ENV 
MINICLUSTER_URL="${MINICLUSTER_BASE}${MINICLUSTER_VER}/minicluster-${MINICLUSTER_VER}-SNAPSHOT-bin.zip"
 \
+    MINICLUSTER_TMP_FILE="/tmp/minicluster.zip"
+
+RUN curl -sL "${MINICLUSTER_URL}" > "${MINICLUSTER_TMP_FILE}" \
+    && unzip "${MINICLUSTER_TMP_FILE}" -d "/tmp" \
+    && rm "${MINICLUSTER_TMP_FILE}"
+
+ENV PATH "${PATH}:/tmp/hive/bin"
+
+RUN curl -fsSL https://download.docker.com/linux/debian/gpg | apt-key add - \
+    && add-apt-repository "deb [arch=amd64] 
https://download.docker.com/linux/debian stretch stable" \
+    && apt-get update \
+    && apt-get -y install --no-install-recommends docker-ce \
+    && apt-get autoremove -yqq --purge \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
 
-ARG APT_DEPS_IMAGE="airflow-apt-deps-ci-slim"
-ENV APT_DEPS_IMAGE=${APT_DEPS_IMAGE}
 ARG KUBERNETES_VERSION="v1.15.0"
 ENV KUBERNETES_VERSION=${KUBERNETES_VERSION}
 ARG KIND_VERSION="v0.5.0"
 ENV KIND_VERSION=${KIND_VERSION}
 
-RUN echo "${APT_DEPS_IMAGE}"
-
-# Note the ifs below might be removed if Buildkit will become usable. It 
should skip building this
-# image automatically if it is not used. For now we still go through all 
layers below but they are empty
-RUN if [[ "${APT_DEPS_IMAGE}" == "airflow-apt-deps-ci" ]]; then \
-        # Note missing man directories on debian-stretch
-        # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=863199
-        mkdir -pv /usr/share/man/man1 \
-        && mkdir -pv /usr/share/man/man7 \
-        && apt-get update \
-        && apt-get install --no-install-recommends -y \
-          gnupg \
-          apt-transport-https \
-          ca-certificates \
-          software-properties-common \
-          krb5-user \
-          ldap-utils \
-          less \
-          lsb-release \
-          net-tools \
-          openjdk-8-jdk \
-          openssh-client \
-          openssh-server \
-          postgresql-client \
-          python-selinux \
-          sqlite3 \
-          tmux \
-          unzip \
-          vim \
-        && apt-get autoremove -yqq --purge \
-        && apt-get clean \
-        && rm -rf /var/lib/apt/lists/* \
-        ;\
+RUN curl -Lo kubectl \
+  
"https://storage.googleapis.com/kubernetes-release/release/${KUBERNETES_VERSION}/bin/linux/amd64/kubectl";
 \
+  && chmod +x kubectl \
+  && mv kubectl /usr/local/bin/kubectl
+
+RUN curl -Lo kind \
+   
"https://github.com/kubernetes-sigs/kind/releases/download/${KIND_VERSION}/kind-linux-amd64";
 \
+   && chmod +x kind \
+   && mv kind /usr/local/bin/kind
+
+ARG AIRFLOW_REPO=apache/airflow
+ENV AIRFLOW_REPO=${AIRFLOW_REPO}
+
+ARG AIRFLOW_BRANCH=master
+ENV AIRFLOW_BRANCH=${AIRFLOW_BRANCH}
+
+# Airflow Extras installed
+ARG AIRFLOW_CI_EXTRAS="all,devel"
+ENV AIRFLOW_CI_EXTRAS=${AIRFLOW_CI_EXTRAS}
+
+RUN echo "Installing with extras: ${AIRFLOW_CI_EXTRAS}."
+
+# Increase the value here to force reinstalling pip dependencies from the 
scratch for CI build
+# It can also be overwritten manually by setting the build variable.
+ARG PIP_DEPENDENCIES_EPOCH_NUMBER="1"
+ENV PIP_DEPENDENCIES_EPOCH_NUMBER=${PIP_DEPENDENCIES_EPOCH_NUMBER}
+
+ENV PATH="/root/.local/bin:/root:${PATH}"
+
+# In case of CI builds we want to pre-install master version of airflow 
dependencies so that
+# We do not have to always reinstall it from the scratch and loose time for 
that.
+# CI build is optimised for build speed
+RUN pip install --user \
+        
"https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_CI_EXTRAS}]";
 \
+        && pip uninstall --yes apache-airflow snakebite
+
+ARG AIRFLOW_SOURCES=/opt/airflow
+ENV AIRFLOW_SOURCES=${AIRFLOW_SOURCES}
+
+# Copy all www files here so that we can run npm building for production
+COPY airflow/www/ ${AIRFLOW_SOURCES}/airflow/www/
 
 Review comment:
   Isn't this COPY a bit broad? It includes the views.py and other python files

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to