ashb commented on a change in pull request #4936: [AIRFLOW-4115] Multi-staging
Aiflow Docker image [Step 1/3]
URL: https://github.com/apache/airflow/pull/4936#discussion_r268538752
##########
File path: Dockerfile
##########
@@ -13,46 +13,179 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+#
+# WARNING: THIS DOCKERFILE IS NOT INTENDED FOR PRODUCTION USE OR DEPLOYMENT.
+#
+# Arguments of the build
+ARG PYTHON_BASE_IMAGE="python:3.6-slim"
+ARG AIRFLOW_VERSION="2.0.0.dev0"
+# Which image is used as dependency for the main image
+ARG APT_DEPS_IMAGE="airflow-apt-deps"
+# Speeds up building the image - cassandra driver without CYTHON saves around
10 minutes
+ARG CASS_DRIVER_NO_CYTHON="1"
+# Build cassandra driver on multiple CPUs
+ARG CASS_DRIVER_BUILD_CONCURRENCY="8"
+# By default PIP install is run without cache to make image smaller
+ARG PIP_NO_CACHE_DIR="true"
+# Additional python deps to install
+ARG ADDITIONAL_PYTHON_DEPS=""
+# PIP version used to install dependencies
+ARG PIP_VERSION="19.0.1"
+# By increasing this number we can do force build of all dependencies
+ARG DEPENDENCIES_EPOCH_NUMBER="1"
+############################################################################################################
+# This is base image with APT dependencies needed by Airflow. It is based on a
python slim image
+# Parameters:
+# PYTHON_BASE_IMAGE - base python image (python:x.y-slim)
+############################################################################################################
+FROM ${PYTHON_BASE_IMAGE} as airflow-apt-deps
+
+SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"]
+
+ARG PYTHON_BASE_IMAGE
+ARG AIRFLOW_VERSION
+ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE}
+ENV AIRFLOW_VERSION=$AIRFLOW_VERSION
+
+# Print versions
+RUN echo "Base image: ${PYTHON_BASE_IMAGE}"
+RUN echo "Airflow version: ${AIRFLOW_VERSION}"
+
+# Make sure noninteractie debian install is used and language variab1les set
+ENV DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8
LC_ALL=C.UTF-8 \
+ LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8
+
+# Increase the value below to force renstalling of all dependencies
+ENV DEPENDENCIES_EPOCH_NUMBER=${DEPENDENCIES_EPOCH_NUMBER}
+
+# Install curl and gnupg2 - needed to download nodejs in next step
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+ curl gnupg2 \
+ && apt-get autoremove -yqq --purge \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+
+# Install basic apt dependencies
+RUN curl -sL https://deb.nodesource.com/setup_10.x | bash - \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends \
+ # Packages to install \
+ libsasl2-dev freetds-bin build-essential sasl2-bin \
+ libsasl2-2 libsasl2-dev libsasl2-modules \
+ default-libmysqlclient-dev apt-utils curl rsync netcat locales \
+ freetds-dev libkrb5-dev libssl-dev libffi-dev libpq-dev git \
+ nodejs gosu sudo \
+ && apt-get autoremove -yqq --purge \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+RUN adduser airflow \
+ && echo "airflow ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/airflow \
+ && chmod 0440 /etc/sudoers.d/airflow
+
+############################################################################################################
+# This is the target image - it installs PIP and NPN dependencies including
efficient caching
+# mechanisms - it might be used to build the bare airflow build or CI build
+# Parameters:
+# APT_DEPS_IMAGE - image with APT dependencies. It might either be base
deps image with airflow
+# dependencies or CI deps image that contains also
CI-required dependencies
+############################################################################################################
+FROM airflow-apt-deps as main
+
+SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"]
+
+WORKDIR /opt/airflow
+
+RUN echo "Airflow version: ${AIRFLOW_VERSION}"
+
+ARG AIRFLOW_HOME=/opt/airflow
+ENV AIRFLOW_HOME=${AIRFLOW_HOME}
-FROM python:3.6-slim
-SHELL ["/bin/bash", "-xc"]
+RUN mkdir -pv ${AIRFLOW_HOME} \
+ && chown -R airflow.airflow ${AIRFLOW_HOME}
-ENV AIRFLOW_HOME=/usr/local/airflow
-ARG AIRFLOW_DEPS="all"
-ARG PYTHON_DEPS=""
-ARG BUILD_DEPS="freetds-dev libkrb5-dev libssl-dev libffi-dev libpq-dev git"
-ARG APT_DEPS="libsasl2-dev freetds-bin build-essential
default-libmysqlclient-dev apt-utils curl rsync netcat locales"
+# Increase the value here to force reinstalling Apache Airflow pip dependencies
+ENV FORCE_REINSTALL_ALL_PIP_DEPENDENCIES=1
-ENV PATH="$HOME/.npm-packages/bin:$PATH"
+# Optimizing installation of Cassandra driver
+ARG CASS_DRIVER_BUILD_CONCURRENCY
+ARG CASS_DRIVER_NO_CYTHON
+ENV CASS_DRIVER_BUILD_CONCURRENCY=${CASS_DRIVER_BUILD_CONCURRENCY}
+ENV CASS_DRIVER_NO_CYTHON=${CASS_DRIVER_NO_CYTHON}
-RUN set -euxo pipefail \
- && apt update \
- && if [ -n "${APT_DEPS}" ]; then apt install -y $APT_DEPS; fi \
- && curl -sL https://deb.nodesource.com/setup_10.x | bash - \
- && apt update \
- && apt install -y nodejs \
- && apt autoremove -yqq --purge \
- && apt clean
+# By default PIP install is run without cache to make image smaller
+ARG PIP_NO_CACHE_DIR
+ENV PIP_NO_CACHE_DIR=${PIP_NO_CACHE_DIR}
+RUN echo "Pip no cache dir: ${PIP_NO_CACHE_DIR}"
-COPY . /opt/airflow/
+ARG PIP_VERSION
+ENV PIP_VERSION=${PIP_VERSION}
+RUN echo "Pip version: ${PIP_VERSION}"
+
+RUN pip install --upgrade pip==${PIP_VERSION}
+
+# Airflow sources change frequently but dependency onfiguration won't change
that often
+# We copy setup.py and other files needed to perform setup of dependencies
+# This way cache here will only be invalidated if any of the
+# version/setup configuration change but not when airflow sources change
+COPY --chown=airflow:airflow setup.py /opt/airflow/setup.py
+COPY --chown=airflow:airflow setup.cfg /opt/airflow/setup.cfg
+
+COPY --chown=airflow:airflow airflow/version.py /opt/airflow/airflow/version.py
+COPY --chown=airflow:airflow airflow/__init__.py
/opt/airflow/airflow/__init__.py
+COPY --chown=airflow:airflow airflow/bin/airflow
/opt/airflow/airflow/bin/airflow
+
+# Airflow Extras installed
+ARG AIRFLOW_EXTRAS="all"
+ENV AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS}
+RUN echo "Installing with extras: ${AIRFLOW_EXTRAS}."
+
+# First install only dependencies but no Apache Airflow itself
+# This way regular changes in sources of Airflow will not trigger
reinstallation of all dependencies
+# And this Docker layer will be reused between builds.
+RUN pip install --no-use-pep517 -e ".[${AIRFLOW_EXTRAS}]"
+
+COPY --chown=airflow:airflow airflow/www/package.json
/opt/airflow/airflow/www/package.json
+COPY --chown=airflow:airflow airflow/www/package-lock.json
/opt/airflow/airflow/www/package-lock.json
WORKDIR /opt/airflow/airflow/www
-RUN npm install \
- && npm run prod
+
+# Install necessary NPM dependencies (triggered by changes in
package-lock.json)
+RUN gosu airflow npm ci
+
+COPY --chown=airflow:airflow airflow/www/ /opt/airflow/airflow/www/
+
+# Package NPM for production
+RUN gosu airflow npm run prod
WORKDIR /opt/airflow
-RUN set -euxo pipefail \
- && apt update \
- && if [ -n "${BUILD_DEPS}" ]; then apt install -y $BUILD_DEPS; fi \
- && if [ -n "${PYTHON_DEPS}" ]; then pip install --no-cache-dir
${PYTHON_DEPS}; fi \
- && pip install --no-cache-dir --upgrade pip==19.0.1 \
- && pip install --no-cache-dir --no-use-pep517 -e .[$AIRFLOW_DEPS] \
- && apt purge --auto-remove -yqq $BUILD_DEPS \
- && apt autoremove -yqq --purge \
- && apt clean
-
-WORKDIR $AIRFLOW_HOME
-RUN mkdir -p $AIRFLOW_HOME
-COPY scripts/docker/entrypoint.sh /entrypoint.sh
-ENTRYPOINT ["/entrypoint.sh"]
+
+# Cache for this line will be automatically invalidated if any
+# of airflow sources change
+COPY --chown=airflow:airflow . /opt/airflow/
+
+# Always add-get update/upgrade here to get latest dependencies before
+# we redo pip install
+RUN apt-get update \
+ && apt-get upgrade -y --no-install-recommends \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
Review comment:
Having this after copying source files seems out of place/not where I would
have put this.
Is there a reason this is in this image rather than, say before the first
`COPY` in this image, or in the airflow-apt-deps image?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services