This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch v2-7-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit d03df712a98b8a6bf70400765d1fb5522b8245df Author: Arthur <37664438+v0lan...@users.noreply.github.com> AuthorDate: Tue Oct 31 19:37:21 2023 +0100 Add pip caching for faster build (#35026) --------- Co-authored-by: Arthur Volant <arthur.vol...@adevinta.com> Co-authored-by: Jarek Potiuk <ja...@potiuk.com> (cherry picked from commit 66871a00f39834b60741fe31a51c11704919eb58) --- Dockerfile | 26 ++++++++++++++++++-------- Dockerfile.ci | 4 ++-- docs/docker-stack/build-arg-ref.rst | 3 +++ docs/docker-stack/build.rst | 12 ++++++++++++ docs/docker-stack/changelog.rst | 2 ++ scripts/docker/common.sh | 4 ++-- scripts/docker/entrypoint_prod.sh | 2 +- 7 files changed, 40 insertions(+), 13 deletions(-) diff --git a/Dockerfile b/Dockerfile index 17f5457064..898c319f25 100644 --- a/Dockerfile +++ b/Dockerfile @@ -546,9 +546,9 @@ function common::install_pip_version() { echo "${COLOR_BLUE}Installing pip version ${AIRFLOW_PIP_VERSION}${COLOR_RESET}" echo if [[ ${AIRFLOW_PIP_VERSION} =~ .*https.* ]]; then - pip install --disable-pip-version-check --no-cache-dir "pip @ ${AIRFLOW_PIP_VERSION}" + pip install --disable-pip-version-check "pip @ ${AIRFLOW_PIP_VERSION}" else - pip install --disable-pip-version-check --no-cache-dir "pip==${AIRFLOW_PIP_VERSION}" + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" fi mkdir -p "${HOME}/.local/bin" } @@ -1114,7 +1114,7 @@ if [[ -n "${_PIP_ADDITIONAL_REQUIREMENTS=}" ]] ; then >&2 echo " the container starts, so it is only useful for testing and trying out" >&2 echo " of adding dependencies." >&2 echo - pip install --root-user-action ignore --no-cache-dir ${_PIP_ADDITIONAL_REQUIREMENTS} + pip install --root-user-action ignore ${_PIP_ADDITIONAL_REQUIREMENTS} fi @@ -1190,7 +1190,8 @@ SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "nounset", "-o", "n ARG PYTHON_BASE_IMAGE ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \ DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \ - LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 + LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 \ + PIP_CACHE_DIR=/tmp/.cache/pip ARG DEV_APT_DEPS="" ARG ADDITIONAL_DEV_APT_DEPS="" @@ -1375,8 +1376,15 @@ WORKDIR ${AIRFLOW_HOME} COPY --from=scripts install_from_docker_context_files.sh install_airflow.sh \ install_additional_dependencies.sh /scripts/docker/ -# hadolint ignore=SC2086, SC2010 -RUN if [[ ${INSTALL_PACKAGES_FROM_CONTEXT} == "true" ]]; then \ +# Useful for creating a cache id based on the underlying architecture, preventing the use of cached python packages from +# an incorrect architecture. +ARG TARGETARCH +# Value to be able to easily change cache id and therefore use a bare new cache +ARG PIP_CACHE_EPOCH="0" + +# hadolint ignore=SC2086, SC2010, DL3042 +RUN --mount=type=cache,id=$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$PIP_CACHE_EPOCH,target=/tmp/.cache/pip,uid=${AIRFLOW_UID} \ + if [[ ${INSTALL_PACKAGES_FROM_CONTEXT} == "true" ]]; then \ bash /scripts/docker/install_from_docker_context_files.sh; \ fi; \ if ! airflow version 2>/dev/null >/dev/null; then \ @@ -1394,8 +1402,10 @@ RUN if [[ ${INSTALL_PACKAGES_FROM_CONTEXT} == "true" ]]; then \ # In case there is a requirements.txt file in "docker-context-files" it will be installed # during the build additionally to whatever has been installed so far. It is recommended that # the requirements.txt contains only dependencies with == version specification -RUN if [[ -f /docker-context-files/requirements.txt ]]; then \ - pip install --no-cache-dir --user -r /docker-context-files/requirements.txt; \ +# hadolint ignore=DL3042 +RUN --mount=type=cache,id=additional-requirements-$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$PIP_CACHE_EPOCH,target=/tmp/.cache/pip,uid=${AIRFLOW_UID} \ + if [[ -f /docker-context-files/requirements.txt ]]; then \ + pip install --user -r /docker-context-files/requirements.txt; \ fi ############################################################################################## diff --git a/Dockerfile.ci b/Dockerfile.ci index 58b5fb059e..0404036fd4 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -506,9 +506,9 @@ function common::install_pip_version() { echo "${COLOR_BLUE}Installing pip version ${AIRFLOW_PIP_VERSION}${COLOR_RESET}" echo if [[ ${AIRFLOW_PIP_VERSION} =~ .*https.* ]]; then - pip install --disable-pip-version-check --no-cache-dir "pip @ ${AIRFLOW_PIP_VERSION}" + pip install --disable-pip-version-check "pip @ ${AIRFLOW_PIP_VERSION}" else - pip install --disable-pip-version-check --no-cache-dir "pip==${AIRFLOW_PIP_VERSION}" + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" fi mkdir -p "${HOME}/.local/bin" } diff --git a/docs/docker-stack/build-arg-ref.rst b/docs/docker-stack/build-arg-ref.rst index 671ba6240f..aa3315473c 100644 --- a/docs/docker-stack/build-arg-ref.rst +++ b/docs/docker-stack/build-arg-ref.rst @@ -278,3 +278,6 @@ Docker context files. | | | This allows to optimize iterations for | | | | Image builds and speeds up CI builds. | +------------------------------------------+------------------------------------------+------------------------------------------+ +| ``PIP_CACHE_EPOCH`` | ``"0"`` | Allow to invalidate cache by passing a | +| | | new argument. | ++------------------------------------------+------------------------------------------+------------------------------------------+ diff --git a/docs/docker-stack/build.rst b/docs/docker-stack/build.rst index 6ede683014..5bad72c003 100644 --- a/docs/docker-stack/build.rst +++ b/docs/docker-stack/build.rst @@ -972,3 +972,15 @@ The architecture of the images You can read more details about the images - the context, their parameters and internal structure in the `IMAGES.rst <https://github.com/apache/airflow/blob/main/IMAGES.rst>`_ document. + + +Pip packages caching +.................... + +To enable faster iteration when building the image locally (especially if you are testing different combination of +python packages), pip caching has been enabled. The caching id is based on four different parameters: + +1. ``PYTHON_BASE_IMAGE``: Avoid sharing same cache based on python version and target os +2. ``AIRFLOW_PIP_VERSION`` +3. ``TARGETARCH``: Avoid sharing architecture specific cached package +4. ``PIP_CACHE_EPOCH``: Enable changing cache id by passing ``PIP_CACHE_EPOCH`` as ``--build-arg`` diff --git a/docs/docker-stack/changelog.rst b/docs/docker-stack/changelog.rst index 1a52409e38..3ec48be434 100644 --- a/docs/docker-stack/changelog.rst +++ b/docs/docker-stack/changelog.rst @@ -68,6 +68,8 @@ Airflow 2.7 * Docker CLI version in the image is bumped to 24.0.6 version. + * PIP caching for local builds has been enabled to speed up local custom image building + * 2.7.0 * As of now, Python 3.7 is no longer supported by the Python community. Therefore, to use Airflow 2.7.0 and above, you must ensure your Python version is diff --git a/scripts/docker/common.sh b/scripts/docker/common.sh index e920cedc34..4b5e3c7633 100644 --- a/scripts/docker/common.sh +++ b/scripts/docker/common.sh @@ -76,9 +76,9 @@ function common::install_pip_version() { echo "${COLOR_BLUE}Installing pip version ${AIRFLOW_PIP_VERSION}${COLOR_RESET}" echo if [[ ${AIRFLOW_PIP_VERSION} =~ .*https.* ]]; then - pip install --disable-pip-version-check --no-cache-dir "pip @ ${AIRFLOW_PIP_VERSION}" + pip install --disable-pip-version-check "pip @ ${AIRFLOW_PIP_VERSION}" else - pip install --disable-pip-version-check --no-cache-dir "pip==${AIRFLOW_PIP_VERSION}" + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" fi mkdir -p "${HOME}/.local/bin" } diff --git a/scripts/docker/entrypoint_prod.sh b/scripts/docker/entrypoint_prod.sh index be2c8c396d..add3ab7ec9 100755 --- a/scripts/docker/entrypoint_prod.sh +++ b/scripts/docker/entrypoint_prod.sh @@ -308,7 +308,7 @@ if [[ -n "${_PIP_ADDITIONAL_REQUIREMENTS=}" ]] ; then >&2 echo " the container starts, so it is only useful for testing and trying out" >&2 echo " of adding dependencies." >&2 echo - pip install --root-user-action ignore --no-cache-dir ${_PIP_ADDITIONAL_REQUIREMENTS} + pip install --root-user-action ignore ${_PIP_ADDITIONAL_REQUIREMENTS} fi