This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 90a650d371 Optimize cachine installed packages in CI build (#37315)
90a650d371 is described below
commit 90a650d3718d76ec8ae07d9bc666cc278bcaf2d7
Author: Jarek Potiuk <[email protected]>
AuthorDate: Sun Feb 11 14:24:01 2024 +0100
Optimize cachine installed packages in CI build (#37315)
Some of the recent changes in handling conflicting dependencies
broke optimization of installing dependencies from branch tip.
The optimisation worked in the way that it installed packages first
from branch tip, to make them pre-installed (and cached in docker
layer) so that final installatin step with pyproject.toml takes
very little time, even if it is changed.
The problem was that in case branch tip and constraints conflicted,
the installation failed and effectively no packages were installed in
the "branch tip" layer, effectively removing the cache.
This change fixes it - when we install from branch tip now we are not
using constraints, which means that they will never conflict, and
this also means that cache will never be empty. It can contain other
versions of some of the packages, but vast majority of the packages
shoudo be the same as in constraints, so the following installation
step should reuse vast majority of already installed packages.
---
Dockerfile | 10 ++++++----
Dockerfile.ci | 10 ++++++----
scripts/docker/install_airflow_dependencies_from_branch_tip.sh | 10 ++++++----
3 files changed, 18 insertions(+), 12 deletions(-)
diff --git a/Dockerfile b/Dockerfile
index dc1d0d259d..a9bc0c9a00 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -447,14 +447,16 @@ function install_airflow_dependencies_from_branch_tip() {
if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then
AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,}
fi
- # Install latest set of dependencies using constraints. In case
constraints were upgraded and there
- # are conflicts, this might fail, but it should be fixed in the following
installation steps
+ # Install latest set of dependencies - without constraints. This is to
download a "base" set of
+ # dependencies that we can cache and reuse when installing airflow using
constraints and latest
+ # pyproject.toml in the next step (when we install regular airflow).
set -x
pip install --root-user-action ignore \
${ADDITIONAL_PIP_INSTALL_FLAGS} \
-
"https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]"
\
- --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" || true
+
"https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]"
common::install_pip_version
+ # Uninstall airflow to keep only the dependencies. In the future when
planned https://github.com/pypa/pip/issues/11440
+ # is implemented in pip we might be able to use this flag and skip the
remove step.
pip freeze | grep apache-airflow-providers | xargs pip uninstall --yes
2>/dev/null || true
set +x
echo
diff --git a/Dockerfile.ci b/Dockerfile.ci
index 2115e70bd3..05f438a8f0 100644
--- a/Dockerfile.ci
+++ b/Dockerfile.ci
@@ -407,14 +407,16 @@ function install_airflow_dependencies_from_branch_tip() {
if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then
AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,}
fi
- # Install latest set of dependencies using constraints. In case
constraints were upgraded and there
- # are conflicts, this might fail, but it should be fixed in the following
installation steps
+ # Install latest set of dependencies - without constraints. This is to
download a "base" set of
+ # dependencies that we can cache and reuse when installing airflow using
constraints and latest
+ # pyproject.toml in the next step (when we install regular airflow).
set -x
pip install --root-user-action ignore \
${ADDITIONAL_PIP_INSTALL_FLAGS} \
-
"https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]"
\
- --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" || true
+
"https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]"
common::install_pip_version
+ # Uninstall airflow to keep only the dependencies. In the future when
planned https://github.com/pypa/pip/issues/11440
+ # is implemented in pip we might be able to use this flag and skip the
remove step.
pip freeze | grep apache-airflow-providers | xargs pip uninstall --yes
2>/dev/null || true
set +x
echo
diff --git a/scripts/docker/install_airflow_dependencies_from_branch_tip.sh
b/scripts/docker/install_airflow_dependencies_from_branch_tip.sh
index 9c809039c7..fcb30505d9 100644
--- a/scripts/docker/install_airflow_dependencies_from_branch_tip.sh
+++ b/scripts/docker/install_airflow_dependencies_from_branch_tip.sh
@@ -46,14 +46,16 @@ function install_airflow_dependencies_from_branch_tip() {
if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then
AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,}
fi
- # Install latest set of dependencies using constraints. In case
constraints were upgraded and there
- # are conflicts, this might fail, but it should be fixed in the following
installation steps
+ # Install latest set of dependencies - without constraints. This is to
download a "base" set of
+ # dependencies that we can cache and reuse when installing airflow using
constraints and latest
+ # pyproject.toml in the next step (when we install regular airflow).
set -x
pip install --root-user-action ignore \
${ADDITIONAL_PIP_INSTALL_FLAGS} \
-
"https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]"
\
- --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" || true
+
"https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]"
common::install_pip_version
+ # Uninstall airflow to keep only the dependencies. In the future when
planned https://github.com/pypa/pip/issues/11440
+ # is implemented in pip we might be able to use this flag and skip the
remove step.
pip freeze | grep apache-airflow-providers | xargs pip uninstall --yes
2>/dev/null || true
set +x
echo