This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch v1-10-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit 9253a8f65f6d25bed13c0f92c6c01d5b55950a06 Author: Jarek Potiuk <jarek.pot...@polidea.com> AuthorDate: Wed Jul 1 14:50:30 2020 +0200 Switches to Helm Chart for Kubernetes tests (#9468) The Kubernetes tests are now run using Helm chart rather than the custom templates we used to have. The Helm Chart uses locally build production image so the tests are testing not only Airflow but also Helm Chart and a Production image - all at the same time. Later on we will add more tests covering more functionalities of both Helm Chart and Production Image. This is the first step to get all of those bundle together and become testable. This change introduces also 'shell' sub-command for Breeze's kind-cluster command and EMBEDDED_DAGS build args for production image - both of them useful to run the Kubernetes tests more easily - without building two images and with an easy-to-iterate-over-tests shell command - which works without any other development environment. Co-authored-by: Jarek Potiuk <ja...@potiuk.com> Co-authored-by: Daniel Imberman <dan...@astronomer.io> (cherry picked from commit 8bd15ef634cca40f3cf6ca3442262f3e05144512) --- .github/workflows/ci.yml | 23 +- BREEZE.rst | 81 +++-- CI.rst | 2 +- Dockerfile | 4 + IMAGES.rst | 3 + TESTING.rst | 67 ++-- airflow/kubernetes/pod_launcher.py | 2 +- breeze | 51 ++- breeze-complete | 14 +- chart/README.md | 5 +- chart/charts/postgresql-6.3.12.tgz | Bin 22754 -> 0 bytes chart/requirements.lock | 4 +- chart/templates/configmap.yaml | 2 + chart/templates/rbac/pod-launcher-role.yaml | 2 +- chart/templates/rbac/pod-launcher-rolebinding.yaml | 4 +- kubernetes_tests/test_kubernetes_executor.py | 40 ++- scripts/ci/ci_build_production_images.sh | 25 -- scripts/ci/ci_count_changed_files.sh | 2 +- scripts/ci/ci_deploy_app_to_kubernetes.sh | 16 +- scripts/ci/ci_docs.sh | 2 +- scripts/ci/ci_flake8.sh | 2 +- scripts/ci/ci_generate_requirements.sh | 2 +- scripts/ci/ci_load_image_to_kind.sh | 7 +- scripts/ci/ci_mypy.sh | 2 +- scripts/ci/ci_perform_kind_cluster_operation.sh | 6 +- scripts/ci/ci_run_airflow_testing.sh | 2 +- scripts/ci/ci_run_kubernetes_tests.sh | 6 +- scripts/ci/ci_run_static_checks.sh | 2 +- scripts/ci/kubernetes/app/postgres.yaml | 94 ----- .../kubernetes/app/templates/airflow.template.yaml | 207 ----------- .../app/templates/configmaps.template.yaml | 395 --------------------- .../app/templates/init_git_sync.template.yaml | 36 -- scripts/ci/kubernetes/app/volumes.yaml | 87 ----- .../docker/airflow-test-env-init-dags.sh | 36 -- .../kubernetes/docker/airflow-test-env-init-db.sh | 46 --- scripts/ci/kubernetes/docker/bootstrap.sh | 74 ---- scripts/ci/kubernetes/kind-cluster-conf.yaml | 3 - .../kubernetes/{app/secrets.yaml => volumes.yaml} | 29 +- scripts/ci/libraries/_build_images.sh | 11 +- scripts/ci/libraries/_initialization.sh | 27 +- scripts/ci/libraries/_kind.sh | 380 +++++++------------- scripts/ci/libraries/_verbosity.sh | 31 ++ 42 files changed, 424 insertions(+), 1410 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d091d2e..195f7f7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -100,7 +100,7 @@ jobs: steps: - uses: actions/checkout@master - name: "Build PROD image ${{ matrix.python-version }}" - run: ./scripts/ci/ci_build_production_images.sh + run: ./scripts/ci/ci_prepare_prod_image_on_ci.sh tests-kubernetes: timeout-minutes: 80 @@ -113,7 +113,11 @@ jobs: kube-mode: - image kubernetes-version: - - "v1.15.3" + - "v1.18.2" + kind-version: + - "v0.8.0" + helm-version: + - "v3.2.4" fail-fast: false env: BACKEND: postgres @@ -126,6 +130,8 @@ jobs: PYTHON_MAJOR_MINOR_VERSION: "${{ matrix.python-version }}" KUBERNETES_MODE: "${{ matrix.kube-mode }}" KUBERNETES_VERSION: "${{ matrix.kubernetes-version }}" + KIND_VERSION: "${{ matrix.kind-version }}" + HELM_VERSION: "${{ matrix.helm-version }}" # For pull requests only run tests when python files changed if: needs.pyfiles.outputs.count != '0' || github.event_name != 'pull_request' steps: @@ -135,10 +141,12 @@ jobs: python-version: '3.6' - name: "Free space" run: ./scripts/ci/ci_free_space_on_ci.sh - - name: "Build PROD image ${{ matrix.python-version }}" - run: ./scripts/ci/ci_build_production_images.sh - - name: "Setup KinD cluster" - run: ./scripts/ci/ci_perform_kind_cluster_operation.sh start + - uses: engineerd/setup-kind@v0.4.0 + name: Setup Kind Cluster + with: + version: "${{ matrix.kind-version }}" + name: airflow-python-${{matrix.python-version}}-${{matrix.kubernetes-version}} + config: "scripts/ci/kubernetes/kind-cluster-conf.yaml" - name: "Deploy app to cluster" run: ./scripts/ci/ci_deploy_app_to_kubernetes.sh - name: Cache virtualenv for kubernetes testing @@ -152,6 +160,7 @@ ${{ hashFiles('requirements/requirements-python${{matrix.python-version}}.txt') - name: "Tests" run: ./scripts/ci/ci_run_kubernetes_tests.sh - uses: actions/upload-artifact@v2 + name: Upload KinD logs # Always run this, even if one of th previous steps failed. if: always() with: @@ -343,7 +352,7 @@ ${{ hashFiles('requirements/requirements-python${{matrix.python-version}}.txt') - name: "Free space" run: ./scripts/ci/ci_free_space_on_ci.sh - name: "Build PROD images ${{ matrix.python-version }}" - run: ./scripts/ci/ci_build_production_images.sh + run: ./scripts/ci/ci_prepare_prod_image_on_ci.sh - name: "Push PROD images ${{ matrix.python-version }}" run: ./scripts/ci/ci_push_production_images.sh diff --git a/BREEZE.rst b/BREEZE.rst index 9d2d719..9b318e2 100644 --- a/BREEZE.rst +++ b/BREEZE.rst @@ -270,7 +270,6 @@ Also - in case you run several different Breeze containers in parallel (from dif with different versions) - they docker images for CLI Cloud Providers tools are shared so if you update it for one Breeze container, they will also get updated for all the other containers. - Using the Airflow Breeze Environment ===================================== @@ -280,22 +279,42 @@ environment. Breeze script allows performing the following tasks: -Manage environments - CI (default) or Production - if ``--production-image`` flag is specified: +Managing CI environment: - * Build docker images with ``breeze build-image`` command - * Enter interactive shell when no command are specified (default behaviour) + * Build CI docker image with ``breeze build-image`` command + * Enter interactive shell in CI container when ``shell`` (or no command) is specified * Join running interactive shell with ``breeze exec`` command - * Start/stops/restarts Kind Kubernetes cluster with ``kind-cluster`` command * Stop running interactive environment with ``breeze stop`` command * Restart running interactive environment with ``breeze restart`` command - * Optionally reset database if specified as extra ``--db-reset`` flag - * Optionally start integrations (separate images) if specified as extra ``--integration`` flags (only CI) + * Run test specified with ``breeze tests`` command + * Execute arbitrary command in the test environment with ``breeze shell`` command + * Execute arbitrary docker-compose command with ``breeze docker-compose`` command + * Push docker images with ``breeze push-image`` command (require committer's rights to push images) -Interact with CI environment: +You can optionally reset database if specified as extra ``--db-reset`` flag and for CI image you can also +start integrations (separate Docker images) if specified as extra ``--integration`` flags. You can also +chose which backend database should be used with ``--backend`` flag and python version with ``--python`` flag. - * Run test target specified with ``breeze tests`` command - * Execute arbitrary command in the test environment with ``breeze execute-command`` command +Managing Prod environment (with ``--production-image`` flag): + + * Build CI docker image with ``breeze build-image`` command + * Enter interactive shell in PROD container when ``shell`` (or no command) is specified + * Join running interactive shell with ``breeze exec`` command + * Stop running interactive environment with ``breeze stop`` command + * Restart running interactive environment with ``breeze restart`` command + * Execute arbitrary command in the test environment with ``breeze shell`` command * Execute arbitrary docker-compose command with ``breeze docker-compose`` command + * Push docker images with ``breeze push-image`` command (require committer's rights to push images) + +You can optionally reset database if specified as extra ``--db-reset`` flag. You can also +chose which backend database should be used with ``--backend`` flag and python version with ``--python`` flag. + + +Manage and Interact with Kubernetes tests environment: + + * Manage KinD Kubernetes cluster and deploy Airflow to KinD cluster ``breeze kind-cluster`` commands + * Run Kubernetes tests specified with ``breeze kind-cluster tests`` command + * Enter the interactive kubernetes test environment with ``breeze kind-cluster shell`` command Run static checks: @@ -312,12 +331,6 @@ Set up local development environment: * Setup autocomplete for itself with ``breeze setup-autocomplete`` command -Note that the below environment interaction is by default with the CI image. If you want to use production -image for those commands you need to add ``--production-image`` flag. - -Note that you also should not run both (CI and production) environments simultaneously, as they are using -the same docker-compose configuration which for example contain the link to the database, port mapping, etc. - Entering Breeze CI environment ------------------------------ @@ -617,20 +630,6 @@ If ``FORCE_ANSWER_TO_QUESTIONS`` is set to ``quit``, the whole script is aborted If more than one variable is set, ``yes`` takes precedence over ``no``, which takes precedence over ``quit``. -Building the Documentation --------------------------- - -To build documentation in Breeze, use the ``build-docs`` command: - -.. code-block:: bash - - ./breeze build-docs - -Results of the build can be found in the ``docs/_build`` folder. - -Often errors during documentation generation come from the docstrings of auto-api generated classes. -During the docs building auto-api generated files are stored in the ``docs/_api`` folder. This helps you -easily identify the location the problems with documentation originated from. Using Your Host IDE =================== @@ -1177,7 +1176,7 @@ This is the current syntax for `./breeze <./breeze>`_: to the cluster so you can also pass appropriate build image flags that will influence rebuilding the production image. Operation is one of: - start stop restart status deploy test + start stop restart status deploy test shell Flags: @@ -1416,9 +1415,25 @@ This is the current syntax for `./breeze <./breeze>`_: Kubernetes version - only used in case one of --kind-cluster-* commands is used. One of: - v1.15.3 + v1.18.2 + + Default: v1.18.2 + + --kind-version <KIND_VERSION> + Kind version - only used in case one of --kind-cluster-* commands is used. + One of: + + v0.8.0 + + Default: v0.8.0 + + --helm-version <HELM_VERSION> + Helm version - only used in case one of --kind-cluster-* commands is used. + One of: + + v3.2.4 - Default: v1.15.3 + Default: v3.2.4 **************************************************************************************************** Manage mounting local files diff --git a/CI.rst b/CI.rst index 145a3ef..c8986cc 100644 --- a/CI.rst +++ b/CI.rst @@ -34,7 +34,7 @@ the CI jobs and we are mapping all the CI-specific environment variables to gene The only two places where CI-specific code might be are: - CI-specific declaration file (for example it is `<.github/workflow/ci.yml>`_ for GitHub Actions -- The ``get_ci_environment`` function in `<scripts/ci/libraries/_build_images.sh>`_ where mapping is +- The ``get_environment_for_builds_on_ci`` function in `<scripts/ci/libraries/_build_images.sh>`_ where mapping is performed from the CI-environment specific to generic values. Example for that is CI_EVENT_TYPE variable which determines whether we are running a ``push``. ``schedule`` or ``pull_request`` kind of CI job. For GitHub Action those are values mapped from ``GITHUB_EVENT_NAME`` variable, for Travis several other diff --git a/Dockerfile b/Dockerfile index bd21d06..89225b8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -356,6 +356,10 @@ COPY --chown=airflow:root --from=airflow-build-image /root/.local "${AIRFLOW_USE COPY scripts/prod/entrypoint_prod.sh /entrypoint COPY scripts/prod/clean-logs.sh /clean-logs +ARG EMBEDDED_DAGS="empty" + +COPY --chown=airflow:airflow ${EMBEDDED_DAGS}/ ${AIRFLOW_HOME}/dags/ + RUN chmod a+x /entrypoint /clean-logs # Make /etc/passwd root-group-writeable so that user can be dynamically added by OpenShift diff --git a/IMAGES.rst b/IMAGES.rst index c829176..aca31d5 100644 --- a/IMAGES.rst +++ b/IMAGES.rst @@ -414,6 +414,9 @@ The following build arguments (``--build-arg`` in docker build command) can be u | ``ADDITIONAL_RUNTIME_DEPS`` | | additional apt runtime dependencies to | | | | install | +------------------------------------------+------------------------------------------+------------------------------------------+ +| ``EMBEDDED_DAGS`` | ``empty`` | Folder containing dags embedded into the | +| | | image in the ${AIRFLOW_HOME}/dags dir | ++------------------------------------------+------------------------------------------+------------------------------------------+ | ``AIRFLOW_HOME`` | ``/opt/airflow`` | Airflow’s HOME (that’s where logs and | | | | sqlite databases are stored) | +------------------------------------------+------------------------------------------+------------------------------------------+ diff --git a/TESTING.rst b/TESTING.rst index 36c1427..5cc681e 100644 --- a/TESTING.rst +++ b/TESTING.rst @@ -334,11 +334,10 @@ For your testing you manage Kind cluster with ``kind-cluster`` breeze command: .. code-block:: bash - ./breeze kind-cluster [ start | stop | recreate | status ] + ./breeze kind-cluster [ start | stop | recreate | status | deploy | test | shell ] - -The command allows you to start/stop/recreate/status Kind Kubernetes cluster -in your docker engine as well as deploy airflow to use it for testing (with ``deploy`` command). +The command allows you to start/stop/recreate/status Kind Kubernetes cluster, deploy Airflow via Helm +chart as well as interact with the cluster (via test and shell commands). Setting up the Kind Kubernetes cluster takes some time so once you started it, the cluster continues running until it is stopped with the ``kind-cluster stop`` command or until ``kind-cluster recreate`` @@ -348,49 +347,33 @@ The cluster name follows the pattern ``airflow-python-X.Y-vA.B.C`` where X.Y is and A.B.C is a Kubernetes version. This way you can have multiple clusters set up and running at the same time for different Python versions and different Kubernetes versions. -The Control Plane is available from inside the Docker image via ``<KIND_CLUSTER_NAME>-control-plane:6443`` -host:port, the worker of the Kind Cluster is available at <KIND_CLUSTER_NAME>-worker -and the webserver port for the worker is 30809. Deploying Airflow to Kubernetes Cluster --------------------------------------- -Deploying Airflow to the Kubernetes cluster created is also done via ``kind-cluster`` breeze command: +Deploying Airflow to the Kubernetes cluster created is also done via ``kind-cluster deploy`` breeze command: -.. code-block:: bash +.. code-block:: bash` ./breeze kind-cluster deploy - The deploy commands performs tthose steps: -1. If needed, it rebuilds the latest ``apache/airflow:master-pythonX.Y`` production images using the - latest sources. You can also force the build with ``--force-build-image`` flag. -2. Builds a new Kubernetes image based on the ``apache/airflow:master-pythonX.Y`` using - necessary scripts added to run in Kubernetes. The image is tagged as - ``apache/airflow:master-pythonX.Y-kubernetes``. -3. Loads the image to the Kind Cluster using the ``kind load`` command. -4. Prepares Kubernetes resources by processing a template from the ``template`` directory and replacing - variables with the right images and locations: - - configmaps.yaml - - airflow.yaml -5. Uses the existing resources without replacing any variables inside: - - secrets.yaml - - postgres.yaml - - volumes.yaml -6. Applies all the resources to the Kind Cluster. -7. Waits for all the applications to be ready and reachable. - +1. It rebuilds the latest ``apache/airflow:master-pythonX.Y`` production images using the + latest sources using local cachine. It also adds example DAGs to the image, so that they do not + have to be mounted inside. +2. Loads the image to the Kind Cluster using the ``kind load`` command. +3. Starts airflow in the cluster using the official helm chart (in ``airflow`` namespace) +4. Forwards Local 8080 port to the webserver running in the cluster +5. Applies the volumes.yaml to get the volumes deployed to ``default`` namespace - this is where + KubernetesExecutor starts its pods. Running tests with Kubernetes Cluster ------------------------------------- -After the deployment is finished, you can run Kubernetes tests via ``scripts/ci/ci_run_kubernetes_tests.sh``. - You can either run all tests or you can select which tests to run. You can also enter interactive virtualenv to run the tests manually one by one. - .. code-block:: bash Running kubernetes tests @@ -401,6 +384,11 @@ to run the tests manually one by one. ./scripts/ci/ci_run_kubernetes_tests.sh [--help] - Prints this help message +You can also run the same tests command with Breeze, using ``kind-cluster test`` command (to run all +kubernetes tests) and with ``kind-cluster shell`` command you can enter interactive shell when you can +run tests. + + Typical testing pattern for Kubernetes tests -------------------------------------------- @@ -484,24 +472,23 @@ communicate with the Kubernetes-run Airflow deployed via the production image. Those Kubernetes tests require virtualenv to be created locally with airflow installed. The virtualenv required will be created automatically when the scripts are run. - -Either run all the tests: - +4a) You can run all the tests .. code-block:: bash ./breeze kind-cluster test -Or enter the interactive virtualenv (the environment is in ``.build/.kubernetes_venv`` folder: +4b) You can enter an interactive shell to run tests one-by-one +This prepares and enters the virtualenv in ``.build/.kubernetes_venv`` folder: .. code-block:: bash - ./scripts/ci/ci_run_kubernetes_tests.sh -i + ./breeze kind-cluster shell -Once you enter the environment you get this information: +Once you enter the environment you receive this information: .. code-block:: bash @@ -513,12 +500,14 @@ Once you enter the environment you get this information: The webserver is available at http://localhost:30809/ - User/password: airflow/airflow + User/password: admin/admin You are entering the virtualenv now. Type exit to exit back to the original shell -You can iterate with tests while you are in the virtualenv: +You can iterate with tests while you are in the virtualenv. All the tests requiring kubernetes cluster +are in "kubernetes_tests" folder. You can add extra ``pytest`` parameters then (for example ``-s`` will +print output generated test logs and print statements to the terminal immediately. .. code-block:: bash @@ -529,7 +518,7 @@ You can iterate with tests while you are in the virtualenv: You can modify the tests or KubernetesPodOperator and re-run them without re-deploying airflow to KinD cluster. -However when you change the Airflow Kubernetes executor implementation you need to redeploy +However when you change Airflow Kubernetes executor implementation you need to redeploy Airflow to the cluster. .. code-block:: bash diff --git a/airflow/kubernetes/pod_launcher.py b/airflow/kubernetes/pod_launcher.py index 5fd675d..d27a647 100644 --- a/airflow/kubernetes/pod_launcher.py +++ b/airflow/kubernetes/pod_launcher.py @@ -104,11 +104,11 @@ class PodLauncher(LoggingMixin): curr_time = dt.now() if resp.status.start_time is None: while self.pod_not_started(pod): + self.log.warning("Pod not yet started: %s", pod.metadata.name) delta = dt.now() - curr_time if delta.total_seconds() >= startup_timeout: raise AirflowException("Pod took too long to start") time.sleep(1) - self.log.debug('Pod not yet started') def monitor_pod(self, pod, get_logs): """ diff --git a/breeze b/breeze index 4321c49..01927e5 100755 --- a/breeze +++ b/breeze @@ -146,7 +146,9 @@ function setup_default_breeze_variables() { _BREEZE_DEFAULT_BACKEND="sqlite" _BREEZE_DEFAULT_KUBERNETES_MODE="image" - _BREEZE_DEFAULT_KUBERNETES_VERSION="v1.15.3" + _BREEZE_DEFAULT_KUBERNETES_VERSION="v1.18.2" + _BREEZE_DEFAULT_KIND_VERSION="v0.8.0" + _BREEZE_DEFAULT_HELM_VERSION="v3.2.4" _BREEZE_DEFAULT_POSTGRES_VERSION="9.6" _BREEZE_DEFAULT_POSTGRES_VERSION="9.6" _BREEZE_DEFAULT_MYSQL_VERSION="5.7" @@ -608,6 +610,16 @@ function parse_arguments() { echo "Kubernetes version: ${KUBERNETES_VERSION}" echo shift 2 ;; + --kind-version) + export KIND_VERSION="${2}"; + echo "Kind version: ${KIND_VERSION}" + echo + shift 2 ;; + --helm-version) + export HELM_VERSION="${2}"; + echo "Helm version: ${HELM_VERSION}" + echo + shift 2 ;; --postgres-version) export POSTGRES_VERSION="${2}"; echo "Postgres version: ${POSTGRES_VERSION}" @@ -833,6 +845,9 @@ function parse_arguments() { shift ;; kind-cluster) LAST_SUBCOMMAND="${1}" + # Force local cache strategy for all kind-cluster operations + # this helps to iterate with production images + DOCKER_CACHE="local" COMMAND_TO_RUN="manage_kind_cluster" export KIND_CLUSTER_OPERATION="${2:-}" if [[ ${KIND_CLUSTER_OPERATION} != "" ]]; then @@ -969,6 +984,10 @@ function prepare_formatted_versions() { fold -w "${WIDTH}" -s | sed "s/^/${LIST_PREFIX}/") FORMATTED_KUBERNETES_VERSIONS=$(echo "${_BREEZE_ALLOWED_KUBERNETES_VERSIONS=""}" | tr '\n' ' ' | \ fold -w "${WIDTH}" -s | sed "s/^/${LIST_PREFIX}/") + FORMATTED_KIND_VERSIONS=$(echo "${_BREEZE_ALLOWED_KIND_VERSIONS=""}" | tr '\n' ' ' | \ + fold -w "${WIDTH}" -s | sed "s/^/${LIST_PREFIX}/") + FORMATTED_HELM_VERSIONS=$(echo "${_BREEZE_ALLOWED_HELM_VERSIONS=""}" | tr '\n' ' ' | \ + fold -w "${WIDTH}" -s | sed "s/^/${LIST_PREFIX}/") FORMATTED_KIND_OPERATIONS=$(echo "${_BREEZE_ALLOWED_KIND_OPERATIONS=""}" | tr '\n' ' ' | \ fold -w "${WIDTH}" -s | sed "s/^/${LIST_PREFIX}/") FORMATTED_INSTALL_AIRFLOW_VERSIONS=$(echo "${_BREEZE_ALLOWED_INSTALL_AIRFLOW_VERSIONS=""}" | \ @@ -1423,6 +1442,23 @@ ${FORMATTED_KUBERNETES_MODES} ${FORMATTED_KUBERNETES_VERSIONS} Default: ${_BREEZE_DEFAULT_KUBERNETES_VERSION:=} + +--kind-version <KIND_VERSION> + Kind version - only used in case one of --kind-cluster-* commands is used. + One of: + +${FORMATTED_KIND_VERSIONS} + + Default: ${_BREEZE_DEFAULT_KIND_VERSION:=} + +--helm-version <HELM_VERSION> + Helm version - only used in case one of --kind-cluster-* commands is used. + One of: + +${FORMATTED_HELM_VERSIONS} + + Default: ${_BREEZE_DEFAULT_HELM_VERSION:=} + " } @@ -1678,6 +1714,12 @@ function read_saved_environment_variables { export KUBERNETES_VERSION="${KUBERNETES_VERSION:=$(read_from_file KUBERNETES_VERSION)}" export KUBERNETES_VERSION=${KUBERNETES_VERSION:=${_BREEZE_DEFAULT_KUBERNETES_VERSION}} + export KIND_VERSION="${KIND_VERSION:=$(read_from_file KIND_VERSION)}" + export KIND_VERSION=${KIND_VERSION:=${_BREEZE_DEFAULT_KIND_VERSION}} + + export HELM_VERSION="${HELM_VERSION:=$(read_from_file HELM_VERSION)}" + export HELM_VERSION=${HELM_VERSION:=${_BREEZE_DEFAULT_HELM_VERSION}} + export POSTGRES_VERSION="${POSTGRES_VERSION:=$(read_from_file POSTGRES_VERSION)}" export POSTGRES_VERSION=${POSTGRES_VERSION:=${_BREEZE_DEFAULT_POSTGRES_VERSION}} @@ -1704,6 +1746,8 @@ function check_and_save_all_params() { check_and_save_allowed_param "BACKEND" "backend" "--backend" check_and_save_allowed_param "KUBERNETES_MODE" "Kubernetes mode" "--kubernetes-mode" check_and_save_allowed_param "KUBERNETES_VERSION" "Kubernetes version" "--kubernetes-version" + check_and_save_allowed_param "KIND_VERSION" "KinD version" "--kind-version" + check_and_save_allowed_param "HELM_VERSION" "Helm version" "--helm-version" check_and_save_allowed_param "POSTGRES_VERSION" "Postgres version" "--postgres-version" check_and_save_allowed_param "MYSQL_VERSION" "Mysql version" "--mysql-version" @@ -1886,6 +1930,8 @@ function run_build_command { echo "Deploys Airflow to KinD cluster" elif [[ ${KIND_CLUSTER_OPERATION} == "test" ]] ; then echo "Run Kubernetes tests with the KinD cluster " + elif [[ ${KIND_CLUSTER_OPERATION} == "shell" ]] ; then + echo "Enter an interactive shell for kubernetes testing" else echo "ERROR: Unknown Kind Kubernetes cluster operation: '${KIND_CLUSTER_OPERATION}'" echo @@ -1971,7 +2017,8 @@ function run_breeze_command { setup_autocomplete ;; manage_kind_cluster) - check_kind_and_kubectl_are_installed + make_sure_kubernetes_tools_are_installed + initialize_kind_variables perform_kind_cluster_operation "${KIND_CLUSTER_OPERATION}" ;; build_docs) diff --git a/breeze-complete b/breeze-complete index d9227a5..583fcce 100644 --- a/breeze-complete +++ b/breeze-complete @@ -21,10 +21,12 @@ _BREEZE_ALLOWED_PYTHON_MAJOR_MINOR_VERSIONS="2.7 3.5 3.6 3.7 3.8" _BREEZE_ALLOWED_BACKENDS="sqlite mysql postgres" _BREEZE_ALLOWED_INTEGRATIONS="cassandra kerberos mongo openldap rabbitmq redis" _BREEZE_ALLOWED_KUBERNETES_MODES="image git" -_BREEZE_ALLOWED_KUBERNETES_VERSIONS="v1.15.3" +_BREEZE_ALLOWED_KUBERNETES_VERSIONS="v1.18.2" +_BREEZE_ALLOWED_HELM_VERSIONS="v3.2.4" +_BREEZE_ALLOWED_KIND_VERSIONS="v0.8.0" _BREEZE_ALLOWED_MYSQL_VERSIONS="5.6 5.7" _BREEZE_ALLOWED_POSTGRES_VERSIONS="9.6 10" -_BREEZE_ALLOWED_KIND_OPERATIONS="start stop restart status deploy test" +_BREEZE_ALLOWED_KIND_OPERATIONS="start stop restart status deploy test shell" _BREEZE_ALLOWED_INSTALL_AIRFLOW_VERSIONS=$(cat <<-EOF 1.10.10 @@ -99,7 +101,7 @@ D: R: c g: G: _BREEZE_LONG_OPTIONS=" help python: backend: integration: -kubernetes-mode: kubernetes-version: +kubernetes-mode: kubernetes-version: helm-version: kind-version: skip-mounting-local-sources install-airflow-version: install-airflow-reference: db-reset verbose assume-yes assume-no assume-quit forward-credentials force-build-images force-pull-images production-image extras: force-clean-images @@ -158,6 +160,12 @@ function _get_known_values_breeze() { -V | --kubernetes-version) _BREEZE_KNOWN_VALUES=${_BREEZE_ALLOWED_KUBERNETES_VERSIONS} ;; + --kind-version) + _BREEZE_KNOWN_VALUES=${_BREEZE_ALLOWED_KIND_VERSIONS} + ;; + --helm-version) + _BREEZE_KNOWN_VALUES=${_BREEZE_ALLOWED_HELM_VERSIONS} + ;; static-check) _BREEZE_KNOWN_VALUES=${_BREEZE_ALLOWED_STATIC_CHECKS} ;; diff --git a/chart/README.md b/chart/README.md index 76d14b4..6cc361e 100644 --- a/chart/README.md +++ b/chart/README.md @@ -28,7 +28,7 @@ cluster using the [Helm](https://helm.sh) package manager. ## Prerequisites -- Kubernetes 1.12+ +- Kubernetes 1.12+ cluster - Helm 2.11+ or Helm 3.0+ - PV provisioner support in the underlying infrastructure @@ -74,6 +74,9 @@ helm upgrade airflow . \ --set images.airflow.tag=8a0da78 ``` +For local development purppose you can also u +You can also build the image locally and use it via deployment method described by Breeze. + ## Parameters The following tables lists the configurable parameters of the Airflow chart and their default values. diff --git a/chart/charts/postgresql-6.3.12.tgz b/chart/charts/postgresql-6.3.12.tgz deleted file mode 100644 index 51751d7..0000000 Binary files a/chart/charts/postgresql-6.3.12.tgz and /dev/null differ diff --git a/chart/requirements.lock b/chart/requirements.lock index 2eff289..f86e696 100644 --- a/chart/requirements.lock +++ b/chart/requirements.lock @@ -2,5 +2,5 @@ dependencies: - name: postgresql repository: https://kubernetes-charts.storage.googleapis.com version: 6.3.12 -digest: sha256:1750ddcc948f15716d157d6a854b59e37571473b2a3390a3673b224b71a56308 -generated: "2019-10-24T16:03:26.569269284-04:00" +digest: sha256:58d88cf56e78b2380091e9e16cc6ccf58b88b3abe4a1886dd47cd9faef5309af +generated: "2020-06-21T19:11:53.498134738+02:00" diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index c0718e7..9a0c000 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -42,6 +42,8 @@ data: remote_logging = True {{- end }} + [logging] + logging_level = DEBUG [webserver] enable_proxy_fix = True expose_config = True diff --git a/chart/templates/rbac/pod-launcher-role.yaml b/chart/templates/rbac/pod-launcher-role.yaml index b3d6818..132c839 100644 --- a/chart/templates/rbac/pod-launcher-role.yaml +++ b/chart/templates/rbac/pod-launcher-role.yaml @@ -19,7 +19,7 @@ ## Airflow Pod Launcher Role ################################# {{- if and .Values.rbacEnabled .Values.allowPodLaunching }} -kind: Role +kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 metadata: name: {{ .Release.Name }}-pod-launcher-role diff --git a/chart/templates/rbac/pod-launcher-rolebinding.yaml b/chart/templates/rbac/pod-launcher-rolebinding.yaml index 9b8aadf..4dba494 100644 --- a/chart/templates/rbac/pod-launcher-rolebinding.yaml +++ b/chart/templates/rbac/pod-launcher-rolebinding.yaml @@ -21,7 +21,7 @@ {{- if and .Values.rbacEnabled .Values.allowPodLaunching }} {{- $grantScheduler := or (eq .Values.executor "LocalExecutor") (eq .Values.executor "SequentialExecutor") (eq .Values.executor "KubernetesExecutor") }} {{- $grantWorker := or (eq .Values.executor "CeleryExecutor") (eq .Values.executor "KubernetesExecutor") }} -kind: RoleBinding +kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: name: {{ .Release.Name }}-pod-launcher-rolebinding @@ -35,7 +35,7 @@ metadata: {{- end }} roleRef: apiGroup: rbac.authorization.k8s.io - kind: Role + kind: ClusterRole name: {{ .Release.Name }}-pod-launcher-role subjects: {{- if $grantScheduler }} diff --git a/kubernetes_tests/test_kubernetes_executor.py b/kubernetes_tests/test_kubernetes_executor.py index 3b86c7e..e6db822 100644 --- a/kubernetes_tests/test_kubernetes_executor.py +++ b/kubernetes_tests/test_kubernetes_executor.py @@ -16,8 +16,10 @@ # under the License. import os import re +import subprocess import time import unittest +from datetime import datetime from subprocess import check_call, check_output import requests @@ -25,7 +27,7 @@ import requests.exceptions from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry -KUBERNETES_HOST_PORT = (os.environ.get('CLUSTER_HOST') or "localhost") + ":30809" +KUBERNETES_HOST_PORT = (os.environ.get('CLUSTER_HOST') or "localhost") + ":8080" print() print("Cluster host/port used: ${KUBERNETES_HOST_PORT}".format(KUBERNETES_HOST_PORT=KUBERNETES_HOST_PORT)) @@ -35,6 +37,25 @@ print() class TestKubernetesExecutor(unittest.TestCase): @staticmethod + def _describe_resources(namespace): + print("=" * 80) + print("Describe resources for namespace {}".format(namespace)) + print("Datetime: {}".format(datetime.utcnow())) + print("=" * 80) + print("Describing pods") + print("-" * 80) + subprocess.call(["kubectl", "describe", "pod", "--namespace", namespace]) + print("=" * 80) + print("Describing persistent volumes") + print("-" * 80) + subprocess.call(["kubectl", "describe", "pv", "--namespace", namespace]) + print("=" * 80) + print("Describing persistent volume claims") + print("-" * 80) + subprocess.call(["kubectl", "describe", "pvc", "--namespace", namespace]) + print("=" * 80) + + @staticmethod def _num_pods_in_namespace(namespace): air_pod = check_output(['kubectl', 'get', 'pods', '-n', namespace]).decode() air_pod = air_pod.split('\n') @@ -79,7 +100,6 @@ class TestKubernetesExecutor(unittest.TestCase): # Wait some time for the operator to complete while tries < max_tries: time.sleep(5) - # Trigger a new dagrun try: get_string = \ @@ -96,6 +116,8 @@ class TestKubernetesExecutor(unittest.TestCase): if state == expected_final_state: break + self._describe_resources(namespace="airflow") + self._describe_resources(namespace="default") tries += 1 except requests.exceptions.ConnectionError as e: check_call(["echo", "api call failed. trying again. error {}".format(e)]) @@ -112,7 +134,6 @@ class TestKubernetesExecutor(unittest.TestCase): # Wait some time for the operator to complete while tries < max_tries: time.sleep(5) - get_string = \ 'http://{host}/api/experimental/dags/{dag_id}/' \ 'dag_runs/{execution_date}'.format(host=host, @@ -130,8 +151,9 @@ class TestKubernetesExecutor(unittest.TestCase): if state == expected_final_state: break + self._describe_resources("airflow") + self._describe_resources("default") tries += 1 - self.assertEqual(state, expected_final_state) # Maybe check if we can retrieve the logs, but then we need to extend the API @@ -197,12 +219,12 @@ class TestKubernetesExecutor(unittest.TestCase): execution_date=execution_date, dag_id=dag_id, task_id='start_task', - expected_final_state='success', timeout=100) + expected_final_state='success', timeout=300) self.ensure_dag_expected_state(host=host, execution_date=execution_date, dag_id=dag_id, - expected_final_state='success', timeout=200) + expected_final_state='success', timeout=300) def test_integration_run_dag_with_scheduler_failure(self): host = KUBERNETES_HOST_PORT @@ -219,18 +241,18 @@ class TestKubernetesExecutor(unittest.TestCase): execution_date=execution_date, dag_id=dag_id, task_id='start_task', - expected_final_state='success', timeout=200) + expected_final_state='success', timeout=300) self.monitor_task(host=host, execution_date=execution_date, dag_id=dag_id, task_id='other_namespace_task', - expected_final_state='success', timeout=200) + expected_final_state='success', timeout=300) self.ensure_dag_expected_state(host=host, execution_date=execution_date, dag_id=dag_id, - expected_final_state='success', timeout=200) + expected_final_state='success', timeout=300) self.assertEqual(self._num_pods_in_namespace('test-namespace'), 0, diff --git a/scripts/ci/ci_build_production_images.sh b/scripts/ci/ci_build_production_images.sh deleted file mode 100755 index 6087502..0000000 --- a/scripts/ci/ci_build_production_images.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -export PYTHON_MAJOR_MINOR_VERSION=${PYTHON_MAJOR_MINOR_VERSION:-3.5} - -# shellcheck source=scripts/ci/_script_init.sh -. "$( dirname "${BASH_SOURCE[0]}" )/_script_init.sh" - -prepare_prod_build - -build_prod_image diff --git a/scripts/ci/ci_count_changed_files.sh b/scripts/ci/ci_count_changed_files.sh index 0e72f72..d1ccd4b 100755 --- a/scripts/ci/ci_count_changed_files.sh +++ b/scripts/ci/ci_count_changed_files.sh @@ -25,7 +25,7 @@ # shellcheck source=scripts/ci/_script_init.sh . "$( dirname "${BASH_SOURCE[0]}" )/_script_init.sh" -get_ci_environment +get_environment_for_builds_on_ci git remote add target "https://github.com/${CI_TARGET_REPO}" diff --git a/scripts/ci/ci_deploy_app_to_kubernetes.sh b/scripts/ci/ci_deploy_app_to_kubernetes.sh index 5b4a2a4..307bf00 100755 --- a/scripts/ci/ci_deploy_app_to_kubernetes.sh +++ b/scripts/ci/ci_deploy_app_to_kubernetes.sh @@ -20,7 +20,6 @@ set -euo pipefail -export KUBERNETES_VERSION=${KUBERNETES_VERSION:="v1.15.3"} export PYTHON_MAJOR_MINOR_VERSION=${PYTHON_MAJOR_MINOR_VERSION:="3.6"} export KIND_CLUSTER_NAME=${KIND_CLUSTER_NAME:="airflow-python-${PYTHON_MAJOR_MINOR_VERSION}-${KUBERNETES_VERSION}"} export KUBERNETES_MODE=${KUBERNETES_MODE:="image"} @@ -30,12 +29,11 @@ HANDLERS="$( trap -p EXIT | cut -f2 -d \' )" # shellcheck disable=SC2064 trap "${HANDLERS}${HANDLERS:+;}dump_kind_logs" EXIT -get_ci_environment -check_kind_and_kubectl_are_installed -build_kubernetes_image +get_environment_for_builds_on_ci +initialize_kind_variables +make_sure_kubernetes_tools_are_installed +build_prod_image_for_kubernetes_tests load_image_to_kind_cluster -prepare_kubernetes_app_variables -prepare_kubernetes_resources -apply_kubernetes_resources -wait_for_airflow_pods_up_and_running -wait_for_airflow_webserver_up_and_running +deploy_airflow_with_helm +forward_port_to_kind_webserver +deploy_test_kubernetes_resources diff --git a/scripts/ci/ci_docs.sh b/scripts/ci/ci_docs.sh index 417f6a6..761e9a0 100755 --- a/scripts/ci/ci_docs.sh +++ b/scripts/ci/ci_docs.sh @@ -37,7 +37,7 @@ function run_docs() { | tee -a "${OUTPUT_LOG}" } -get_ci_environment +get_environment_for_builds_on_ci prepare_ci_build diff --git a/scripts/ci/ci_flake8.sh b/scripts/ci/ci_flake8.sh index fac9be4..15a7ccb 100755 --- a/scripts/ci/ci_flake8.sh +++ b/scripts/ci/ci_flake8.sh @@ -56,7 +56,7 @@ function run_flake8() { fi } -get_ci_environment +get_environment_for_builds_on_ci prepare_ci_build diff --git a/scripts/ci/ci_generate_requirements.sh b/scripts/ci/ci_generate_requirements.sh index 0e20c60..689695c 100755 --- a/scripts/ci/ci_generate_requirements.sh +++ b/scripts/ci/ci_generate_requirements.sh @@ -20,7 +20,7 @@ export PYTHON_MAJOR_MINOR_VERSION=${PYTHON_MAJOR_MINOR_VERSION:-3.5} # shellcheck source=scripts/ci/_script_init.sh . "$( dirname "${BASH_SOURCE[0]}" )/_script_init.sh" -get_ci_environment +get_environment_for_builds_on_ci prepare_ci_build diff --git a/scripts/ci/ci_load_image_to_kind.sh b/scripts/ci/ci_load_image_to_kind.sh index 2fc2570..dda1e38 100755 --- a/scripts/ci/ci_load_image_to_kind.sh +++ b/scripts/ci/ci_load_image_to_kind.sh @@ -20,15 +20,14 @@ cd "${AIRFLOW_SOURCES}" || exit 1 -export KUBERNETES_VERSION=${KUBERNETES_VERSION:="v1.15.3"} export PYTHON_MAJOR_MINOR_VERSION=${PYTHON_MAJOR_MINOR_VERSION:="3.6"} export KIND_CLUSTER_NAME=${KIND_CLUSTER_NAME:="airflow-python-${PYTHON_MAJOR_MINOR_VERSION}-${KUBERNETES_VERSION}"} prepare_prod_build echo -echo "Loading the ${AIRFLOW_KUBERNETES_IMAGE} to cluster ${KIND_CLUSTER_NAME} from docker" +echo "Loading the ${AIRFLOW_PROD_IMAGE} to cluster ${KIND_CLUSTER_NAME} from docker" echo -"${AIRFLOW_SOURCES}/.build/bin/kind" load docker-image --name "${KIND_CLUSTER_NAME}" "${AIRFLOW_KUBERNETES_IMAGE}" +"${AIRFLOW_SOURCES}/.build/bin/kind" load docker-image --name "${KIND_CLUSTER_NAME}" "${AIRFLOW_PROD_IMAGE}" echo -echo "Loaded the ${AIRFLOW_KUBERNETES_IMAGE} to cluster ${KIND_CLUSTER_NAME}" +echo "Loaded the ${AIRFLOW_PROD_IMAGE} to cluster ${KIND_CLUSTER_NAME}" echo diff --git a/scripts/ci/ci_mypy.sh b/scripts/ci/ci_mypy.sh index 962d2c0..dee25c8 100755 --- a/scripts/ci/ci_mypy.sh +++ b/scripts/ci/ci_mypy.sh @@ -43,7 +43,7 @@ function run_mypy() { | tee -a "${OUTPUT_LOG}" } -get_ci_environment +get_environment_for_builds_on_ci prepare_ci_build diff --git a/scripts/ci/ci_perform_kind_cluster_operation.sh b/scripts/ci/ci_perform_kind_cluster_operation.sh index 419b7af..4d3ddd2 100755 --- a/scripts/ci/ci_perform_kind_cluster_operation.sh +++ b/scripts/ci/ci_perform_kind_cluster_operation.sh @@ -24,9 +24,9 @@ HANDLERS="$( trap -p EXIT | cut -f2 -d \' )" # shellcheck disable=SC2064 trap "${HANDLERS}${HANDLERS:+;}dump_kind_logs" EXIT -get_ci_environment -check_kind_and_kubectl_are_installed - +get_environment_for_builds_on_ci +make_sure_kubernetes_tools_are_installed +initialize_kind_variables perform_kind_cluster_operation "${@}" check_cluster_ready_for_airflow diff --git a/scripts/ci/ci_run_airflow_testing.sh b/scripts/ci/ci_run_airflow_testing.sh index 7b69a36..09dfb7d 100755 --- a/scripts/ci/ci_run_airflow_testing.sh +++ b/scripts/ci/ci_run_airflow_testing.sh @@ -39,7 +39,7 @@ if [[ -f ${BUILD_CACHE_DIR}/.skip_tests ]]; then exit fi -get_ci_environment +get_environment_for_builds_on_ci prepare_ci_build diff --git a/scripts/ci/ci_run_kubernetes_tests.sh b/scripts/ci/ci_run_kubernetes_tests.sh index 0e2a87f..4d49e9e 100755 --- a/scripts/ci/ci_run_kubernetes_tests.sh +++ b/scripts/ci/ci_run_kubernetes_tests.sh @@ -70,7 +70,8 @@ else fi -get_ci_environment +get_environment_for_builds_on_ci +initialize_kind_variables cd "${AIRFLOW_SOURCES}" || exit 1 @@ -100,10 +101,11 @@ if [[ ${INTERACTIVE} == "true" ]]; then echo echo "The webserver is available at http://localhost:30809/" echo - echo "User/password: airflow/airflow" + echo "User/password: admin/admin" echo echo "You are entering the virtualenv now. Type exit to exit back to the original shell" echo + kubectl config set-context --current --namespace=airflow exec "${SHELL}" else pytest "${PYTEST_ARGS[@]}" "${TESTS[@]}" diff --git a/scripts/ci/ci_run_static_checks.sh b/scripts/ci/ci_run_static_checks.sh index 81cd2e6..234ae61 100755 --- a/scripts/ci/ci_run_static_checks.sh +++ b/scripts/ci/ci_run_static_checks.sh @@ -27,7 +27,7 @@ if [[ -f ${BUILD_CACHE_DIR}/.skip_tests ]]; then exit fi -get_ci_environment +get_environment_for_builds_on_ci prepare_ci_build diff --git a/scripts/ci/kubernetes/app/postgres.yaml b/scripts/ci/kubernetes/app/postgres.yaml deleted file mode 100644 index c6a4db7..0000000 --- a/scripts/ci/kubernetes/app/postgres.yaml +++ /dev/null @@ -1,94 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. ---- -kind: Deployment -apiVersion: extensions/v1beta1 -metadata: - name: postgres-airflow -spec: - replicas: 1 - template: - metadata: - labels: - name: postgres-airflow - spec: - restartPolicy: Always - containers: - - name: postgres - image: postgres - imagePullPolicy: IfNotPresent - ports: - - containerPort: 5432 - protocol: TCP - volumeMounts: - - name: dbvol - mountPath: /var/lib/postgresql/data/pgdata - subPath: pgdata - env: - - name: POSTGRES_USER - value: root - - name: POSTGRES_PASSWORD - value: root - - name: POSTGRES_DB - value: airflow - - name: PGDATA - value: /var/lib/postgresql/data/pgdata - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - livenessProbe: - initialDelaySeconds: 60 - timeoutSeconds: 5 - failureThreshold: 5 - exec: - command: - - /bin/sh - - -c - - > - exec pg_isready --host $POD_IP || - if [[ $(psql -qtAc --host $POD_IP 'SELECT pg_is_in_recovery') != "f" ]]; - then exit 0 else; - exit 1; fi - readinessProbe: - initialDelaySeconds: 5 - timeoutSeconds: 5 - periodSeconds: 5 - exec: - command: - - /bin/sh - - -c - - exec pg_isready --host $POD_IP - resources: - requests: - memory: .5Gi - cpu: .5 - volumes: - - name: dbvol - emptyDir: {} ---- -apiVersion: v1 -kind: Service -metadata: - name: postgres-airflow -spec: - clusterIP: None - ports: - - port: 5432 - targetPort: 5432 - selector: - name: postgres-airflow diff --git a/scripts/ci/kubernetes/app/templates/airflow.template.yaml b/scripts/ci/kubernetes/app/templates/airflow.template.yaml deleted file mode 100644 index f3704d6..0000000 --- a/scripts/ci/kubernetes/app/templates/airflow.template.yaml +++ /dev/null @@ -1,207 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. ---- -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: ClusterRoleBinding -metadata: - name: admin-rbac -subjects: - - kind: ServiceAccount - # Reference to upper's `metadata.name` - name: default - # Reference to upper's `metadata.namespace` - namespace: default -roleRef: - kind: ClusterRole - name: cluster-admin - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - name: airflow -spec: - replicas: 1 - template: - metadata: - labels: - name: airflow - spec: - securityContext: - runAsUser: 50000 - runAsGroup: 50000 - fsGroup: 50000 - initContainers: - - name: "init-dags" - image: {{AIRFLOW_KUBERNETES_IMAGE}} - imagePullPolicy: Never - securityContext: - runAsUser: 0 - volumeMounts: - - name: airflow-configmap - mountPath: /opt/airflow/airflow.cfg - subPath: airflow.cfg - - name: {{INIT_DAGS_VOLUME_NAME}} - mountPath: /opt/airflow/dags - env: - - name: SQL_ALCHEMY_CONN - valueFrom: - secretKeyRef: - name: airflow-secrets - key: sql_alchemy_conn - command: - - "bash" - args: - - "-cx" - - "/tmp/airflow-test-env-init-dags.sh" - - name: "init-db" - image: {{AIRFLOW_KUBERNETES_IMAGE}} - imagePullPolicy: Never - volumeMounts: - - name: airflow-configmap - mountPath: /opt/airflow/airflow.cfg - subPath: airflow.cfg - - name: {{INIT_DAGS_VOLUME_NAME}} - mountPath: /opt/airflow/dags - env: - - name: SQL_ALCHEMY_CONN - valueFrom: - secretKeyRef: - name: airflow-secrets - key: sql_alchemy_conn - command: - - "bash" - args: - - "-cx" - - "/tmp/airflow-test-env-init-db.sh" -{{INIT_GIT_SYNC}} - containers: - - name: webserver - image: {{AIRFLOW_KUBERNETES_IMAGE}} - imagePullPolicy: Never - ports: - - name: webserver - containerPort: 8080 - args: ["webserver"] - env: - - name: AIRFLOW__KUBERNETES__NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: SQL_ALCHEMY_CONN - valueFrom: - secretKeyRef: - name: airflow-secrets - key: sql_alchemy_conn - volumeMounts: - - name: airflow-configmap - mountPath: /opt/airflow/airflow.cfg - subPath: airflow.cfg - - name: {{POD_AIRFLOW_DAGS_VOLUME_NAME}} - mountPath: /opt/airflow/dags - - name: airflow-logs - mountPath: /opt/airflow/logs - - name: scheduler - image: {{AIRFLOW_KUBERNETES_IMAGE}} - imagePullPolicy: Never - args: ["scheduler"] - env: - - name: AIRFLOW__KUBERNETES__NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: SQL_ALCHEMY_CONN - valueFrom: - secretKeyRef: - name: airflow-secrets - key: sql_alchemy_conn - volumeMounts: - - name: airflow-configmap - mountPath: /opt/airflow/airflow.cfg - subPath: airflow.cfg - - name: {{POD_AIRFLOW_DAGS_VOLUME_NAME}} - mountPath: /opt/airflow/dags - - name: airflow-logs - mountPath: /opt/airflow/logs - volumes: - - name: airflow-dags - persistentVolumeClaim: - claimName: airflow-dags - - name: airflow-dags-fake - emptyDir: {} - - name: airflow-dags-git - emptyDir: {} - - name: airflow-logs - emptyDir: {} - - name: airflow-configmap - configMap: - name: airflow-configmap ---- -apiVersion: v1 -kind: Service -metadata: - name: airflow -spec: - type: NodePort - ports: - - port: 8080 - nodePort: 30809 - selector: - name: airflow ---- -apiVersion: v1 -kind: Pod -metadata: - name: init-dags - namespace: test-namespace -spec: - containers: - - name: "init-dags-test-namespace" - image: {{AIRFLOW_KUBERNETES_IMAGE}} - imagePullPolicy: Never - securityContext: - runAsUser: 0 - volumeMounts: - - name: airflow-configmap - mountPath: /opt/airflow/airflow.cfg - subPath: airflow.cfg - - name: {{INIT_DAGS_VOLUME_NAME}} - mountPath: /opt/airflow/dags - env: - - name: SQL_ALCHEMY_CONN - valueFrom: - secretKeyRef: - name: airflow-secrets - key: sql_alchemy_conn - command: - - "bash" - args: - - "-cx" - - "/tmp/airflow-test-env-init-dags.sh" - volumes: - - name: airflow-dags - persistentVolumeClaim: - claimName: airflow-dags - - name: airflow-dags-fake - emptyDir: {} - - name: airflow-dags-git - emptyDir: {} - - name: airflow-logs - emptyDir: {} - - name: airflow-configmap - configMap: - name: airflow-configmap diff --git a/scripts/ci/kubernetes/app/templates/configmaps.template.yaml b/scripts/ci/kubernetes/app/templates/configmaps.template.yaml deleted file mode 100644 index bb942bc..0000000 --- a/scripts/ci/kubernetes/app/templates/configmaps.template.yaml +++ /dev/null @@ -1,395 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: airflow-configmap -data: - # yamllint disable rule:line-length - airflow.cfg: | - [core] - dags_folder = {{CONFIGMAP_DAGS_FOLDER}} - base_log_folder = /opt/airflow/logs - logging_level = INFO - executor = KubernetesExecutor - parallelism = 32 - load_examples = False - load_default_connections = True - plugins_folder = /opt/airflow/plugins - sql_alchemy_conn = $SQL_ALCHEMY_CONN - - [scheduler] - dag_dir_list_interval = 300 - child_process_log_directory = /opt/airflow/logs/scheduler - # Task instances listen for external kill signal (when you clear tasks - # from the CLI or the UI), this defines the frequency at which they should - # listen (in seconds). - job_heartbeat_sec = 5 - max_threads = 2 - - # The scheduler constantly tries to trigger new tasks (look at the - # scheduler section in the docs for more information). This defines - # how often the scheduler should run (in seconds). - scheduler_heartbeat_sec = 5 - - # after how much time should the scheduler terminate in seconds - # -1 indicates to run continuously (see also num_runs) - run_duration = -1 - - # after how much time a new DAGs should be picked up from the filesystem - min_file_process_interval = 0 - - statsd_on = False - statsd_host = localhost - statsd_port = 8125 - statsd_prefix = airflow - - # How many seconds to wait between file-parsing loops to prevent the logs from being spammed. - min_file_parsing_loop_time = 1 - - print_stats_interval = 30 - scheduler_zombie_task_threshold = 300 - max_tis_per_query = 0 - authenticate = False - - # Turn off scheduler catchup by setting this to False. - # Default behavior is unchanged and - # Command Line Backfills still work, but the scheduler - # will not do scheduler catchup if this is False, - # however it can be set on a per DAG basis in the - # DAG definition (catchup) - catchup_by_default = True - - [webserver] - # The base url of your website as airflow cannot guess what domain or - # cname you are using. This is used in automated emails that - # airflow sends to point links to the right web server - base_url = http://localhost:8080 - - # The ip specified when starting the web server - web_server_host = 0.0.0.0 - - # The port on which to run the web server - web_server_port = 8080 - - # Paths to the SSL certificate and key for the web server. When both are - # provided SSL will be enabled. This does not change the web server port. - web_server_ssl_cert = - web_server_ssl_key = - - # Number of seconds the webserver waits before killing gunicorn master that doesn't respond - web_server_master_timeout = 120 - - # Number of seconds the gunicorn webserver waits before timing out on a worker - web_server_worker_timeout = 120 - - # Number of workers to refresh at a time. When set to 0, worker refresh is - # disabled. When nonzero, airflow periodically refreshes webserver workers by - # bringing up new ones and killing old ones. - worker_refresh_batch_size = 1 - - # Number of seconds to wait before refreshing a batch of workers. - worker_refresh_interval = 30 - - # Secret key used to run your flask app - secret_key = temporary_key - - # Number of workers to run the Gunicorn web server - workers = 4 - - # The worker class gunicorn should use. Choices include - # sync (default), eventlet, gevent - worker_class = sync - - # Log files for the gunicorn webserver. '-' means log to stderr. - access_logfile = - - error_logfile = - - - # Expose the configuration file in the web server - expose_config = False - - # Set to true to turn on authentication: - # https://airflow.apache.org/security.html#web-authentication - authenticate = False - - # Filter the list of dags by owner name (requires authentication to be enabled) - filter_by_owner = False - - # Filtering mode. Choices include user (default) and ldapgroup. - # Ldap group filtering requires using the ldap backend - # - # Note that the ldap server needs the "memberOf" overlay to be set up - # in order to user the ldapgroup mode. - owner_mode = user - - # Default DAG view. Valid values are: - # tree, graph, duration, gantt, landing_times - dag_default_view = tree - - # Default DAG orientation. Valid values are: - # LR (Left->Right), TB (Top->Bottom), RL (Right->Left), BT (Bottom->Top) - dag_orientation = LR - - # Puts the webserver in demonstration mode; blurs the names of Operators for - # privacy. - demo_mode = False - - # The amount of time (in secs) webserver will wait for initial handshake - # while fetching logs from other worker machine - log_fetch_timeout_sec = 5 - - # By default, the webserver shows paused DAGs. Flip this to hide paused - # DAGs by default - hide_paused_dags_by_default = False - - # Consistent page size across all listing views in the UI - page_size = 100 - - # Use FAB-based webserver with RBAC feature - rbac = True - - [smtp] - # If you want airflow to send emails on retries, failure, and you want to use - # the airflow.utils.email.send_email_smtp function, you have to configure an - # smtp server here - smtp_host = localhost - smtp_starttls = True - smtp_ssl = False - # Uncomment and set the user/pass settings if you want to use SMTP AUTH - # smtp_user = airflow - # smtp_password = airflow - smtp_port = 25 - smtp_mail_from = airf...@example.com - - [kubernetes] - airflow_configmap = airflow-configmap - worker_container_repository = {{AIRFLOW_KUBERNETES_IMAGE_NAME}} - worker_container_tag = {{AIRFLOW_KUBERNETES_IMAGE_TAG}} - worker_container_image_pull_policy = IfNotPresent - delete_worker_pods = True - dags_in_image = False - git_repo = https://github.com/{{CONFIGMAP_GIT_REPO}}.git - git_branch = {{CONFIGMAP_BRANCH}} - git_sync_depth = "1" - git_subpath = airflow/contrib/example_dags/ - git_user = - git_password = - git_sync_root = /git - git_sync_path = repo - git_dags_folder_mount_point = {{CONFIGMAP_GIT_DAGS_FOLDER_MOUNT_POINT}} - dags_volume_claim = {{CONFIGMAP_DAGS_VOLUME_CLAIM}} - dags_volume_subpath = - logs_volume_claim = - logs_volume_subpath = - dags_volume_host = - logs_volume_host = - in_cluster = True - namespace = default - run_as_user = 50000 - gcp_service_account_keys = - - # Example affinity and toleration definitions. - affinity = {"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"nodeSelectorTerms":[{"matchExpressions":[{"key":"kubernetes.io/hostname","operator":"NotIn","values":["4e5e6a99-e28a-450b-bba9-e0124853de9b"]}]}]}}} - tolerations = [{ "key": "dedicated", "operator": "Equal", "value": "airflow", "effect": "NoSchedule" }, { "key": "prod", "operator": "Exists" }] - - # For cloning DAGs from git repositories into volumes: https://github.com/kubernetes/git-sync - git_sync_container_repository = gcr.io/google-containers/git-sync-amd64 - git_sync_container_tag = v2.0.5 - git_sync_init_container_name = git-sync-clone - - [kubernetes_node_selectors] - # The Key-value pairs to be given to worker pods. - # The worker pods will be scheduled to the nodes of the specified key-value pairs. - # Should be supplied in the format: key = value - - [kubernetes_annotations] - # The Key-value annotations pairs to be given to worker pods. - # Should be supplied in the format: key = value - - [kubernetes_secrets] - SQL_ALCHEMY_CONN = airflow-secrets=sql_alchemy_conn - - [hive] - # Default mapreduce queue for HiveOperator tasks - default_hive_mapred_queue = - - [celery] - # This section only applies if you are using the CeleryExecutor in - # [core] section above - - # The app name that will be used by celery - celery_app_name = airflow.executors.celery_executor - - # The concurrency that will be used when starting workers with the - # "airflow worker" command. This defines the number of task instances that - # a worker will take, so size up your workers based on the resources on - # your worker box and the nature of your tasks - worker_concurrency = 16 - - # When you start an airflow worker, airflow starts a tiny web server - # subprocess to serve the workers local log files to the airflow main - # web server, who then builds pages and sends them to users. This defines - # the port on which the logs are served. It needs to be unused, and open - # visible from the main web server to connect into the workers. - worker_log_server_port = 8793 - - # The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally - # a sqlalchemy database. Refer to the Celery documentation for more - # information. - # http://docs.celeryproject.org/en/latest/userguide/configuration.html#broker-settings - broker_url = sqla+mysql://airflow:airflow@localhost:3306/airflow - - # The Celery result_backend. When a job finishes, it needs to update the - # metadata of the job. Therefore it will post a message on a message bus, - # or insert it into a database (depending of the backend) - # This status is used by the scheduler to update the state of the task - # The use of a database is highly recommended - # http://docs.celeryproject.org/en/latest/userguide/configuration.html#task-result-backend-settings - result_backend = db+mysql://airflow:airflow@localhost:3306/airflow - - # Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start - # it `airflow flower`. This defines the IP that Celery Flower runs on - flower_host = 0.0.0.0 - - # The root URL for Flower - # Ex: flower_url_prefix = /flower - flower_url_prefix = - - # This defines the port that Celery Flower runs on - flower_port = 5555 - - # Securing Flower with Basic Authentication - # Accepts user:password pairs separated by a comma - # Example: flower_basic_auth = user1:password1,user2:password2 - flower_basic_auth = - - # Default queue that tasks get assigned to and that worker listen on. - default_queue = default - - # How many processes CeleryExecutor uses to sync task state. - # 0 means to use max(1, number of cores - 1) processes. - sync_parallelism = 0 - - # Import path for celery configuration options - celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG - - [celery_broker_transport_options] - # The visibility timeout defines the number of seconds to wait for the worker - # to acknowledge the task before the message is redelivered to another worker. - # Make sure to increase the visibility timeout to match the time of the longest - # ETA you're planning to use. Especially important in case of using Redis or SQS - visibility_timeout = 21600 - - # In case of using SSL - ssl_active = False - ssl_key = - ssl_cert = - ssl_cacert = - - [dask] - # This section only applies if you are using the DaskExecutor in - # [core] section above - - # The IP address and port of the Dask cluster's scheduler. - cluster_address = 127.0.0.1:8786 - # TLS/ SSL settings to access a secured Dask scheduler. - tls_ca = - tls_cert = - tls_key = - - [ldap] - # set this to ldaps://<your.ldap.server>:<port> - uri = - user_filter = objectClass=* - user_name_attr = uid - group_member_attr = memberOf - superuser_filter = - data_profiler_filter = - bind_user = cn=Manager,dc=example,dc=com - bind_password = insecure - basedn = dc=example,dc=com - cacert = /etc/ca/ldap_ca.crt - search_scope = LEVEL - - [mesos] - # Mesos master address which MesosExecutor will connect to. - master = localhost:5050 - - # The framework name which Airflow scheduler will register itself as on mesos - framework_name = Airflow - - # Number of cpu cores required for running one task instance using - # 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>' - # command on a mesos slave - task_cpu = 1 - - # Memory in MB required for running one task instance using - # 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>' - # command on a mesos slave - task_memory = 256 - - # Enable framework checkpointing for mesos - # See http://mesos.apache.org/documentation/latest/slave-recovery/ - checkpoint = False - - # Failover timeout in milliseconds. - # When checkpointing is enabled and this option is set, Mesos waits - # until the configured timeout for - # the MesosExecutor framework to re-register after a failover. Mesos - # shuts down running tasks if the - # MesosExecutor framework fails to re-register within this timeframe. - # failover_timeout = 604800 - - # Enable framework authentication for mesos - # See http://mesos.apache.org/documentation/latest/configuration/ - authenticate = False - - # Mesos credentials, if authentication is enabled - # default_principal = admin - # default_secret = admin - - # Optional Docker Image to run on slave before running the command - # This image should be accessible from mesos slave i.e mesos slave - # should be able to pull this docker image before executing the command. - # docker_image_slave = puckel/docker-airflow - - [kerberos] - ccache = /tmp/airflow_krb5_ccache - # gets augmented with fqdn - principal = airflow - reinit_frequency = 3600 - kinit_path = kinit - keytab = airflow.keytab - - [cli] - api_client = airflow.api.client.json_client - endpoint_url = http://localhost:8080 - - [api] - auth_backend = airflow.api.auth.backend.default - - [github_enterprise] - api_rev = v3 - - [admin] - # UI to hide sensitive variable fields when set to True - hide_sensitive_variable_fields = True - - [elasticsearch] - host = - # yamllint enable rule:line-length diff --git a/scripts/ci/kubernetes/app/templates/init_git_sync.template.yaml b/scripts/ci/kubernetes/app/templates/init_git_sync.template.yaml deleted file mode 100644 index 940b6ec..0000000 --- a/scripts/ci/kubernetes/app/templates/init_git_sync.template.yaml +++ /dev/null @@ -1,36 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - - name: "git-sync-clone" - env: - - name: GIT_SYNC_REPO - value: https://github.com/{{CONFIGMAP_GIT_REPO}}.git - - name: GIT_SYNC_BRANCH - value: {{CONFIGMAP_BRANCH}} - - name: GIT_SYNC_ROOT - value: /git - - name: GIT_SYNC_DEST - value: repo - - name: GIT_SYNC_ONE_TIME - value: "true" - image: gcr.io/google-containers/git-sync-amd64:v2.0.5 - imagePullPolicy: IfNotPresent - securityContext: - runAsUser: 0 - volumeMounts: - - mountPath: /git - name: airflow-dags-git diff --git a/scripts/ci/kubernetes/app/volumes.yaml b/scripts/ci/kubernetes/app/volumes.yaml deleted file mode 100644 index 0e3af06..0000000 --- a/scripts/ci/kubernetes/app/volumes.yaml +++ /dev/null @@ -1,87 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. ---- -kind: PersistentVolume -apiVersion: v1 -metadata: - name: airflow-dags -spec: - accessModes: - - ReadWriteMany - capacity: - storage: 2Gi - hostPath: - path: /airflow-dags/ ---- -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: airflow-dags -spec: - accessModes: - - ReadWriteMany - resources: - requests: - storage: 2G ---- -kind: PersistentVolume -apiVersion: v1 -metadata: - name: airflow-logs - annotations: - pv.beta.kubernetes.io/gid: "50000" -spec: - accessModes: - - ReadWriteMany - capacity: - storage: 2Gi - hostPath: - path: /airflow-logs/ ---- -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: airflow-logs -spec: - accessModes: - - ReadWriteMany - resources: - requests: - storage: 2Gi ---- -kind: PersistentVolume -apiVersion: v1 -metadata: - name: test-volume -spec: - accessModes: - - ReadWriteMany - capacity: - storage: 2Gi - hostPath: - path: /airflow-dags/ ---- -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: test-volume -spec: - accessModes: - - ReadWriteMany - resources: - requests: - storage: 2Gi diff --git a/scripts/ci/kubernetes/docker/airflow-test-env-init-dags.sh b/scripts/ci/kubernetes/docker/airflow-test-env-init-dags.sh deleted file mode 100755 index 9722958..0000000 --- a/scripts/ci/kubernetes/docker/airflow-test-env-init-dags.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -euo pipefail - -echo -echo "Copying airflow dags" -echo - - -# Create DAGS folder if it does not exist -mkdir -pv "${AIRFLOW_HOME}/dags" -ls -la "${AIRFLOW_HOME}/dags/" -rm -rvf "${AIRFLOW_HOME}/dags/*" - -# Copy DAGS from current sources -cp -Rv "${AIRFLOW_SOURCES}"/airflow/example_dags/* "${AIRFLOW_HOME}/dags/" - -echo -echo "Copied airflow dags" -echo diff --git a/scripts/ci/kubernetes/docker/airflow-test-env-init-db.sh b/scripts/ci/kubernetes/docker/airflow-test-env-init-db.sh deleted file mode 100755 index 70c710e..0000000 --- a/scripts/ci/kubernetes/docker/airflow-test-env-init-db.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -euo pipefail - -echo -echo "Initializing the Airflow db" -echo - - -# Init and upgrade the database to latest heads -cd "${AIRFLOW_SOURCES}"/airflow || exit 1 - -airflow initdb -alembic upgrade heads - -echo -echo "Initialized the database" -echo - -# Create Airflow User if it does not exist -airflow create_user \ - --username airflow \ - --lastname airflow \ - --firstname jon \ - --email airf...@apache.org \ - --role Admin --password airflow || true - -echo -echo "Created airflow user" -echo diff --git a/scripts/ci/kubernetes/docker/bootstrap.sh b/scripts/ci/kubernetes/docker/bootstrap.sh deleted file mode 100755 index 7f9ca10..0000000 --- a/scripts/ci/kubernetes/docker/bootstrap.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -set -euo pipefail - -# This part is to allow fast iteration with the kubernetes tests -# Pre-installed Airflow from the production image is removed and Airflow is re-installed from the -# sources added during preparing the kubernetes image. This way when we deploy the image -# to KinD we only add latest sources and only that most recent layer is sent to kind -# and airflow always runs with compiled dist web files from pre-compiled dist installed in prod image - - -echo -echo "Save minimised web files" -echo - -mv "$(python -m site | grep ^USER_SITE | awk '{print $2}' | tr -d "'")/airflow/www_rbac/static/dist/" \ - "/tmp" - -echo -echo "Uninstalling pre-installed airflow" -echo - -# Uninstall preinstalled Apache Airlfow -pip uninstall -y apache-airflow - - -echo -echo "Installing airflow from the sources" -echo - -# Installing airflow from the sources copied to the Kubernetes image -pip install --user "${AIRFLOW_SOURCES}" - -echo -echo "Restore minimised web files" -echo - -mv "/tmp/dist" "$(python -m site | grep ^USER_SITE | awk '{print $2}' | tr -d "'")/airflow/www/static/" - -echo -echo "Airflow prepared. Running ${1}" -echo - - -if [[ "$1" = "webserver" ]] -then - exec airflow webserver -fi - -if [[ "$1" = "scheduler" ]] -then - exec airflow scheduler -fi - -echo -echo "Entering bash" -echo - -exec /bin/bash diff --git a/scripts/ci/kubernetes/kind-cluster-conf.yaml b/scripts/ci/kubernetes/kind-cluster-conf.yaml index c7a4433..348fb68 100644 --- a/scripts/ci/kubernetes/kind-cluster-conf.yaml +++ b/scripts/ci/kubernetes/kind-cluster-conf.yaml @@ -23,9 +23,6 @@ networking: nodes: - role: control-plane - role: worker - extraPortMappings: - - containerPort: 30809 - hostPort: 30809 kubeadmConfigPatchesJson6902: - group: kubeadm.k8s.io version: v1beta2 diff --git a/scripts/ci/kubernetes/app/secrets.yaml b/scripts/ci/kubernetes/volumes.yaml similarity index 66% rename from scripts/ci/kubernetes/app/secrets.yaml rename to scripts/ci/kubernetes/volumes.yaml index 34571ed..51ed2e4 100644 --- a/scripts/ci/kubernetes/app/secrets.yaml +++ b/scripts/ci/kubernetes/volumes.yaml @@ -15,12 +15,27 @@ # specific language governing permissions and limitations # under the License. --- +kind: PersistentVolume apiVersion: v1 -kind: Secret metadata: - name: airflow-secrets -type: Opaque -data: - # The sql_alchemy_conn value is a base64 encoded representation of this connection string: - # postgresql+psycopg2://root:r...@postgres-airflow.default.svc.cluster.local:5432/airflow - sql_alchemy_conn: cG9zdGdyZXNxbCtwc3ljb3BnMjovL3Jvb3Q6cm9vdEBwb3N0Z3Jlcy1haXJmbG93LmRlZmF1bHQuc3ZjLmNsdXN0ZXIubG9jYWw6NTQzMi9haXJmbG93 # yamllint disable-line + name: test-volume +spec: + accessModes: + - ReadWriteOnce + capacity: + storage: 2Gi + hostPath: + path: /opt/airflow/dags/ + storageClassName: standard +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: test-volume +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 2Gi + storageClassName: standard diff --git a/scripts/ci/libraries/_build_images.sh b/scripts/ci/libraries/_build_images.sh index 7195b12..26ab1c4 100644 --- a/scripts/ci/libraries/_build_images.sh +++ b/scripts/ci/libraries/_build_images.sh @@ -509,7 +509,7 @@ function determine_cache_strategy() { function build_ci_image_on_ci() { export SKIP_CI_IMAGE_CHECK="false" - get_ci_environment + get_environment_for_builds_on_ci determine_cache_strategy prepare_ci_build @@ -652,14 +652,8 @@ function prepare_prod_build() { export CACHED_AIRFLOW_PROD_BUILD_IMAGE="" export CACHED_PYTHON_BASE_IMAGE="" fi - export AIRFLOW_KUBERNETES_IMAGE=${AIRFLOW_PROD_IMAGE}-kubernetes - AIRFLOW_KUBERNETES_IMAGE_NAME=$(echo "${AIRFLOW_KUBERNETES_IMAGE}" | cut -f 1 -d ":") - export AIRFLOW_KUBERNETES_IMAGE_NAME - AIRFLOW_KUBERNETES_IMAGE_TAG=$(echo "${AIRFLOW_KUBERNETES_IMAGE}" | cut -f 2 -d ":") - export AIRFLOW_KUBERNETES_IMAGE_TAG AIRFLOW_BRANCH_FOR_PYPI_PRELOADING="${BRANCH_NAME}" - go_to_airflow_sources } @@ -668,7 +662,7 @@ function prepare_prod_build() { # Depending on the type of build (push/pr/scheduled) it will either build it incrementally or # from the scratch without cache (the latter for scheduled builds only) function build_prod_image_on_ci() { - get_prod_environment + get_environment_for_builds_on_ci determine_cache_strategy @@ -741,6 +735,7 @@ function build_prod_image() { --build-arg AIRFLOW_VERSION="${AIRFLOW_VERSION}" \ --build-arg AIRFLOW_BRANCH="${AIRFLOW_BRANCH_FOR_PYPI_PRELOADING}" \ --build-arg AIRFLOW_EXTRAS="${AIRFLOW_EXTRAS}" \ + --build-arg EMBEDDED_DAGS="${EMBEDDED_DAGS}" \ "${DOCKER_CACHE_PROD_DIRECTIVE[@]}" \ -t "${AIRFLOW_PROD_IMAGE}" \ --target "main" \ diff --git a/scripts/ci/libraries/_initialization.sh b/scripts/ci/libraries/_initialization.sh index c0c171a..5f2a742 100644 --- a/scripts/ci/libraries/_initialization.sh +++ b/scripts/ci/libraries/_initialization.sh @@ -212,14 +212,29 @@ function initialize_common_environment { # Determines if airflow should be installed from a specified reference in GitHub export INSTALL_AIRFLOW_REFERENCE="" + # Version suffix for PyPI packaging + export VERSION_SUFFIX_FOR_PYPI="" + + # Artifact name suffix for SVN packaging + export VERSION_SUFFIX_FOR_SVN="" + + # Default Kubernetes version + export DEFAULT_KUBERNETES_VERSION="v1.18.2" + + # Default KinD version + export DEFAULT_KIND_VERSION="v0.8.0" + + # Default Helm version + export DEFAULT_HELM_VERSION="v3.2.4" + # Version of Kubernetes to run - export KUBERNETES_VERSION="${KUBERNETES_VERSION:="v1.15.3"}" + export KUBERNETES_VERSION="${KUBERNETES_VERSION:=${DEFAULT_KUBERNETES_VERSION}}" - # Name of the KinD cluster to connect to - export KIND_CLUSTER_NAME=${KIND_CLUSTER_NAME:="airflow-python-${PYTHON_MAJOR_MINOR_VERSION}-${KUBERNETES_VERSION}"} + # folder with DAGs to embed into production image + export EMBEDDED_DAGS=${EMBEDDED_DAGS:="empty"} - # Name of the KinD cluster to connect to when referred to via kubectl - export KUBECTL_CLUSTER_NAME=kind-${KIND_CLUSTER_NAME} + # Namespace where airflow is installed via helm + export HELM_AIRFLOW_NAMESPACE="airflow" } @@ -240,7 +255,7 @@ function set_mysql_encoding() { # (This makes it easy to move between different CI systems) # This function maps CI-specific variables into a generic ones (prefixed with CI_) that # we used in other scripts -function get_ci_environment() { +function get_environment_for_builds_on_ci() { export CI_EVENT_TYPE="manual" export CI_TARGET_REPO="apache/airflow" export CI_TARGET_BRANCH="master" diff --git a/scripts/ci/libraries/_kind.sh b/scripts/ci/libraries/_kind.sh index 1d8b4ad..4f5ebe5 100644 --- a/scripts/ci/libraries/_kind.sh +++ b/scripts/ci/libraries/_kind.sh @@ -16,6 +16,14 @@ # specific language governing permissions and limitations # under the License. + +function initialize_kind_variables(){ + # Name of the KinD cluster to connect to + export KIND_CLUSTER_NAME=${KIND_CLUSTER_NAME:="airflow-python-${PYTHON_MAJOR_MINOR_VERSION}-${KUBERNETES_VERSION}"} + # Name of the KinD cluster to connect to when referred to via kubectl + export KUBECTL_CLUSTER_NAME=kind-${KIND_CLUSTER_NAME} +} + function dump_kind_logs() { echo "###########################################################################################" echo " Dumping logs from KIND" @@ -26,31 +34,60 @@ function dump_kind_logs() { local DUMP_DIR_NAME DUMP_DIR DUMP_DIR_NAME=kind_logs_$(date "+%Y-%m-%d")_${CI_BUILD_ID:="default"}_${CI_JOB_ID:="default"} DUMP_DIR="/tmp/${DUMP_DIR_NAME}" - kind --name "${KIND_CLUSTER_NAME}" export logs "${DUMP_DIR}" + verbose_kind --name "${KIND_CLUSTER_NAME}" export logs "${DUMP_DIR}" } -function check_kind_and_kubectl_are_installed() { +function make_sure_kubernetes_tools_are_installed() { SYSTEM=$(uname -s| tr '[:upper:]' '[:lower:]') - KIND_VERSION="v0.7.0" + KIND_VERSION=${KIND_VERSION:=${DEFAULT_KIND_VERSION}} KIND_URL="https://github.com/kubernetes-sigs/kind/releases/download/${KIND_VERSION}/kind-${SYSTEM}-amd64" KIND_PATH="${BUILD_CACHE_DIR}/bin/kind" - KUBECTL_VERSION="v1.15.3" + HELM_VERSION=${HELM_VERSION:=${DEFAULT_HELM_VERSION}} + HELM_URL="https://get.helm.sh/helm-${HELM_VERSION}-${SYSTEM}-amd64.tar.gz" + HELM_PATH="${BUILD_CACHE_DIR}/bin/helm" + KUBECTL_VERSION=${KUBENETES_VERSION:=${DEFAULT_KUBERNETES_VERSION}} KUBECTL_URL="https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VERSION}/bin/${SYSTEM}/amd64/kubectl" KUBECTL_PATH="${BUILD_CACHE_DIR}/bin/kubectl" mkdir -pv "${BUILD_CACHE_DIR}/bin" - if [[ ! -f "${KIND_PATH}" ]]; then + if [[ -f "${KIND_PATH}" ]]; then + DOWNLOADED_KIND_VERSION=v"$(${KIND_PATH} --version | awk '{ print $3 }')" + echo "Currently downloaded kind version = ${DOWNLOADED_KIND_VERSION}" + fi + if [[ ! -f "${KIND_PATH}" || ${DOWNLOADED_KIND_VERSION} != "${KIND_VERSION}" ]]; then echo echo "Downloading Kind version ${KIND_VERSION}" - echo curl --fail --location "${KIND_URL}" --output "${KIND_PATH}" - chmod +x "${KIND_PATH}" + chmod a+x "${KIND_PATH}" + else + echo "Kind version ok" + echo fi - if [[ ! -f "${KUBECTL_PATH}" ]]; then + if [[ -f "${KUBECTL_PATH}" ]]; then + DOWNLOADED_KUBECTL_VERSION="$(${KUBECTL_PATH} version --client=true --short | awk '{ print $3 }')" + echo "Currently downloaded kubectl version = ${DOWNLOADED_KUBECTL_VERSION}" + fi + if [[ ! -f "${KUBECTL_PATH}" || ${DOWNLOADED_KUBECTL_VERSION} != "${KUBECTL_VERSION}" ]]; then echo echo "Downloading Kubectl version ${KUBECTL_VERSION}" - echo curl --fail --location "${KUBECTL_URL}" --output "${KUBECTL_PATH}" - chmod +x "${KUBECTL_PATH}" + chmod a+x "${KUBECTL_PATH}" + else + echo "Kubectl version ok" + echo + fi + if [[ -f "${HELM_PATH}" ]]; then + DOWNLOADED_HELM_VERSION="$(${HELM_PATH} version --template '{{.Version}}')" + echo "Currently downloaded helm version = ${DOWNLOADED_HELM_VERSION}" + fi + if [[ ! -f "${HELM_PATH}" || ${DOWNLOADED_HELM_VERSION} != "${HELM_VERSION}" ]]; then + echo + echo "Downloading Helm version ${HELM_VERSION}" + curl --location "${HELM_URL}" | + tar -xvz -O "${SYSTEM}-amd64/helm" >"${HELM_PATH}" + chmod a+x "${HELM_PATH}" + else + echo "Helm version ok" + echo fi PATH=${PATH}:${BUILD_CACHE_DIR}/bin } @@ -69,7 +106,7 @@ function create_cluster() { fi stop_output_heartbeat else - kind create cluster \ + verbose_kind create cluster \ --name "${KIND_CLUSTER_NAME}" \ --config "${AIRFLOW_SOURCES}/scripts/ci/kubernetes/kind-cluster-conf.yaml" \ --image "kindest/node:${KUBERNETES_VERSION}" @@ -77,36 +114,10 @@ function create_cluster() { echo echo "Created cluster ${KIND_CLUSTER_NAME}" echo - - echo - echo "Patching CoreDNS to avoid loop and to use 8.8.8.8 DNS as forward address." - echo - echo "============================================================================" - echo " Original coredns configmap:" - echo "============================================================================" - kubectl --cluster "${KUBECTL_CLUSTER_NAME}" get configmaps --namespace=kube-system coredns -o yaml - kubectl --cluster "${KUBECTL_CLUSTER_NAME}" get configmaps --namespace=kube-system coredns -o yaml | \ - sed 's/forward \. .*$/forward . 8.8.8.8/' | kubectl --cluster "${KUBECTL_CLUSTER_NAME}" apply -f - - - echo - echo "============================================================================" - echo " Updated coredns configmap with new forward directive:" - echo "============================================================================" - kubectl --cluster "${KUBECTL_CLUSTER_NAME}" get configmaps --namespace=kube-system coredns -o yaml - - - echo - echo "Restarting CoreDNS" - echo - kubectl --cluster "${KUBECTL_CLUSTER_NAME}" scale deployment --namespace=kube-system coredns --replicas=0 - kubectl --cluster "${KUBECTL_CLUSTER_NAME}" scale deployment --namespace=kube-system coredns --replicas=2 - echo - echo "Restarted CoreDNS" - echo } function delete_cluster() { - kind delete cluster --name "${KIND_CLUSTER_NAME}" + verbose_kind delete cluster --name "${KIND_CLUSTER_NAME}" echo echo "Deleted cluster ${KIND_CLUSTER_NAME}" echo @@ -126,7 +137,7 @@ function perform_kind_cluster_operation() { echo echo "Cluster name: ${KIND_CLUSTER_NAME}" echo - kind get nodes --name "${KIND_CLUSTER_NAME}" + verbose_kind get nodes --name "${KIND_CLUSTER_NAME}" echo exit else @@ -158,20 +169,24 @@ function perform_kind_cluster_operation() { echo echo "Deploying Airflow to KinD" echo - get_ci_environment - check_kind_and_kubectl_are_installed - build_kubernetes_image + get_environment_for_builds_on_ci + make_sure_kubernetes_tools_are_installed + initialize_kind_variables + build_prod_image_for_kubernetes_tests load_image_to_kind_cluster - prepare_kubernetes_app_variables - prepare_kubernetes_resources - apply_kubernetes_resources - wait_for_airflow_pods_up_and_running - wait_for_airflow_webserver_up_and_running + deploy_airflow_with_helm + forward_port_to_kind_webserver + deploy_test_kubernetes_resources elif [[ ${OPERATION} == "test" ]]; then echo - echo "Testing with kind to KinD" + echo "Testing with KinD" echo "${AIRFLOW_SOURCES}/scripts/ci/ci_run_kubernetes_tests.sh" + elif [[ ${OPERATION} == "shell" ]]; then + echo + echo "Entering an interactive shell for kubernetes testing" + echo + "${AIRFLOW_SOURCES}/scripts/ci/ci_run_kubernetes_tests.sh" "-i" else echo echo "Wrong cluster operation: ${OPERATION}. Should be one of:" @@ -191,7 +206,8 @@ function perform_kind_cluster_operation() { echo "Creating cluster" echo create_cluster - elif [[ ${OPERATION} == "stop" || ${OEPRATON} == "deploy" || ${OPERATION} == "test" ]]; then + elif [[ ${OPERATION} == "stop" || ${OEPRATON} == "deploy" || \ + ${OPERATION} == "test" || ${OPERATION} == "shell" ]]; then echo echo "Cluster ${KIND_CLUSTER_NAME} does not exist. It should exist for ${OPERATION} operation" echo @@ -207,175 +223,88 @@ function perform_kind_cluster_operation() { } function check_cluster_ready_for_airflow() { - kubectl cluster-info --cluster "${KUBECTL_CLUSTER_NAME}" - kubectl get nodes --cluster "${KUBECTL_CLUSTER_NAME}" + verbose_kubectl cluster-info --cluster "${KUBECTL_CLUSTER_NAME}" + verbose_kubectl get nodes --cluster "${KUBECTL_CLUSTER_NAME}" echo echo "Showing storageClass" echo - kubectl get storageclass --cluster "${KUBECTL_CLUSTER_NAME}" + verbose_kubectl get storageclass --cluster "${KUBECTL_CLUSTER_NAME}" echo echo "Showing kube-system pods" echo - kubectl get -n kube-system pods --cluster "${KUBECTL_CLUSTER_NAME}" + verbose_kubectl get -n kube-system pods --cluster "${KUBECTL_CLUSTER_NAME}" echo echo "Airflow environment on kubernetes is good to go!" echo - kubectl create namespace test-namespace --cluster "${KUBECTL_CLUSTER_NAME}" + verbose_kubectl create namespace test-namespace --cluster "${KUBECTL_CLUSTER_NAME}" } -function build_kubernetes_image() { +function build_prod_image_for_kubernetes_tests() { cd "${AIRFLOW_SOURCES}" || exit 1 + export EMBEDDED_DAGS="airflow/example_dags" + export DOCKER_CACHE=${DOCKER_CACHE:="pulled"} prepare_prod_build - if [[ $(docker images -q "${AIRFLOW_PROD_IMAGE}") == "" || - ${FORCE_BUILD_IMAGES:="false"} == "true" ]]; then - build_prod_image - else - echo - echo "Skip rebuilding prod image. Use --force-build-images to rebuild prod image." - echo - fi - echo - echo "Adding kubernetes-specific scripts to prod image." - echo "Building ${AIRFLOW_KUBERNETES_IMAGE} from ${AIRFLOW_PROD_IMAGE} with latest sources." - echo - docker build \ - --build-arg AIRFLOW_PROD_IMAGE="${AIRFLOW_PROD_IMAGE}" \ - --cache-from "${AIRFLOW_PROD_IMAGE}" \ - --tag="${AIRFLOW_KUBERNETES_IMAGE}" \ - -f- . << 'EOF' - ARG AIRFLOW_PROD_IMAGE - FROM ${AIRFLOW_PROD_IMAGE} - - ARG AIRFLOW_SOURCES=/home/airflow/airflow_sources/ - ENV AIRFLOW_SOURCES=${AIRFLOW_SOURCES} - - USER root - - COPY --chown=airflow:airflow . ${AIRFLOW_SOURCES} - - COPY scripts/ci/kubernetes/docker/airflow-test-env-init-db.sh /tmp/airflow-test-env-init-db.sh - COPY scripts/ci/kubernetes/docker/airflow-test-env-init-dags.sh /tmp/airflow-test-env-init-dags.sh - COPY scripts/ci/kubernetes/docker/bootstrap.sh /bootstrap.sh - - RUN chmod +x /bootstrap.sh - - - USER airflow - - - ENTRYPOINT ["/bootstrap.sh"] -EOF - - echo "The ${AIRFLOW_KUBERNETES_IMAGE} is prepared for deployment." + build_prod_image + echo "The ${AIRFLOW_PROD_IMAGE} is prepared for test kubernetes deployment." + rm "${OUTPUT_LOG}" } function load_image_to_kind_cluster() { echo - echo "Loading ${AIRFLOW_KUBERNETES_IMAGE} to ${KIND_CLUSTER_NAME}" + echo "Loading ${AIRFLOW_PROD_IMAGE} to ${KIND_CLUSTER_NAME}" echo - kind load docker-image --name "${KIND_CLUSTER_NAME}" "${AIRFLOW_KUBERNETES_IMAGE}" + verbose_kind load docker-image --name "${KIND_CLUSTER_NAME}" "${AIRFLOW_PROD_IMAGE}" } -function prepare_kubernetes_app_variables() { - echo - echo "Preparing kubernetes variables" - echo - KUBERNETES_APP_DIR="${AIRFLOW_SOURCES}/scripts/ci/kubernetes/app" - TEMPLATE_DIRNAME="${KUBERNETES_APP_DIR}/templates" - BUILD_DIRNAME="${KUBERNETES_APP_DIR}/build" - - # shellcheck source=common/_image_variables.sh - . "${AIRFLOW_SOURCES}/common/_image_variables.sh" - - # Source branch will be set in DockerHub - SOURCE_BRANCH=${SOURCE_BRANCH:=${DEFAULT_BRANCH}} - BRANCH_NAME=${BRANCH_NAME:=${SOURCE_BRANCH}} - - if [[ ! -d "${BUILD_DIRNAME}" ]]; then - mkdir -p "${BUILD_DIRNAME}" - fi - - rm -f "${BUILD_DIRNAME}"/* - rm -f "${BUILD_DIRNAME}"/* - - if [[ "${KUBERNETES_MODE}" == "image" ]]; then - INIT_DAGS_VOLUME_NAME=airflow-dags - POD_AIRFLOW_DAGS_VOLUME_NAME=airflow-dags - CONFIGMAP_DAGS_FOLDER=/opt/airflow/dags - CONFIGMAP_GIT_DAGS_FOLDER_MOUNT_POINT= - CONFIGMAP_DAGS_VOLUME_CLAIM=airflow-dags - else - INIT_DAGS_VOLUME_NAME=airflow-dags-fake - POD_AIRFLOW_DAGS_VOLUME_NAME=airflow-dags-git - CONFIGMAP_DAGS_FOLDER=/opt/airflow/dags/repo/airflow/example_dags - CONFIGMAP_GIT_DAGS_FOLDER_MOUNT_POINT=/opt/airflow/dags - CONFIGMAP_DAGS_VOLUME_CLAIM= - fi - - - CONFIGMAP_GIT_REPO=${CI_SOURCE_REPO} - CONFIGMAP_BRANCH=${CI_SOURCE_BRANCH} +function forward_port_to_kind_webserver() { + num_tries=0 + set +e + while ! curl http://localhost:8080/health -s | grep -q healthy; do + if [[ ${num_tries} == 6 ]]; then + echo + echo "ERROR! Could not setup a forward port to Airflow's webserver after ${num_tries}! Exiting." + echo + exit 1 + fi + echo + echo "Trying to establish port forwarding to 'airflow webserver'" + echo + kubectl port-forward svc/airflow-webserver 8080:8080 --namespace airflow >/dev/null & + sleep 10 + num_tries=$(( num_tries + 1)) + done + echo "Connection to 'airflow webserver' established" + set -e } -function prepare_kubernetes_resources() { +function deploy_airflow_with_helm() { echo - echo "Preparing kubernetes resources" + echo "Deploying Airflow with Helm" echo - if [[ "${KUBERNETES_MODE}" == "image" ]]; then - sed -e "s/{{INIT_GIT_SYNC}}//g" \ - "${TEMPLATE_DIRNAME}/airflow.template.yaml" >"${BUILD_DIRNAME}/airflow.yaml" - else - sed -e "/{{INIT_GIT_SYNC}}/{r ${TEMPLATE_DIRNAME}/init_git_sync.template.yaml" -e 'd}' \ - "${TEMPLATE_DIRNAME}/airflow.template.yaml" >"${BUILD_DIRNAME}/airflow.yaml" - fi - sed -i "s|{{AIRFLOW_KUBERNETES_IMAGE}}|${AIRFLOW_KUBERNETES_IMAGE}|g" "${BUILD_DIRNAME}/airflow.yaml" - - sed -i "s|{{CONFIGMAP_GIT_REPO}}|${CONFIGMAP_GIT_REPO}|g" "${BUILD_DIRNAME}/airflow.yaml" - sed -i "s|{{CONFIGMAP_BRANCH}}|${CONFIGMAP_BRANCH}|g" "${BUILD_DIRNAME}/airflow.yaml" - sed -i "s|{{INIT_DAGS_VOLUME_NAME}}|${INIT_DAGS_VOLUME_NAME}|g" "${BUILD_DIRNAME}/airflow.yaml" - sed -i "s|{{POD_AIRFLOW_DAGS_VOLUME_NAME}}|${POD_AIRFLOW_DAGS_VOLUME_NAME}|g" \ - "${BUILD_DIRNAME}/airflow.yaml" - - sed "s|{{CONFIGMAP_DAGS_FOLDER}}|${CONFIGMAP_DAGS_FOLDER}|g" \ - "${TEMPLATE_DIRNAME}/configmaps.template.yaml" >"${BUILD_DIRNAME}/configmaps.yaml" - sed -i "s|{{CONFIGMAP_GIT_REPO}}|${CONFIGMAP_GIT_REPO}|g" "${BUILD_DIRNAME}/configmaps.yaml" - sed -i "s|{{CONFIGMAP_BRANCH}}|${CONFIGMAP_BRANCH}|g" "${BUILD_DIRNAME}/configmaps.yaml" - sed -i "s|{{CONFIGMAP_GIT_DAGS_FOLDER_MOUNT_POINT}}|${CONFIGMAP_GIT_DAGS_FOLDER_MOUNT_POINT}|g" \ - "${BUILD_DIRNAME}/configmaps.yaml" - sed -i "s|{{CONFIGMAP_DAGS_VOLUME_CLAIM}}|${CONFIGMAP_DAGS_VOLUME_CLAIM}|g" \ - "${BUILD_DIRNAME}/configmaps.yaml" - sed -i "s|{{AIRFLOW_KUBERNETES_IMAGE_NAME}}|${AIRFLOW_KUBERNETES_IMAGE_NAME}|g" \ - "${BUILD_DIRNAME}/configmaps.yaml" - sed -i "s|{{AIRFLOW_KUBERNETES_IMAGE_TAG}}|${AIRFLOW_KUBERNETES_IMAGE_TAG}|g" \ - "${BUILD_DIRNAME}/configmaps.yaml" + echo "Deleting namespace ${HELM_AIRFLOW_NAMESPACE}" + verbose_kubectl delete namespace "${HELM_AIRFLOW_NAMESPACE}" >/dev/null 2>&1 || true + verbose_kubectl delete namespace "test-namespace" >/dev/null 2>&1 || true + verbose_kubectl create namespace "${HELM_AIRFLOW_NAMESPACE}" + verbose_kubectl create namespace "test-namespace" + pushd "${AIRFLOW_SOURCES}/chart" || exit 1 + verbose_helm repo add stable https://kubernetes-charts.storage.googleapis.com + verbose_helm dep update + verbose_helm install airflow . --namespace "${HELM_AIRFLOW_NAMESPACE}" \ + --set "defaultAirflowRepository=${DOCKERHUB_USER}/${DOCKERHUB_REPO}" \ + --set "images.airflow.repository=${DOCKERHUB_USER}/${DOCKERHUB_REPO}" \ + --set "images.airflow.tag=${AIRFLOW_PROD_BASE_TAG}" -v 1 \ + --set "defaultAirflowTag=${AIRFLOW_PROD_BASE_TAG}" -v 1 + echo + popd || exit 1 } -function apply_kubernetes_resources() { + +function deploy_test_kubernetes_resources() { echo - echo "Apply kubernetes resources." + echo "Deploying Custom kubernetes resources" echo - - - kubectl delete -f "${KUBERNETES_APP_DIR}/postgres.yaml" --cluster "${KUBECTL_CLUSTER_NAME}" \ - 2>&1 | grep -v "NotFound" || true - kubectl delete -f "${BUILD_DIRNAME}/airflow.yaml" --cluster "${KUBECTL_CLUSTER_NAME}" \ - 2>&1 | grep -v "NotFound" || true - kubectl delete -f "${KUBERNETES_APP_DIR}/secrets.yaml" --cluster "${KUBECTL_CLUSTER_NAME}" \ - 2>&1 | grep -v "NotFound" || true - - set -e - - kubectl apply -f "${KUBERNETES_APP_DIR}/secrets.yaml" --cluster "${KUBECTL_CLUSTER_NAME}" - kubectl apply -f "${BUILD_DIRNAME}/configmaps.yaml" --cluster "${KUBECTL_CLUSTER_NAME}" - kubectl apply -f "${KUBERNETES_APP_DIR}/volumes.yaml" --cluster "${KUBECTL_CLUSTER_NAME}" - - kubectl apply -f "${KUBERNETES_APP_DIR}/secrets.yaml" --cluster "${KUBECTL_CLUSTER_NAME}" -n "test-namespace" - kubectl apply -f "${BUILD_DIRNAME}/configmaps.yaml" --cluster "${KUBECTL_CLUSTER_NAME}" -n "test-namespace" - kubectl apply -f "${KUBERNETES_APP_DIR}/volumes.yaml" --cluster "${KUBECTL_CLUSTER_NAME}" -n "test-namespace" - - kubectl apply -f "${KUBERNETES_APP_DIR}/postgres.yaml" --cluster "${KUBECTL_CLUSTER_NAME}" - kubectl apply -f "${BUILD_DIRNAME}/airflow.yaml" --cluster "${KUBECTL_CLUSTER_NAME}" + verbose_kubectl apply -f "scripts/ci/kubernetes/volumes.yaml" --namespace default } @@ -383,77 +312,8 @@ function dump_kubernetes_logs() { POD=$(kubectl get pods -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' \ --cluster "${KUBECTL_CLUSTER_NAME}" | grep airflow | head -1) echo "------- pod description -------" - kubectl describe pod "${POD}" --cluster "${KUBECTL_CLUSTER_NAME}" + verbose_kubectl describe pod "${POD}" --cluster "${KUBECTL_CLUSTER_NAME}" echo "------- airflow pod logs -------" - kubectl logs "${POD}" --all-containers=true || true + verbose_kubectl logs "${POD}" --all-containers=true || true echo "--------------" } - -function wait_for_airflow_pods_up_and_running() { - set +o pipefail - # wait for up to 10 minutes for everything to be deployed - PODS_ARE_READY="0" - for i in {1..150}; do - echo "------- Running kubectl get pods: $i -------" - PODS=$(kubectl get pods --cluster "${KUBECTL_CLUSTER_NAME}" | awk 'NR>1 {print $0}') - echo "$PODS" - NUM_AIRFLOW_READY=$(echo "${PODS}" | grep airflow | awk '{print $2}' | grep -cE '([0-9])\/(\1)' \ - | xargs) - NUM_POSTGRES_READY=$(echo "${PODS}" | grep postgres | awk '{print $2}' | grep -cE '([0-9])\/(\1)' \ - | xargs) - if [[ "${NUM_AIRFLOW_READY}" == "1" && "${NUM_POSTGRES_READY}" == "1" ]]; then - PODS_ARE_READY="1" - break - fi - sleep 4 - done - POD=$(kubectl get pods -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' \ - --cluster "${KUBECTL_CLUSTER_NAME}" | grep airflow | head -1) - - if [[ "${PODS_ARE_READY}" == "1" ]]; then - echo "PODS are ready." - set -o pipefail - else - echo >&2 "PODS are not ready after waiting for a long time. Exiting..." - dump_kubernetes_logs - exit 1 - fi -} - - -function wait_for_airflow_webserver_up_and_running() { - set +o pipefail - # Wait until Airflow webserver is up - KUBERNETES_HOST=localhost - AIRFLOW_WEBSERVER_IS_READY="0" - CONSECUTIVE_SUCCESS_CALLS=0 - for i in {1..30}; do - echo "------- Wait until webserver is up: $i -------" - PODS=$(kubectl get pods --cluster "${KUBECTL_CLUSTER_NAME}" | awk 'NR>1 {print $0}') - echo "$PODS" - HTTP_CODE=$(curl -LI "http://${KUBERNETES_HOST}:30809/health" -o /dev/null -w '%{http_code}\n' -sS) \ - || true - if [[ "${HTTP_CODE}" == 200 ]]; then - ((CONSECUTIVE_SUCCESS_CALLS += 1)) - else - CONSECUTIVE_SUCCESS_CALLS="0" - fi - if [[ "${CONSECUTIVE_SUCCESS_CALLS}" == 3 ]]; then - AIRFLOW_WEBSERVER_IS_READY="1" - break - fi - sleep 10 - done - set -o pipefail - if [[ "${AIRFLOW_WEBSERVER_IS_READY}" == "1" ]]; then - echo - echo "Airflow webserver is ready." - echo - else - echo >&2 - echo >&2 "Airflow webserver is not ready after waiting for a long time. Exiting..." - echo >&2 - dump_kubernetes_logs - exit 1 - fi -} diff --git a/scripts/ci/libraries/_verbosity.sh b/scripts/ci/libraries/_verbosity.sh index b7a4c0a..af78fea 100644 --- a/scripts/ci/libraries/_verbosity.sh +++ b/scripts/ci/libraries/_verbosity.sh @@ -35,6 +35,37 @@ function verbose_docker { docker "${@}" } +# In case "VERBOSE" is set to "true" (--verbose flag in Breeze) all helm commands run will be +# printed before execution +function verbose_helm { + if [[ ${VERBOSE:="false"} == "true" && ${VERBOSE_COMMANDS:=} != "true" ]]; then + # do not print echo if VERBOSE_COMMAND is set (set -x does it already) + echo "helm" "${@}" + fi + helm "${@}" +} + +# In case "VERBOSE" is set to "true" (--verbose flag in Breeze) all kubectl commands run will be +# printed before execution +function verbose_kubectl { + if [[ ${VERBOSE:="false"} == "true" && ${VERBOSE_COMMANDS:=} != "true" ]]; then + # do not print echo if VERBOSE_COMMAND is set (set -x does it already) + echo "kubectl" "${@}" + fi + kubectl "${@}" +} + +# In case "VERBOSE" is set to "true" (--verbose flag in Breeze) all kind commands run will be +# printed before execution +function verbose_kind { + if [[ ${VERBOSE:="false"} == "true" && ${VERBOSE_COMMANDS:=} != "true" ]]; then + # do not print echo if VERBOSE_COMMAND is set (set -x does it already) + echo "kind" "${@}" + fi + kind "${@}" +} + + # In case "VERBOSE" is set to "true" (--verbose flag in Breeze) all docker commands run will be # printed before execution function verbose_docker_hide_output_on_success {