This is an automated email from the ASF dual-hosted git repository. eladkal pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push: new 8e383e87e1 Fix hardcoded container name in remote logging option for Azure Blob Storage (#32779) 8e383e87e1 is described below commit 8e383e87e1e4d6076f92fd3bce298e32607d43c5 Author: Akash Sharma <35839624+adave...@users.noreply.github.com> AuthorDate: Fri Oct 13 20:57:58 2023 +0530 Fix hardcoded container name in remote logging option for Azure Blob Storage (#32779) * added config for container in azure blob remote logs --------- Co-authored-by: adaverse <adaverse@LAPTOP-JD3LRTNF> Co-authored-by: Elad Kalif <45845474+elad...@users.noreply.github.com> Co-authored-by: adaverse <adaverse@Akash> --- airflow/config_templates/airflow_local_settings.py | 5 ++- .../microsoft/azure/log/wasb_task_handler.py | 10 ++--- airflow/providers/microsoft/azure/provider.yaml | 14 ++++++ .../configurations-ref.rst | 18 ++++++++ .../index.rst | 1 + .../logging/index.rst | 51 ++++++++++++---------- .../microsoft/azure/log/test_wasb_task_handler.py | 3 +- 7 files changed, 70 insertions(+), 32 deletions(-) diff --git a/airflow/config_templates/airflow_local_settings.py b/airflow/config_templates/airflow_local_settings.py index 7349819d98..71ad054184 100644 --- a/airflow/config_templates/airflow_local_settings.py +++ b/airflow/config_templates/airflow_local_settings.py @@ -246,13 +246,16 @@ if REMOTE_LOGGING: DEFAULT_LOGGING_CONFIG["handlers"].update(GCS_REMOTE_HANDLERS) elif REMOTE_BASE_LOG_FOLDER.startswith("wasb"): + wasb_log_container = conf.get_mandatory_value( + "azure_remote_logging", "remote_wasb_log_container", fallback="airflow-logs" + ) WASB_REMOTE_HANDLERS: dict[str, dict[str, str | bool | None]] = { "task": { "class": "airflow.providers.microsoft.azure.log.wasb_task_handler.WasbTaskHandler", "formatter": "airflow", "base_log_folder": str(os.path.expanduser(BASE_LOG_FOLDER)), "wasb_log_folder": REMOTE_BASE_LOG_FOLDER, - "wasb_container": "airflow-logs", + "wasb_container": wasb_log_container, "filename_template": FILENAME_TEMPLATE, }, } diff --git a/airflow/providers/microsoft/azure/log/wasb_task_handler.py b/airflow/providers/microsoft/azure/log/wasb_task_handler.py index 941462c2da..ac45fb6c42 100644 --- a/airflow/providers/microsoft/azure/log/wasb_task_handler.py +++ b/airflow/providers/microsoft/azure/log/wasb_task_handler.py @@ -136,11 +136,9 @@ class WasbTaskHandler(FileTaskHandler, LoggingMixin): messages = [] logs = [] worker_log_relative_path = self._render_filename(ti, try_number) - # todo: fix this - # for some reason this handler was designed such that (1) container name is not configurable - # (i.e. it's hardcoded in airflow_local_settings.py) and (2) the "relative path" is actually... - # whatever you put in REMOTE_BASE_LOG_FOLDER i.e. it includes the "wasb://" in the blob - # name. it's very screwed up but to change it we have to be careful not to break backcompat. + # TODO: fix this - "relative path" i.e currently REMOTE_BASE_LOG_FOLDER should start with "wasb" + # unlike others with shceme in URL itself to identify the correct handler. + # This puts limitations on ways users can name the base_path. prefix = os.path.join(self.remote_base, worker_log_relative_path) blob_names = [] try: @@ -151,7 +149,7 @@ class WasbTaskHandler(FileTaskHandler, LoggingMixin): self.log.exception("can't list blobs") if blob_names: - uris = [f"wasb://{self.wasb_container}/{b}" for b in blob_names] + uris = [f"https://{self.wasb_container}.blob.core.windows.net/{b}" for b in blob_names] messages.extend(["Found remote logs:", *[f" * {x}" for x in sorted(uris)]]) else: messages.append(f"No logs found in WASB; ti=%s {ti}") diff --git a/airflow/providers/microsoft/azure/provider.yaml b/airflow/providers/microsoft/azure/provider.yaml index 14a9d36ea0..91d65f5fdc 100644 --- a/airflow/providers/microsoft/azure/provider.yaml +++ b/airflow/providers/microsoft/azure/provider.yaml @@ -298,3 +298,17 @@ logging: extra-links: - airflow.providers.microsoft.azure.operators.data_factory.AzureDataFactoryPipelineRunLink + +config: + azure_remote_logging: + description: | + Configuration that needs to be set for enable remote logging in Azure Blob Storage + options: + remote_wasb_log_container: + description: | + WASB storage container where the remote logs will be stored. + The container should exist. + version_added: 8.0.0 + type: string + example: ~ + default: "airflow-logs" diff --git a/docs/apache-airflow-providers-microsoft-azure/configurations-ref.rst b/docs/apache-airflow-providers-microsoft-azure/configurations-ref.rst new file mode 100644 index 0000000000..5885c9d91b --- /dev/null +++ b/docs/apache-airflow-providers-microsoft-azure/configurations-ref.rst @@ -0,0 +1,18 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. include:: ../exts/includes/providers-configurations-ref.rst diff --git a/docs/apache-airflow-providers-microsoft-azure/index.rst b/docs/apache-airflow-providers-microsoft-azure/index.rst index 0bd36c9877..c9a07d1288 100644 --- a/docs/apache-airflow-providers-microsoft-azure/index.rst +++ b/docs/apache-airflow-providers-microsoft-azure/index.rst @@ -46,6 +46,7 @@ :maxdepth: 1 :caption: References + Configuration <configurations-ref> Python API <_api/airflow/providers/microsoft/azure/index> .. toctree:: diff --git a/docs/apache-airflow-providers-microsoft-azure/logging/index.rst b/docs/apache-airflow-providers-microsoft-azure/logging/index.rst index d0f176d01f..ddfc909992 100644 --- a/docs/apache-airflow-providers-microsoft-azure/logging/index.rst +++ b/docs/apache-airflow-providers-microsoft-azure/logging/index.rst @@ -26,30 +26,33 @@ this process will fail. Follow the steps below to enable Azure Blob Storage logging: -#. Airflow's logging system requires a custom ``.py`` file to be located in the :envvar:`PYTHONPATH`, so that it's importable from Airflow. Start by creating a directory to store the config file, ``$AIRFLOW_HOME/config`` is recommended. -#. Create empty files called ``$AIRFLOW_HOME/config/log_config.py`` and ``$AIRFLOW_HOME/config/__init__.py``. -#. Copy the contents of ``airflow/config_templates/airflow_local_settings.py`` into the ``log_config.py`` file created in ``Step 2``. -#. Customize the following portions of the template: - - .. code-block:: ini - - # wasb buckets should start with "wasb" just to help Airflow select correct handler - REMOTE_BASE_LOG_FOLDER = 'wasb://<container_name>@<storage_account>.blob.core.windows.net' - - # Rename DEFAULT_LOGGING_CONFIG to LOGGING CONFIG - LOGGING_CONFIG = ... - - -#. Make sure a Azure Blob Storage (Wasb) connection hook has been defined in Airflow. The hook should have read and write access to the Azure Blob Storage bucket defined above in ``REMOTE_BASE_LOG_FOLDER``. - -#. Update ``$AIRFLOW_HOME/airflow.cfg`` to contain: +To enable this feature, ``airflow.cfg`` must be configured as in this +example: + +.. code-block:: ini + + [logging] + # Airflow can store logs remotely in AWS S3, Google Cloud Storage or Elastic Search. + # Users must supply an Airflow connection id that provides access to the storage + # location. If remote_logging is set to true, see UPDATING.md for additional + # configuration requirements. + remote_logging = True + remote_base_log_folder = wasb-base-folder/path/to/logs + remote_wasb_log_container = my-container + +#. Install the provider package with ``pip install apache-airflow-providers-microsoft-azure`` +#. Ensure :ref:`connection <howto/connection:wasb>` is already setup with read and write access to Azure Blob Storage in the ``remote_wasb_log_container`` container and path ``remote_base_log_folder``. +#. Setup the above configuration values. Please note that the ``remote_base_log_folder`` should start with ``wasb`` to select the correct handler as shown above and the container should already exist. +#. Restart the Airflow webserver and scheduler, and trigger (or wait for) a new task execution. +#. Verify that logs are showing up for newly executed tasks in the container at the specified base path you have defined. +#. Verify that the Azure Blob Storage viewer is working in the UI. Pull up a newly executed task, and verify that you see something like: - .. code-block:: ini +.. code-block:: none - [logging] - remote_logging = True - logging_config_class = log_config.LOGGING_CONFIG - remote_log_conn_id = <name of the Azure Blob Storage connection> + *** Found remote logs: + *** * https://my-container.blob.core.windows.net/wasb-base-folder/path/to/logs/dag_id=tutorial_dag/run_id=manual__2023-07-22T22:22:25.891267+00:00/task_id=load/attempt=1.log + [2023-07-23, 03:52:47] {taskinstance.py:1144} INFO - Dependencies all met for dep_context=non-requeueable deps ti=<TaskInstance: tutorial_dag.load manual__2023-07-22T22:22:25.891267+00:00 [queued]> + [2023-07-23, 03:52:47] {taskinstance.py:1144} INFO - Dependencies all met for dep_context=requeueable deps ti=<TaskInstance: tutorial_dag.load manual__2023-07-22T22:22:25.891267+00:00 [queued]> + [2023-07-23, 03:52:47] {taskinstance.py:1346} INFO - Starting attempt 1 of 3 -#. Restart the Airflow webserver and scheduler, and trigger (or wait for) a new task execution. -#. Verify that logs are showing up for newly executed tasks in the bucket you have defined. +**Note** that the path to the remote log file is listed in the second line. diff --git a/tests/providers/microsoft/azure/log/test_wasb_task_handler.py b/tests/providers/microsoft/azure/log/test_wasb_task_handler.py index 6178293e0d..fe554bd161 100644 --- a/tests/providers/microsoft/azure/log/test_wasb_task_handler.py +++ b/tests/providers/microsoft/azure/log/test_wasb_task_handler.py @@ -113,7 +113,8 @@ class TestWasbTaskHandler: [ ( "localhost", - "*** Found remote logs:\n*** * wasb://wasb-container/abc/hello.log\nLog line", + "*** Found remote logs:\n" + "*** * https://wasb-container.blob.core.windows.net/abc/hello.log\nLog line", ) ] ],