This is an automated email from the ASF dual-hosted git repository.

eladkal pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 8e383e87e1 Fix hardcoded container name in remote logging option for 
Azure Blob Storage (#32779)
8e383e87e1 is described below

commit 8e383e87e1e4d6076f92fd3bce298e32607d43c5
Author: Akash Sharma <35839624+adave...@users.noreply.github.com>
AuthorDate: Fri Oct 13 20:57:58 2023 +0530

    Fix hardcoded container name in remote logging option for Azure Blob 
Storage (#32779)
    
    * added config for container in azure blob remote logs
    ---------
    
    Co-authored-by: adaverse <adaverse@LAPTOP-JD3LRTNF>
    Co-authored-by: Elad Kalif <45845474+elad...@users.noreply.github.com>
    Co-authored-by: adaverse <adaverse@Akash>
---
 airflow/config_templates/airflow_local_settings.py |  5 ++-
 .../microsoft/azure/log/wasb_task_handler.py       | 10 ++---
 airflow/providers/microsoft/azure/provider.yaml    | 14 ++++++
 .../configurations-ref.rst                         | 18 ++++++++
 .../index.rst                                      |  1 +
 .../logging/index.rst                              | 51 ++++++++++++----------
 .../microsoft/azure/log/test_wasb_task_handler.py  |  3 +-
 7 files changed, 70 insertions(+), 32 deletions(-)

diff --git a/airflow/config_templates/airflow_local_settings.py 
b/airflow/config_templates/airflow_local_settings.py
index 7349819d98..71ad054184 100644
--- a/airflow/config_templates/airflow_local_settings.py
+++ b/airflow/config_templates/airflow_local_settings.py
@@ -246,13 +246,16 @@ if REMOTE_LOGGING:
 
         DEFAULT_LOGGING_CONFIG["handlers"].update(GCS_REMOTE_HANDLERS)
     elif REMOTE_BASE_LOG_FOLDER.startswith("wasb"):
+        wasb_log_container = conf.get_mandatory_value(
+            "azure_remote_logging", "remote_wasb_log_container", 
fallback="airflow-logs"
+        )
         WASB_REMOTE_HANDLERS: dict[str, dict[str, str | bool | None]] = {
             "task": {
                 "class": 
"airflow.providers.microsoft.azure.log.wasb_task_handler.WasbTaskHandler",
                 "formatter": "airflow",
                 "base_log_folder": str(os.path.expanduser(BASE_LOG_FOLDER)),
                 "wasb_log_folder": REMOTE_BASE_LOG_FOLDER,
-                "wasb_container": "airflow-logs",
+                "wasb_container": wasb_log_container,
                 "filename_template": FILENAME_TEMPLATE,
             },
         }
diff --git a/airflow/providers/microsoft/azure/log/wasb_task_handler.py 
b/airflow/providers/microsoft/azure/log/wasb_task_handler.py
index 941462c2da..ac45fb6c42 100644
--- a/airflow/providers/microsoft/azure/log/wasb_task_handler.py
+++ b/airflow/providers/microsoft/azure/log/wasb_task_handler.py
@@ -136,11 +136,9 @@ class WasbTaskHandler(FileTaskHandler, LoggingMixin):
         messages = []
         logs = []
         worker_log_relative_path = self._render_filename(ti, try_number)
-        # todo: fix this
-        # for some reason this handler was designed such that (1) container 
name is not configurable
-        # (i.e. it's hardcoded in airflow_local_settings.py) and (2) the 
"relative path" is actually...
-        # whatever you put in REMOTE_BASE_LOG_FOLDER i.e. it includes the 
"wasb://" in the blob
-        # name. it's very screwed up but to change it we have to be careful 
not to break backcompat.
+        # TODO: fix this - "relative path" i.e currently 
REMOTE_BASE_LOG_FOLDER should start with "wasb"
+        # unlike others with shceme in URL itself to identify the correct 
handler.
+        # This puts limitations on ways users can name the base_path.
         prefix = os.path.join(self.remote_base, worker_log_relative_path)
         blob_names = []
         try:
@@ -151,7 +149,7 @@ class WasbTaskHandler(FileTaskHandler, LoggingMixin):
             self.log.exception("can't list blobs")
 
         if blob_names:
-            uris = [f"wasb://{self.wasb_container}/{b}" for b in blob_names]
+            uris = [f"https://{self.wasb_container}.blob.core.windows.net/{b}"; 
for b in blob_names]
             messages.extend(["Found remote logs:", *[f"  * {x}" for x in 
sorted(uris)]])
         else:
             messages.append(f"No logs found in WASB; ti=%s {ti}")
diff --git a/airflow/providers/microsoft/azure/provider.yaml 
b/airflow/providers/microsoft/azure/provider.yaml
index 14a9d36ea0..91d65f5fdc 100644
--- a/airflow/providers/microsoft/azure/provider.yaml
+++ b/airflow/providers/microsoft/azure/provider.yaml
@@ -298,3 +298,17 @@ logging:
 
 extra-links:
   - 
airflow.providers.microsoft.azure.operators.data_factory.AzureDataFactoryPipelineRunLink
+
+config:
+  azure_remote_logging:
+    description: |
+      Configuration that needs to be set for enable remote logging in Azure 
Blob Storage
+    options:
+      remote_wasb_log_container:
+        description: |
+          WASB storage container where the remote logs will be stored.
+          The container should exist.
+        version_added: 8.0.0
+        type: string
+        example: ~
+        default: "airflow-logs"
diff --git 
a/docs/apache-airflow-providers-microsoft-azure/configurations-ref.rst 
b/docs/apache-airflow-providers-microsoft-azure/configurations-ref.rst
new file mode 100644
index 0000000000..5885c9d91b
--- /dev/null
+++ b/docs/apache-airflow-providers-microsoft-azure/configurations-ref.rst
@@ -0,0 +1,18 @@
+ .. Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+ ..   http://www.apache.org/licenses/LICENSE-2.0
+
+ .. Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+.. include:: ../exts/includes/providers-configurations-ref.rst
diff --git a/docs/apache-airflow-providers-microsoft-azure/index.rst 
b/docs/apache-airflow-providers-microsoft-azure/index.rst
index 0bd36c9877..c9a07d1288 100644
--- a/docs/apache-airflow-providers-microsoft-azure/index.rst
+++ b/docs/apache-airflow-providers-microsoft-azure/index.rst
@@ -46,6 +46,7 @@
     :maxdepth: 1
     :caption: References
 
+    Configuration <configurations-ref>
     Python API <_api/airflow/providers/microsoft/azure/index>
 
 .. toctree::
diff --git a/docs/apache-airflow-providers-microsoft-azure/logging/index.rst 
b/docs/apache-airflow-providers-microsoft-azure/logging/index.rst
index d0f176d01f..ddfc909992 100644
--- a/docs/apache-airflow-providers-microsoft-azure/logging/index.rst
+++ b/docs/apache-airflow-providers-microsoft-azure/logging/index.rst
@@ -26,30 +26,33 @@ this process will fail.
 
 Follow the steps below to enable Azure Blob Storage logging:
 
-#. Airflow's logging system requires a custom ``.py`` file to be located in 
the :envvar:`PYTHONPATH`, so that it's importable from Airflow. Start by 
creating a directory to store the config file, ``$AIRFLOW_HOME/config`` is 
recommended.
-#. Create empty files called ``$AIRFLOW_HOME/config/log_config.py`` and 
``$AIRFLOW_HOME/config/__init__.py``.
-#. Copy the contents of ``airflow/config_templates/airflow_local_settings.py`` 
into the ``log_config.py`` file created in ``Step 2``.
-#. Customize the following portions of the template:
-
-    .. code-block:: ini
-
-        # wasb buckets should start with "wasb" just to help Airflow select 
correct handler
-        REMOTE_BASE_LOG_FOLDER = 
'wasb://<container_name>@<storage_account>.blob.core.windows.net'
-
-        # Rename DEFAULT_LOGGING_CONFIG to LOGGING CONFIG
-        LOGGING_CONFIG = ...
-
-
-#. Make sure a Azure Blob Storage (Wasb) connection hook has been defined in 
Airflow. The hook should have read and write access to the Azure Blob Storage 
bucket defined above in ``REMOTE_BASE_LOG_FOLDER``.
-
-#. Update ``$AIRFLOW_HOME/airflow.cfg`` to contain:
+To enable this feature, ``airflow.cfg`` must be configured as in this
+example:
+
+.. code-block:: ini
+
+    [logging]
+    # Airflow can store logs remotely in AWS S3, Google Cloud Storage or 
Elastic Search.
+    # Users must supply an Airflow connection id that provides access to the 
storage
+    # location. If remote_logging is set to true, see UPDATING.md for 
additional
+    # configuration requirements.
+    remote_logging = True
+    remote_base_log_folder = wasb-base-folder/path/to/logs
+    remote_wasb_log_container = my-container
+
+#. Install the provider package with ``pip install 
apache-airflow-providers-microsoft-azure``
+#. Ensure :ref:`connection <howto/connection:wasb>` is already setup with read 
and write access to Azure Blob Storage in the ``remote_wasb_log_container`` 
container and path ``remote_base_log_folder``.
+#. Setup the above configuration values. Please note that the 
``remote_base_log_folder`` should start with ``wasb`` to select the correct 
handler as shown above and the container should already exist.
+#. Restart the Airflow webserver and scheduler, and trigger (or wait for) a 
new task execution.
+#. Verify that logs are showing up for newly executed tasks in the container 
at the specified base path you have defined.
+#. Verify that the Azure Blob Storage viewer is working in the UI. Pull up a 
newly executed task, and verify that you see something like:
 
-    .. code-block:: ini
+.. code-block:: none
 
-        [logging]
-        remote_logging = True
-        logging_config_class = log_config.LOGGING_CONFIG
-        remote_log_conn_id = <name of the Azure Blob Storage connection>
+    *** Found remote logs:
+    ***   * 
https://my-container.blob.core.windows.net/wasb-base-folder/path/to/logs/dag_id=tutorial_dag/run_id=manual__2023-07-22T22:22:25.891267+00:00/task_id=load/attempt=1.log
+    [2023-07-23, 03:52:47] {taskinstance.py:1144} INFO - Dependencies all met 
for dep_context=non-requeueable deps ti=<TaskInstance: tutorial_dag.load 
manual__2023-07-22T22:22:25.891267+00:00 [queued]>
+    [2023-07-23, 03:52:47] {taskinstance.py:1144} INFO - Dependencies all met 
for dep_context=requeueable deps ti=<TaskInstance: tutorial_dag.load 
manual__2023-07-22T22:22:25.891267+00:00 [queued]>
+    [2023-07-23, 03:52:47] {taskinstance.py:1346} INFO - Starting attempt 1 of 
3
 
-#. Restart the Airflow webserver and scheduler, and trigger (or wait for) a 
new task execution.
-#. Verify that logs are showing up for newly executed tasks in the bucket you 
have defined.
+**Note** that the path to the remote log file is listed in the second line.
diff --git a/tests/providers/microsoft/azure/log/test_wasb_task_handler.py 
b/tests/providers/microsoft/azure/log/test_wasb_task_handler.py
index 6178293e0d..fe554bd161 100644
--- a/tests/providers/microsoft/azure/log/test_wasb_task_handler.py
+++ b/tests/providers/microsoft/azure/log/test_wasb_task_handler.py
@@ -113,7 +113,8 @@ class TestWasbTaskHandler:
                 [
                     (
                         "localhost",
-                        "*** Found remote logs:\n***   * 
wasb://wasb-container/abc/hello.log\nLog line",
+                        "*** Found remote logs:\n"
+                        "***   * 
https://wasb-container.blob.core.windows.net/abc/hello.log\nLog line",
                     )
                 ]
             ],

Reply via email to