This is an automated email from the ASF dual-hosted git repository. ephraimanierobi pushed a commit to branch v2-4-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit 14e55b64ce803858fde5ff9369e11e79bbcdd668 Author: Ephraim Anierobi <splendidzig...@gmail.com> AuthorDate: Mon Sep 26 23:01:14 2022 +0100 Add a note against use of top level code in timetable (#26649) Accessing Variables, Connections at top level of code is bad practice Add a section in best practices ref for timetable (cherry picked from commit 37c0cb6d3240062106388449cf8eed9c948fb539) --- airflow/serialization/serialized_objects.py | 6 ++++- docs/apache-airflow/best-practices.rst | 35 +++++++++++++++++++++++++++ docs/apache-airflow/concepts/timetable.rst | 6 +++++ tests/serialization/test_dag_serialization.py | 8 ++++-- 4 files changed, 52 insertions(+), 3 deletions(-) diff --git a/airflow/serialization/serialized_objects.py b/airflow/serialization/serialized_objects.py index 542573fbcc..06608ba865 100644 --- a/airflow/serialization/serialized_objects.py +++ b/airflow/serialization/serialized_objects.py @@ -165,7 +165,11 @@ class _TimetableNotRegistered(ValueError): self.type_string = type_string def __str__(self) -> str: - return f"Timetable class {self.type_string!r} is not registered" + return ( + f"Timetable class {self.type_string!r} is not registered or " + "you have a top level database access that disrupted the session. " + "Please check the airflow best practices documentation." + ) def _encode_timetable(var: Timetable) -> dict[str, Any]: diff --git a/docs/apache-airflow/best-practices.rst b/docs/apache-airflow/best-practices.rst index 33f31654cc..2beb777eca 100644 --- a/docs/apache-airflow/best-practices.rst +++ b/docs/apache-airflow/best-practices.rst @@ -216,6 +216,41 @@ or if you need to deserialize a json object from the variable : For security purpose, you're recommended to use the :ref:`Secrets Backend<secrets_backend_configuration>` for any variable that contains sensitive data. +.. _best_practices/timetables: + +Timetables +---------- +Avoid using Airflow Variables/Connections or accessing airflow database at the top level of your timetable code. +Database access should be delayed until the execution time of the DAG. This means that you should not have variables/connections retrieval +as argument to your timetable class initialization or have Variable/connection at the top level of your custom timetable module. + +Bad example: + +.. code-block:: python + + from airflow.models.variable import Variable + from airflow.timetables.interval import CronDataIntervalTimetable + + + class CustomTimetable(CronDataIntervalTimetable): + def __init__(self, *args, something=Variable.get('something'), **kwargs): + self._something = something + super().__init__(*args, **kwargs) + +Good example: + +.. code-block:: python + + from airflow.models.variable import Variable + from airflow.timetables.interval import CronDataIntervalTimetable + + + class CustomTimetable(CronDataIntervalTimetable): + def __init__(self, *args, something='something', **kwargs): + self._something = Variable.get(something) + super().__init__(*args, **kwargs) + + Triggering DAGs after changes ----------------------------- diff --git a/docs/apache-airflow/concepts/timetable.rst b/docs/apache-airflow/concepts/timetable.rst index 9e1a9e0bf3..c76d63ea48 100644 --- a/docs/apache-airflow/concepts/timetable.rst +++ b/docs/apache-airflow/concepts/timetable.rst @@ -51,6 +51,12 @@ As such, Airflow allows for custom timetables to be written in plugins and used DAGs. An example demonstrating a custom timetable can be found in the :doc:`/howto/timetable` how-to guide. +.. note:: + + As a general rule, always access Variables, Connections etc or anything that would access + the database as late as possible in your code. See :ref:`best_practices/timetables` + for more best practices to follow. + Built-in Timetables ------------------- diff --git a/tests/serialization/test_dag_serialization.py b/tests/serialization/test_dag_serialization.py index 6b485f7465..7a6b9a1ec1 100644 --- a/tests/serialization/test_dag_serialization.py +++ b/tests/serialization/test_dag_serialization.py @@ -411,7 +411,9 @@ class TestStringifiedDAGs: message = ( "Failed to serialize DAG 'simple_dag': Timetable class " "'tests.test_utils.timetables.CustomSerializationTimetable' " - "is not registered" + "is not registered or " + "you have a top level database access that disrupted the session. " + "Please check the airflow best practices documentation." ) assert str(ctx.value) == message @@ -721,7 +723,9 @@ class TestStringifiedDAGs: message = ( "Timetable class " "'tests.test_utils.timetables.CustomSerializationTimetable' " - "is not registered" + "is not registered or " + "you have a top level database access that disrupted the session. " + "Please check the airflow best practices documentation." ) assert str(ctx.value) == message