This is an automated email from the ASF dual-hosted git repository. ephraimanierobi pushed a commit to branch v2-7-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit 9ecdff04471508345e6e23a5f99e5b4c8c97aee6 Author: Hussein Awala <[email protected]> AuthorDate: Thu Sep 28 09:31:06 2023 +0200 Fix non deterministic datetime deserialization (#34492) tzname() does not return full timezones and returned short hand notations are not deterministic. This changes the serialization to be deterministic and adds some logic to deal with serialized short-hand US Timezones and CEST. --------- Co-authored-by: bolkedebruin <[email protected]> (cherry picked from commit a3c06c02e31cc77b2c19554892b72ed91b8387de) --- airflow/serialization/serializers/datetime.py | 32 +++++++++-- .../serialization/serializers/test_serializers.py | 62 +++++++++++++++++++++- 2 files changed, 88 insertions(+), 6 deletions(-) diff --git a/airflow/serialization/serializers/datetime.py b/airflow/serialization/serializers/datetime.py index bdb9a6cb6c..49f0899a59 100644 --- a/airflow/serialization/serializers/datetime.py +++ b/airflow/serialization/serializers/datetime.py @@ -19,6 +19,10 @@ from __future__ import annotations from typing import TYPE_CHECKING +from airflow.serialization.serializers.timezone import ( + deserialize as deserialize_timezone, + serialize as serialize_timezone, +) from airflow.utils.module_loading import qualname from airflow.utils.timezone import convert_to_utc, is_naive @@ -27,7 +31,7 @@ if TYPE_CHECKING: from airflow.serialization.serde import U -__version__ = 1 +__version__ = 2 serializers = ["datetime.date", "datetime.datetime", "datetime.timedelta", "pendulum.datetime.DateTime"] deserializers = serializers @@ -44,7 +48,7 @@ def serialize(o: object) -> tuple[U, str, int, bool]: if is_naive(o): o = convert_to_utc(o) - tz = o.tzname() + tz = serialize_timezone(o.tzinfo) return {TIMESTAMP: o.timestamp(), TIMEZONE: tz}, qn, __version__, True @@ -61,13 +65,31 @@ def deserialize(classname: str, version: int, data: dict | str) -> datetime.date import datetime from pendulum import DateTime - from pendulum.tz import timezone + from pendulum.tz import fixed_timezone, timezone + + tz: datetime.tzinfo | None = None + if isinstance(data, dict) and TIMEZONE in data: + if version == 1: + # try to deserialize unsupported timezones + timezone_mapping = { + "EDT": fixed_timezone(-4 * 3600), + "CDT": fixed_timezone(-5 * 3600), + "MDT": fixed_timezone(-6 * 3600), + "PDT": fixed_timezone(-7 * 3600), + "CEST": timezone("CET"), + } + if data[TIMEZONE] in timezone_mapping: + tz = timezone_mapping[data[TIMEZONE]] + else: + tz = timezone(data[TIMEZONE]) + else: + tz = deserialize_timezone(data[TIMEZONE][1], data[TIMEZONE][2], data[TIMEZONE][0]) if classname == qualname(datetime.datetime) and isinstance(data, dict): - return datetime.datetime.fromtimestamp(float(data[TIMESTAMP]), tz=timezone(data[TIMEZONE])) + return datetime.datetime.fromtimestamp(float(data[TIMESTAMP]), tz=tz) if classname == qualname(DateTime) and isinstance(data, dict): - return DateTime.fromtimestamp(float(data[TIMESTAMP]), tz=timezone(data[TIMEZONE])) + return DateTime.fromtimestamp(float(data[TIMESTAMP]), tz=tz) if classname == qualname(datetime.timedelta) and isinstance(data, (str, float)): return datetime.timedelta(seconds=float(data)) diff --git a/tests/serialization/serializers/test_serializers.py b/tests/serialization/serializers/test_serializers.py index e9805d4d77..1d19760417 100644 --- a/tests/serialization/serializers/test_serializers.py +++ b/tests/serialization/serializers/test_serializers.py @@ -32,7 +32,6 @@ from airflow.serialization.serde import DATA, deserialize, serialize class TestSerializers: def test_datetime(self): i = datetime.datetime(2022, 7, 10, 22, 10, 43, microsecond=0, tzinfo=pendulum.tz.UTC) - s = serialize(i) d = deserialize(s) assert i.timestamp() == d.timestamp() @@ -52,6 +51,67 @@ class TestSerializers: d = deserialize(s) assert i == d + i = datetime.datetime( + 2022, 7, 10, 22, 10, 43, microsecond=0, tzinfo=pendulum.timezone("America/New_York") + ) + s = serialize(i) + d = deserialize(s) + assert i.timestamp() == d.timestamp() + + i = DateTime(2022, 7, 10, tzinfo=pendulum.timezone("America/New_York")) + s = serialize(i) + d = deserialize(s) + assert i.timestamp() == d.timestamp() + + def test_deserialize_datetime_v1(self): + + s = { + "__classname__": "pendulum.datetime.DateTime", + "__version__": 1, + "__data__": {"timestamp": 1657505443.0, "tz": "UTC"}, + } + d = deserialize(s) + assert d.timestamp() == 1657505443.0 + assert d.tzinfo.name == "UTC" + + s["__data__"]["tz"] = "Europe/Paris" + d = deserialize(s) + assert d.timestamp() == 1657505443.0 + assert d.tzinfo.name == "Europe/Paris" + + s["__data__"]["tz"] = "America/New_York" + d = deserialize(s) + assert d.timestamp() == 1657505443.0 + assert d.tzinfo.name == "America/New_York" + + s["__data__"]["tz"] = "EDT" + d = deserialize(s) + assert d.timestamp() == 1657505443.0 + assert d.tzinfo.name == "-04:00" + # assert that it's serializable with the new format + assert deserialize(serialize(d)) == d + + s["__data__"]["tz"] = "CDT" + d = deserialize(s) + assert d.timestamp() == 1657505443.0 + assert d.tzinfo.name == "-05:00" + # assert that it's serializable with the new format + assert deserialize(serialize(d)) == d + + s["__data__"]["tz"] = "MDT" + d = deserialize(s) + assert d.timestamp() == 1657505443.0 + assert d.tzinfo.name == "-06:00" + # assert that it's serializable with the new format + assert deserialize(serialize(d)) == d + + s["__data__"]["tz"] = "PDT" + d = deserialize(s) + assert d.timestamp() == 1657505443.0 + assert d.tzinfo.name == "-07:00" + # assert that it's serializable with the new format + assert deserialize(serialize(d)) == d + @pytest.mark.parametrize( "expr, expected", [("1", "1"), ("52e4", "520000"), ("2e0", "2"), ("12e-2", "0.12"), ("12.34", "12.34")],
