This is an automated email from the ASF dual-hosted git repository.
raulcd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 4373fdf929 GH-31318: [Python] Add fixed-offset timezones to Hypothesis
test strategy (#49844)
4373fdf929 is described below
commit 4373fdf929a852ea8d7f9f20f1232daff90e0f86
Author: Alexandros Anastasiou <[email protected]>
AuthorDate: Tue May 5 08:22:17 2026 +0100
GH-31318: [Python] Add fixed-offset timezones to Hypothesis test strategy
(#49844)
### Rationale for this change
The Hypothesis test strategy for timezones
(`pyarrow.tests.strategies.timezones`) only generates named IANA timezones (via
pytz and zoneinfo). It does not generate fixed-offset timezones like `+05:30`
or `-03:00`. As noted in #31318, this is not supported out of the box by
Hypothesis, so a custom strategy is needed. Fixed-offset timezones are already
used in manual tests and handled correctly by Arrow, but are never exercised
via property-based testing.
### What changes are included in this PR?
Adds a `fixed_offset_timezones` strategy to
`python/pyarrow/tests/strategies.py` that generates `datetime.timezone` objects
with offsets ranging from UTC-12:00 to UTC+14:00, with minute components of 0,
30, or 45 (covering all real-world UTC offsets). This strategy is included in
the existing `timezones` strategy across all dependency configurations.
### Are these changes tested?
This change improves test infrastructure itself -- it extends the
`timezones` strategy used by existing Hypothesis-based tests. The generated
fixed-offset timezones were verified to work correctly with `pa.timestamp()`.
The test command used was:
```sh
export MAMBA_ROOT_PREFIX=$PWD/.micromamba && eval "$(/tmp/bin/micromamba
shell hook -s bash)" && micromamba activate pyarrow-dev && export
LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH && python -m pytest
pyarrow/tests/test_scalars.py pyarrow/tests/test_array.py -v -k test_dunders
```
### Are there any user-facing changes?
No. This only affects the internal test suite.
### AI-generated code disclosure
This PR was developed with assistance from an AI coding tool (Claude,
Anthropic). All changes have been reviewed, understood, and verified.
* GitHub Issue: #31318
Authored-by: Alexandros Anastasiou <[email protected]>
Signed-off-by: Raúl Cumplido <[email protected]>
---
python/pyarrow/tests/strategies.py | 36 +++++++++++++++++++++++++--------
python/pyarrow/tests/test_strategies.py | 9 +++++++++
2 files changed, 37 insertions(+), 8 deletions(-)
diff --git a/python/pyarrow/tests/strategies.py
b/python/pyarrow/tests/strategies.py
index 3c31650ddf..cb96f71e26 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -143,14 +143,33 @@ time_types = st.sampled_from([
pa.time64('ns')
])
+# UTC-12 to UTC+14, minute offsets 0/30/45 cover all real-world IANA offsets
+_fixed_offset_timezone_list = [
+ datetime.timezone(datetime.timedelta(hours=h, minutes=m))
+ for h in range(-12, 15)
+ for m in [0, 30, 45]
+ if datetime.timedelta(hours=-12)
+ <= datetime.timedelta(hours=h, minutes=m)
+ <= datetime.timedelta(hours=14)
+]
+
+
[email protected]
+def _draw_fixed_offset_timezone(draw):
+ return draw(st.sampled_from(_fixed_offset_timezone_list))
+
+
+fixed_offset_timezones = _draw_fixed_offset_timezone()
+
if tzst and zoneinfo:
- timezones = st.one_of(st.none(), tzst.timezones(), st.timezones())
+ timezones = st.one_of(
+ st.none(), tzst.timezones(), st.timezones(), fixed_offset_timezones)
elif tzst:
- timezones = st.one_of(st.none(), tzst.timezones())
+ timezones = st.one_of(st.none(), tzst.timezones(), fixed_offset_timezones)
elif zoneinfo:
- timezones = st.one_of(st.none(), st.timezones())
+ timezones = st.one_of(st.none(), st.timezones(), fixed_offset_timezones)
else:
- timezones = st.none()
+ timezones = st.one_of(st.none(), fixed_offset_timezones)
timestamp_types = st.builds(
pa.timestamp,
unit=st.sampled_from(['s', 'ms', 'us', 'ns']),
@@ -337,10 +356,11 @@ def arrays(draw, type, size=None, nullable=True):
max_datetime = datetime.datetime.fromtimestamp(
max_int64 // 10**9) - datetime.timedelta(hours=12)
try:
- offset = ty.tz.split(":")
- offset_hours = int(offset[0])
- offset_min = int(offset[1])
- tz = datetime.timedelta(hours=offset_hours, minutes=offset_min)
+ offset_hours, offset_min = ty.tz.split(":")
+ sign = -1 if offset_hours.startswith("-") else 1
+ offset = datetime.timedelta(
+ hours=abs(int(offset_hours)), minutes=int(offset_min))
+ tz = datetime.timezone(sign * offset)
except ValueError:
tz = zoneinfo.ZoneInfo(ty.tz)
value = st.datetimes(timezones=st.just(tz), min_value=min_datetime,
diff --git a/python/pyarrow/tests/test_strategies.py
b/python/pyarrow/tests/test_strategies.py
index babb839b53..bd9ea140bd 100644
--- a/python/pyarrow/tests/test_strategies.py
+++ b/python/pyarrow/tests/test_strategies.py
@@ -16,6 +16,7 @@
# under the License.
import hypothesis as h
+import hypothesis.strategies as st
import pytest
@@ -44,6 +45,14 @@ def test_arrays(array):
assert isinstance(array, pa.lib.Array)
[email protected](st.sampled_from(['+01:30', '-00:30']), st.data())
+def test_timestamp_array_fixed_offset_timezones(timezone, data):
+ array = data.draw(
+ past.arrays(st.just(pa.timestamp('s', timezone)), size=1))
+ assert isinstance(array, pa.lib.Array)
+ assert array.type.tz == timezone
+
+
@pytest.mark.numpy
@h.given(past.arrays(past.primitive_types, nullable=False))
def test_array_nullability(array):