This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 41705f9eca GH-35600: [Python] Allow setting path to timezone db
through python API (#37436)
41705f9eca is described below
commit 41705f9eca36e667fe66607a470b31e3c5c03ffe
Author: Alenka Frim <[email protected]>
AuthorDate: Thu Oct 5 10:00:48 2023 +0200
GH-35600: [Python] Allow setting path to timezone db through python API
(#37436)
### Rationale for this change
Add a function to change the path where timezone db should be found as a
small wrapper around the setting of a C++ option `GlobalOptions`.
### What changes are included in this PR?
New function `configure_tzdb`.
### Are these changes tested?
### Are there any user-facing changes?
No.
* Closes: #35600
Lead-authored-by: AlenkaF <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Co-authored-by: Alenka Frim <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
---
ci/appveyor-cpp-build.bat | 13 +++++++++++++
python/pyarrow/__init__.py | 8 ++++----
python/pyarrow/config.pxi | 19 +++++++++++++++++++
python/pyarrow/includes/libarrow.pxd | 5 +++++
python/pyarrow/tests/conftest.py | 7 +++++++
python/pyarrow/tests/test_misc.py | 12 ++++++++++++
python/pyarrow/tests/util.py | 10 ++++++++--
7 files changed, 68 insertions(+), 6 deletions(-)
diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index e69c7bf251..5e561a0461 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -132,6 +132,19 @@ set ARROW_HOME=%CONDA_PREFIX%\Library
@rem ARROW-3075; pkgconfig is broken for Parquet for now
set PARQUET_HOME=%CONDA_PREFIX%\Library
+@rem Download IANA Timezone Database to a non-standard location to
+@rem test the configurability of the timezone database path
+curl https://data.iana.org/time-zones/releases/tzdata2021e.tar.gz --output
tzdata.tar.gz || exit /B
+mkdir %USERPROFILE%\Downloads\test\tzdata
+tar --extract --file tzdata.tar.gz --directory
%USERPROFILE%\Downloads\test\tzdata
+curl
https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml
^
+ --output %USERPROFILE%\Downloads\test\tzdata\windowsZones.xml || exit /B
+@rem Remove the database from the default location
+rmdir /s /q %USERPROFILE%\Downloads\tzdata
+@rem Set the env var for the non-standard location of the database
+@rem (only needed for testing purposes)
+set PYARROW_TZDATA_PATH=%USERPROFILE%\Downloads\test\tzdata
+
python setup.py develop -q || exit /B
set PYTHONDEVMODE=1
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index ee0d07bb2c..cd66abcb44 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -66,10 +66,10 @@ import pyarrow.lib as _lib
if _gc_enabled:
_gc.enable()
-from pyarrow.lib import (BuildInfo, RuntimeInfo, MonthDayNano,
- VersionInfo, cpp_build_info, cpp_version,
- cpp_version_info, runtime_info, cpu_count,
- set_cpu_count, enable_signal_handlers,
+from pyarrow.lib import (BuildInfo, RuntimeInfo, set_timezone_db_path,
+ MonthDayNano, VersionInfo, cpp_build_info,
+ cpp_version, cpp_version_info, runtime_info,
+ cpu_count, set_cpu_count, enable_signal_handlers,
io_thread_count, set_io_thread_count)
diff --git a/python/pyarrow/config.pxi b/python/pyarrow/config.pxi
index fb9526ba89..cf751b810c 100644
--- a/python/pyarrow/config.pxi
+++ b/python/pyarrow/config.pxi
@@ -18,6 +18,7 @@
from pyarrow.includes.libarrow cimport GetBuildInfo
from collections import namedtuple
+import os
VersionInfo = namedtuple('VersionInfo', ('major', 'minor', 'patch'))
@@ -74,3 +75,21 @@ def runtime_info():
return RuntimeInfo(
simd_level=frombytes(c_info.simd_level),
detected_simd_level=frombytes(c_info.detected_simd_level))
+
+
+def set_timezone_db_path(path):
+ """
+ Configure the path to text timezone database on Windows.
+
+ Parameters
+ ----------
+ path : str
+ Path to text timezone database.
+ """
+ cdef:
+ CGlobalOptions options
+
+ if path is not None:
+ options.timezone_db_path = <c_string>tobytes(path)
+
+ check_status(Initialize(options))
diff --git a/python/pyarrow/includes/libarrow.pxd
b/python/pyarrow/includes/libarrow.pxd
index f4d6541fa7..482a6e91ba 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -80,6 +80,11 @@ cdef extern from "arrow/config.h" namespace "arrow" nogil:
CRuntimeInfo GetRuntimeInfo()
+ cdef cppclass CGlobalOptions" arrow::GlobalOptions":
+ optional[c_string] timezone_db_path
+
+ CStatus Initialize(const CGlobalOptions& options)
+
cdef extern from "arrow/util/future.h" namespace "arrow" nogil:
cdef cppclass CFuture_Void" arrow::Future<>":
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index 241ae4814a..a5941e8c8d 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -28,6 +28,7 @@ from pytest_lazyfixture import lazy_fixture
import hypothesis as h
from ..conftest import groups, defaults
+from pyarrow import set_timezone_db_path
from pyarrow.util import find_free_port
@@ -48,6 +49,12 @@ h.settings.load_profile(os.environ.get('HYPOTHESIS_PROFILE',
'dev'))
os.environ['AWS_CONFIG_FILE'] = "/dev/null"
+if sys.platform == 'win32':
+ tzdata_set_path = os.environ.get('PYARROW_TZDATA_PATH', None)
+ if tzdata_set_path:
+ set_timezone_db_path(tzdata_set_path)
+
+
def pytest_addoption(parser):
# Create options to selectively enable test groups
def bool_env(name, default=None):
diff --git a/python/pyarrow/tests/test_misc.py
b/python/pyarrow/tests/test_misc.py
index 86cbf013cd..9b9dfdd554 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -22,6 +22,7 @@ import sys
import pytest
import pyarrow as pa
+from pyarrow.lib import ArrowInvalid
def test_get_include():
@@ -116,6 +117,17 @@ def test_runtime_info():
subprocess.check_call([sys.executable, "-c", code], env=env)
[email protected](sys.platform == "win32",
+ reason="Path to timezone database is not configurable "
+ "on non-Windows platforms")
+def test_set_timezone_db_path_non_windows():
+ # set_timezone_db_path raises an error on non-Windows platforms
+ with pytest.raises(ArrowInvalid,
+ match="Arrow was set to use OS timezone "
+ "database at compile time"):
+ pa.set_timezone_db_path("path")
+
+
@pytest.mark.parametrize('klass', [
pa.Field,
pa.Schema,
diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py
index 0b69deb73b..638eee9807 100644
--- a/python/pyarrow/tests/util.py
+++ b/python/pyarrow/tests/util.py
@@ -455,5 +455,11 @@ def windows_has_tzdata():
This is the default location where tz.cpp will look for (until we make
this configurable at run-time)
"""
- tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata")
- return os.path.exists(tzdata_path)
+ tzdata_bool = False
+ if "PYARROW_TZDATA_PATH" in os.environ:
+ tzdata_bool = os.path.exists(os.environ['PYARROW_TZDATA_PATH'])
+ if not tzdata_bool:
+ tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata")
+ tzdata_bool = os.path.exists(tzdata_path)
+
+ return tzdata_bool