This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 41705f9eca GH-35600: [Python] Allow setting path to timezone db 
through python API (#37436)
41705f9eca is described below

commit 41705f9eca36e667fe66607a470b31e3c5c03ffe
Author: Alenka Frim <[email protected]>
AuthorDate: Thu Oct 5 10:00:48 2023 +0200

    GH-35600: [Python] Allow setting path to timezone db through python API 
(#37436)
    
    ### Rationale for this change
    
    Add a function to change the path where timezone db should be found as a 
small wrapper around the setting of a C++ option `GlobalOptions`.
    
    ### What changes are included in this PR?
    
    New function `configure_tzdb`.
    
    ### Are these changes tested?
    
    ### Are there any user-facing changes?
    
    No.
    * Closes: #35600
    
    Lead-authored-by: AlenkaF <[email protected]>
    Co-authored-by: Antoine Pitrou <[email protected]>
    Co-authored-by: Alenka Frim <[email protected]>
    Co-authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Joris Van den Bossche <[email protected]>
---
 ci/appveyor-cpp-build.bat            | 13 +++++++++++++
 python/pyarrow/__init__.py           |  8 ++++----
 python/pyarrow/config.pxi            | 19 +++++++++++++++++++
 python/pyarrow/includes/libarrow.pxd |  5 +++++
 python/pyarrow/tests/conftest.py     |  7 +++++++
 python/pyarrow/tests/test_misc.py    | 12 ++++++++++++
 python/pyarrow/tests/util.py         | 10 ++++++++--
 7 files changed, 68 insertions(+), 6 deletions(-)

diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index e69c7bf251..5e561a0461 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -132,6 +132,19 @@ set ARROW_HOME=%CONDA_PREFIX%\Library
 @rem ARROW-3075; pkgconfig is broken for Parquet for now
 set PARQUET_HOME=%CONDA_PREFIX%\Library
 
+@rem Download IANA Timezone Database to a non-standard location to
+@rem test the configurability of the timezone database path
+curl https://data.iana.org/time-zones/releases/tzdata2021e.tar.gz --output 
tzdata.tar.gz || exit /B
+mkdir %USERPROFILE%\Downloads\test\tzdata
+tar --extract --file tzdata.tar.gz --directory 
%USERPROFILE%\Downloads\test\tzdata
+curl 
https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml
 ^
+  --output %USERPROFILE%\Downloads\test\tzdata\windowsZones.xml || exit /B
+@rem Remove the database from the default location
+rmdir /s /q %USERPROFILE%\Downloads\tzdata
+@rem Set the env var for the non-standard location of the database
+@rem (only needed for testing purposes)
+set PYARROW_TZDATA_PATH=%USERPROFILE%\Downloads\test\tzdata
+
 python setup.py develop -q || exit /B
 
 set PYTHONDEVMODE=1
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index ee0d07bb2c..cd66abcb44 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -66,10 +66,10 @@ import pyarrow.lib as _lib
 if _gc_enabled:
     _gc.enable()
 
-from pyarrow.lib import (BuildInfo, RuntimeInfo, MonthDayNano,
-                         VersionInfo, cpp_build_info, cpp_version,
-                         cpp_version_info, runtime_info, cpu_count,
-                         set_cpu_count, enable_signal_handlers,
+from pyarrow.lib import (BuildInfo, RuntimeInfo, set_timezone_db_path,
+                         MonthDayNano, VersionInfo, cpp_build_info,
+                         cpp_version, cpp_version_info, runtime_info,
+                         cpu_count, set_cpu_count, enable_signal_handlers,
                          io_thread_count, set_io_thread_count)
 
 
diff --git a/python/pyarrow/config.pxi b/python/pyarrow/config.pxi
index fb9526ba89..cf751b810c 100644
--- a/python/pyarrow/config.pxi
+++ b/python/pyarrow/config.pxi
@@ -18,6 +18,7 @@
 from pyarrow.includes.libarrow cimport GetBuildInfo
 
 from collections import namedtuple
+import os
 
 
 VersionInfo = namedtuple('VersionInfo', ('major', 'minor', 'patch'))
@@ -74,3 +75,21 @@ def runtime_info():
     return RuntimeInfo(
         simd_level=frombytes(c_info.simd_level),
         detected_simd_level=frombytes(c_info.detected_simd_level))
+
+
+def set_timezone_db_path(path):
+    """
+    Configure the path to text timezone database on Windows.
+
+    Parameters
+    ----------
+    path : str
+        Path to text timezone database.
+    """
+    cdef:
+        CGlobalOptions options
+
+    if path is not None:
+        options.timezone_db_path = <c_string>tobytes(path)
+
+    check_status(Initialize(options))
diff --git a/python/pyarrow/includes/libarrow.pxd 
b/python/pyarrow/includes/libarrow.pxd
index f4d6541fa7..482a6e91ba 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -80,6 +80,11 @@ cdef extern from "arrow/config.h" namespace "arrow" nogil:
 
     CRuntimeInfo GetRuntimeInfo()
 
+    cdef cppclass CGlobalOptions" arrow::GlobalOptions":
+        optional[c_string] timezone_db_path
+
+    CStatus Initialize(const CGlobalOptions& options)
+
 
 cdef extern from "arrow/util/future.h" namespace "arrow" nogil:
     cdef cppclass CFuture_Void" arrow::Future<>":
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index 241ae4814a..a5941e8c8d 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -28,6 +28,7 @@ from pytest_lazyfixture import lazy_fixture
 import hypothesis as h
 from ..conftest import groups, defaults
 
+from pyarrow import set_timezone_db_path
 from pyarrow.util import find_free_port
 
 
@@ -48,6 +49,12 @@ h.settings.load_profile(os.environ.get('HYPOTHESIS_PROFILE', 
'dev'))
 os.environ['AWS_CONFIG_FILE'] = "/dev/null"
 
 
+if sys.platform == 'win32':
+    tzdata_set_path = os.environ.get('PYARROW_TZDATA_PATH', None)
+    if tzdata_set_path:
+        set_timezone_db_path(tzdata_set_path)
+
+
 def pytest_addoption(parser):
     # Create options to selectively enable test groups
     def bool_env(name, default=None):
diff --git a/python/pyarrow/tests/test_misc.py 
b/python/pyarrow/tests/test_misc.py
index 86cbf013cd..9b9dfdd554 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -22,6 +22,7 @@ import sys
 import pytest
 
 import pyarrow as pa
+from pyarrow.lib import ArrowInvalid
 
 
 def test_get_include():
@@ -116,6 +117,17 @@ def test_runtime_info():
         subprocess.check_call([sys.executable, "-c", code], env=env)
 
 
[email protected](sys.platform == "win32",
+                    reason="Path to timezone database is not configurable "
+                           "on non-Windows platforms")
+def test_set_timezone_db_path_non_windows():
+    # set_timezone_db_path raises an error on non-Windows platforms
+    with pytest.raises(ArrowInvalid,
+                       match="Arrow was set to use OS timezone "
+                             "database at compile time"):
+        pa.set_timezone_db_path("path")
+
+
 @pytest.mark.parametrize('klass', [
     pa.Field,
     pa.Schema,
diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py
index 0b69deb73b..638eee9807 100644
--- a/python/pyarrow/tests/util.py
+++ b/python/pyarrow/tests/util.py
@@ -455,5 +455,11 @@ def windows_has_tzdata():
     This is the default location where tz.cpp will look for (until we make
     this configurable at run-time)
     """
-    tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata")
-    return os.path.exists(tzdata_path)
+    tzdata_bool = False
+    if "PYARROW_TZDATA_PATH" in os.environ:
+        tzdata_bool = os.path.exists(os.environ['PYARROW_TZDATA_PATH'])
+    if not tzdata_bool:
+        tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata")
+        tzdata_bool = os.path.exists(tzdata_path)
+
+    return tzdata_bool

Reply via email to