This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new aa057d02a8 GH-35594: [R][C++] Bump vendored date library (#35612)
aa057d02a8 is described below
commit aa057d02a8234082e78a99e2b9de8449378acf30
Author: Nic Crane <[email protected]>
AuthorDate: Tue May 16 14:54:36 2023 +0100
GH-35594: [R][C++] Bump vendored date library (#35612)
There was an issue with tzdb 0.4.0 and the shipped arrow `tz.cpp`.
This PR bumps the vendored version of the date library to commit
`cc4685a21e4a4fdae707ad1233c61bbaff241f93`.
* Closes: #35594
Authored-by: Nic Crane <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/vendored/datetime/README.md | 2 +-
cpp/src/arrow/vendored/datetime/date.h | 6 +-
cpp/src/arrow/vendored/datetime/tz.cpp | 186 +++++++++++++++++++++++++-----
cpp/src/arrow/vendored/datetime/tz.h | 4 +-
4 files changed, 163 insertions(+), 35 deletions(-)
diff --git a/cpp/src/arrow/vendored/datetime/README.md
b/cpp/src/arrow/vendored/datetime/README.md
index cff53e7e30..0dd663c5e5 100644
--- a/cpp/src/arrow/vendored/datetime/README.md
+++ b/cpp/src/arrow/vendored/datetime/README.md
@@ -17,7 +17,7 @@ copies or substantial portions of the Software.
Sources for datetime are adapted from Howard Hinnant's date library
(https://github.com/HowardHinnant/date).
-Sources are taken from changeset 2e19c006e2218447ee31f864191859517603f59f
+Sources are taken from changeset cc4685a21e4a4fdae707ad1233c61bbaff241f93
of the above project.
The following changes are made:
diff --git a/cpp/src/arrow/vendored/datetime/date.h
b/cpp/src/arrow/vendored/datetime/date.h
index 3b38b263a8..fd2569c6de 100644
--- a/cpp/src/arrow/vendored/datetime/date.h
+++ b/cpp/src/arrow/vendored/datetime/date.h
@@ -1318,7 +1318,7 @@ CONSTCD11
std::chrono::duration<Rep, Period>
abs(std::chrono::duration<Rep, Period> d)
{
- return d >= d.zero() ? d : -d;
+ return d >= d.zero() ? d : static_cast<decltype(d)>(-d);
}
// round down
@@ -4208,8 +4208,8 @@ template <class CharT, class Traits, class Duration>
inline
typename std::enable_if
<
- std::ratio_less<typename Duration::period, days::period>::value
- , std::basic_ostream<CharT, Traits>&
+ !std::is_convertible<Duration, days>::value,
+ std::basic_ostream<CharT, Traits>&
>::type
operator<<(std::basic_ostream<CharT, Traits>& os, const sys_time<Duration>& tp)
{
diff --git a/cpp/src/arrow/vendored/datetime/tz.cpp
b/cpp/src/arrow/vendored/datetime/tz.cpp
index 9047a31c79..6962a8b3c3 100644
--- a/cpp/src/arrow/vendored/datetime/tz.cpp
+++ b/cpp/src/arrow/vendored/datetime/tz.cpp
@@ -202,6 +202,35 @@ namespace
using co_task_mem_ptr = std::unique_ptr<wchar_t[], task_mem_deleter>;
}
+static
+std::wstring
+convert_utf8_to_utf16(const std::string& s)
+{
+ std::wstring out;
+ const int size = MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, NULL, 0);
+
+ if (size == 0)
+ {
+ std::string msg = "Failed to determine required size when converting
\"";
+ msg += s;
+ msg += "\" to UTF-16.";
+ throw std::runtime_error(msg);
+ }
+
+ out.resize(size);
+ const int check = MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, &out[0],
size);
+
+ if (size != check)
+ {
+ std::string msg = "Failed to convert \"";
+ msg += s;
+ msg += "\" to UTF-16.";
+ throw std::runtime_error(msg);
+ }
+
+ return out;
+}
+
// We might need to know certain locations even if not using the remote API,
// so keep these routines out of that block for now.
static
@@ -271,8 +300,90 @@ get_download_folder()
# endif // !_WIN32
-#endif // !USE_OS_TZDB
+/*
+ * This class is provided to mimic the following usage of `ifstream`:
+ *
+ * std::ifstream is(filename);
+ *
+ * file_streambuf ibuf(filename);
+ * std::istream is(&ibuf);
+ *
+ * This is required because `ifstream` does not support opening files
+ * containing wide characters on Windows. On Windows, `file_streambuf` uses
+ * `file_open()` to convert the file name to UTF-16 before opening it with
+ * `_wfopen()`.
+ *
+ * Note that this is not an exact re-implementation of `ifstream`,
+ * but is enough for usage here.
+ *
+ * It is partially based on these two implementations:
+ * - fdinbuf from http://www.josuttis.com/cppcode/fdstream.html
+ * - stdiobuf
https://stackoverflow.com/questions/12342542/convert-file-to-ifstream-c-android-ndk
+ *
+ * Apparently MSVC provides non-standard overloads of `ifstream` that support
+ * a `const wchar_t*` file name, but MinGW does not
https://stackoverflow.com/a/822032
+ */
+class file_streambuf
+ : public std::streambuf
+{
+private:
+ FILE* file_;
+ static const int buffer_size_ = 1024;
+ char buffer_[buffer_size_];
+
+public:
+ ~file_streambuf()
+ {
+ if (file_)
+ {
+ ::fclose(file_);
+ }
+ }
+ file_streambuf(const file_streambuf&) = delete;
+ file_streambuf& operator=(const file_streambuf&) = delete;
+
+ file_streambuf(const std::string& filename)
+ : file_(file_open(filename))
+ {
+ }
+
+protected:
+ virtual
+ int_type
+ underflow()
+ {
+ if (gptr() == egptr() && file_)
+ {
+ const size_t size = ::fread(buffer_, 1, buffer_size_, file_);
+ setg(buffer_, buffer_, buffer_ + size);
+ }
+ return (gptr() == egptr())
+ ? traits_type::eof()
+ : traits_type::to_int_type(*gptr());
+ }
+
+private:
+ FILE*
+ file_open(const std::string& filename)
+ {
+# ifdef _WIN32
+ std::wstring wfilename = convert_utf8_to_utf16(filename);
+ FILE* file = ::_wfopen(wfilename.c_str(), L"rb");
+# else // !_WIN32
+ FILE* file = ::fopen(filename.c_str(), "rb");
+# endif // _WIN32
+ if (file == NULL)
+ {
+ std::string msg = "Error opening file \"";
+ msg += filename;
+ msg += "\".";
+ throw std::runtime_error(msg);
+ }
+ return file;
+ }
+};
+#endif // !USE_OS_TZDB
namespace arrow_vendored
{
namespace date
@@ -309,9 +420,9 @@ access_install()
}
void
-set_install(const std::string& s)
+set_install(const std::string& install)
{
- access_install() = s;
+ access_install() = install;
}
static
@@ -565,15 +676,8 @@ load_timezone_mappings_from_xml_file(const std::string&
input_path)
std::vector<detail::timezone_mapping> mappings;
std::string line;
- std::ifstream is(input_path);
- if (!is.is_open())
- {
- // We don't emit file exceptions because that's an implementation
detail.
- std::string msg = "Error opening time zone mapping file \"";
- msg += input_path;
- msg += "\".";
- throw std::runtime_error(msg);
- }
+ file_streambuf ibuf(input_path);
+ std::istream is(&ibuf);
auto error = [&input_path, &line_num](const char* info)
{
@@ -703,7 +807,6 @@ load_timezone_mappings_from_xml_file(const std::string&
input_path)
}
}
- is.close();
return mappings;
}
@@ -2675,16 +2778,16 @@ find_read_and_leap_seconds()
std::getline(in, line);
if (!line.empty() && line[0] != '#')
{
- std::istringstream in(line);
- in.exceptions(std::ios::failbit | std::ios::badbit);
+ std::istringstream iss(line);
+ iss.exceptions(std::ios::failbit | std::ios::badbit);
std::string word;
- in >> word;
+ iss >> word;
if (word == "Leap")
{
int y, m, d;
- in >> y;
- m = static_cast<int>(parse_month(in));
- in >> d;
+ iss >> y;
+ m = static_cast<int>(parse_month(iss));
+ iss >> d;
leap_seconds.push_back(leap_second(sys_days{year{y}/m/d} +
days{1},
detail::undocumented{}));
}
@@ -2709,11 +2812,11 @@ find_read_and_leap_seconds()
std::getline(in, line);
if (!line.empty() && line[0] != '#')
{
- std::istringstream in(line);
- in.exceptions(std::ios::failbit | std::ios::badbit);
+ std::istringstream iss(line);
+ iss.exceptions(std::ios::failbit | std::ios::badbit);
using seconds = std::chrono::seconds;
seconds::rep s;
- in >> s;
+ iss >> s;
if (s == 2272060800)
continue;
leap_seconds.push_back(leap_second(sys_seconds{seconds{s}} -
offset,
@@ -2722,6 +2825,7 @@ find_read_and_leap_seconds()
}
return leap_seconds;
}
+#if !MISSING_LEAP_SECONDS
in.clear();
in.open(get_tz_dir() + std::string(1, folder_delimiter) + "right/UTC",
std::ios_base::binary);
@@ -2736,6 +2840,7 @@ find_read_and_leap_seconds()
{
return load_just_leaps(in);
}
+#endif
return {};
}
@@ -2842,7 +2947,8 @@ bool
file_exists(const std::string& filename)
{
#ifdef _WIN32
- return ::_access(filename.c_str(), 0) == 0;
+ std::wstring wfilename = convert_utf8_to_utf16(filename);
+ return ::_waccess(wfilename.c_str(), 0) == 0;
#else
return ::access(filename.c_str(), F_OK) == 0;
#endif
@@ -3419,16 +3525,27 @@ std::string
get_version(const std::string& path)
{
std::string version;
- std::ifstream infile(path + "version");
- if (infile.is_open())
+
+ std::string path_version = path + "version";
+
+ if (file_exists(path_version))
{
+ file_streambuf inbuf(path_version);
+ std::istream infile(&inbuf);
+
infile >> version;
+
if (!infile.fail())
return version;
}
- else
+
+ std::string path_news = path + "NEWS";
+
+ if (file_exists(path_news))
{
- infile.open(path + "NEWS");
+ file_streambuf inbuf(path_news);
+ std::istream infile(&inbuf);
+
while (infile)
{
infile >> version;
@@ -3439,6 +3556,7 @@ get_version(const std::string& path)
}
}
}
+
throw std::runtime_error("Unable to get Timezone database version from " +
path);
}
@@ -3510,7 +3628,13 @@ init_tzdb()
for (const auto& filename : files)
{
- std::ifstream infile(path + filename);
+ std::string file_path = path + filename;
+ if (!file_exists(file_path))
+ {
+ continue;
+ }
+ file_streambuf inbuf(file_path);
+ std::istream infile(&inbuf);
while (infile)
{
std::getline(infile, line);
@@ -3543,6 +3667,10 @@ init_tzdb()
{
db->zones.back().add(line);
}
+ else if (word.size() > 0 && word[0] == '#')
+ {
+ continue;
+ }
else
{
std::cerr << line << '\n';
@@ -3925,7 +4053,7 @@ tzdb::current_zone() const
auto p = result.find("ZONE=\"");
if (p != std::string::npos)
{
- result.erase(p, p+6);
+ result.erase(0, p+6);
result.erase(result.rfind('"'));
return locate_zone(result);
}
diff --git a/cpp/src/arrow/vendored/datetime/tz.h
b/cpp/src/arrow/vendored/datetime/tz.h
index 6d54e49ea2..467db6d199 100644
--- a/cpp/src/arrow/vendored/datetime/tz.h
+++ b/cpp/src/arrow/vendored/datetime/tz.h
@@ -60,7 +60,7 @@
# else
# define HAS_REMOTE_API 1
# endif
-# else // HAS_REMOTE_API makes no since when using the OS timezone database
+# else // HAS_REMOTE_API makes no sense when using the OS timezone database
# define HAS_REMOTE_API 0
# endif
#endif
@@ -853,7 +853,7 @@ private:
load_data(std::istream& inf, std::int32_t tzh_leapcnt, std::int32_t
tzh_timecnt,
std::int32_t tzh_typecnt, std::int32_t
tzh_charcnt);
#else // !USE_OS_TZDB
- DATE_API sys_info get_info_impl(sys_seconds tp, int timezone) const;
+ DATE_API sys_info get_info_impl(sys_seconds tp, int tz_int) const;
DATE_API void adjust_infos(const std::vector<detail::Rule>& rules);
DATE_API void parse_info(std::istream& in);
#endif // !USE_OS_TZDB