This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new ddc91cccf55 [Opt](timezone) Preload time offset in datetime (#42395)
ddc91cccf55 is described below
commit ddc91cccf55f352500da30e6fbcca86a1acd0278
Author: zclllhhjj <[email protected]>
AuthorDate: Fri Oct 25 18:17:42 2024 +0800
[Opt](timezone) Preload time offset in datetime (#42395)
## Proposed changes
Issue Number: close #xxx
```sql
mysql> insert into str select "2020-12-12 00:00:00+03:30" from
numbers("number" = "10000000");
```
before:
```sql
mysql> select count(cast(k0 as datetime)) from str;
+----------------------------------+
| count(cast(k0 as DATETIMEV2(0))) |
+----------------------------------+
| 10000000 |
+----------------------------------+
1 row in set (6.51 sec)
```
after:
```sql
mysql> select count(cast(k0 as datetime)) from str;
+----------------------------------+
| count(cast(k0 as DATETIMEV2(0))) |
+----------------------------------+
| 10000000 |
+----------------------------------+
1 row in set (0.25 sec)
```
---
be/src/util/timezone_utils.cpp | 50 +++++++++---
be/src/util/timezone_utils.h | 3 +
be/test/util/timezone_utils_test.cpp | 144 +++++++++++++++++++++++++++++++++++
3 files changed, 188 insertions(+), 9 deletions(-)
diff --git a/be/src/util/timezone_utils.cpp b/be/src/util/timezone_utils.cpp
index 5aef6f8702b..6bb71ac4647 100644
--- a/be/src/util/timezone_utils.cpp
+++ b/be/src/util/timezone_utils.cpp
@@ -30,6 +30,7 @@
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/case_conv.hpp>
+#include <cstdlib>
#include <filesystem>
#include <memory>
#include <string>
@@ -58,6 +59,9 @@ static const char* tzdir = "/usr/share/zoneinfo"; // default
value, may change b
void TimezoneUtils::clear_timezone_caches() {
lower_zone_cache_->clear();
}
+int TimezoneUtils::cache_size() {
+ return lower_zone_cache_->size();
+}
static bool parse_save_name_tz(const std::string& tz_name) {
cctz::time_zone tz;
@@ -106,24 +110,54 @@ void TimezoneUtils::load_timezones_to_cache() {
}
lower_zone_cache_->erase("lmt"); // local mean time for every timezone
- LOG(INFO) << "Read " << lower_zone_cache_->size() << " timezones.";
+
+ load_offsets_to_cache();
+ LOG(INFO) << "Preloaded" << lower_zone_cache_->size() << " timezones.";
+}
+
+static std::string to_hour_string(int arg) {
+ if (arg < 0 && arg > -10) { // -9 to -1
+ return std::string {"-0"} + std::to_string(std::abs(arg));
+ } else if (arg >= 0 && arg < 10) { //0 to 9
+ return std::string {"0"} + std::to_string(arg);
+ }
+ return std::to_string(arg);
+}
+
+void TimezoneUtils::load_offsets_to_cache() {
+ for (int hour = -12; hour <= +14; hour++) {
+ for (int minute = 0; minute <= 30; minute += 30) {
+ std::string offset_str = (hour >= 0 ? "+" : "") +
to_hour_string(hour) + ':' +
+ (minute == 0 ? "00" : "30");
+ cctz::time_zone result;
+ parse_tz_offset_string(offset_str, result);
+ lower_zone_cache_->emplace(offset_str, result);
+ }
+ }
+ // -00 for hour is also valid
+ std::string offset_str = "-00:00";
+ cctz::time_zone result;
+ parse_tz_offset_string(offset_str, result);
+ lower_zone_cache_->emplace(offset_str, result);
+ offset_str = "-00:30";
+ parse_tz_offset_string(offset_str, result);
+ lower_zone_cache_->emplace(offset_str, result);
}
bool TimezoneUtils::find_cctz_time_zone(const std::string& timezone,
cctz::time_zone& ctz) {
- if (auto it = lower_zone_cache_->find(to_lower_copy(timezone));
- it != lower_zone_cache_->end()) {
+ if (auto it = lower_zone_cache_->find(to_lower_copy(timezone)); it !=
lower_zone_cache_->end())
+ [[likely]] {
ctz = it->second;
return true;
}
- // offset format or just illegal
- return parse_tz_offset_string(timezone, ctz);
+ return false;
}
bool TimezoneUtils::parse_tz_offset_string(const std::string& timezone,
cctz::time_zone& ctz) {
// like +08:00, which not in timezone_names_map_
re2::StringPiece value;
- if (time_zone_offset_format_reg.Match(timezone, 0, timezone.size(),
RE2::UNANCHORED, &value,
- 1)) {
+ if (time_zone_offset_format_reg.Match(timezone, 0, timezone.size(),
RE2::UNANCHORED, &value, 1))
+ [[likely]] {
bool positive = value[0] != '-';
//Regular expression guarantees hour and minute must be int
@@ -139,8 +173,6 @@ bool TimezoneUtils::parse_tz_offset_string(const
std::string& timezone, cctz::ti
int offset = hour * 60 * 60 + minute * 60;
offset *= positive ? 1 : -1;
ctz = cctz::fixed_time_zone(cctz::seconds(offset));
- // try to push the result time offset of "+08:00" need lock. now it's
harmful for performance.
- // maybe we can use rcu of hazard-pointer to opt it.
return true;
}
return false;
diff --git a/be/src/util/timezone_utils.h b/be/src/util/timezone_utils.h
index c8bce44b5ab..3cdb17fc6fd 100644
--- a/be/src/util/timezone_utils.h
+++ b/be/src/util/timezone_utils.h
@@ -41,6 +41,9 @@ public:
private:
// for ut only
static void clear_timezone_caches();
+ static int cache_size();
+
+ static void load_offsets_to_cache();
static bool parse_tz_offset_string(const std::string& timezone,
cctz::time_zone& ctz);
};
diff --git a/be/test/util/timezone_utils_test.cpp
b/be/test/util/timezone_utils_test.cpp
new file mode 100644
index 00000000000..9130e0be633
--- /dev/null
+++ b/be/test/util/timezone_utils_test.cpp
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "util/timezone_utils.h"
+
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-test-part.h>
+
+#include <boost/utility/binary.hpp>
+#include <iostream>
+
+#include "cctz/time_zone.h"
+#include "gtest/gtest.h"
+#include "gtest/gtest_pred_impl.h"
+
+namespace doris {
+
+TEST(TimezoneUtilsTest, ParseOffset) {
+ const auto tp = cctz::civil_second(2011, 1, 1, 0, 0,
+ 0); // offset has no DST, every time
point is acceptable
+ cctz::time_zone result;
+
+ TimezoneUtils::parse_tz_offset_string("+14:00", result);
+ auto cl = result.lookup(cctz::convert(tp, result));
+ EXPECT_EQ(cl.offset, 14 * 3600);
+
+ TimezoneUtils::parse_tz_offset_string("+00:00", result);
+ cl = result.lookup(cctz::convert(tp, result));
+ EXPECT_EQ(cl.offset, 0 * 3600);
+
+ TimezoneUtils::parse_tz_offset_string("+00:30", result);
+ cl = result.lookup(cctz::convert(tp, result));
+ EXPECT_EQ(cl.offset, 1800);
+
+ TimezoneUtils::parse_tz_offset_string("+10:30", result);
+ cl = result.lookup(cctz::convert(tp, result));
+ EXPECT_EQ(cl.offset, 10 * 3600 + 1800);
+
+ TimezoneUtils::parse_tz_offset_string("+01:00", result);
+ cl = result.lookup(cctz::convert(tp, result));
+ EXPECT_EQ(cl.offset, 1 * 3600);
+
+ TimezoneUtils::parse_tz_offset_string("-12:00", result);
+ cl = result.lookup(cctz::convert(tp, result));
+ EXPECT_EQ(cl.offset, -12 * 3600);
+
+ TimezoneUtils::parse_tz_offset_string("-09:00", result);
+ cl = result.lookup(cctz::convert(tp, result));
+ EXPECT_EQ(cl.offset, -9 * 3600);
+
+ TimezoneUtils::parse_tz_offset_string("-01:00", result);
+ cl = result.lookup(cctz::convert(tp, result));
+ EXPECT_EQ(cl.offset, -1 * 3600);
+
+ TimezoneUtils::parse_tz_offset_string("-00:00", result);
+ cl = result.lookup(cctz::convert(tp, result));
+ EXPECT_EQ(cl.offset, 0 * 3600);
+
+ TimezoneUtils::parse_tz_offset_string("-00:30", result);
+ cl = result.lookup(cctz::convert(tp, result));
+ EXPECT_EQ(cl.offset, -1800);
+
+ TimezoneUtils::parse_tz_offset_string("-10:30", result);
+ cl = result.lookup(cctz::convert(tp, result));
+ EXPECT_EQ(cl.offset, -10 * 3600 - 1800);
+
+ // out of range or illegal format
+ EXPECT_FALSE(TimezoneUtils::parse_tz_offset_string("+15:00", result));
+ EXPECT_FALSE(TimezoneUtils::parse_tz_offset_string("-13:00", result));
+ EXPECT_FALSE(TimezoneUtils::parse_tz_offset_string("+9:30", result));
+}
+
+TEST(TimezoneUtilsTest, LoadOffsets) {
+ TimezoneUtils::clear_timezone_caches();
+ TimezoneUtils::load_offsets_to_cache();
+ EXPECT_EQ(TimezoneUtils::cache_size(), (13 + 15) * 2);
+
+ TimezoneUtils::load_timezones_to_cache();
+ EXPECT_GE(TimezoneUtils::cache_size(), 100);
+}
+
+TEST(TimezoneUtilsTest, FindTimezone) {
+ TimezoneUtils::load_timezones_to_cache();
+
+ std::string tzname;
+ cctz::time_zone result;
+ const auto tp = cctz::civil_second(2011, 1, 1, 0, 0, 0);
+
+ tzname = "Asia/Shanghai";
+ TimezoneUtils::find_cctz_time_zone(tzname, result);
+ auto cl = result.lookup(cctz::convert(tp, result));
+ EXPECT_EQ(cl.offset, 8 * 3600);
+
+ tzname = "America/Los_Angeles";
+ TimezoneUtils::find_cctz_time_zone(tzname, result);
+ cl = result.lookup(cctz::convert(tp, result));
+ EXPECT_EQ(cl.offset, -8 * 3600);
+
+ tzname = "+00:30";
+ TimezoneUtils::find_cctz_time_zone(tzname, result);
+ cl = result.lookup(cctz::convert(tp, result));
+ EXPECT_EQ(cl.offset, 1800);
+
+ tzname = "-00:00";
+ TimezoneUtils::find_cctz_time_zone(tzname, result);
+ cl = result.lookup(cctz::convert(tp, result));
+ EXPECT_EQ(cl.offset, 0);
+
+ tzname = "+14:00";
+ TimezoneUtils::find_cctz_time_zone(tzname, result);
+ cl = result.lookup(cctz::convert(tp, result));
+ EXPECT_EQ(cl.offset, 14 * 3600);
+
+ tzname = "-12:00";
+ TimezoneUtils::find_cctz_time_zone(tzname, result);
+ cl = result.lookup(cctz::convert(tp, result));
+ EXPECT_EQ(cl.offset, -12 * 3600);
+
+ // out of range or illegal format
+ tzname = "+15:00";
+ EXPECT_FALSE(TimezoneUtils::find_cctz_time_zone(tzname, result));
+
+ tzname = "-13:00";
+ EXPECT_FALSE(TimezoneUtils::find_cctz_time_zone(tzname, result));
+
+ tzname = "+9:30";
+ EXPECT_FALSE(TimezoneUtils::find_cctz_time_zone(tzname, result));
+}
+
+} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]