On Thu, Apr 18, 2024 at 6:34 PM Jonathan Wakely <jwak...@redhat.com> wrote: > > This would fix the but, how do people feel about it this close to the > gcc-14 release?
Guess we'll have to fix it anyway, so why not now ... (what could go wrong..) Richard. > Tested x86_64-linux. > > -- >8 -- > > Since 2022 the TZif format defined in the zic(8) man page has said that > links can refer to other links, rather than only referring to a zone. > This isn't supported by the C++20 spec, which assumes that the target() > for a chrono::time_zone_link always names a chrono::time_zone, not > another chrono::time_zone_link. > > This hasn't been a problem until now, because there are no entries in > the tzdata file that chain links together. However, Debian Sid has > changed the target of the Asia/Chungking link from the Asia/Shanghai > zone to the Asia/Chongqing link, creating a link chain. The libstdc++ > code is unable to handle this, so chrono::locate_zone("Asia/Chungking") > will fail with the tzdata.zi file from Debian Sid. > > It seems likely that the C++ spec will need a change to allow link > chains, so that the original structure of the IANA database can be fully > represented by chrono::tzdb. The alternative would be for chrono::tzdb > to flatten all chains when loading the data, so that a link's target is > always a zone, but this means throwing away information present in the > tzdata.zi input file. > > In anticipation of a change to the spec, this commit adds support for > chained links to libstdc++. When a name is found to be a link, we try to > find its target in the list of zones as before, but now if the target > isn't the name of a zone we don't fail. Instead we look for another link > with that name, and keep doing that until we reach the end of the chain > of links, and then look up the last target as a zone. > > This new logic would get stuck in a loop if the tzdata.zi file is buggy > and defines a link chain that contains a cycle, e.g. two links that > refer to each other. To deal with that unlikely case, we use the > tortoise and hare algorithm to detect cycles in link chains, and throw > an exception if we detect a cycle. Cycles in links should never happen, > and it is expected that link chains will be short (if they occur at all) > and so the code is optimized for short chains without cycles. Longer > chains (four or more links) and cycles will do more work, but won't fail > to resolve a chain or get stuck in a loop. > > libstdc++-v3/ChangeLog: > > PR libstdc++/114770 > * src/c++20/tzdb.cc (do_locate_zone): Support links that have > another link as their target. > * testsuite/std/time/tzdb/links.cc: New test. > --- > libstdc++-v3/src/c++20/tzdb.cc | 57 ++++- > libstdc++-v3/testsuite/std/time/tzdb/links.cc | 215 ++++++++++++++++++ > 2 files changed, 268 insertions(+), 4 deletions(-) > create mode 100644 libstdc++-v3/testsuite/std/time/tzdb/links.cc > > diff --git a/libstdc++-v3/src/c++20/tzdb.cc b/libstdc++-v3/src/c++20/tzdb.cc > index 639d1c440ba..c7c7cc9deee 100644 > --- a/libstdc++-v3/src/c++20/tzdb.cc > +++ b/libstdc++-v3/src/c++20/tzdb.cc > @@ -1599,7 +1599,7 @@ namespace std::chrono > const time_zone* > do_locate_zone(const vector<time_zone>& zones, > const vector<time_zone_link>& links, > - string_view tz_name) noexcept > + string_view tz_name) > { > // Lambda mangling changed between -fabi-version=2 and -fabi-version=18 > auto search = []<class Vec>(const Vec& v, string_view name) { > @@ -1610,13 +1610,62 @@ namespace std::chrono > return ptr; > }; > > + // Search zones first. > if (auto tz = search(zones, tz_name)) > return tz; > > + // Search links second. > if (auto tz_l = search(links, tz_name)) > - return search(zones, tz_l->target()); > + { > + // Handle the common case of a link that has a zone as the target. > + if (auto tz = search(zones, tz_l->target())) [[likely]] > + return tz; > > - return nullptr; > + // Either tz_l->target() doesn't exist, or we have a chain of links. > + // Use Floyd's cycle-finding algorithm to avoid infinite loops, > + // at the cost of extra lookups. In the common case we expect a > + // chain of links to be short so the loop won't run many times. > + // In particular, the duplicate lookups to move the tortoise > + // never happen unless the chain has four or more links. > + // When a chain contains a cycle we do multiple duplicate lookups, > + // but that case should never happen with correct tzdata.zi, > + // so there's no need to optimize cycle detection. > + > + const time_zone_link* tortoise = tz_l; > + const time_zone_link* hare = search(links, tz_l->target()); > + while (hare) > + { > + // Chains should be short, so first check if it ends here: > + if (auto tz = search(zones, hare->target())) [[likely]] > + return tz; > + > + // Otherwise follow the chain: > + hare = search(links, hare->target()); > + if (!hare) > + break; > + > + // Again, first check if the chain ends at a zone here: > + if (auto tz = search(zones, hare->target())) [[likely]] > + return tz; > + > + // Follow the chain again: > + hare = search(links, hare->target()); > + > + if (hare == tortoise) > + { > + string_view err = "std::chrono::tzdb: link cycle: "; > + string str; > + str.reserve(err.size() + tz_name.size()); > + str += err; > + str += tz_name; > + __throw_runtime_error(str.c_str()); > + } > + // Plod along the chain one step: > + tortoise = search(links, tortoise->target()); > + } > + } > + > + return nullptr; // not found > } > } // namespace > > @@ -1626,7 +1675,7 @@ namespace std::chrono > { > if (auto tz = do_locate_zone(zones, links, tz_name)) > return tz; > - string_view err = "tzdb: cannot locate zone: "; > + string_view err = "std::chrono::tzdb: cannot locate zone: "; > string str; > str.reserve(err.size() + tz_name.size()); > str += err; > diff --git a/libstdc++-v3/testsuite/std/time/tzdb/links.cc > b/libstdc++-v3/testsuite/std/time/tzdb/links.cc > new file mode 100644 > index 00000000000..0ba214846c6 > --- /dev/null > +++ b/libstdc++-v3/testsuite/std/time/tzdb/links.cc > @@ -0,0 +1,215 @@ > +// { dg-do run { target c++20 } } > +// { dg-require-effective-target tzdb } > +// { dg-require-effective-target cxx11_abi } > +// { dg-xfail-run-if "no weak override on AIX" { powerpc-ibm-aix* } } > + > +#include <chrono> > +#include <fstream> > +#include <testsuite_hooks.h> > + > +static bool override_used = false; > + > +namespace __gnu_cxx > +{ > + const char* zoneinfo_dir_override() { > + override_used = true; > + return "./"; > + } > +} > + > +using namespace std::chrono; > + > +void > +test_link_chains() > +{ > + std::ofstream("tzdata.zi") << R"(# version test_1 > +Link Greenwich G_M_T > +Link Etc/GMT Greenwich > +Zone Etc/GMT 0 - GMT > +Zone A_Zone 1 - ZON > +Link A_Zone L1 > +Link L1 L2 > +Link L2 L3 > +Link L3 L4 > +Link L4 L5 > +Link L5 L6 > +Link L3 L7 > +)"; > + > + const auto& db = reload_tzdb(); > + VERIFY( override_used ); // If this fails then XFAIL for the target. > + VERIFY( db.version == "test_1" ); > + > + // Simple case of a link with a zone as its target. > + VERIFY( locate_zone("Greenwich")->name() == "Etc/GMT" ); > + // Chains of links, where the target may be another link. > + VERIFY( locate_zone("G_M_T")->name() == "Etc/GMT" ); > + VERIFY( locate_zone("L1")->name() == "A_Zone" ); > + VERIFY( locate_zone("L2")->name() == "A_Zone" ); > + VERIFY( locate_zone("L3")->name() == "A_Zone" ); > + VERIFY( locate_zone("L4")->name() == "A_Zone" ); > + VERIFY( locate_zone("L5")->name() == "A_Zone" ); > + VERIFY( locate_zone("L6")->name() == "A_Zone" ); > + VERIFY( locate_zone("L7")->name() == "A_Zone" ); > +} > + > +void > +test_bad_links() > +{ > + // The zic(8) man page says > + // > the behavior is unspecified if multiple zone or link lines > + // > define the same name" > + // For libstdc++ the expected behaviour is described and tested below. > + std::ofstream("tzdata.zi") << R"(# version test_2 > +Zone A_Zone 1 - ZA > +Zone B_Zone 2 - ZB > +Link A_Zone B_Zone > +Link B_Zone C_Link > +Link C_Link D_Link > +Link D_Link E_Link > +)"; > + > + const auto& db2 = reload_tzdb(); > + VERIFY( override_used ); // If this fails then XFAIL for the target. > + VERIFY( db2.version == "test_2" ); > + > + // The standard requires locate_zone(name) to search for a zone first, > + // so this finds the zone B_Zone, not the link that points to zone A_Zone. > + VERIFY( locate_zone("B_Zone")->name() == "B_Zone" ); > + // And libstdc++ does the same at every step when following chained links: > + VERIFY( locate_zone("C_Link")->name() == "B_Zone" ); > + VERIFY( locate_zone("D_Link")->name() == "B_Zone" ); > + VERIFY( locate_zone("E_Link")->name() == "B_Zone" ); > + > + // The zic(8) man page says > + // > the behavior is unspecified if a chain of one or more links > + // > does not terminate in a Zone name. > + // For libstdc++ we throw std::runtime_error if locate_zone finds an > + // unterminated chain, including the case of a chain that includes a cycle. > + std::ofstream("tzdata.zi") << R"(# version test_3 > +Zone A_Zone 1 - ZON > +Link A_Zone GoodLink > +Link No_Zone BadLink > +Link LinkSelf LinkSelf > +Link LinkSelf Link1 > +Link Link1 Link2 > +Link Cycle2_A Cycle2_B > +Link Cycle2_B Cycle2_A > +Link Cycle3_A Cycle3_B > +Link Cycle3_B Cycle3_C > +Link Cycle3_C Cycle3_A > +Link Cycle3_C Cycle3_D > +Link Cycle4_A Cycle4_B > +Link Cycle4_B Cycle4_C > +Link Cycle4_C Cycle4_D > +Link Cycle4_D Cycle4_A > +)"; > + > + const auto& db3 = reload_tzdb(); > + VERIFY( db3.version == "test_3" ); > + > + // Lookup for valid links should still work even if other links are bad. > + VERIFY( locate_zone("GoodLink")->name() == "A_Zone" ); > + > +#if __cpp_exceptions > + try { > + locate_zone("BadLink"); > + VERIFY( false ); > + } catch (const std::runtime_error& e) { > + std::string_view what(e.what()); > + VERIFY( what.ends_with("cannot locate zone: BadLink") ); > + } > + > + // LinkSelf forms a link cycle with itself. > + try { > + locate_zone("LinkSelf"); > + VERIFY( false ); > + } catch (const std::runtime_error& e) { > + std::string_view what(e.what()); > + VERIFY( what.ends_with("link cycle: LinkSelf") ); > + } > + > + // Any chain that leads to LinkSelf reaches a cycle. > + try { > + locate_zone("Link1"); > + VERIFY( false ); > + } catch (const std::runtime_error& e) { > + std::string_view what(e.what()); > + VERIFY( what.ends_with("link cycle: Link1") ); > + } > + > + try { > + locate_zone("Link2"); > + VERIFY( false ); > + } catch (const std::runtime_error& e) { > + std::string_view what(e.what()); > + VERIFY( what.ends_with("link cycle: Link2") ); > + } > + > + // Cycle2_A and Cycle2_B form a cycle of length two. > + try { > + locate_zone("Cycle2_A"); > + VERIFY( false ); > + } catch (const std::runtime_error& e) { > + std::string_view what(e.what()); > + VERIFY( what.ends_with("link cycle: Cycle2_A") ); > + } > + > + try { > + locate_zone("Cycle2_B"); > + VERIFY( false ); > + } catch (const std::runtime_error& e) { > + std::string_view what(e.what()); > + VERIFY( what.ends_with("link cycle: Cycle2_B") ); > + } > + > + // Cycle3_A, Cycle3_B and Cycle3_C form a cycle of length three. > + try { > + locate_zone("Cycle3_A"); > + VERIFY( false ); > + } catch (const std::runtime_error& e) { > + std::string_view what(e.what()); > + VERIFY( what.ends_with("link cycle: Cycle3_A") ); > + } > + > + try { > + locate_zone("Cycle3_B"); > + VERIFY( false ); > + } catch (const std::runtime_error& e) { > + std::string_view what(e.what()); > + VERIFY( what.ends_with("link cycle: Cycle3_B") ); > + } > + > + try { > + locate_zone("Cycle3_C"); > + VERIFY( false ); > + } catch (const std::runtime_error& e) { > + std::string_view what(e.what()); > + VERIFY( what.ends_with("link cycle: Cycle3_C") ); > + } > + > + // Cycle3_D isn't part of the cycle, but it leads to it. > + try { > + locate_zone("Cycle3_D"); > + VERIFY( false ); > + } catch (const std::runtime_error& e) { > + std::string_view what(e.what()); > + VERIFY( what.ends_with("link cycle: Cycle3_D") ); > + } > + > + // Cycle4_* links form a cycle of length four. > + try { > + locate_zone("Cycle4_A"); > + VERIFY( false ); > + } catch (const std::runtime_error& e) { > + std::string_view what(e.what()); > + VERIFY( what.ends_with("link cycle: Cycle4_A") ); > + } > +#endif > +} > + > +int main() > +{ > + test_link_chains(); > + test_bad_links(); > +} > -- > 2.44.0 >