This is an automated email from the ASF dual-hosted git repository. bneradt pushed a commit to branch host-file in repository https://gitbox.apache.org/repos/asf/trafficserver-libswoc.git
commit 1d2bc5b8dd5a3882228e79f88a151093bd98f45f Author: Alan M. Carroll <a...@apache.org> AuthorDate: Sun Aug 21 18:30:50 2022 -0500 Lexicon: add iteration by name. Add lexicon example for parsing host file. --- code/include/swoc/Lexicon.h | 254 ++++++++++++++++++++++++++++++-------------- code/include/swoc/swoc_ip.h | 13 +-- doc/code/Lexicon.en.rst | 114 ++++++++++++-------- example/CMakeLists.txt | 7 ++ example/ex_host_file.cc | 89 ++++++++++++++++ 5 files changed, 345 insertions(+), 132 deletions(-) diff --git a/code/include/swoc/Lexicon.h b/code/include/swoc/Lexicon.h index 19c6050..df5c112 100644 --- a/code/include/swoc/Lexicon.h +++ b/code/include/swoc/Lexicon.h @@ -40,12 +40,12 @@ what(std::string_view const &fmt, Args &&... args) { } // namespace detail /// Policy template use to specify the hash function for the integral type of @c Lexicon. -/// The default is to cast to the required hash value type, which is usually sufficient. -/// In some cases the cast doesn't work and this must be specialized. +/// The default is @c std::hash but that can be overridden by specializing this method. template <typename E> -uintmax_t +size_t Lexicon_Hash(E e) { - return static_cast<uintmax_t>(e); + static constexpr std::hash<E> hasher; + return hasher(e); } /** A bidirectional mapping between names and enumeration values. @@ -77,6 +77,15 @@ protected: struct Item; public: + /// An association of an enumeration value and a name. + /// @ note Used for initializer lists that have just a primary value. + using Pair = std::tuple<E, std::string_view>; + + /// Index in @c Pair for the enumeration value. + static constexpr auto VALUE_IDX = 0; + /// Index in @c Pair for name. + static constexpr auto NAME_IDX = 1; + /** A function to be called if a value is not found to provide a default name. * @param value The value. * @return A name for the value. @@ -102,9 +111,6 @@ public: */ using DefaultHandler = std::variant<std::monostate, E, std::string_view, UnknownNameHandler, UnknownValueHandler>; - /// Used for initializer lists that have just a primary value. - using Pair = std::tuple<E, std::string_view>; - /// Element of an initializer list that contains secondary names. struct Definition { const E &value; ///< Value for definition. @@ -167,7 +173,7 @@ public: * @param value Value to look up. * @return The name for @a value. */ - std::string_view operator[](E value) const; + std::string_view operator[](E const& value) const; /** Get the value for a @a name. * @@ -229,43 +235,59 @@ public: /// Get the number of values with definitions. size_t count() const; - /** Iterator over pairs of values and primary name pairs. - * Value is a 2-tuple of the enumeration type and the primary name. - */ - class const_iterator { - using self_type = const_iterator; - +protected: + /// Common features of container iterators. + class base_iterator { + using self_type = base_iterator; public: using value_type = const Pair; ///< Iteration value. using pointer = value_type *; ///< Pointer to iteration value. using reference = value_type &; ///< Reference to iteration value. using difference_type = ptrdiff_t; ///< Type of difference between iterators. using iterator_category = std::bidirectional_iterator_tag; ///< Concepts for iterator. - - /// Default constructor. - const_iterator() = default; - - /// Copy constructor. - const_iterator(self_type const &that) = default; - - /// Move construcgtor. - const_iterator(self_type &&that) = default; - + /// Default constructor (invalid iterator) + base_iterator() = default; /// Dereference. reference operator*() const; - /// Dereference. pointer operator->() const; - - /// Assignment. - self_type &operator=(self_type const &that) = default; - /// Equality. bool operator==(self_type const &that) const; - /// Inequality. bool operator!=(self_type const &that) const; + protected: + base_iterator(Item const * item) : _item(item) {} + + const Item *_item{nullptr}; ///< Current location in the container. + }; + +public: + + /** Iterator over pairs of values and primary name pairs. + * The value type is a @c Pair with the value and name. + */ + class value_iterator : public base_iterator { + using super_type = base_iterator; + using self_type = value_iterator; + + public: + using value_type = typename super_type::value_type; + using pointer = typename super_type::pointer; + using reference = typename super_type::reference; + + /// Default constructor. + value_iterator() = default; + + /// Copy constructor. + value_iterator(self_type const &that) = default; + + /// Move constructor. + value_iterator(self_type &&that) = default; + + /// Assignment. + self_type &operator=(self_type const &that) = default; + /// Increment. self_type &operator++(); @@ -279,18 +301,49 @@ public: self_type operator--(int); protected: - const_iterator(const Item *item); ///< Internal constructor. + value_iterator(const Item *item) : super_type(item) {}; ///< Internal constructor. - /// Update the internal values after changing the iterator location. - void update(); + friend Lexicon; + }; - const Item *_item{nullptr}; ///< Current location in the container. - typename std::remove_const<value_type>::type _v; ///< Synthesized value for dereference. + class name_iterator : public base_iterator { + private: + using self_type = name_iterator; + using super_type = base_iterator; + public: + /// Default constructor. + name_iterator() = default; + + /// Copy constructor. + name_iterator(self_type const &that) = default; + + /// Move constructor. + name_iterator(self_type &&that) = default; + + /// Assignment. + self_type &operator=(self_type const &that) = default; + + /// Increment. + self_type &operator++(); + + /// Increment. + self_type operator++(int); + + /// Decrement. + self_type &operator--(); + + /// Decrement. + self_type operator--(int); + + protected: + name_iterator(const Item *item) : super_type(item) {}; ///< Internal constructor. friend Lexicon; }; - /// Pair iterator. + /// Iterator over values (each with a primary name). + using const_iterator = value_iterator; + /// Iterator over values. /// @note All iteration is over constant pairs, no modification is possible. using iterator = const_iterator; @@ -300,6 +353,34 @@ public: /// Iteration end. const_iterator end() const; + /// Iteration over names - every value/name pair. + name_iterator begin_names() const { return { _by_name.begin() }; } + /// Iteration over names - every value/name pair. + name_iterator end_names() const { return { _by_name.end() }; } + + /// @cond INTERNAL + // Helper struct to return to enable container iteration for names. + struct ByNameHelper { + self_type const & _lexicon; + ByNameHelper(self_type const & self) : _lexicon(self) {} + name_iterator begin() const { return _lexicon.begin_names(); } + name_iterator end() const { return _lexicon.end_names(); } + }; + /// @endcond + + /** Enable container iteration by name. + * The return value is a tempoary of indeterminate type that provides @c begin and @c end methods which + * return name based iterators for @a this. This is useful for container based iteration. E.g. to iterate + * over all of the value/name pairs, + * @code + * for ( auto const & pair : lexicon.by_names()) { + * // code + * } + * @endcode + * @return Temporary. + */ + ByNameHelper by_names() const { return { *this }; } + protected: /// Handle providing a default name. using NameDefault = std::variant<std::monostate, std::string_view, UnknownValueHandler>; @@ -313,7 +394,7 @@ protected: /// Visitor - invalid value type. std::string_view operator()(std::monostate const &) const { - throw std::domain_error(detail::what("Lexicon: invalid enumeration value {}", static_cast<int>(_value)).data()); + throw std::domain_error("Lexicon: invalid enumeration value"); } /// Visitor - literal string. @@ -363,8 +444,7 @@ protected: */ Item(E value, std::string_view name); - E _value; ///< Definition value. - std::string_view _name; ///< Definition name + Pair _payload; ///< Enumeration and name. /// @cond INTERNAL_DETAIL // Intrusive list linkage support. @@ -387,7 +467,7 @@ protected: static Item *&next_ptr(Item *); static Item *&prev_ptr(Item *); static E key_of(Item *); - static uintmax_t hash_of(E); + static size_t hash_of(E); static bool equal(E lhs, E rhs); } _value_link; /// @endcond @@ -412,7 +492,7 @@ protected: // ---- // Item -template <typename E> Lexicon<E>::Item::Item(E value, std::string_view name) : _value(value), _name(name) {} +template <typename E> Lexicon<E>::Item::Item(E value, std::string_view name) : _payload(value, name) {} /// @cond INTERNAL_DETAIL template <typename E> @@ -442,13 +522,13 @@ Lexicon<E>::Item::ValueLinkage::prev_ptr(Item *item) -> Item *& { template <typename E> std::string_view Lexicon<E>::Item::NameLinkage::key_of(Item *item) { - return item->_name; + return std::get<NAME_IDX>(item->_payload); } template <typename E> E Lexicon<E>::Item::ValueLinkage::key_of(Item *item) { - return item->_value; + return std::get<VALUE_IDX>(item->_payload); } template <typename E> @@ -458,9 +538,8 @@ Lexicon<E>::Item::NameLinkage::hash_of(std::string_view s) { } template <typename E> -uintmax_t +size_t Lexicon<E>::Item::ValueLinkage::hash_of(E value) { - // In almost all cases, the values will be (roughly) sequential, so an identity hash works well. return Lexicon_Hash<E>(value); } @@ -520,10 +599,9 @@ Lexicon<E>::localize(std::string_view const &name) { template <typename E> std::string_view -Lexicon<E>::operator[](E value) const { - auto spot = _by_value.find(value); - if (spot != _by_value.end()) { - return spot->_name; +Lexicon<E>::operator[](E const& value) const { + if ( auto spot = _by_value.find(value) ; spot != _by_value.end()) { + return std::get<NAME_IDX>(spot->_payload); } return std::visit(NameDefaultVisitor{value}, _name_default); } @@ -531,9 +609,8 @@ Lexicon<E>::operator[](E value) const { template <typename E> E Lexicon<E>::operator[](std::string_view const &name) const { - auto spot = _by_name.find(name); - if (spot != _by_name.end()) { - return spot->_value; + if ( auto spot = _by_name.find(name) ; spot != _by_name.end()) { + return std::get<VALUE_IDX>(spot->_payload); } return std::visit(ValueDefaultVisitor{name}, _value_default); } @@ -544,7 +621,7 @@ Lexicon<E>::define(E value, const std::initializer_list<std::string_view> &names if (names.size() < 1) { throw std::invalid_argument("A defined value must have at least a primary name"); } - for (auto name : names) { + for (auto const& name : names) { if (_by_name.find(name) != _by_name.end()) { throw std::invalid_argument(detail::what("Duplicate name '{}' in Lexicon", name)); } @@ -569,7 +646,7 @@ Lexicon<E>::define(E value, Args &&... names) -> self_type & { template <typename E> auto Lexicon<E>::define(const Pair &pair) -> self_type & { - return this->define(std::get<0>(pair), {std::get<1>(pair)}); + return this->define(std::get<VALUE_IDX>(pair), {std::get<NAME_IDX>(pair)}); } template <typename E> @@ -621,54 +698,69 @@ Lexicon<E>::end() const -> const_iterator { // Iterators template <typename E> -void -Lexicon<E>::const_iterator::update() { - std::get<0>(_v) = _item->_value; - std::get<1>(_v) = _item->_name; +auto +Lexicon<E>::base_iterator::operator*() const -> reference { + return _item->_payload; } -template <typename E> Lexicon<E>::const_iterator::const_iterator(const Item *item) : _item(item) { - if (_item) { - this->update(); - }; +template <typename E> +auto +Lexicon<E>::base_iterator::operator->() const -> pointer { + return &(_item->_payload); +} + +template <typename E> +bool +Lexicon<E>::base_iterator::operator==(self_type const &that) const { + return _item == that._item; +} + +template <typename E> +bool +Lexicon<E>::base_iterator::operator!=(self_type const &that) const { + return _item != that._item; } template <typename E> auto -Lexicon<E>::const_iterator::operator*() const -> reference { - return _v; +Lexicon<E>::value_iterator::operator++() -> self_type & { + super_type::_item = super_type::_item->_value_link._next; + return *this; } template <typename E> auto -Lexicon<E>::const_iterator::operator->() const -> pointer { - return &_v; +Lexicon<E>::value_iterator::operator++(int) -> self_type { + self_type tmp{*this}; + ++*this; + return tmp; } template <typename E> -bool -Lexicon<E>::const_iterator::operator==(self_type const &that) const { - return _item == that._item; +auto +Lexicon<E>::value_iterator::operator--() -> self_type & { + super_type::_item = super_type::_item->_value_link->_prev; + return *this; } template <typename E> -bool -Lexicon<E>::const_iterator::operator!=(self_type const &that) const { - return _item != that._item; +auto +Lexicon<E>::value_iterator::operator--(int) -> self_type { + self_type tmp; + ++*this; + return tmp; } template <typename E> auto -Lexicon<E>::const_iterator::operator++() -> self_type & { - if (nullptr != (_item = _item->_value_link._next)) { - this->update(); - } +Lexicon<E>::name_iterator::operator++() -> self_type & { + super_type::_item = super_type::_item->_name_link._next; return *this; } template <typename E> auto -Lexicon<E>::const_iterator::operator++(int) -> self_type { +Lexicon<E>::name_iterator::operator++(int) -> self_type { self_type tmp{*this}; ++*this; return tmp; @@ -676,16 +768,14 @@ Lexicon<E>::const_iterator::operator++(int) -> self_type { template <typename E> auto -Lexicon<E>::const_iterator::operator--() -> self_type & { - if (nullptr != (_item = _item->_value_link->_prev)) { - this->update(); - } +Lexicon<E>::name_iterator::operator--() -> self_type & { + super_type::_item = super_type::_item->_name_link->_prev; return *this; } template <typename E> auto -Lexicon<E>::const_iterator::operator--(int) -> self_type { +Lexicon<E>::name_iterator::operator--(int) -> self_type { self_type tmp; ++*this; return tmp; diff --git a/code/include/swoc/swoc_ip.h b/code/include/swoc/swoc_ip.h index dec4d4c..4f76450 100644 --- a/code/include/swoc/swoc_ip.h +++ b/code/include/swoc/swoc_ip.h @@ -3367,28 +3367,25 @@ get(swoc::IPNet const &net) { namespace std { /// Standard hash support for @a IP4Addr. template <> struct hash<swoc::IP4Addr> { - uint32_t operator()(swoc::IP4Addr const &addr) const { + size_t operator()(swoc::IP4Addr const &addr) const { return addr.network_order(); } }; /// Standard hash support for @a IP6Addr. template <> struct hash<swoc::IP6Addr> { - uint32_t operator()(swoc::IP6Addr const &addr) const { + size_t operator()(swoc::IP6Addr const &addr) const { // XOR the 64 chunks then XOR that down to 32 bits. auto words = addr.as_span<uint64_t>(); - union { - uint64_t w; - uint32_t n[2]; - } x{words[0] ^ words[1]}; - return x.n[0] ^ x.n[1]; + return words[0] ^ words[1]; } }; /// Standard hash support for @a IPAddr. template <> struct hash<swoc::IPAddr> { - uint32_t operator()(swoc::IPAddr const &addr) const { + size_t operator()(swoc::IPAddr const &addr) const { return addr.is_ip4() ? hash<swoc::IP4Addr>()(addr.ip4()) : addr.is_ip6() ? hash<swoc::IP6Addr>()(addr.ip6()) : 0; } }; + } // namespace std diff --git a/doc/code/Lexicon.en.rst b/doc/code/Lexicon.en.rst index 64468b5..5ee9794 100644 --- a/doc/code/Lexicon.en.rst +++ b/doc/code/Lexicon.en.rst @@ -54,6 +54,9 @@ Values and names can be associated either using pairs of values and names, or a and a list of names, the first of which is the primary name. This must be consistent for all of the defined values, so if one value has multiple names, all names must use the value, name list form. +Defaults +======== + In addition, defaults can be specified. Because all possible defaults have distinct signatures there is no need to order them - the constructor can deduce what is meant. Defaults are very handy when using a Lexicon for parsing - the default value can be an invalid value, in which case checking @@ -80,52 +83,28 @@ equivalence. This is only a benefit if the pointer is to be stored and compared token = lex[lex[token]]; // Normalize string pointer. -Examples -======== - -For illustrative purposes, consider using :ref:`ip-space` where each address has a set of flags -representing the type of address, such as production, edge, secure, etc. This is stored in memory -as a ``std::bitset``. To load up the data a comma separated value file is provided which has the -first column as the IP address range and the subsequent values are flag names. - -The starting point is an enumeration with the address types: - -.. literalinclude:: ../../unit_tests/ex_Lexicon.cc - :start-after: doc.1.begin - :end-before: doc.1.end - -To do conversions a Lexicon is created: - -.. literalinclude:: ../../unit_tests/ex_Lexicon.cc - :start-after: doc.2.begin - :end-before: doc.2.end - -The file loading and parsing is then: - -.. literalinclude:: ../../unit_tests/ex_Lexicon.cc - :start-after: doc.load.begin - :end-before: doc.load.end +Iteration +========= -with the simulated file contents +For iteration, the lexicon is treated as a list of pairs of values and names. Standard iteration is +over the values and the primary names for those values. The value type of the iterator is a tuple +of the value and name. :: -.. literalinclude:: ../../unit_tests/ex_Lexicon.cc - :start-after: doc.file.begin - :end-before: doc.file.end + extern swoc::Lexicon<Type> lex; // Initialized elsewhere. + for ( auto const & pair : lex ) { + std::cout << std::get<Lexicon<Type>::VALUE_IDX>(pair) << " has the name " + << std::get<Lexicon<Type>::NAME_IDX>(pair) << std::endl; + } -This uses the Lexicon to convert the strings in the file to the enumeration values, which are the -bitset indices. The defalt is set to ``INVALID`` so that any string that doesn't match a string -in the Lexicon is mapped to ``INVALID``. +It is possible to iterate over the names +as well using the :libswoc:`Lexicon::begin_names` and :libswoc:`Lexicon::end_names` methods. For +convience there the method :libswoc:`Lexicon::by_names` returns a temporary object which has :code:`begin` +and :code:`end` methods which return name iterators. This makes container iteration easier. :: -Once the IP Space is loaded, lookup is simple, given an address: - -.. literalinclude:: ../../unit_tests/ex_Lexicon.cc - :start-after: doc.lookup.begin - :end-before: doc.lookup.end - -At this point ``flags`` has the set of flags stored for that address from the original data. Data -can be accessed like :: - - if (flags[NetType::PROD]) { ... } + extern swoc::Lexicon<Type> lex; // Initialized elsewhere. + for ( auto const & pair : lex.by_names() ) { + // code for each pair. + } Constructing ============ @@ -175,6 +154,57 @@ because only the ``true`` and ``false`` values would be stored, ``INVALID`` indi error. The enumeration values were chosen so casting from ``bool`` to ``BoolTag`` yields the appropriate string. +Examples +======== + +For illustrative purposes, consider using :ref:`ip-space` where each address has a set of flags +representing the type of address, such as production, edge, secure, etc. This is stored in memory +as a ``std::bitset``. To load up the data a comma separated value file is provided which has the +first column as the IP address range and the subsequent values are flag names. + +The starting point is an enumeration with the address types: + +.. literalinclude:: ../../unit_tests/ex_Lexicon.cc + :start-after: doc.1.begin + :end-before: doc.1.end + +To do conversions a Lexicon is created: + +.. literalinclude:: ../../unit_tests/ex_Lexicon.cc + :start-after: doc.2.begin + :end-before: doc.2.end + +The file loading and parsing is then: + +.. literalinclude:: ../../unit_tests/ex_Lexicon.cc + :start-after: doc.load.begin + :end-before: doc.load.end + +with the simulated file contents + +.. literalinclude:: ../../unit_tests/ex_Lexicon.cc + :start-after: doc.file.begin + :end-before: doc.file.end + +This uses the Lexicon to convert the strings in the file to the enumeration values, which are the +bitset indices. The defalt is set to ``INVALID`` so that any string that doesn't match a string +in the Lexicon is mapped to ``INVALID``. + +Once the IP Space is loaded, lookup is simple, given an address: + +.. literalinclude:: ../../unit_tests/ex_Lexicon.cc + :start-after: doc.lookup.begin + :end-before: doc.lookup.end + +At this point ``flags`` has the set of flags stored for that address from the original data. Data +can be accessed like :: + + if (flags[NetType::PROD]) { ... } + +The example :swoc:git:`example/ex_host_file.cc` processes a standard host file into a lexicon that +enables forward and reverse lookups. A name can be used to find an address and an address can be +used to find the first name with that address. + Design Notes ************ diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index ae234cd..b85d555 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -34,3 +34,10 @@ if (CMAKE_COMPILER_IS_GNUCXX) target_compile_options(ex_lru_cache PRIVATE -Wall -Wextra -Werror) target_link_options(ex_lru_cache PRIVATE -lpthread) endif() + +add_executable(ex_host_file ex_host_file.cc) +target_link_libraries(ex_host_file PUBLIC libswoc) +if (CMAKE_COMPILER_IS_GNUCXX) + target_compile_options(ex_host_file PRIVATE -Wall -Wextra -Werror) + target_link_options(ex_host_file PRIVATE -lpthread) +endif() diff --git a/example/ex_host_file.cc b/example/ex_host_file.cc new file mode 100644 index 0000000..c6aeb5f --- /dev/null +++ b/example/ex_host_file.cc @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2022 Network Geographics + +/** @file + + Using Lexicon to represent a UNIX host file. +*/ + +#include <ios> + +#include "swoc/TextView.h" +#include "swoc/swoc_ip.h" +#include "swoc/Lexicon.h" +#include "swoc/bwf_ip.h" +#include "swoc/bwf_ex.h" +#include "swoc/bwf_std.h" +#include "swoc/swoc_file.h" +#include "swoc/Errata.h" + +using namespace std::literals; +using namespace swoc::literals; + +using swoc::TextView; +using swoc::Errata; +using swoc::IPAddr; +using swoc::IP4Addr; +using swoc::IP6Addr; + +using V4Lexicon = swoc::Lexicon<IP4Addr>; +using V6Lexicon = swoc::Lexicon<IP6Addr>; + +// -------------------------------------------------- + +static constexpr TextView HOST_FILE { R"( +127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4 +::1 localhost localhost.localdomain localhost6 localhost6.localdomain6 + +192.168.56.233 tiphares +192.168.56.97 spira +192.168.3.22 livm +192.168.2.12 atc-build + +192.168.2.2 ns1 ns1.cdn.swoc.io +192.168.2.3 ns2 ns2.cdn.swoc.io +192.168.2.4 atc-dns dns.cdn.swoc.io +192.168.2.10 atc-ops +192.168.2.11 atc-portal +192.168.2.33 atc-monitor atc-mon + +192.168.2.19 mid-ts +192.168.2.32 edge-ts +)"}; + +int main(int, char *[]) { + V4Lexicon hosts_ipv4; + V6Lexicon hosts_ipv6; + + TextView src{HOST_FILE}; + while (src) { + auto line = src.take_prefix_at('\n').ltrim_if(&isspace); + if (!line || *line == '#') { + continue; + } + auto addr_token = line.take_prefix_if(&isspace); + IPAddr addr; + if (! addr.load(addr_token)) { + continue; // invalid address. + } + while (line.ltrim_if(&isspace)) { + auto host = line.take_prefix_if(&isspace); + if (addr.is_ip4()) { + hosts_ipv4.define(IP4Addr(addr), host); + } else if (addr.is_ip6()) { + hosts_ipv6.define(IP6Addr(addr), host); + } + } + } + + std::cout << swoc::detail::what("{} -> {}\n", "ns2.cdn.swoc.io", hosts_ipv4["ns2.cdn.swoc.io"]); + std::cout << swoc::detail::what("{} -> {}\n", "ns2", hosts_ipv4["ns2"]); + std::cout << swoc::detail::what("{} -> {}\n", IP4Addr("192.168.2.3"), hosts_ipv4[IP4Addr("192.168.2.3")]); + + std::cout << "Table dump by name" << std::endl; + for ( auto const & item : hosts_ipv4.by_names()) { + std::cout << swoc::detail::what("{} -> {}\n", std::get<V4Lexicon::NAME_IDX>(item), std::get<V4Lexicon::VALUE_IDX>(item)); + } + + return 0; +}