The BFS executor was mainly added to work around the unbounded stack
usage of the DFS executor, and is implemented on top of the DFS executor.
It however doesn't support backreferences and it mishandles ECMAScript's
(the default grammar) "leftmost first" alternative matching rule, making
it not suitable as a drop-in replacement. Users need to pass the
non-standard regex_constants::__polynomial flag to use it which isn't
well known, so likely the BFS executor is not used much in practice.
So this patch just removes the BFS executor. This will make converting
the recursive DFS executor to iteration with a heap-based stack much
easier.
PR libstdc++/86164
libstdc++-v3/ChangeLog:
* include/bits/regex.h (__detail::_RegexExecutorPolicy): Remove.
(__detail::__regex_algo_impl): Adjust forward declaration.
(__detail::_Executor): Adjust forward declaration.
(regex_match): Adjust after _RegexExecutorPolicy removal.
(regex_search): Likewise.
* include/bits/regex.tcc (__detail::__regex_algo_impl): Remove
_RegexExecutorPolicy parameter and the BFS code path.
* include/bits/regex_executor.h (__detail::_Executor): Remove
__dfs_mode template parameter.
(__detail::_Executor::__search_mode): Remove.
(__detail::_Executor::__dfs): Remove.
(__detail::_Executor::__bfs): Remove.
(__detail::_Executor::_M_main_dispatch): Consolidate.
(__detail::_Executor::_State_info): Remove template parameters
and the __bfs partial specialization, and make the __dfs partial
specialization the primary template.
(__detail::_Executor::_State_info::_M_visited): Remove.
(__detail::_Executor::_State_info::_M_queue): Remove.
(__detail::_Executor::_M_states): Adjust _State_info use.
* include/bits/regex_executor.tcc (__detail::_Executor): Remove
__dfs_mode template parameter throughout and simplify each
member function definition accordingly.
* testsiute/util/testsuite_regex.h (__gnu_test::regex_match_debug):
Adjust after BFS executor removal. Add TODO.
(__gnu_test::regex_search_debug): Likewise.
---
libstdc++-v3/include/bits/regex.h | 15 +-
libstdc++-v3/include/bits/regex.tcc | 28 +-
libstdc++-v3/include/bits/regex_executor.h | 83 +-----
libstdc++-v3/include/bits/regex_executor.tcc | 263 +++++-------------
libstdc++-v3/testsuite/util/testsuite_regex.h | 26 +-
5 files changed, 102 insertions(+), 313 deletions(-)
diff --git a/libstdc++-v3/include/bits/regex.h
b/libstdc++-v3/include/bits/regex.h
index 68fb6254f362..93599c62e8fd 100644
--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -49,8 +49,6 @@ _GLIBCXX_END_NAMESPACE_CXX11
namespace __detail
{
- enum class _RegexExecutorPolicy : int { _S_auto, _S_alternate };
-
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
bool
@@ -58,10 +56,9 @@ namespace __detail
match_results<_BiIter, _Alloc>& __m,
const basic_regex<_CharT, _TraitsT>& __re,
regex_constants::match_flag_type __flags,
- _RegexExecutorPolicy __policy,
bool __match_mode);
- template<typename, typename, typename, bool>
+ template<typename, typename, typename>
class _Executor;
template<typename _Tp>
@@ -838,7 +835,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
__detail::__regex_algo_impl(_Bp, _Bp, match_results<_Bp, _Ap>&,
const basic_regex<_Cp, _Rp>&,
regex_constants::match_flag_type,
- __detail::_RegexExecutorPolicy, bool);
+ bool);
template<typename, typename, typename, bool>
friend class __detail::_Executor;
@@ -2142,7 +2139,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
__detail::__regex_algo_impl(_Bp, _Bp, match_results<_Bp, _Ap>&,
const basic_regex<_Cp, _Rp>&,
regex_constants::match_flag_type,
- __detail::_RegexExecutorPolicy, bool);
+ bool);
// Reset contents to __size unmatched sub_match objects
// (plus additional objects for prefix, suffix and unmatched sub).
@@ -2287,8 +2284,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
regex_constants::match_flag_type __flags
= regex_constants::match_default)
{
- return __detail::__regex_algo_impl(__s, __e, __m, __re, __flags,
- __detail::_RegexExecutorPolicy::_S_auto, true);
+ return __detail::__regex_algo_impl(__s, __e, __m, __re, __flags, true);
}
/**
@@ -2443,8 +2439,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
regex_constants::match_flag_type __flags
= regex_constants::match_default)
{
- return __detail::__regex_algo_impl(__s, __e, __m, __re, __flags,
- __detail::_RegexExecutorPolicy::_S_auto, false);
+ return __detail::__regex_algo_impl(__s, __e, __m, __re, __flags, false);
}
/**
diff --git a/libstdc++-v3/include/bits/regex.tcc
b/libstdc++-v3/include/bits/regex.tcc
index 6ec259c25019..964ee4575a19 100644
--- a/libstdc++-v3/include/bits/regex.tcc
+++ b/libstdc++-v3/include/bits/regex.tcc
@@ -38,9 +38,6 @@ namespace __detail
// Result of merging regex_match and regex_search.
//
- // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
- // the other one if possible, for test purpose).
- //
// That __match_mode is true means regex_match, else regex_search.
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
@@ -50,7 +47,6 @@ namespace __detail
match_results<_BiIter, _Alloc>& __m,
const basic_regex<_CharT, _TraitsT>& __re,
regex_constants::match_flag_type __flags,
- _RegexExecutorPolicy __policy,
bool __match_mode)
{
if (__re._M_automaton == nullptr)
@@ -61,26 +57,12 @@ namespace __detail
__m._M_resize(__re._M_automaton->_M_sub_count());
bool __ret;
- if ((__re.flags() & regex_constants::__polynomial)
- || (__policy == _RegexExecutorPolicy::_S_alternate
- && !__re._M_automaton->_M_has_backref))
- {
- _Executor<_BiIter, _Alloc, _TraitsT, false>
- __executor(__s, __e, __res, __re, __flags);
- if (__match_mode)
- __ret = __executor._M_match();
- else
- __ret = __executor._M_search();
- }
+ _Executor<_BiIter, _Alloc, _TraitsT>
+ __executor(__s, __e, __res, __re, __flags);
+ if (__match_mode)
+ __ret = __executor._M_match();
else
- {
- _Executor<_BiIter, _Alloc, _TraitsT, true>
- __executor(__s, __e, __res, __re, __flags);
- if (__match_mode)
- __ret = __executor._M_match();
- else
- __ret = __executor._M_search();
- }
+ __ret = __executor._M_search();
if (__ret)
{
for (auto& __it : __res)
diff --git a/libstdc++-v3/include/bits/regex_executor.h
b/libstdc++-v3/include/bits/regex_executor.h
index f9857bfc4c1d..0c3f7f9050b0 100644
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -43,18 +43,10 @@ namespace __detail
/**
* @brief Takes a regex and an input string and does the matching.
- *
- * The %_Executor class has two modes: DFS mode and BFS mode, controlled
- * by the template parameter %__dfs_mode.
*/
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
class _Executor
{
- using __search_mode = integral_constant<bool, __dfs_mode>;
- using __dfs = true_type;
- using __bfs = false_type;
-
enum class _Match_mode : unsigned char { _Exact, _Prefix };
public:
@@ -147,13 +139,10 @@ namespace __detail
bool
_M_main(_Match_mode __match_mode)
- { return _M_main_dispatch(__match_mode, __search_mode{}); }
-
- bool
- _M_main_dispatch(_Match_mode __match_mode, __dfs);
+ { return _M_main_dispatch(__match_mode); }
bool
- _M_main_dispatch(_Match_mode __match_mode, __bfs);
+ _M_main_dispatch(_Match_mode __match_mode);
bool
_M_is_word(_CharT __ch) const
@@ -232,62 +221,18 @@ namespace __detail
return (_M_re._M_automaton->_M_options() & __m) == __m;
}
- // Holds additional information used in BFS-mode.
- template<typename _SearchMode, typename _ResultsVec>
- struct _State_info;
-
- template<typename _ResultsVec>
- struct _State_info<__bfs, _ResultsVec>
- {
- explicit
- _State_info(_StateIdT __start, size_t __n)
- : _M_visited_states(new bool[__n]()), _M_start(__start)
- { }
-
- ~_State_info() { delete[] _M_visited_states; }
-
- _State_info(const _State_info&) = delete;
- _State_info& operator=(const _State_info&) = delete;
-
- bool _M_visited(_StateIdT __i)
- {
- if (_M_visited_states[__i])
- return true;
- _M_visited_states[__i] = true;
- return false;
- }
-
- void _M_queue(_StateIdT __i, const _ResultsVec& __res)
- { _M_match_queue.emplace_back(__i, __res); }
-
- // Dummy implementations for BFS mode.
- _BiIter* _M_get_sol_pos() { return nullptr; }
-
- // Saves states that need to be considered for the next character.
- _GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
- // Indicates which states are already visited.
- bool* _M_visited_states;
- // To record current solution.
- _StateIdT _M_start;
- };
-
- template<typename _ResultsVec>
- struct _State_info<__dfs, _ResultsVec>
- {
- explicit
- _State_info(_StateIdT __start, size_t) : _M_start(__start)
- { }
-
- // Dummy implementations for DFS mode.
- bool _M_visited(_StateIdT) const { return false; }
- void _M_queue(_StateIdT, const _ResultsVec&) { }
+ struct _State_info
+ {
+ explicit
+ _State_info(_StateIdT __start, size_t) : _M_start(__start)
+ { }
- _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
+ _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
- // To record current solution.
- _StateIdT _M_start;
- _BiIter _M_sol_pos;
- };
+ // To record current solution.
+ _StateIdT _M_start;
+ _BiIter _M_sol_pos;
+ };
public:
_ResultsVec _M_cur_results;
@@ -298,7 +243,7 @@ namespace __detail
const _NFAT& _M_nfa;
_ResultsVec& _M_results;
_GLIBCXX_STD_C::vector<pair<_BiIter, int>> _M_rep_count;
- _State_info<__search_mode, _ResultsVec> _M_states;
+ _State_info _M_states;
_FlagT _M_flags;
// Do we have a solution so far?
bool _M_has_sol;
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc
b/libstdc++-v3/include/bits/regex_executor.tcc
index 421e585f39d9..58d5a2bfec3a 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -36,9 +36,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
namespace __detail
{
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
+ bool _Executor<_BiIter, _Alloc, _TraitsT>::
_M_search()
{
if (_M_search_from_first())
@@ -78,10 +77,9 @@ namespace __detail
// Time complexity: \Omega(match_length), O(2^(_M_nfa.size()))
// Space complexity: \theta(match_results.size() + match_length)
//
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
- _M_main_dispatch(_Match_mode __match_mode, __dfs)
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
+ bool _Executor<_BiIter, _Alloc, _TraitsT>::
+ _M_main_dispatch(_Match_mode __match_mode)
{
_M_has_sol = false;
*_M_states._M_get_sol_pos() = _BiIter();
@@ -90,64 +88,9 @@ namespace __detail
return _M_has_sol;
}
- // ------------------------------------------------------------
- //
- // BFS mode:
- //
- // Russ Cox's article (http://swtch.com/~rsc/regexp/regexp1.html)
- // explained this algorithm clearly.
- //
- // It first computes epsilon closure (states that can be achieved without
- // consuming characters) for every state that's still matching,
- // using the same DFS algorithm, but doesn't re-enter states (using
- // _M_states._M_visited to check), nor follow _S_opcode_match.
- //
- // Then apply DFS using every _S_opcode_match (in _M_states._M_match_queue)
- // as the start state.
- //
- // It significantly reduces potential duplicate states, so has a better
- // upper bound; but it requires more overhead.
- //
- // Time complexity: \Omega(match_length * match_results.size())
- // O(match_length * _M_nfa.size() * match_results.size())
- // Space complexity: \Omega(_M_nfa.size() + match_results.size())
- // O(_M_nfa.size() * match_results.size())
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
- _M_main_dispatch(_Match_mode __match_mode, __bfs)
- {
- _M_states._M_queue(_M_states._M_start, _M_results);
- bool __ret = false;
- while (1)
- {
- _M_has_sol = false;
- if (_M_states._M_match_queue.empty())
- break;
- std::fill_n(_M_states._M_visited_states, _M_nfa.size(), false);
- auto __old_queue = std::move(_M_states._M_match_queue);
- auto __alloc = _M_cur_results.get_allocator();
- for (auto& __task : __old_queue)
- {
- _M_cur_results = _ResultsVec(std::move(__task.second), __alloc);
- _M_dfs(__match_mode, __task.first);
- }
- if (__match_mode == _Match_mode::_Prefix)
- __ret |= _M_has_sol;
- if (_M_current == _M_end)
- break;
- ++_M_current;
- }
- if (__match_mode == _Match_mode::_Exact)
- __ret = _M_has_sol;
- _M_states._M_match_queue.clear();
- return __ret;
- }
-
// Return whether now match the given sub-NFA.
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
+ bool _Executor<_BiIter, _Alloc, _TraitsT>::
_M_lookahead(_StateIdT __next)
{
// Backreferences may refer to captured content.
@@ -172,9 +115,8 @@ namespace __detail
// infinite loop by refusing to continue when it's already been
// visited more than twice. It's `twice` instead of `once` because
// we need to spare one more time for potential group capture.
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
+ void _Executor<_BiIter, _Alloc, _TraitsT>::
_M_rep_once_more(_Match_mode __match_mode, _StateIdT __i)
{
const auto& __state = _M_nfa[__i];
@@ -202,9 +144,8 @@ namespace __detail
// of this quantifier". Executing _M_next first or _M_alt first don't
// mean the same thing, and we need to choose the correct order under
// given greedy mode.
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
+ void _Executor<_BiIter, _Alloc, _TraitsT>::
_M_handle_repeat(_Match_mode __match_mode, _StateIdT __i)
{
const auto& __state = _M_nfa[__i];
@@ -213,40 +154,21 @@ namespace __detail
if (!__state._M_neg)
{
_M_rep_once_more(__match_mode, __i);
- // If it's DFS executor and already accepted, we're done.
- if (!__dfs_mode || !_M_has_sol)
+ // If we already accepted, we're done.
+ if (!_M_has_sol)
_M_dfs(__match_mode, __state._M_next);
}
else // Non-greedy mode
{
- if constexpr (__dfs_mode)
- {
- // vice-versa.
- _M_dfs(__match_mode, __state._M_next);
- if (!_M_has_sol)
- _M_rep_once_more(__match_mode, __i);
- }
- else
- {
- // DON'T attempt anything, because there's already another
- // state with higher priority accepted. This state cannot
- // be better by attempting its next node.
- if (!_M_has_sol)
- {
- _M_dfs(__match_mode, __state._M_next);
- // DON'T attempt anything if it's already accepted. An
- // accepted state *must* be better than a solution that
- // matches a non-greedy quantifier one more time.
- if (!_M_has_sol)
- _M_rep_once_more(__match_mode, __i);
- }
- }
+ // vice-versa.
+ _M_dfs(__match_mode, __state._M_next);
+ if (!_M_has_sol)
+ _M_rep_once_more(__match_mode, __i);
}
}
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
+ void _Executor<_BiIter, _Alloc, _TraitsT>::
_M_handle_subexpr_begin(_Match_mode __match_mode, _StateIdT __i)
{
const auto& __state = _M_nfa[__i];
@@ -258,9 +180,8 @@ namespace __detail
__res.first = __back;
}
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
+ void _Executor<_BiIter, _Alloc, _TraitsT>::
_M_handle_subexpr_end(_Match_mode __match_mode, _StateIdT __i)
{
const auto& __state = _M_nfa[__i];
@@ -273,9 +194,8 @@ namespace __detail
__res = __back;
}
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
+ inline void _Executor<_BiIter, _Alloc, _TraitsT>::
_M_handle_line_begin_assertion(_Match_mode __match_mode, _StateIdT __i)
{
const auto& __state = _M_nfa[__i];
@@ -283,9 +203,8 @@ namespace __detail
_M_dfs(__match_mode, __state._M_next);
}
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
+ inline void _Executor<_BiIter, _Alloc, _TraitsT>::
_M_handle_line_end_assertion(_Match_mode __match_mode, _StateIdT __i)
{
const auto& __state = _M_nfa[__i];
@@ -293,9 +212,8 @@ namespace __detail
_M_dfs(__match_mode, __state._M_next);
}
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
+ inline void _Executor<_BiIter, _Alloc, _TraitsT>::
_M_handle_word_boundary(_Match_mode __match_mode, _StateIdT __i)
{
const auto& __state = _M_nfa[__i];
@@ -305,9 +223,8 @@ namespace __detail
// Here __state._M_alt offers a single start node for a sub-NFA.
// We recursively invoke our algorithm to match the sub-NFA.
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
+ void _Executor<_BiIter, _Alloc, _TraitsT>::
_M_handle_subexpr_lookahead(_Match_mode __match_mode, _StateIdT __i)
{
const auto& __state = _M_nfa[__i];
@@ -315,27 +232,20 @@ namespace __detail
_M_dfs(__match_mode, __state._M_next);
}
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
+ void _Executor<_BiIter, _Alloc, _TraitsT>::
_M_handle_match(_Match_mode __match_mode, _StateIdT __i)
{
const auto& __state = _M_nfa[__i];
if (_M_current == _M_end)
return;
- if constexpr (__dfs_mode)
+ if (__state._M_matches(*_M_current))
{
- if (__state._M_matches(*_M_current))
- {
- ++_M_current;
- _M_dfs(__match_mode, __state._M_next);
- --_M_current;
- }
+ ++_M_current;
+ _M_dfs(__match_mode, __state._M_next);
+ --_M_current;
}
- else
- if (__state._M_matches(*_M_current))
- _M_states._M_queue(__state._M_next, _M_cur_results);
}
template<typename _BiIter, typename _TraitsT>
@@ -390,13 +300,10 @@ namespace __detail
// then compare it with
// (_M_current, _M_current + (__submatch.second - __submatch.first)).
// If matched, keep going; else just return and try another state.
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
+ void _Executor<_BiIter, _Alloc, _TraitsT>::
_M_handle_backref(_Match_mode __match_mode, _StateIdT __i)
{
- static_assert(__dfs_mode, "this should never be instantiated");
-
const auto& __state = _M_nfa[__i];
auto& __submatch = _M_cur_results[__state._M_backref_index];
if (!__submatch.matched)
@@ -423,70 +330,52 @@ namespace __detail
}
}
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
+ void _Executor<_BiIter, _Alloc, _TraitsT>::
_M_handle_accept(_Match_mode __match_mode, _StateIdT)
{
- if constexpr (__dfs_mode)
+ __glibcxx_assert(!_M_has_sol);
+ if (__match_mode == _Match_mode::_Exact)
+ _M_has_sol = _M_current == _M_end;
+ else
+ _M_has_sol = true;
+ if (_M_current == _M_begin
+ && (_M_flags & regex_constants::match_not_null))
+ _M_has_sol = false;
+ if (_M_has_sol)
{
- __glibcxx_assert(!_M_has_sol);
- if (__match_mode == _Match_mode::_Exact)
- _M_has_sol = _M_current == _M_end;
- else
- _M_has_sol = true;
- if (_M_current == _M_begin
- && (_M_flags & regex_constants::match_not_null))
- _M_has_sol = false;
- if (_M_has_sol)
+ if (_M_nfa._M_flags & regex_constants::ECMAScript)
+ _M_results = _M_cur_results;
+ else // POSIX
{
- if (_M_nfa._M_flags & regex_constants::ECMAScript)
- _M_results = _M_cur_results;
- else // POSIX
+ __glibcxx_assert(_M_states._M_get_sol_pos());
+ // Here's POSIX's logic: match the longest one. However
+ // we never know which one (lhs or rhs of "|") is longer
+ // unless we try both of them and compare the results.
+ // The member variable _M_sol_pos records the end
+ // position of the last successful match. It's better
+ // to be larger, because POSIX regex is always greedy.
+ // TODO: This could be slow.
+ if (*_M_states._M_get_sol_pos() == _BiIter()
+ || std::distance(_M_begin,
+ *_M_states._M_get_sol_pos())
+ < std::distance(_M_begin, _M_current))
{
- __glibcxx_assert(_M_states._M_get_sol_pos());
- // Here's POSIX's logic: match the longest one. However
- // we never know which one (lhs or rhs of "|") is longer
- // unless we try both of them and compare the results.
- // The member variable _M_sol_pos records the end
- // position of the last successful match. It's better
- // to be larger, because POSIX regex is always greedy.
- // TODO: This could be slow.
- if (*_M_states._M_get_sol_pos() == _BiIter()
- || std::distance(_M_begin,
- *_M_states._M_get_sol_pos())
- < std::distance(_M_begin, _M_current))
- {
- *_M_states._M_get_sol_pos() = _M_current;
- _M_results = _M_cur_results;
- }
+ *_M_states._M_get_sol_pos() = _M_current;
+ _M_results = _M_cur_results;
}
}
}
- else
- {
- if (_M_current == _M_begin
- && (_M_flags & regex_constants::match_not_null))
- return;
- if (__match_mode == _Match_mode::_Prefix || _M_current == _M_end)
- if (!_M_has_sol)
- {
- _M_has_sol = true;
- _M_results = _M_cur_results;
- }
- }
}
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
+ void _Executor<_BiIter, _Alloc, _TraitsT>::
_M_handle_alternative(_Match_mode __match_mode, _StateIdT __i)
{
const auto& __state = _M_nfa[__i];
if (_M_nfa._M_flags & regex_constants::ECMAScript)
{
- // TODO: Fix BFS support. It is wrong.
_M_dfs(__match_mode, __state._M_alt);
// Pick lhs if it matches. Only try rhs if it doesn't.
if (!_M_has_sol)
@@ -504,14 +393,10 @@ namespace __detail
}
}
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
+ void _Executor<_BiIter, _Alloc, _TraitsT>::
_M_dfs(_Match_mode __match_mode, _StateIdT __i)
{
- if (_M_states._M_visited(__i))
- return;
-
switch (_M_nfa[__i]._M_opcode())
{
case _S_opcode_repeat:
@@ -531,10 +416,7 @@ namespace __detail
case _S_opcode_match:
_M_handle_match(__match_mode, __i); break;
case _S_opcode_backref:
- if constexpr (__dfs_mode)
- _M_handle_backref(__match_mode, __i);
- else
- __builtin_unreachable();
+ _M_handle_backref(__match_mode, __i);
break;
case _S_opcode_accept:
_M_handle_accept(__match_mode, __i); break;
@@ -546,9 +428,8 @@ namespace __detail
}
// Return whether now is at some word boundary.
- template<typename _BiIter, typename _Alloc, typename _TraitsT,
- bool __dfs_mode>
- bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT>
+ bool _Executor<_BiIter, _Alloc, _TraitsT>::
_M_word_boundary() const
{
if (_M_current == _M_begin && (_M_flags &
regex_constants::match_not_bow))
diff --git a/libstdc++-v3/testsuite/util/testsuite_regex.h
b/libstdc++-v3/testsuite/util/testsuite_regex.h
index fdd8b01f3aaa..32258a0035ab 100644
--- a/libstdc++-v3/testsuite/util/testsuite_regex.h
+++ b/libstdc++-v3/testsuite/util/testsuite_regex.h
@@ -132,6 +132,8 @@ namespace __gnu_test
// regex_match_debug behaves like regex_match, but will run *two* executors
// (if there's no back-reference) and check if their results agree. If not,
// an exception is thrown. The arguments are the same as for regex_match.
+ // TODO: This is just equivalent to std::regex_match now that the BFS
+ // executor has been removed.
template<typename _Bi_iter, typename _Alloc,
typename _Ch_type, typename _Rx_traits>
bool
@@ -143,16 +145,7 @@ namespace __gnu_test
= std::regex_constants::match_default)
{
using namespace std::__detail;
- auto __res1 = __regex_algo_impl(__s, __e, __m, __re, __flags,
- _RegexExecutorPolicy::_S_auto,
- true);
- match_results<_Bi_iter, _Alloc> __mm;
- auto __res2 = __regex_algo_impl(__s, __e, __mm, __re, __flags,
- _RegexExecutorPolicy::_S_alternate,
- true);
- if (__res1 == __res2 && __m == __mm)
- return __res1;
- throw std::exception();
+ return __regex_algo_impl(__s, __e, __m, __re, __flags, true);
}
// No match_results version
@@ -218,6 +211,8 @@ namespace __gnu_test
// regex_match_debug behaves like regex_match, but will run *two* executors
// (if there's no back-reference) and check if their results agree. If not,
// an exception throws. One can use them just in the way of using
regex_match.
+ // TODO: This is just equivalent to std::regex_search now that the BFS
+ // executor has been removed.
template<typename _Bi_iter, typename _Alloc,
typename _Ch_type, typename _Rx_traits>
bool
@@ -229,16 +224,7 @@ namespace __gnu_test
= std::regex_constants::match_default)
{
using namespace std::__detail;
- auto __res1 = __regex_algo_impl(__s, __e, __m, __re, __flags,
- _RegexExecutorPolicy::_S_auto,
- false);
- match_results<_Bi_iter, _Alloc> __mm;
- auto __res2 = __regex_algo_impl(__s, __e, __mm, __re, __flags,
- _RegexExecutorPolicy::_S_alternate,
- false);
- if (__res1 == __res2 && __m == __mm)
- return __res1;
- throw(std::exception()); // Let test fail. Give it a name.
+ return __regex_algo_impl(__s, __e, __m, __re, __flags, false);
}
// No match_results version
--
2.53.0.rc0.25.gb5c409c40f