On Fri, Apr 25, 2014 at 4:40 PM, Tim Shen <timshe...@gmail.com> wrote: > regex_match("", regex("(?:)*") will cause segfault because the > infinite loop detection (writeen by me) is stupid. :O
Patch attached. -- Regards, Tim Shen
commit 71b52302793f501a60783ea6dec09202120ce592 Author: tim <timshe...@gmail.com> Date: Fri Apr 25 01:50:11 2014 -0400 2014-04-25 Tim Shen <timshe...@gmail.com> * include/bits/regex_executor.h: Add _M_rep_count to track how many times this repeat node are visited. * include/bits/regex_executor.tcc (_Executor<>::_M_rep_once_more, _Executor<>::_M_dfs): Use _M_rep_count to prevent entering infinite loop. diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h index 708c78e..d185c27 100644 --- a/libstdc++-v3/include/bits/regex_executor.h +++ b/libstdc++-v3/include/bits/regex_executor.h @@ -73,6 +73,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_results(__results), _M_match_queue(__dfs_mode ? nullptr : new vector<pair<_StateIdT, _ResultsVec>>()), + _M_rep_count(_M_nfa.size()), _M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())), _M_flags((__flags & regex_constants::match_prev_avail) ? (__flags @@ -104,6 +105,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION private: template<bool __match_mode> void + _M_rep_once_more(_StateIdT); + + template<bool __match_mode> + void _M_dfs(_StateIdT __start); template<bool __match_mode> @@ -151,6 +156,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // character. std::unique_ptr<vector<pair<_StateIdT, _ResultsVec>>> _M_match_queue; // Used in BFS, indicating that which state is already visited. + std::vector<pair<_BiIter, int>> _M_rep_count; std::unique_ptr<vector<bool>> _M_visited; _FlagT _M_flags; // To record current solution. diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc index 68a5e04..bb10a72 100644 --- a/libstdc++-v3/include/bits/regex_executor.tcc +++ b/libstdc++-v3/include/bits/regex_executor.tcc @@ -161,6 +161,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return false; } + // __rep_count records how many times (__rep_count.second) + // this node are visited under certain input iterator + // (__rep_count.first). This prevent the executor from entering + // infinite loop by refusing to continue when it's already been + // visited more than twice. It's `twice` instead of `once` because + // we need to spare one more time for potential group capture. + template<typename _BiIter, typename _Alloc, typename _TraitsT, + bool __dfs_mode> + template<bool __match_mode> + void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: + _M_rep_once_more(_StateIdT __i) + { + const auto& __state = _M_nfa[__i]; + auto& __rep_count = _M_rep_count[__i]; + if (__rep_count.second == 0 || __rep_count.first != _M_current) + { + auto back = __rep_count; + __rep_count.first = _M_current; + __rep_count.second = 1; + _M_dfs<__match_mode>(__state._M_alt); + __rep_count = back; + } + else + { + if (__rep_count.second < 2) + { + __rep_count.second++; + _M_dfs<__match_mode>(__state._M_alt); + __rep_count.second--; + } + } + }; + // TODO: Use a function vector to dispatch, instead of using switch-case. template<typename _BiIter, typename _Alloc, typename _TraitsT, bool __dfs_mode> @@ -185,68 +218,60 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // mean the same thing, and we need to choose the correct order under // given greedy mode. case _S_opcode_alternative: - // Greedy. - if (!__state._M_neg) - { - // "Once more" is preferred in greedy mode. - _M_dfs<__match_mode>(__state._M_alt); - // If it's DFS executor and already accepted, we're done. - if (!__dfs_mode || !_M_has_sol) - _M_dfs<__match_mode>(__state._M_next); - } - else // Non-greedy mode - { - if (__dfs_mode) - { - // vice-versa. + { + // Greedy. + if (!__state._M_neg) + { + _M_rep_once_more<__match_mode>(__i); + // If it's DFS executor and already accepted, we're done. + if (!__dfs_mode || !_M_has_sol) _M_dfs<__match_mode>(__state._M_next); - if (!_M_has_sol) - _M_dfs<__match_mode>(__state._M_alt); - } - else - { - // DON'T attempt anything, because there's already another - // state with higher priority accepted. This state cannot be - // better by attempting its next node. - if (!_M_has_sol) - { - _M_dfs<__match_mode>(__state._M_next); - // DON'T attempt anything if it's already accepted. An - // accepted state *must* be better than a solution that - // matches a non-greedy quantifier one more time. - if (!_M_has_sol) - _M_dfs<__match_mode>(__state._M_alt); - } - } + } + else // Non-greedy mode + { + if (__dfs_mode) + { + // vice-versa. + _M_dfs<__match_mode>(__state._M_next); + if (!_M_has_sol) + _M_rep_once_more<__match_mode>(__i); + } + else + { + // DON'T attempt anything, because there's already another + // state with higher priority accepted. This state cannot be + // better by attempting its next node. + if (!_M_has_sol) + { + _M_dfs<__match_mode>(__state._M_next); + // DON'T attempt anything if it's already accepted. An + // accepted state *must* be better than a solution that + // matches a non-greedy quantifier one more time. + if (!_M_has_sol) + _M_rep_once_more<__match_mode>(__i); + } + } + } } break; case _S_opcode_subexpr_begin: - // If there's nothing changed since last visit, do NOT continue. - // This prevents the executor from get into infinite loop when using - // "()*" to match "". - if (!_M_cur_results[__state._M_subexpr].matched - || _M_cur_results[__state._M_subexpr].first != _M_current) - { - auto& __res = _M_cur_results[__state._M_subexpr]; - auto __back = __res.first; - __res.first = _M_current; - _M_dfs<__match_mode>(__state._M_next); - __res.first = __back; - } + { + auto& __res = _M_cur_results[__state._M_subexpr]; + auto __back = __res.first; + __res.first = _M_current; + _M_dfs<__match_mode>(__state._M_next); + __res.first = __back; + } break; case _S_opcode_subexpr_end: - if (_M_cur_results[__state._M_subexpr].second != _M_current - || _M_cur_results[__state._M_subexpr].matched != true) - { - auto& __res = _M_cur_results[__state._M_subexpr]; - auto __back = __res; - __res.second = _M_current; - __res.matched = true; - _M_dfs<__match_mode>(__state._M_next); - __res = __back; - } - else + { + auto& __res = _M_cur_results[__state._M_subexpr]; + auto __back = __res; + __res.second = _M_current; + __res.matched = true; _M_dfs<__match_mode>(__state._M_next); + __res = __back; + } break; case _S_opcode_line_begin_assertion: if (_M_at_begin()) diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc index 3fdbdad..c33d1b6 100644 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc @@ -50,6 +50,7 @@ test01() const char s[] = ""; VERIFY( regex_match_debug(s, m, re) ); } + VERIFY(regex_match_debug("", regex("(?:)*"))); } int