On Fri, Apr 25, 2014 at 4:40 PM, Tim Shen <timshe...@gmail.com> wrote:
> regex_match("", regex("(?:)*") will cause segfault because the
> infinite loop detection (writeen by me) is stupid. :O

Patch attached.


-- 
Regards,
Tim Shen
commit 71b52302793f501a60783ea6dec09202120ce592
Author: tim <timshe...@gmail.com>
Date:   Fri Apr 25 01:50:11 2014 -0400

    2014-04-25  Tim Shen  <timshe...@gmail.com>
    
        * include/bits/regex_executor.h: Add _M_rep_count to track how
        many times this repeat node are visited.
        * include/bits/regex_executor.tcc (_Executor<>::_M_rep_once_more,
        _Executor<>::_M_dfs): Use _M_rep_count to prevent entering
        infinite loop.

diff --git a/libstdc++-v3/include/bits/regex_executor.h 
b/libstdc++-v3/include/bits/regex_executor.h
index 708c78e..d185c27 100644
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -73,6 +73,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_results(__results),
       _M_match_queue(__dfs_mode ? nullptr
                     : new vector<pair<_StateIdT, _ResultsVec>>()),
+      _M_rep_count(_M_nfa.size()),
       _M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())),
       _M_flags((__flags & regex_constants::match_prev_avail)
               ? (__flags
@@ -104,6 +105,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     private:
       template<bool __match_mode>
        void
+       _M_rep_once_more(_StateIdT);
+
+      template<bool __match_mode>
+       void
        _M_dfs(_StateIdT __start);
 
       template<bool __match_mode>
@@ -151,6 +156,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       // character.
       std::unique_ptr<vector<pair<_StateIdT, _ResultsVec>>> _M_match_queue;
       // Used in BFS, indicating that which state is already visited.
+      std::vector<pair<_BiIter, int>>                       _M_rep_count;
       std::unique_ptr<vector<bool>>                         _M_visited;
       _FlagT                                                _M_flags;
       // To record current solution.
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc 
b/libstdc++-v3/include/bits/regex_executor.tcc
index 68a5e04..bb10a72 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -161,6 +161,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return false;
     }
 
+  // __rep_count records how many times (__rep_count.second)
+  // this node are visited under certain input iterator
+  // (__rep_count.first). This prevent the executor from entering
+  // infinite loop by refusing to continue when it's already been
+  // visited more than twice. It's `twice` instead of `once` because
+  // we need to spare one more time for potential group capture.
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+    bool __dfs_mode>
+  template<bool __match_mode>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_rep_once_more(_StateIdT __i)
+    {
+      const auto& __state = _M_nfa[__i];
+      auto& __rep_count = _M_rep_count[__i];
+      if (__rep_count.second == 0 || __rep_count.first != _M_current)
+       {
+         auto back = __rep_count;
+         __rep_count.first = _M_current;
+         __rep_count.second = 1;
+         _M_dfs<__match_mode>(__state._M_alt);
+         __rep_count = back;
+       }
+      else
+       {
+         if (__rep_count.second < 2)
+           {
+             __rep_count.second++;
+             _M_dfs<__match_mode>(__state._M_alt);
+             __rep_count.second--;
+           }
+       }
+    };
+
   // TODO: Use a function vector to dispatch, instead of using switch-case.
   template<typename _BiIter, typename _Alloc, typename _TraitsT,
     bool __dfs_mode>
@@ -185,68 +218,60 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        // mean the same thing, and we need to choose the correct order under
        // given greedy mode.
        case _S_opcode_alternative:
-         // Greedy.
-         if (!__state._M_neg)
-           {
-             // "Once more" is preferred in greedy mode.
-             _M_dfs<__match_mode>(__state._M_alt);
-             // If it's DFS executor and already accepted, we're done.
-             if (!__dfs_mode || !_M_has_sol)
-               _M_dfs<__match_mode>(__state._M_next);
-           }
-         else // Non-greedy mode
-           {
-             if (__dfs_mode)
-               {
-                 // vice-versa.
+         {
+           // Greedy.
+           if (!__state._M_neg)
+             {
+               _M_rep_once_more<__match_mode>(__i);
+               // If it's DFS executor and already accepted, we're done.
+               if (!__dfs_mode || !_M_has_sol)
                  _M_dfs<__match_mode>(__state._M_next);
-                 if (!_M_has_sol)
-                   _M_dfs<__match_mode>(__state._M_alt);
-               }
-             else
-               {
-                 // DON'T attempt anything, because there's already another
-                 // state with higher priority accepted. This state cannot be
-                 // better by attempting its next node.
-                 if (!_M_has_sol)
-                   {
-                     _M_dfs<__match_mode>(__state._M_next);
-                     // DON'T attempt anything if it's already accepted. An
-                     // accepted state *must* be better than a solution that
-                     // matches a non-greedy quantifier one more time.
-                     if (!_M_has_sol)
-                       _M_dfs<__match_mode>(__state._M_alt);
-                   }
-               }
+             }
+           else // Non-greedy mode
+             {
+               if (__dfs_mode)
+                 {
+                   // vice-versa.
+                   _M_dfs<__match_mode>(__state._M_next);
+                   if (!_M_has_sol)
+                     _M_rep_once_more<__match_mode>(__i);
+                 }
+               else
+                 {
+                   // DON'T attempt anything, because there's already another
+                   // state with higher priority accepted. This state cannot be
+                   // better by attempting its next node.
+                   if (!_M_has_sol)
+                     {
+                       _M_dfs<__match_mode>(__state._M_next);
+                       // DON'T attempt anything if it's already accepted. An
+                       // accepted state *must* be better than a solution that
+                       // matches a non-greedy quantifier one more time.
+                       if (!_M_has_sol)
+                         _M_rep_once_more<__match_mode>(__i);
+                     }
+                 }
+             }
            }
          break;
        case _S_opcode_subexpr_begin:
-         // If there's nothing changed since last visit, do NOT continue.
-         // This prevents the executor from get into infinite loop when using
-         // "()*" to match "".
-         if (!_M_cur_results[__state._M_subexpr].matched
-             || _M_cur_results[__state._M_subexpr].first != _M_current)
-           {
-             auto& __res = _M_cur_results[__state._M_subexpr];
-             auto __back = __res.first;
-             __res.first = _M_current;
-             _M_dfs<__match_mode>(__state._M_next);
-             __res.first = __back;
-           }
+         {
+           auto& __res = _M_cur_results[__state._M_subexpr];
+           auto __back = __res.first;
+           __res.first = _M_current;
+           _M_dfs<__match_mode>(__state._M_next);
+           __res.first = __back;
+         }
          break;
        case _S_opcode_subexpr_end:
-         if (_M_cur_results[__state._M_subexpr].second != _M_current
-             || _M_cur_results[__state._M_subexpr].matched != true)
-           {
-             auto& __res = _M_cur_results[__state._M_subexpr];
-             auto __back = __res;
-             __res.second = _M_current;
-             __res.matched = true;
-             _M_dfs<__match_mode>(__state._M_next);
-             __res = __back;
-           }
-         else
+         {
+           auto& __res = _M_cur_results[__state._M_subexpr];
+           auto __back = __res;
+           __res.second = _M_current;
+           __res.matched = true;
            _M_dfs<__match_mode>(__state._M_next);
+           __res = __back;
+         }
          break;
        case _S_opcode_line_begin_assertion:
          if (_M_at_begin())
diff --git 
a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc
 
b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc
index 3fdbdad..c33d1b6 100644
--- 
a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc
+++ 
b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc
@@ -50,6 +50,7 @@ test01()
     const char s[] = "";
     VERIFY( regex_match_debug(s, m, re) );
   }
+  VERIFY(regex_match_debug("", regex("(?:)*")));
 }
 
 int

Reply via email to