Changes in v3:

  - Rebased against latest version of previous patch

Some memory use + wall clock benchmarks for
regex_match(string(200000, 'a'), "(a|b|c)*"):

       HEAD~4         | HEAD
     +----------------+--------------+
 -O0 | 150MB, 0.17s   | 50MB, 0.31s |
     +----------------+-------------+
 -O2 | 100MB, 0.07s   | 50MB, 0.05s |
     +----------------+-------------+

So the only downside to this patch series is that at -O0 we're 1.8x slower
than before at -O0.  According to perf this is due std::vector (e.g. 30%
of runtime is in back(), end() and empty()).  That seems acceptable given the
50%-66% reduction in memory use (and with -O2 a slight speedup as well).


-- >8 --

The actual incrementing of the current input string position is done by
_M_handle_match which also makes sure to restore it afterwards, via a
restore_current frame.  But restoring _M_current is naturally only
necessary when backtracking is involved, not after every single match.

So this patch moves the responsibility of saving/restoring _M_current
from _M_handle_match to the branching nodes _M_handle_alternative and
_M_handle_repeat.  This is done by storing _M_current within the
fallback_next, fallback_rep_once_more and posix_alternative frames.
And we can get rid of the restore_current frame.

This reduces the maximum size of the _M_frames stack by 15% for

  regex_match(string(200000, 'a'), "(a|b|c)*")

        PR libstdc++/86164

libstdc++-v3/ChangeLog:

        * include/bits/regex_executor.tcc (__detail::_ExecutorFrameOpcode):
        Remove _S_fopcode_restore_current.
        (__detail::_Executor::_M_handle_repeat): Pass _M_current when
        pushing a fallback_next or fallback_rep_once_more frame.
        (__detail::_Executor::_M_handle_match): Don't push a
        restore_current frame.
        (__detail::_Executor::_M_handle_backref): Likewise and simplify
        accordingly.
        (__detail::_Executor::_M_handle_alternative): Pass _M_current when
        pushing a fallback_next or posix_alternative frame.
        (__detail::_Executor::_M_dfs) <case _S_fopcode_fallback_next>:
        Restore _M_current.
        <case _S_fopcode_fallback_rep_once_more>: Likewise.
        <case _S_fopcode_posix_alternative>: Likewise.
        <case _S_fopcode_restore_current>: Remove.
---
 libstdc++-v3/include/bits/regex_executor.tcc | 41 ++++++++++----------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/libstdc++-v3/include/bits/regex_executor.tcc 
b/libstdc++-v3/include/bits/regex_executor.tcc
index f68b3641936e..5f0bf6aeacfb 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -62,7 +62,6 @@ namespace __detail
     _S_fopcode_fallback_rep_once_more,
     _S_fopcode_posix_alternative,
     _S_fopcode_merge_sol,
-    _S_fopcode_restore_current,
     _S_fopcode_restore_cur_results,
     _S_fopcode_restore_rep_count,
     _S_fopcode_decrement_rep_count,
@@ -210,12 +209,14 @@ namespace __detail
       // Greedy.
       if (!__state._M_neg)
        {
-         _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next);
+         _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next,
+                                _M_current);
          _M_frames.emplace_back(_S_fopcode_rep_once_more, __i);
        }
       else // Non-greedy mode
        {
-         _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i);
+         _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i,
+                                _M_current);
          _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
        }
     }
@@ -294,7 +295,6 @@ namespace __detail
        return;
       if (__state._M_matches(*_M_current))
        {
-         _M_frames.emplace_back(_S_fopcode_restore_current, 0, _M_current);
          ++_M_current;
          _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
        }
@@ -370,14 +370,8 @@ namespace __detail
              _M_re._M_automaton->_M_traits)._M_apply(
                  __submatch.first, __submatch.second, _M_current, __last))
        {
-         if (__last != _M_current)
-           {
-             _M_frames.emplace_back(_S_fopcode_restore_current, 0, _M_current);
-             _M_current = __last;
-             _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
-           }
-         else
-           _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
+         _M_current = __last;
+         _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
        }
     }
 
@@ -427,14 +421,16 @@ namespace __detail
       if (_M_nfa._M_flags & regex_constants::ECMAScript)
        {
          // Pick lhs if it matches. Only try rhs if it doesn't.
-         _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next);
+         _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next,
+                                _M_current);
          _M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
        }
       else
        {
          // Try both and compare the result.
          // See "case _S_opcode_accept:" handling above.
-         _M_frames.emplace_back(_S_fopcode_posix_alternative, __state._M_next);
+         _M_frames.emplace_back(_S_fopcode_posix_alternative, __state._M_next,
+                                _M_current);
          _M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
        }
     }
@@ -492,13 +488,19 @@ namespace __detail
 
            case _S_fopcode_fallback_next:
              if (!_M_has_sol)
-               _M_frames.emplace_back(_S_fopcode_next, __frame._M_state_id);
+               {
+                 _M_current = __frame._M_pos;
+                 _M_frames.emplace_back(_S_fopcode_next, __frame._M_state_id);
+               }
              break;
 
            case _S_fopcode_fallback_rep_once_more:
              if (!_M_has_sol)
-               _M_frames.emplace_back(_S_fopcode_rep_once_more,
-                                      __frame._M_state_id);
+               {
+                 _M_current = __frame._M_pos;
+                 _M_frames.emplace_back(_S_fopcode_rep_once_more,
+                                        __frame._M_state_id);
+               }
              break;
 
            case _S_fopcode_rep_once_more:
@@ -508,6 +510,7 @@ namespace __detail
            case _S_fopcode_posix_alternative:
              _M_frames.emplace_back(_S_fopcode_merge_sol, 0, _M_has_sol);
              _M_frames.emplace_back(_S_fopcode_next, __frame._M_state_id);
+             _M_current = __frame._M_pos;
              _M_has_sol = false;
              break;
 
@@ -515,10 +518,6 @@ namespace __detail
              _M_has_sol |= __frame._M_val;
              break;
 
-           case _S_fopcode_restore_current:
-             _M_current = __frame._M_pos;
-             break;
-
            case _S_fopcode_restore_cur_results:
              if (__frame._M_char == -1)
                _M_cur_results[__frame._M_state_id].first = __frame._M_pos;
-- 
2.53.0.rc1

Reply via email to