Changes in v4:
- Rebased
Changes in v3:
- Rebased against latest version of previous patch
-- >8 --
The actual incrementing of the current input string position is done by
_M_handle_match which also makes sure to restore it afterwards, via a
restore_current frame. But restoring _M_current is naturally only
necessary when backtracking is involved, not after every single match.
So this patch moves the responsibility of saving/restoring _M_current
from _M_handle_match to the branching nodes _M_handle_alternative and
_M_handle_repeat. This is done by storing _M_current within the
fallback_next, fallback_rep_once_more and posix_alternative frames.
And we can get rid of the restore_current frame.
This reduces the maximum size of the _M_frames stack by 15% for
regex_match(string(200000, 'a'), "(a|b|c)*")
PR libstdc++/86164
libstdc++-v3/ChangeLog:
* include/bits/regex_executor.tcc (__detail::_ExecutorFrameOpcode):
Remove _S_fopcode_restore_current.
(__detail::_Executor::_M_handle_repeat): Pass _M_current when
pushing a fallback_next or fallback_rep_once_more frame.
(__detail::_Executor::_M_handle_match): Don't push a
restore_current frame.
(__detail::_Executor::_M_handle_backref): Likewise and simplify
accordingly.
(__detail::_Executor::_M_handle_alternative): Pass _M_current when
pushing a fallback_next or posix_alternative frame.
(__detail::_Executor::_M_dfs) <case _S_fopcode_fallback_next>:
Restore _M_current.
<case _S_fopcode_fallback_rep_once_more>: Likewise.
<case _S_fopcode_posix_alternative>: Likewise.
<case _S_fopcode_restore_current>: Remove.
---
libstdc++-v3/include/bits/regex_executor.tcc | 44 ++++++++++----------
1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc
b/libstdc++-v3/include/bits/regex_executor.tcc
index 9c24bba01e76..73cf66fdaf0f 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -63,7 +63,6 @@ namespace __detail
_S_fopcode_fallback_rep_once_more,
_S_fopcode_posix_alternative,
_S_fopcode_merge_sol,
- _S_fopcode_restore_current,
_S_fopcode_restore_cur_results,
_S_fopcode_restore_rep_count,
_S_fopcode_decrement_rep_count,
@@ -279,7 +278,8 @@ namespace __detail
{
if constexpr (__dfs_mode)
// If it's DFS executor and already accepted, we're done.
- _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next);
+ _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next,
+ _M_current);
else
_M_frames.emplace_back(_S_fopcode_next, __state._M_next);
_M_frames.emplace_back(_S_fopcode_rep_once_more, __i);
@@ -289,7 +289,8 @@ namespace __detail
if constexpr (__dfs_mode)
{
// vice-versa.
- _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i);
+ _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i,
+ _M_current);
_M_frames.emplace_back(_S_fopcode_next, __state._M_next);
}
else
@@ -302,7 +303,8 @@ namespace __detail
// DON'T attempt anything if it's already accepted. An
// accepted state *must* be better than a solution that
// matches a non-greedy quantifier one more time.
- _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more,
__i);
+ _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i,
+ _M_current);
_M_frames.emplace_back(_S_fopcode_next, __state._M_next);
}
}
@@ -392,7 +394,6 @@ namespace __detail
{
if (__state._M_matches(*_M_current))
{
- _M_frames.emplace_back(_S_fopcode_restore_current, 0, _M_current);
++_M_current;
_M_frames.emplace_back(_S_fopcode_next, __state._M_next);
}
@@ -475,14 +476,8 @@ namespace __detail
_M_re._M_automaton->_M_traits)._M_apply(
__submatch.first, __submatch.second, _M_current, __last))
{
- if (__last != _M_current)
- {
- _M_frames.emplace_back(_S_fopcode_restore_current, 0, _M_current);
- _M_current = __last;
- _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
- }
- else
- _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
+ _M_current = __last;
+ _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
}
}
@@ -550,14 +545,16 @@ namespace __detail
{
// TODO: Fix BFS support. It is wrong.
// Pick lhs if it matches. Only try rhs if it doesn't.
- _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next);
+ _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next,
+ _M_current);
_M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
}
else
{
// Try both and compare the result.
// See "case _S_opcode_accept:" handling above.
- _M_frames.emplace_back(_S_fopcode_posix_alternative, __state._M_next);
+ _M_frames.emplace_back(_S_fopcode_posix_alternative, __state._M_next,
+ _M_current);
_M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
}
}
@@ -623,13 +620,19 @@ namespace __detail
case _S_fopcode_fallback_next:
if (!_M_has_sol)
- _M_frames.emplace_back(_S_fopcode_next, __frame._M_state_id);
+ {
+ _M_current = __frame._M_pos;
+ _M_frames.emplace_back(_S_fopcode_next, __frame._M_state_id);
+ }
break;
case _S_fopcode_fallback_rep_once_more:
if (!_M_has_sol)
- _M_frames.emplace_back(_S_fopcode_rep_once_more,
- __frame._M_state_id);
+ {
+ _M_current = __frame._M_pos;
+ _M_frames.emplace_back(_S_fopcode_rep_once_more,
+ __frame._M_state_id);
+ }
break;
case _S_fopcode_rep_once_more:
@@ -639,6 +642,7 @@ namespace __detail
case _S_fopcode_posix_alternative:
_M_frames.emplace_back(_S_fopcode_merge_sol, 0, _M_has_sol);
_M_frames.emplace_back(_S_fopcode_next, __frame._M_state_id);
+ _M_current = __frame._M_pos;
_M_has_sol = false;
break;
@@ -646,10 +650,6 @@ namespace __detail
_M_has_sol |= __frame._M_val;
break;
- case _S_fopcode_restore_current:
- _M_current = __frame._M_pos;
- break;
-
case _S_fopcode_restore_cur_results:
if (__frame._M_char == -1)
_M_cur_results[__frame._M_state_id].first = __frame._M_pos;
--
2.53.0.rc1.25.g1faf5b085a