On Wed, Jan 8, 2014 at 5:38 PM, Paolo Carlini <paolo.carl...@oracle.com> wrote:
> I agree, it's probably fine for now, but please actually attach the patch ;)

Oops sorry >.<


So my plan is to instantiate _Compiler and _Executor instead of user
interfaces like basic_regex or regex_match, because the implementation
may change (say add a new executor) later. Is that Ok?


-- 
Regards,
Tim Shen
commit d9f47e783680a1cab86bd704e67236025cbdff18
Author: tim <timshe...@gmail.com>
Date:   Mon Jan 6 00:03:41 2014 -0500

    2014-01-08  Tim Shen  <timshe...@gmail.com>
    
        * bits/regex_automaton.tcc: Indentation fix.
        * bits/regex_compiler.h (__compile_nfa<>(), _Compiler<>,
        _RegexTranslator<> _AnyMatcher<>, _CharMatcher<>,
        _BracketMatcher<>): Add bool option template parameters and
        specializations to make matching more efficient and space saving.
        * bits/regex_compiler.tcc: Likewise.

diff --git a/libstdc++-v3/include/bits/regex_automaton.tcc 
b/libstdc++-v3/include/bits/regex_automaton.tcc
index 7edc67f..e222803 100644
--- a/libstdc++-v3/include/bits/regex_automaton.tcc
+++ b/libstdc++-v3/include/bits/regex_automaton.tcc
@@ -134,9 +134,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _NFA<_TraitsT>::_M_dot(std::ostream& __ostr) const
     {
       __ostr << "digraph _Nfa {\n"
-               "  rankdir=LR;\n";
+               "  rankdir=LR;\n";
       for (size_t __i = 0; __i < this->size(); ++__i)
-        (*this)[__i]._M_dot(__ostr, __i);
+       (*this)[__i]._M_dot(__ostr, __i);
       __ostr << "}\n";
       return __ostr;
     }
diff --git a/libstdc++-v3/include/bits/regex_compiler.h 
b/libstdc++-v3/include/bits/regex_compiler.h
index 4ac67df..b73fe30 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -39,7 +39,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    * @{
    */
 
-  template<typename _TraitsT>
+  template<typename, bool, bool>
     struct _BracketMatcher;
 
   /// Builds an NFA from an input iterator interval.
@@ -63,7 +63,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       typedef typename _ScannerT::_TokenT                     _TokenT;
       typedef _StateSeq<_TraitsT>                            _StateSeqT;
       typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT;
-      typedef _BracketMatcher<_TraitsT>                              
_BMatcherT;
       typedef std::ctype<typename _TraitsT::char_type>        _CtypeT;
 
       // accepts a specific token or returns false.
@@ -91,20 +90,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       bool
       _M_bracket_expression();
 
-      void
-      _M_expression_term(_BMatcherT& __matcher);
+      template<bool __icase, bool __collate>
+       void
+       _M_insert_any_matcher_ecma();
 
-      bool
-      _M_range_expression(_BMatcherT& __matcher);
+      template<bool __icase, bool __collate>
+       void
+       _M_insert_any_matcher_posix();
 
-      bool
-      _M_collating_symbol(_BMatcherT& __matcher);
+      template<bool __icase, bool __collate>
+       void
+       _M_insert_char_matcher();
 
-      bool
-      _M_equivalence_class(_BMatcherT& __matcher);
+      template<bool __icase, bool __collate>
+       void
+       _M_insert_character_class_matcher();
 
-      bool
-      _M_character_class(_BMatcherT& __matcher);
+      template<bool __icase, bool __collate>
+       void
+       _M_insert_bracket_matcher(bool __neg);
+
+      template<bool __icase, bool __collate>
+       void
+       _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>&
+                          __matcher);
 
       int
       _M_cur_int_value(int __radix);
@@ -148,16 +157,110 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return __compile_nfa(__cfirst, __cfirst + __len, __traits, __flags);
     }
 
-  template<typename _TraitsT, bool __is_ecma>
-    struct _AnyMatcher
+  // [28.13.14]
+  template<typename _TraitsT, bool __icase, bool __collate>
+    class _RegexTranslator
     {
-      typedef typename _TraitsT::char_type       _CharT;
+    public:
+      typedef typename _TraitsT::char_type           _CharT;
+      typedef typename _TraitsT::string_type         _StringT;
+      typedef typename std::conditional<__collate,
+                                       _StringT,
+                                       _CharT>::type _StrTransT;
 
       explicit
-      _AnyMatcher(const _TraitsT& __traits)
+      _RegexTranslator(const _TraitsT& __traits)
       : _M_traits(__traits)
       { }
 
+      _CharT
+      _M_translate(_CharT __ch) const
+      {
+       if (__icase)
+         return _M_traits.translate_nocase(__ch);
+       else if (__collate)
+         return _M_traits.translate(__ch);
+       else
+         return __ch;
+      }
+
+      _StrTransT
+      _M_transform(_CharT __ch) const
+      {
+       return _M_transform_impl(__ch, typename integral_constant<bool,
+                                __collate>::type());
+      }
+
+    private:
+      _StrTransT
+      _M_transform_impl(_CharT __ch, false_type) const
+      { return __ch; }
+
+      _StrTransT
+      _M_transform_impl(_CharT __ch, true_type) const
+      {
+       _StrTransT __str = _StrTransT(1, _M_translate(__ch));
+       return _M_traits.transform(__str.begin(), __str.end());
+      }
+
+      const _TraitsT& _M_traits;
+    };
+
+  template<typename _TraitsT>
+    class _RegexTranslator<_TraitsT, false, false>
+    {
+    public:
+      typedef typename _TraitsT::char_type _CharT;
+      typedef _CharT                       _StrTransT;
+
+      explicit
+      _RegexTranslator(const _TraitsT& __traits)
+      { }
+
+      _CharT
+      _M_translate(_CharT __ch) const
+      { return __ch; }
+
+      _StrTransT
+      _M_transform(_CharT __ch) const
+      { return __ch; }
+    };
+
+  template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate>
+    struct _AnyMatcher;
+
+  template<typename _TraitsT, bool __icase, bool __collate>
+    struct _AnyMatcher<_TraitsT, false, __icase, __collate>
+    {
+      typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
+      typedef typename _TransT::_CharT                       _CharT;
+
+      explicit
+      _AnyMatcher(const _TraitsT& __traits)
+      : _M_translator(__traits)
+      { }
+
+      bool
+      operator()(_CharT __ch) const
+      {
+       static auto __nul = _M_translator._M_translate('\0');
+       return _M_translator._M_translate(__ch) != __nul;
+      }
+
+      _TransT _M_translator;
+    };
+
+  template<typename _TraitsT, bool __icase, bool __collate>
+    struct _AnyMatcher<_TraitsT, true, __icase, __collate>
+    {
+      typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
+      typedef typename _TransT::_CharT                       _CharT;
+
+      explicit
+      _AnyMatcher(const _TraitsT& __traits)
+      : _M_translator(__traits)
+      { }
+
       bool
       operator()(_CharT __ch) const
       { return _M_apply(__ch, typename is_same<_CharT, char>::type()); }
@@ -165,92 +268,66 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       bool
       _M_apply(_CharT __ch, true_type) const
       {
-       auto __c = _M_traits.translate(__ch);
-       if (__is_ecma)
-         {
-           static auto __n = _M_traits.translate('\n');
-           static auto __r = _M_traits.translate('\r');
-           return __c != __n && __c != __r;
-         }
-       else
-         {
-           static auto __nul = _M_traits.translate('\0');
-           return __c != __nul;
-         }
+       auto __c = _M_translator._M_translate(__ch);
+       auto __n = _M_translator._M_translate('\n');
+       auto __r = _M_translator._M_translate('\r');
+       return __c != __n && __c != __r;
       }
 
       bool
       _M_apply(_CharT __ch, false_type) const
       {
-       auto __c = _M_traits.translate(__ch);
-       if (__is_ecma)
-         {
-           static auto __n = _M_traits.translate('\n');
-           static auto __r = _M_traits.translate('\r');
-           static auto __u2028 = _M_traits.translate(u'\u2028');
-           static auto __u2029 = _M_traits.translate(u'\u2029');
-           return __c != __n && __c != __r && __c != __u2028
-             && __c != __u2029;
-         }
-       else
-         {
-           static auto __nul = _M_traits.translate('\0');
-           return __c != __nul;
-         }
+       auto __c = _M_translator._M_translate(__ch);
+       auto __n = _M_translator._M_translate('\n');
+       auto __r = _M_translator._M_translate('\r');
+       auto __u2028 = _M_translator._M_translate(u'\u2028');
+       auto __u2029 = _M_translator._M_translate(u'\u2029');
+       return __c != __n && __c != __r && __c != __u2028 && __c != __u2029;
       }
 
-      const _TraitsT& _M_traits;
+      _TransT _M_translator;
     };
 
-  template<typename _TraitsT, bool __icase>
+  template<typename _TraitsT, bool __icase, bool __collate>
     struct _CharMatcher
     {
-      typedef typename _TraitsT::char_type       _CharT;
+      typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
+      typedef typename _TransT::_CharT                       _CharT;
 
       _CharMatcher(_CharT __ch, const _TraitsT& __traits)
-      : _M_traits(__traits), _M_ch(_M_translate(__ch))
+      : _M_translator(__traits), _M_ch(_M_translator._M_translate(__ch))
       { }
 
       bool
       operator()(_CharT __ch) const
-      { return _M_ch == _M_translate(__ch); }
+      { return _M_ch == _M_translator._M_translate(__ch); }
 
-      _CharT
-      _M_translate(_CharT __ch) const
-      {
-       if (__icase)
-         return _M_traits.translate_nocase(__ch);
-       else
-         return _M_traits.translate(__ch);
-      }
-
-      const _TraitsT& _M_traits;
-      _CharT          _M_ch;
+      _TransT _M_translator;
+      _CharT  _M_ch;
     };
 
   /// Matches a character range (bracket expression)
   // TODO: Convert used _M_flags fields to template parameters, including
   // collate and icase. Avoid using std::set, could use flat_set
   // (sorted vector and binary search) instead.
-  template<typename _TraitsT>
+  template<typename _TraitsT, bool __icase, bool __collate>
     struct _BracketMatcher
     {
     public:
-      typedef typename _TraitsT::char_type       _CharT;
-      typedef typename _TraitsT::char_class_type  _CharClassT;
-      typedef typename _TraitsT::string_type      _StringT;
-      typedef regex_constants::syntax_option_type _FlagT;
+      typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
+      typedef typename _TransT::_CharT                       _CharT;
+      typedef typename _TransT::_StrTransT                   _StrTransT;
+      typedef typename _TraitsT::string_type                 _StringT;
+      typedef typename _TraitsT::char_class_type             _CharClassT;
 
     public:
       _BracketMatcher(bool __is_non_matching,
-                     const _TraitsT& __traits,
-                     _FlagT __flags)
-      :
+                     const _TraitsT& __traits)
+      : _M_class_set(0), _M_translator(__traits), _M_traits(__traits),
+      _M_is_non_matching(__is_non_matching)
 #ifdef _GLIBCXX_DEBUG
-      _M_is_ready(false),
+      , _M_is_ready(false)
 #endif
-      _M_traits(__traits), _M_class_set(0), _M_flags(__flags),
-      _M_is_non_matching(__is_non_matching)
       { }
 
       bool
@@ -263,7 +340,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       void
       _M_add_char(_CharT __c)
       {
-       _M_char_set.insert(_M_translate(__c));
+       _M_char_set.insert(_M_translator._M_translate(__c));
 #ifdef _GLIBCXX_DEBUG
        _M_is_ready = false;
 #endif
@@ -276,7 +353,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                                                 __s.data() + __s.size());
        if (__st.empty())
          __throw_regex_error(regex_constants::error_collate);
-       _M_char_set.insert(_M_translate(__st[0]));
+       _M_char_set.insert(_M_translator._M_translate(__st[0]));
 #ifdef _GLIBCXX_DEBUG
        _M_is_ready = false;
 #endif
@@ -302,7 +379,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       {
        auto __mask = _M_traits.lookup_classname(__s.data(),
                                                 __s.data() + __s.size(),
-                                                _M_is_icase());
+                                                __icase);
        if (__mask == 0)
          __throw_regex_error(regex_constants::error_ctype);
        _M_class_set |= __mask;
@@ -314,12 +391,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       void
       _M_make_range(_CharT __l, _CharT __r)
       {
-       if (_M_flags & regex_constants::collate)
-         _M_range_set.insert(
-           make_pair(_M_get_str(_M_translate(__l)),
-                     _M_get_str(_M_translate(__r))));
-       else
-         _M_range_set.insert(make_pair(_M_get_str(__l), _M_get_str(__r)));
+       _M_range_set.insert(make_pair(_M_translator._M_transform(__l),
+                                     _M_translator._M_transform(__r)));
 #ifdef _GLIBCXX_DEBUG
        _M_is_ready = false;
 #endif
@@ -350,26 +423,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_apply(_CharT __ch, true_type) const
       { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; }
 
-      _CharT
-      _M_translate(_CharT __c) const
-      {
-       if (_M_is_icase())
-         return _M_traits.translate_nocase(__c);
-       else
-         return _M_traits.translate(__c);
-      }
-
-      bool
-      _M_is_icase() const
-      { return _M_flags & regex_constants::icase; }
-
-      _StringT
-      _M_get_str(_CharT __c) const
-      {
-       _StringT __s(1, __c);
-       return _M_traits.transform(__s.begin(), __s.end());
-      }
-
       void
       _M_make_cache(true_type)
       {
@@ -383,16 +436,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       { }
 
     private:
-      _CacheT                            _M_cache;
-      std::set<_CharT>                   _M_char_set;
-      std::set<_StringT>                 _M_equiv_set;
-      std::set<pair<_StringT, _StringT>> _M_range_set;
-      const _TraitsT&                    _M_traits;
-      _CharClassT                        _M_class_set;
-      _FlagT                             _M_flags;
-      bool                               _M_is_non_matching;
+      _CacheT                                _M_cache;
+      std::set<_CharT>                       _M_char_set;
+      std::set<_StringT>                     _M_equiv_set;
+      std::set<pair<_StrTransT, _StrTransT>> _M_range_set;
+      _CharClassT                            _M_class_set;
+      _TransT                                _M_translator;
+      const _TraitsT&                        _M_traits;
+      bool                                   _M_is_non_matching;
 #ifdef _GLIBCXX_DEBUG
-      bool                               _M_is_ready;
+      bool                                   _M_is_ready;
 #endif
     };
 
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc 
b/libstdc++-v3/include/bits/regex_compiler.tcc
index 4da653f..9ff538d 100644
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -278,6 +278,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        }
     }
 
+#define __INSERT_REGEX_MATCHER(__func, args...)\
+       do\
+         if (!(_M_flags & regex_constants::icase))\
+           if (!(_M_flags & regex_constants::collate))\
+             __func<false, false>(args);\
+           else\
+             __func<false, true>(args);\
+         else\
+           if (!(_M_flags & regex_constants::collate))\
+             __func<true, false>(args);\
+           else\
+             __func<true, true>(args);\
+       while (false)
+
   template<typename _FwdIter, typename _TraitsT>
     bool
     _Compiler<_FwdIter, _TraitsT>::
@@ -285,45 +299,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     {
       if (_M_match_token(_ScannerT::_S_token_anychar))
        {
-         if (_M_flags & regex_constants::ECMAScript)
-           _M_stack.push(_StateSeqT(_M_nfa,
-                                    _M_nfa._M_insert_matcher
-                                    (_AnyMatcher<_TraitsT,
-                                       true>(_M_traits))));
+         if (!(_M_flags & regex_constants::ECMAScript))
+           __INSERT_REGEX_MATCHER(_M_insert_any_matcher_posix);
          else
-           _M_stack.push(_StateSeqT(_M_nfa,
-                                    _M_nfa._M_insert_matcher
-                                    (_AnyMatcher<_TraitsT,
-                                       false>(_M_traits))));
+           __INSERT_REGEX_MATCHER(_M_insert_any_matcher_ecma);
        }
       else if (_M_try_char())
-       {
-         if (_M_flags & regex_constants::icase)
-           _M_stack.push(_StateSeqT(_M_nfa,
-                                    _M_nfa._M_insert_matcher
-                                    (_CharMatcher<_TraitsT,
-                                       true>(_M_value[0],
-                                             _M_traits))));
-         else
-           _M_stack.push(_StateSeqT(_M_nfa,
-                                    _M_nfa._M_insert_matcher
-                                    (_CharMatcher<_TraitsT,
-                                       false>(_M_value[0],
-                                              _M_traits))));
-       }
+       __INSERT_REGEX_MATCHER(_M_insert_char_matcher);
       else if (_M_match_token(_ScannerT::_S_token_backref))
        _M_stack.push(_StateSeqT(_M_nfa, _M_nfa.
                                 _M_insert_backref(_M_cur_int_value(10))));
       else if (_M_match_token(_ScannerT::_S_token_quoted_class))
-       {
-         _GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1);
-         _BMatcherT __matcher(_M_ctype.is(_CtypeT::upper, _M_value[0]),
-                              _M_traits, _M_flags);
-         __matcher._M_add_character_class(_M_value);
-         __matcher._M_ready();
-         _M_stack.push(_StateSeqT(_M_nfa,
-               _M_nfa._M_insert_matcher(std::move(__matcher))));
-       }
+       __INSERT_REGEX_MATCHER(_M_insert_character_class_matcher);
       else if (_M_match_token(_ScannerT::_S_token_subexpr_no_group_begin))
        {
          _StateSeqT __r(_M_nfa, _M_nfa._M_insert_dummy());
@@ -357,19 +344,81 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        _M_match_token(_ScannerT::_S_token_bracket_neg_begin);
       if (!(__neg || _M_match_token(_ScannerT::_S_token_bracket_begin)))
        return false;
-      _BMatcherT __matcher(__neg, _M_traits, _M_flags);
+      __INSERT_REGEX_MATCHER(_M_insert_bracket_matcher, __neg);
+      return true;
+    }
+#undef __INSERT_REGEX_MATCHER
+
+  template<typename _FwdIter, typename _TraitsT>
+  template<bool __icase, bool __collate>
+    void
+    _Compiler<_FwdIter, _TraitsT>::
+    _M_insert_any_matcher_ecma()
+    {
+      _M_stack.push(_StateSeqT(_M_nfa,
+       _M_nfa._M_insert_matcher
+         (_AnyMatcher<_TraitsT, true, __icase, __collate>
+           (_M_traits))));
+    }
+
+  template<typename _FwdIter, typename _TraitsT>
+  template<bool __icase, bool __collate>
+    void
+    _Compiler<_FwdIter, _TraitsT>::
+    _M_insert_any_matcher_posix()
+    {
+      _M_stack.push(_StateSeqT(_M_nfa,
+       _M_nfa._M_insert_matcher
+         (_AnyMatcher<_TraitsT, false, __icase, __collate>
+           (_M_traits))));
+    }
+
+  template<typename _FwdIter, typename _TraitsT>
+  template<bool __icase, bool __collate>
+    void
+    _Compiler<_FwdIter, _TraitsT>::
+    _M_insert_char_matcher()
+    {
+      _M_stack.push(_StateSeqT(_M_nfa,
+       _M_nfa._M_insert_matcher
+         (_CharMatcher<_TraitsT, __icase, __collate>
+           (_M_value[0], _M_traits))));
+    }
+
+  template<typename _FwdIter, typename _TraitsT>
+  template<bool __icase, bool __collate>
+    void
+    _Compiler<_FwdIter, _TraitsT>::
+    _M_insert_character_class_matcher()
+    {
+      _GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1);
+      _BracketMatcher<_TraitsT, __icase, __collate> __matcher
+       (_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits);
+      __matcher._M_add_character_class(_M_value);
+      __matcher._M_ready();
+      _M_stack.push(_StateSeqT(_M_nfa,
+       _M_nfa._M_insert_matcher(std::move(__matcher))));
+    }
+
+  template<typename _FwdIter, typename _TraitsT>
+  template<bool __icase, bool __collate>
+    void
+    _Compiler<_FwdIter, _TraitsT>::
+    _M_insert_bracket_matcher(bool __neg)
+    {
+      _BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, 
_M_traits);
       while (!_M_match_token(_ScannerT::_S_token_bracket_end))
        _M_expression_term(__matcher);
       __matcher._M_ready();
       _M_stack.push(_StateSeqT(_M_nfa,
                               _M_nfa._M_insert_matcher(std::move(__matcher))));
-      return true;
     }
 
   template<typename _FwdIter, typename _TraitsT>
+  template<bool __icase, bool __collate>
     void
     _Compiler<_FwdIter, _TraitsT>::
-    _M_expression_term(_BMatcherT& __matcher)
+    _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& 
__matcher)
     {
       if (_M_match_token(_ScannerT::_S_token_collsymbol))
        __matcher._M_add_collating_element(_M_value);
@@ -450,25 +499,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return __v;
     }
 
-  template<typename _TraitsT>
+  template<typename _TraitsT, bool __icase, bool __collate>
     bool
-    _BracketMatcher<_TraitsT>::_M_apply(_CharT __ch, false_type) const
+    _BracketMatcher<_TraitsT, __icase, __collate>::
+    _M_apply(_CharT __ch, false_type) const
     {
       bool __ret = false;
-      if (_M_traits.isctype(__ch, _M_class_set)
-         || _M_char_set.count(_M_translate(__ch))
-         || _M_equiv_set.count(_M_traits.transform_primary(&__ch, &__ch+1)))
+      if (_M_char_set.count(_M_translator._M_translate(__ch)))
        __ret = true;
       else
        {
-         _StringT __s = _M_get_str(_M_flags & regex_constants::collate
-                                   ? _M_translate(__ch) : __ch);
+         auto __s = _M_translator._M_transform(__ch);
          for (auto& __it : _M_range_set)
            if (__it.first <= __s && __s <= __it.second)
              {
                __ret = true;
                break;
              }
+         if (_M_traits.isctype(__ch, _M_class_set))
+           __ret = true;
+         else if (_M_equiv_set.count(_M_traits.
+                                     transform_primary(&__ch, &__ch+1)))
+           __ret = true;
        }
       if (_M_is_non_matching)
        return !__ret;

Reply via email to