On Sat, Jun 11, 2016 at 5:01 AM, Jonathan Wakely wrote:
> N.B. The "typename" and "::type" are redundant here, because it names
> the same type as the integral_constant itself, and you could
> use __bool_constant<__collate> instead:
>
>         return _M_transform_impl(_M_translate(__ch),
>                                 __bool_constant<__collate>());
>
> OK for trunk without the redundant typename ...::type, your choice
> whether to use __bool_constant or not.

Thanks! I was looking at std::bool_constant but that's in C++17.
__bool_constant is even better. :)

>
> Will this fix apply cleanly to the branches too?
>

For gcc6 yes; for gcc5 there needs more work. I guess it's OK for
backporting to gcc6?

Updated the patch according to the discussion in the libstdc++/71500 bug.


-- 
Regards,
Tim Shen
commit 46d269bcfeebc497af4b3dc427d857f10f0ab931
Author: Tim Shen <tims...@google.com>
Date:   Sat Jun 11 00:41:09 2016 -0700

    2016-06-11  Tim Shen  <tims...@google.com>
    
        PR libstdc++/71500
        * include/bits/regex_compiler.h (_RegexTranslator<>::_M_transform):
        Take case into consideration when looking at range expression.
        * include/bits/regex_compiler.tcc
        (_Compiler<>::_M_insert_bracket_matcher,
        _Compiler<>::_M_expression_term):
        Re-define __last_char to buffer the last character, which may be
        flushed to _M_add_char or be used as the beginning of a range.
        * testsuite/28_regex/regression.cc: Add new testcase.

diff --git a/libstdc++-v3/include/bits/regex_compiler.h 
b/libstdc++-v3/include/bits/regex_compiler.h
index 410d61b..73c0af4 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -226,18 +226,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       {
        if (__icase)
          return _M_traits.translate_nocase(__ch);
-       else if (__collate)
-         return _M_traits.translate(__ch);
        else
-         return __ch;
+         return _M_traits.translate(__ch);
       }
 
       _StrTransT
       _M_transform(_CharT __ch) const
-      {
-       return _M_transform_impl(__ch, typename integral_constant<bool,
-                                __collate>::type());
-      }
+      { return _M_transform_impl(__ch, __bool_constant<__collate>()); }
 
     private:
       _StrTransT
@@ -247,7 +242,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _StrTransT
       _M_transform_impl(_CharT __ch, true_type) const
       {
-       _StrTransT __str = _StrTransT(1, _M_translate(__ch));
+       _StrTransT __str = _StrTransT(1, __ch);
        return _M_traits.transform(__str.begin(), __str.end());
       }
 
@@ -433,6 +428,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       void
       _M_make_range(_CharT __l, _CharT __r)
       {
+       __l = _M_translator._M_translate(__l);
+       __r = _M_translator._M_translate(__r);
        if (__l > __r)
          __throw_regex_error(regex_constants::error_range,
                              "Invalid range in bracket expression.");
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc 
b/libstdc++-v3/include/bits/regex_compiler.tcc
index ff69e16..050435d 100644
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -428,11 +428,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (!(_M_flags & regex_constants::ECMAScript))
        if (_M_try_char())
          {
-           __matcher._M_add_char(_M_value[0]);
            __last_char.first = true;
            __last_char.second = _M_value[0];
          }
       while (_M_expression_term(__last_char, __matcher));
+      if (__last_char.first)
+       __matcher._M_add_char(__last_char.second);
+
       __matcher._M_ready();
       _M_stack.push(_StateSeqT(
                      *_M_nfa,
@@ -449,8 +451,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (_M_match_token(_ScannerT::_S_token_bracket_end))
        return false;
 
+      const auto __flush = [&]
+      {
+       if (__last_char.first)
+         {
+           __matcher._M_add_char(__last_char.second);
+           __last_char.first = false;
+         }
+      };
       if (_M_match_token(_ScannerT::_S_token_collsymbol))
        {
+         __flush();
          auto __symbol = __matcher._M_add_collate_element(_M_value);
          if (__symbol.size() == 1)
            {
@@ -459,9 +470,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
            }
        }
       else if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
-       __matcher._M_add_equivalence_class(_M_value);
+       {
+         __flush();
+         __matcher._M_add_equivalence_class(_M_value);
+       }
       else if (_M_match_token(_ScannerT::_S_token_char_class_name))
-       __matcher._M_add_character_class(_M_value, false);
+       {
+         __flush();
+         __matcher._M_add_character_class(_M_value, false);
+       }
       // POSIX doesn't allow '-' as a start-range char (say [a-z--0]),
       // except when the '-' is the first or last character in the bracket
       // expression ([--0]). ECMAScript treats all '-' after a range as a
@@ -476,7 +493,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        {
          if (!__last_char.first)
            {
-             __matcher._M_add_char(_M_value[0]);
+             __last_char.first = true;
+             __last_char.second = _M_value[0];
              if (_M_value[0] == '-'
                  && !(_M_flags & regex_constants::ECMAScript))
                {
@@ -488,8 +506,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                    "a dash is not treated literally only when it is at "
                    "beginning or end.");
                }
-             __last_char.first = true;
-             __last_char.second = _M_value[0];
            }
          else
            {
@@ -499,22 +515,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                    {
                      __matcher._M_make_range(__last_char.second , _M_value[0]);
                      __last_char.first = false;
+                     return true;
                    }
-                 else
-                   {
-                     if (_M_scanner._M_get_token()
-                         != _ScannerT::_S_token_bracket_end)
-                       __throw_regex_error(
-                         regex_constants::error_range,
-                         "Unexpected end of bracket expression.");
-                     __matcher._M_add_char(_M_value[0]);
-                   }
-               }
-             else
-               {
-                 __matcher._M_add_char(_M_value[0]);
-                 __last_char.second = _M_value[0];
+                 if (_M_scanner._M_get_token()
+                     != _ScannerT::_S_token_bracket_end)
+                   __throw_regex_error(
+                     regex_constants::error_range,
+                     "Unexpected end of bracket expression.");
                }
+             __matcher._M_add_char(__last_char.second);
+             __last_char.second = _M_value[0];
            }
        }
       else if (_M_match_token(_ScannerT::_S_token_quoted_class))
@@ -580,8 +590,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _BracketMatcher<_TraitsT, __icase, __collate>::
     _M_apply(_CharT __ch, false_type) const
     {
+      __ch = _M_translator._M_translate(__ch);
       bool __ret = std::binary_search(_M_char_set.begin(), _M_char_set.end(),
-                                     _M_translator._M_translate(__ch));
+                                     __ch);
       if (!__ret)
        {
          auto __s = _M_translator._M_transform(__ch);
diff --git a/libstdc++-v3/testsuite/28_regex/regression.cc 
b/libstdc++-v3/testsuite/28_regex/regression.cc
index d367c8b..45b9f93 100644
--- a/libstdc++-v3/testsuite/28_regex/regression.cc
+++ b/libstdc++-v3/testsuite/28_regex/regression.cc
@@ -61,12 +61,40 @@ test03()
   VERIFY(!regex_search_debug("a", regex(R"(\b$)"), 
regex_constants::match_not_eow));
 }
 
+// PR libstdc++/71500
+void
+test04()
+{
+  bool test __attribute__((unused)) = true;
+
+  {
+    regex re1("[A-F]+", regex::ECMAScript | regex::icase);
+    VERIFY(regex_match_debug("aaa", re1));
+    VERIFY(regex_match_debug("AAA", re1));
+    VERIFY(regex_match_debug("fff", re1));
+    VERIFY(regex_match_debug("FFF", re1));
+  }
+  {
+    bool caught = false;
+    try
+    {
+      (void)regex("[T-f]+", regex::ECMAScript | regex::icase);
+    }
+    catch (...)
+    {
+      caught = true;
+    }
+    VERIFY(caught);
+  }
+}
+
 int
 main()
 {
   test01();
   test02();
   test03();
+  test04();
   return 0;
 }
 

Reply via email to