timshen created this revision.
Herald added a subscriber: sanjoy.
Herald added a reviewer: EricWF.

Always lookup the class name, even when the traits type is regex_traits<>.
The lookup happens at regex compile time, so it shouldn't affect the 
performance.

I also added ja_JP.UTF-8 as a common locale.


https://reviews.llvm.org/D37958

Files:
  libcxx/include/regex
  libcxx/test/std/re/re.traits/lookup_classname_user_defined.pass.cpp
  libcxx/utils/libcxx/test/target_info.py

Index: libcxx/utils/libcxx/test/target_info.py
===================================================================
--- libcxx/utils/libcxx/test/target_info.py
+++ libcxx/utils/libcxx/test/target_info.py
@@ -55,6 +55,7 @@
         ('fr_FR.UTF-8', 'French_France.1252'),
         ('ru_RU.UTF-8', 'Russian_Russia.1251'),
         ('zh_CN.UTF-8', 'Chinese_China.936'),
+        ('ja_JP.UTF-8', 'Japanese_Japan.932'),
         ('fr_CA.ISO8859-1', 'French_Canada.1252'),
         ('cs_CZ.ISO8859-2', 'Czech_Czech Republic.1250')
     ]
Index: libcxx/test/std/re/re.traits/lookup_classname_user_defined.pass.cpp
===================================================================
--- /dev/null
+++ libcxx/test/std/re/re.traits/lookup_classname_user_defined.pass.cpp
@@ -0,0 +1,54 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: locale.ja_JP.UTF-8
+
+// <regex>
+
+// template <class charT> struct regex_traits;
+
+// template <class ForwardIterator>
+//   char_class_type
+//   lookup_classname(ForwardIterator first, ForwardIterator last,
+//                    bool icase = false) const;
+
+#include <regex>
+#include <cassert>
+#include "test_macros.h"
+#include "platform_support.h" // locale name macros
+
+struct wctype_traits : std::regex_traits<wchar_t>
+{
+    using char_class_type = std::wctype_t;
+    template<class ForwardIt>
+    char_class_type lookup_classname(ForwardIt first, ForwardIt last, bool icase = false ) const {
+        (void)icase;
+        return std::wctype(std::string(first, last).c_str());
+    }
+    bool isctype(wchar_t c, char_class_type f) const {
+        return std::iswctype(c, f);
+    }
+};
+
+int main()
+{
+    std::locale::global(std::locale("ja_JP.utf8"));
+    std::wsmatch m;
+    std::wstring in = L"風の谷のナウシカ";
+
+    // matches all characters (they are classified as alnum)
+    std::wstring re1 = L"([[:alnum:]]+)";
+    std::regex_search(in, m, std::wregex(re1));
+    assert(m[1] == L"風の谷のナウシカ");
+
+    // matches only the kanji
+    std::wstring re2 = L"([[:jkata:]]+)";
+    std::regex_search(in, m, std::basic_regex<wchar_t, wctype_traits>(re2));
+    assert(m[1] == L"ナウシカ");
+}
Index: libcxx/include/regex
===================================================================
--- libcxx/include/regex
+++ libcxx/include/regex
@@ -2213,8 +2213,8 @@
     vector<pair<string_type, string_type> > __ranges_;
     vector<pair<_CharT, _CharT> > __digraphs_;
     vector<string_type> __equivalences_;
-    typename regex_traits<_CharT>::char_class_type __mask_;
-    typename regex_traits<_CharT>::char_class_type __neg_mask_;
+    typename _Traits::char_class_type __mask_;
+    typename _Traits::char_class_type __neg_mask_;
     bool __negate_;
     bool __icase_;
     bool __collate_;
@@ -2307,12 +2307,26 @@
     _LIBCPP_INLINE_VISIBILITY
     void __add_equivalence(const string_type& __s)
         {__equivalences_.push_back(__s);}
+
+    template<typename _Iter>
     _LIBCPP_INLINE_VISIBILITY
-    void __add_class(typename regex_traits<_CharT>::char_class_type __mask)
-        {__mask_ |= __mask;}
+    void __add_class(_Iter __begin, _Iter __end)
+    {
+      auto __class_type = __traits_.lookup_classname(__begin, __end, __icase_);
+      if (__class_type == 0)
+          __throw_regex_error<regex_constants::error_brack>();
+      __mask_ |= __class_type;
+    }
+
+    template<typename _Iter>
     _LIBCPP_INLINE_VISIBILITY
-    void __add_neg_class(typename regex_traits<_CharT>::char_class_type __mask)
-        {__neg_mask_ |= __mask;}
+    void __add_neg_class(_Iter __begin, _Iter __end)
+    {
+      auto __class_type = __traits_.lookup_classname(__begin, __end, true);
+      if (__class_type == 0)
+          __throw_regex_error<regex_constants::error_brack>();
+      __neg_mask_ |= __class_type;
+    }
 };
 
 template <class _CharT, class _Traits>
@@ -3841,23 +3855,23 @@
         __str = _CharT(8);
         return ++__first;
     case 'd':
-        __ml->__add_class(ctype_base::digit);
+        __ml->__add_class(__first, std::next(__first));
         return ++__first;
     case 'D':
-        __ml->__add_neg_class(ctype_base::digit);
+        __ml->__add_neg_class(__first, std::next(__first));
         return ++__first;
     case 's':
-        __ml->__add_class(ctype_base::space);
+        __ml->__add_class(__first, std::next(__first));
         return ++__first;
     case 'S':
-        __ml->__add_neg_class(ctype_base::space);
+        __ml->__add_neg_class(__first, std::next(__first));
         return ++__first;
     case 'w':
-        __ml->__add_class(ctype_base::alnum);
+        __ml->__add_class(__first, std::next(__first));
         __ml->__add_char('_');
         return ++__first;
     case 'W':
-        __ml->__add_neg_class(ctype_base::alnum);
+        __ml->__add_neg_class(__first, std::next(__first));
         __ml->__add_neg_char('_');
         return ++__first;
     }
@@ -4003,12 +4017,7 @@
     if (__temp == __last)
         __throw_regex_error<regex_constants::error_brack>();
     // [__first, __temp) contains all text in [: ... :]
-    typedef typename _Traits::char_class_type char_class_type;
-    char_class_type __class_type =
-        __traits_.lookup_classname(__first, __temp, __flags_ & icase);
-    if (__class_type == 0)
-        __throw_regex_error<regex_constants::error_brack>();
-    __ml->__add_class(__class_type);
+    __ml->__add_class(__first, __temp);
     __first = _VSTD::next(__temp, 2);
     return __first;
 }
@@ -4341,33 +4350,33 @@
         {
         case 'd':
             __ml = __start_matching_list(false);
-            __ml->__add_class(ctype_base::digit);
+            __ml->__add_class(__first, std::next(__first));
             ++__first;
             break;
         case 'D':
             __ml = __start_matching_list(true);
-            __ml->__add_class(ctype_base::digit);
+            __ml->__add_class(__first, std::next(__first));
             ++__first;
             break;
         case 's':
             __ml = __start_matching_list(false);
-            __ml->__add_class(ctype_base::space);
+            __ml->__add_class(__first, std::next(__first));
             ++__first;
             break;
         case 'S':
             __ml = __start_matching_list(true);
-            __ml->__add_class(ctype_base::space);
+            __ml->__add_class(__first, std::next(__first));
             ++__first;
             break;
         case 'w':
             __ml = __start_matching_list(false);
-            __ml->__add_class(ctype_base::alnum);
+            __ml->__add_class(__first, std::next(__first));
             __ml->__add_char('_');
             ++__first;
             break;
         case 'W':
             __ml = __start_matching_list(true);
-            __ml->__add_class(ctype_base::alnum);
+            __ml->__add_class(__first, std::next(__first));
             __ml->__add_char('_');
             ++__first;
             break;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to