miyuki created this revision. miyuki added reviewers: EricWF, mclow.lists. Currently when parsing basic POSIX regular expressions libc++ silently skips invalid escaped characters and trailing escapes. This patch changes the behavior, so that a std::regex_error with code set to error_escape is thrown in these cases.
https://reviews.llvm.org/D42693 Files: include/regex test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp Index: test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp =================================================================== --- test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp +++ test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp @@ -19,11 +19,13 @@ #include <cassert> #include "test_macros.h" -static bool error_escape_thrown(const char *pat) +static bool error_escape_thrown(const char *pat, + std::regex_constants::syntax_option_type + syntax = std::regex_constants::ECMAScript) { bool result = false; try { - std::regex re(pat); + std::regex re(pat, syntax); } catch (const std::regex_error &ex) { result = (ex.code() == std::regex_constants::error_escape); } @@ -45,4 +47,15 @@ assert(!error_escape_thrown("[\\cA]")); assert(!error_escape_thrown("\\cA")); + std::regex_constants::syntax_option_type basic = + std::regex_constants::basic; + + assert(error_escape_thrown("\\a", basic)); + assert(error_escape_thrown("\\", basic)); + + assert(!error_escape_thrown("\\(a\\)", basic)); + assert(!error_escape_thrown("\\(a+\\)\\1", basic)); + assert(!error_escape_thrown("a\\{1,2\\}", basic)); + assert(!error_escape_thrown("\\.", basic)); + assert(!error_escape_thrown("\\*", basic)); } Index: include/regex =================================================================== --- include/regex +++ include/regex @@ -3442,23 +3442,32 @@ { if (__first != __last) { - _ForwardIterator __temp = _VSTD::next(__first); - if (__temp != __last) + if (*__first == '\\') { - if (*__first == '\\') + _ForwardIterator __temp = _VSTD::next(__first); + if (__temp == __last) + __throw_regex_error<regex_constants::error_escape>(); + + switch (*__temp) { - switch (*__temp) - { - case '^': - case '.': - case '*': - case '[': - case '$': - case '\\': - __push_char(*__temp); - __first = ++__temp; + case '^': + case '.': + case '*': + case '[': + case '$': + case '\\': + __push_char(*__temp); + __first = ++__temp; + break; + case '(': + case ')': + case '{': + case '}': + break; + default: + if (*__temp >= '1' && *__temp <= '9') break; - } + __throw_regex_error<regex_constants::error_escape>(); } } }
Index: test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp =================================================================== --- test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp +++ test/std/re/re.regex/re.regex.construct/bad_escape.pass.cpp @@ -19,11 +19,13 @@ #include <cassert> #include "test_macros.h" -static bool error_escape_thrown(const char *pat) +static bool error_escape_thrown(const char *pat, + std::regex_constants::syntax_option_type + syntax = std::regex_constants::ECMAScript) { bool result = false; try { - std::regex re(pat); + std::regex re(pat, syntax); } catch (const std::regex_error &ex) { result = (ex.code() == std::regex_constants::error_escape); } @@ -45,4 +47,15 @@ assert(!error_escape_thrown("[\\cA]")); assert(!error_escape_thrown("\\cA")); + std::regex_constants::syntax_option_type basic = + std::regex_constants::basic; + + assert(error_escape_thrown("\\a", basic)); + assert(error_escape_thrown("\\", basic)); + + assert(!error_escape_thrown("\\(a\\)", basic)); + assert(!error_escape_thrown("\\(a+\\)\\1", basic)); + assert(!error_escape_thrown("a\\{1,2\\}", basic)); + assert(!error_escape_thrown("\\.", basic)); + assert(!error_escape_thrown("\\*", basic)); } Index: include/regex =================================================================== --- include/regex +++ include/regex @@ -3442,23 +3442,32 @@ { if (__first != __last) { - _ForwardIterator __temp = _VSTD::next(__first); - if (__temp != __last) + if (*__first == '\\') { - if (*__first == '\\') + _ForwardIterator __temp = _VSTD::next(__first); + if (__temp == __last) + __throw_regex_error<regex_constants::error_escape>(); + + switch (*__temp) { - switch (*__temp) - { - case '^': - case '.': - case '*': - case '[': - case '$': - case '\\': - __push_char(*__temp); - __first = ++__temp; + case '^': + case '.': + case '*': + case '[': + case '$': + case '\\': + __push_char(*__temp); + __first = ++__temp; + break; + case '(': + case ')': + case '{': + case '}': + break; + default: + if (*__temp >= '1' && *__temp <= '9') break; - } + __throw_regex_error<regex_constants::error_escape>(); } } }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits