Title: [199523] trunk
Revision
199523
Author
msab...@apple.com
Date
2016-04-13 17:47:40 -0700 (Wed, 13 Apr 2016)

Log Message

Some tests fail with ES6 `u` (Unicode) flag for regular expressions
https://bugs.webkit.org/show_bug.cgi?id=151597

Reviewed by Geoffrey Garen.

Source/_javascript_Core:

Added two new tables to handle the anomolies of \w and \W CharacterClassEscapes
when specified in RegExp's with both the unicode and ignoreCase flags.  Given the
case folding rules described in the standard vie the meta function Canonicalize(),
which allow cross ASCII case folding when unicode is specified, the unicode characters
\u017f (small sharp s) and \u212a (kelvin symbol) are part of the \w (word) characterClassEscape.
This is true because they case fold to 's' and 'k' respectively.  Because they case fold
to lower case letters, the corresponding letters, 'k', 'K', 's' and 'S', are also matched with
\W with the unicode and ignoreCase flags.

* create_regex_tables:
* yarr/YarrPattern.cpp:
(JSC::Yarr::YarrPatternConstructor::atomBuiltInCharacterClass):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBuiltIn):
(JSC::Yarr::YarrPattern::YarrPattern):
* yarr/YarrPattern.h:
(JSC::Yarr::YarrPattern::wordcharCharacterClass):
(JSC::Yarr::YarrPattern::wordUnicodeIgnoreCaseCharCharacterClass):
(JSC::Yarr::YarrPattern::nonwordcharCharacterClass):
(JSC::Yarr::YarrPattern::nonwordUnicodeIgnoreCaseCharCharacterClass):

LayoutTests:

Updated tests.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:

Modified Paths

Diff

Modified: trunk/LayoutTests/ChangeLog (199522 => 199523)


--- trunk/LayoutTests/ChangeLog	2016-04-14 00:42:00 UTC (rev 199522)
+++ trunk/LayoutTests/ChangeLog	2016-04-14 00:47:40 UTC (rev 199523)
@@ -1,3 +1,15 @@
+2016-04-13  Michael Saboff  <msab...@apple.com>
+
+        Some tests fail with ES6 `u` (Unicode) flag for regular expressions
+        https://bugs.webkit.org/show_bug.cgi?id=151597
+
+        Reviewed by Geoffrey Garen.
+
+        Updated tests.
+
+        * js/regexp-unicode-expected.txt:
+        * js/script-tests/regexp-unicode.js:
+
 2016-04-13  Chris Dumez  <cdu...@apple.com>
 
         We should not speculatively revalidate cached redirects

Modified: trunk/LayoutTests/js/regexp-unicode-expected.txt (199522 => 199523)


--- trunk/LayoutTests/js/regexp-unicode-expected.txt	2016-04-14 00:42:00 UTC (rev 199522)
+++ trunk/LayoutTests/js/regexp-unicode-expected.txt	2016-04-14 00:47:40 UTC (rev 199523)
@@ -39,6 +39,38 @@
 PASS /(?:A|𐄣|b)x/iu.test("bx") is true
 PASS "a𐄣X".match(/a𐄣b|a𐄣x/iu)[0].length is 4
 PASS "Ťx".match(/ťx/iu)[0].length is 2
+PASS /\w/iu.test("ſ") is true
+PASS /\w/iu.test("K") is true
+PASS /!\w/iu.test("ſ") is false
+PASS /!\w/iu.test("K") is false
+PASS /\W/iu.test("ſ") is true
+PASS /\W/iu.test("K") is true
+PASS /!\W/iu.test("ſ") is false
+PASS /!\W/iu.test("K") is false
+PASS /[\w\d]/iu.test("ſ") is true
+PASS /[\w\d]/iu.test("K") is true
+PASS /[^\w\d]/iu.test("ſ") is false
+PASS /[^\w\d]/iu.test("K") is false
+PASS /[\W\d]/iu.test("ſ") is true
+PASS /[\W\d]/iu.test("K") is true
+PASS /[^\W\d]/iu.test("ſ") is false
+PASS /[^\W\d]/iu.test("K") is false
+PASS /\w/iu.test("S") is true
+PASS /\w/iu.test("K") is true
+PASS /!\w/iu.test("S") is false
+PASS /!\w/iu.test("K") is false
+PASS /\W/iu.test("S") is true
+PASS /\W/iu.test("K") is true
+PASS /!\W/iu.test("S") is false
+PASS /!\W/iu.test("K") is false
+PASS /[\w\d]/iu.test("S") is true
+PASS /[\w\d]/iu.test("K") is true
+PASS /[^\w\d]/iu.test("S") is false
+PASS /[^\w\d]/iu.test("K") is false
+PASS /[\W\d]/iu.test("S") is true
+PASS /[\W\d]/iu.test("K") is true
+PASS /[^\W\d]/iu.test("S") is false
+PASS /[^\W\d]/iu.test("K") is false
 PASS "𝌆".match(/^.$/u)[0].length is 2
 PASS "It is 78°".match(/.*/u)[0].length is 9
 PASS stringWithDanglingFirstSurrogate.match(/.*/u)[0].length is 3

Modified: trunk/LayoutTests/js/script-tests/regexp-unicode.js (199522 => 199523)


--- trunk/LayoutTests/js/script-tests/regexp-unicode.js	2016-04-14 00:42:00 UTC (rev 199522)
+++ trunk/LayoutTests/js/script-tests/regexp-unicode.js	2016-04-14 00:47:40 UTC (rev 199523)
@@ -43,7 +43,40 @@
 shouldBeTrue('/(?:A|\u{10123}|b)x/iu.test("bx")');
 shouldBe('"a\u{10123}X".match(/a\u{10123}b|a\u{10123}x/iu)[0].length', '4');
 shouldBe('"\u0164x".match(/\u0165x/iu)[0].length', '2');
+shouldBeTrue('/\\w/iu.test("\u017f")');
+shouldBeTrue('/\\w/iu.test("\u212a")');
+shouldBeFalse('/!\\w/iu.test("\u017f")');
+shouldBeFalse('/!\\w/iu.test("\u212a")');
+shouldBeTrue('/\\W/iu.test("\u017f")');
+shouldBeTrue('/\\W/iu.test("\u212a")');
+shouldBeFalse('/!\\W/iu.test("\u017f")');
+shouldBeFalse('/!\\W/iu.test("\u212a")');
+shouldBeTrue('/[\\w\\d]/iu.test("\u017f")');
+shouldBeTrue('/[\\w\\d]/iu.test("\u212a")');
+shouldBeFalse('/[^\\w\\d]/iu.test("\u017f")');
+shouldBeFalse('/[^\\w\\d]/iu.test("\u212a")');
+shouldBeTrue('/[\\W\\d]/iu.test("\u017f")');
+shouldBeTrue('/[\\W\\d]/iu.test("\u212a")');
+shouldBeFalse('/[^\\W\\d]/iu.test("\u017f")');
+shouldBeFalse('/[^\\W\\d]/iu.test("\u212a")');
+shouldBeTrue('/\\w/iu.test("S")');
+shouldBeTrue('/\\w/iu.test("K")');
+shouldBeFalse('/!\\w/iu.test("S")');
+shouldBeFalse('/!\\w/iu.test("K")');
+shouldBeTrue('/\\W/iu.test("S")');
+shouldBeTrue('/\\W/iu.test("K")');
+shouldBeFalse('/!\\W/iu.test("S")');
+shouldBeFalse('/!\\W/iu.test("K")');
+shouldBeTrue('/[\\w\\d]/iu.test("S")');
+shouldBeTrue('/[\\w\\d]/iu.test("K")');
+shouldBeFalse('/[^\\w\\d]/iu.test("S")');
+shouldBeFalse('/[^\\w\\d]/iu.test("K")');
+shouldBeTrue('/[\\W\\d]/iu.test("S")');
+shouldBeTrue('/[\\W\\d]/iu.test("K")');
+shouldBeFalse('/[^\\W\\d]/iu.test("S")');
+shouldBeFalse('/[^\\W\\d]/iu.test("K")');
 
+
 // Test . matches with Unicode flag
 shouldBe('"\u{1D306}".match(/^.$/u)[0].length', '2');
 shouldBe('"It is 78\u00B0".match(/.*/u)[0].length', '9');

Modified: trunk/Source/_javascript_Core/ChangeLog (199522 => 199523)


--- trunk/Source/_javascript_Core/ChangeLog	2016-04-14 00:42:00 UTC (rev 199522)
+++ trunk/Source/_javascript_Core/ChangeLog	2016-04-14 00:47:40 UTC (rev 199523)
@@ -1,3 +1,30 @@
+2016-04-13  Michael Saboff  <msab...@apple.com>
+
+        Some tests fail with ES6 `u` (Unicode) flag for regular expressions
+        https://bugs.webkit.org/show_bug.cgi?id=151597
+
+        Reviewed by Geoffrey Garen.
+
+        Added two new tables to handle the anomolies of \w and \W CharacterClassEscapes
+        when specified in RegExp's with both the unicode and ignoreCase flags.  Given the
+        case folding rules described in the standard vie the meta function Canonicalize(),
+        which allow cross ASCII case folding when unicode is specified, the unicode characters
+        \u017f (small sharp s) and \u212a (kelvin symbol) are part of the \w (word) characterClassEscape.
+        This is true because they case fold to 's' and 'k' respectively.  Because they case fold
+        to lower case letters, the corresponding letters, 'k', 'K', 's' and 'S', are also matched with
+        \W with the unicode and ignoreCase flags.
+
+        * create_regex_tables:
+        * yarr/YarrPattern.cpp:
+        (JSC::Yarr::YarrPatternConstructor::atomBuiltInCharacterClass):
+        (JSC::Yarr::YarrPatternConstructor::atomCharacterClassBuiltIn):
+        (JSC::Yarr::YarrPattern::YarrPattern):
+        * yarr/YarrPattern.h:
+        (JSC::Yarr::YarrPattern::wordcharCharacterClass):
+        (JSC::Yarr::YarrPattern::wordUnicodeIgnoreCaseCharCharacterClass):
+        (JSC::Yarr::YarrPattern::nonwordcharCharacterClass):
+        (JSC::Yarr::YarrPattern::nonwordUnicodeIgnoreCaseCharCharacterClass):
+
 2016-04-13  Commit Queue  <commit-qu...@webkit.org>
 
         Unreviewed, rolling out r199502 and r199511.

Modified: trunk/Source/_javascript_Core/create_regex_tables (199522 => 199523)


--- trunk/Source/_javascript_Core/create_regex_tables	2016-04-14 00:42:00 UTC (rev 199522)
+++ trunk/Source/_javascript_Core/create_regex_tables	2016-04-14 00:47:40 UTC (rev 199523)
@@ -25,12 +25,14 @@
 
 types = {
     "wordchar": { "UseTable" : True, "data": ['_', ('0','9'), ('A', 'Z'), ('a','z')]},
-    "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0xffff)]},
+    "wordUnicodeIgnoreCaseChar": { "UseTable" : False, "data": ['_', ('0', '9'), ('A', 'Z'), ('a', 'z'), 0x017f, 0x212a]},
+    "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0x10ffff)]},
+    "nonwordUnicodeIgnoreCaseChar": { "UseTable" : False, "Inverse": "wordchar", "data": ['k', 'K', 's', 'S', '`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0x10ffff)]},
     "newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]},
     "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a), 0xfeff]},
-    "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xfefe), (0xff00, 0xffff)]},
+    "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xfefe), (0xff00, 0x10ffff)]},
     "digits": { "UseTable" : False, "data": [('0', '9')]},
-    "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0xffff)] }
+    "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0x10ffff)] }
 }
 entriesPerLine = 50
 arrays = "";

Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.cpp (199522 => 199523)


--- trunk/Source/_javascript_Core/yarr/YarrPattern.cpp	2016-04-14 00:42:00 UTC (rev 199522)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.cpp	2016-04-14 00:47:40 UTC (rev 199523)
@@ -349,7 +349,13 @@
             m_alternative->m_terms.append(PatternTerm(m_pattern.spacesCharacterClass(), invert));
             break;
         case WordClassID:
-            m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert));
+            if (m_pattern.unicode() && m_pattern.ignoreCase()) {
+                if (invert)
+                    m_alternative->m_terms.append(PatternTerm(m_pattern.nonwordUnicodeIgnoreCaseCharCharacterClass(), false));
+                else
+                    m_alternative->m_terms.append(PatternTerm(m_pattern.wordUnicodeIgnoreCaseCharCharacterClass(), false));
+            } else
+                m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert));
             break;
         case NewlineClassID:
             m_alternative->m_terms.append(PatternTerm(m_pattern.newlineCharacterClass(), invert));
@@ -386,7 +392,10 @@
             break;
         
         case WordClassID:
-            m_characterClassConstructor.append(invert ? m_pattern.nonwordcharCharacterClass() : m_pattern.wordcharCharacterClass());
+            if (m_pattern.unicode() && m_pattern.ignoreCase())
+                m_characterClassConstructor.append(invert ? m_pattern.nonwordUnicodeIgnoreCaseCharCharacterClass() : m_pattern.wordUnicodeIgnoreCaseCharCharacterClass());
+            else
+                m_characterClassConstructor.append(invert ? m_pattern.nonwordcharCharacterClass() : m_pattern.wordcharCharacterClass());
             break;
         
         default:
@@ -884,9 +893,11 @@
     , digitsCached(0)
     , spacesCached(0)
     , wordcharCached(0)
+    , wordUnicodeIgnoreCaseCharCached(0)
     , nondigitsCached(0)
     , nonspacesCached(0)
     , nonwordcharCached(0)
+    , nonwordUnicodeIgnoreCasecharCached(0)
 {
     *error = compile(pattern);
 }

Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.h (199522 => 199523)


--- trunk/Source/_javascript_Core/yarr/YarrPattern.h	2016-04-14 00:42:00 UTC (rev 199522)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.h	2016-04-14 00:47:40 UTC (rev 199523)
@@ -287,9 +287,11 @@
 std::unique_ptr<CharacterClass> digitsCreate();
 std::unique_ptr<CharacterClass> spacesCreate();
 std::unique_ptr<CharacterClass> wordcharCreate();
+std::unique_ptr<CharacterClass> wordUnicodeIgnoreCaseCharCreate();
 std::unique_ptr<CharacterClass> nondigitsCreate();
 std::unique_ptr<CharacterClass> nonspacesCreate();
 std::unique_ptr<CharacterClass> nonwordcharCreate();
+std::unique_ptr<CharacterClass> nonwordUnicodeIgnoreCaseCharCreate();
 
 struct TermChain {
     TermChain(PatternTerm term)
@@ -317,9 +319,11 @@
         digitsCached = 0;
         spacesCached = 0;
         wordcharCached = 0;
+        wordUnicodeIgnoreCaseCharCached = 0;
         nondigitsCached = 0;
         nonspacesCached = 0;
         nonwordcharCached = 0;
+        nonwordUnicodeIgnoreCasecharCached = 0;
 
         m_disjunctions.clear();
         m_userCharacterClasses.clear();
@@ -367,6 +371,14 @@
         }
         return wordcharCached;
     }
+    CharacterClass* wordUnicodeIgnoreCaseCharCharacterClass()
+    {
+        if (!wordUnicodeIgnoreCaseCharCached) {
+            m_userCharacterClasses.append(wordUnicodeIgnoreCaseCharCreate());
+            wordUnicodeIgnoreCaseCharCached = m_userCharacterClasses.last().get();
+        }
+        return wordUnicodeIgnoreCaseCharCached;
+    }
     CharacterClass* nondigitsCharacterClass()
     {
         if (!nondigitsCached) {
@@ -391,6 +403,14 @@
         }
         return nonwordcharCached;
     }
+    CharacterClass* nonwordUnicodeIgnoreCaseCharCharacterClass()
+    {
+        if (!nonwordUnicodeIgnoreCasecharCached) {
+            m_userCharacterClasses.append(nonwordUnicodeIgnoreCaseCharCreate());
+            nonwordUnicodeIgnoreCasecharCached = m_userCharacterClasses.last().get();
+        }
+        return nonwordUnicodeIgnoreCasecharCached;
+    }
 
     bool ignoreCase() const { return m_flags & FlagIgnoreCase; }
     bool multiline() const { return m_flags & FlagMultiline; }
@@ -414,9 +434,11 @@
     CharacterClass* digitsCached;
     CharacterClass* spacesCached;
     CharacterClass* wordcharCached;
+    CharacterClass* wordUnicodeIgnoreCaseCharCached;
     CharacterClass* nondigitsCached;
     CharacterClass* nonspacesCached;
     CharacterClass* nonwordcharCached;
+    CharacterClass* nonwordUnicodeIgnoreCasecharCached;
 };
 
 } } // namespace JSC::Yarr
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to