Diff
Modified: trunk/LayoutTests/ChangeLog (221768 => 221769)
--- trunk/LayoutTests/ChangeLog 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/LayoutTests/ChangeLog 2017-09-07 23:13:38 UTC (rev 221769)
@@ -1,3 +1,16 @@
+2017-09-07 Michael Saboff <msab...@apple.com>
+
+ Add support for RegExp named capture groups
+ https://bugs.webkit.org/show_bug.cgi?id=176435
+
+ Reviewed by Filip Pizlo.
+
+ New regression tests.
+
+ * js/regexp-named-capture-groups-expected.txt: Added.
+ * js/regexp-named-capture-groups.html: Added.
+ * js/script-tests/regexp-named-capture-groups.js: Added.
+
2017-09-07 Joseph Pecoraro <pecor...@apple.com>
Augmented Inspector: Provide a way to inspect a DOM Node (DOM.inspect)
Added: trunk/LayoutTests/js/regexp-named-capture-groups-expected.txt (0 => 221769)
--- trunk/LayoutTests/js/regexp-named-capture-groups-expected.txt (rev 0)
+++ trunk/LayoutTests/js/regexp-named-capture-groups-expected.txt 2017-09-07 23:13:38 UTC (rev 221769)
@@ -0,0 +1,66 @@
+Test for of RegExp named capture groups
+
+On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
+
+
+PASS re1.toString() is "\/(?<month>\\d{2})\\/(?<day>\\d{2})\\/(?<year>\\d{4})\/"
+PASS execResult1[0] is "01/02/2001"
+PASS execResult1.groups.month is "01"
+PASS execResult1.groups.day is "02"
+PASS execResult1.groups.year is "2001"
+PASS Object.getOwnPropertyNames(execResult1).sort() is ["0","1","2","3","groups","index","input","length"]
+PASS Object.getOwnPropertyNames(execResult1.groups).sort() is ["day","month","year"]
+PASS matchResult1[0] is "01/02/2001"
+PASS matchResult1.groups.month is "01"
+PASS matchResult1.groups.day is "02"
+PASS matchResult1.groups.year is "2001"
+PASS Object.getOwnPropertyNames(matchResult1).sort() is ["0","1","2","3","groups","index","input","length"]
+PASS Object.getOwnPropertyNames(matchResult1.groups).sort() is ["day","month","year"]
+PASS matchResult2a[0] is "John W. Smith"
+PASS matchResult2a[1] is "John"
+PASS matchResult2a[2] is "W."
+PASS matchResult2a[3] is "Smith"
+PASS matchResult2a[1] is matchResult2a.groups.first_name
+PASS matchResult2a[2] is matchResult2a.groups.middle_initial
+PASS matchResult2a[3] is matchResult2a.groups.last_name
+PASS Object.getOwnPropertyNames(matchResult1).sort() is ["0","1","2","3","groups","index","input","length"]
+PASS matchResult2b[0] is "Sally Brown"
+PASS matchResult2b[1] is "Sally"
+PASS matchResult2b[2] is undefined.
+PASS matchResult2b[3] is "Brown"
+PASS matchResult2b[1] is matchResult2b.groups.first_name
+PASS matchResult2b[2] is matchResult2b.groups.middle_initial
+PASS matchResult2b[3] is matchResult2b.groups.last_name
+PASS Object.getOwnPropertyNames(matchResult1).sort() is ["0","1","2","3","groups","index","input","length"]
+PASS re3.toString() is "\/^(?<part1>.*):(?<part2>.*):\\k<part2>:\\k<part1>$\/"
+PASS re3.test("a:b:b:a") is true
+PASS re3.test("a:a:a:a") is true
+PASS re3.test("a:b:c:a") is false
+PASS first is "1"
+PASS second is "2"
+PASS result4 is "14.02.2010"
+PASS "third edition".match(/(?<auf\u200clage>\w+) edition/).groups.auf\u200clage is "third"
+PASS "fourth edition".match(/(?<auf\u200dlage>\w+) edition/).groups.auf\u200dlage is "fourth"
+PASS "10/20/1930".replace(/(?<month>\d{2})\/(?<day>\d{2})\/(?<year>\d{4})/, "$<day>-$<month>-$<year>") is "20-10-1930"
+PASS "10/20/1930".replace(/(?<month>\d{2})\/(?<day>\d{2})\/(?<year>\d{4})/, "$2-$<month>-$<year>") is "20-10-1930"
+PASS "10/20/1930".replace(/(?<month>\d{2})\/(?<day>\d{2})\/(?<year>\d{4})/, "$<day>-$1-$<year>") is "20-10-1930"
+PASS "10/20/1930".replace(/(?<month>\d{2})\/(?<day>\d{2})\/(?<year>\d{4})/, "$<day>-$<month>-$3") is "20-10-1930"
+PASS "Give me a \'k\'!".match(/Give me a \'\k\'/)[0] is "Give me a \'k\'"
+PASS "Give me \'k2\'!".match(/Give me \'\k2\'/)[0] is "Give me \'k2\'"
+PASS "Give me a \'kat\'!".match(/Give me a \'\kat\'/)[0] is "Give me a \'kat\'"
+PASS "Give me a \'k\'!".match(/Give me a \'\k\'/u)[0] threw exception SyntaxError: Invalid regular _expression_: invalid escaped character for unicode pattern.
+PASS "Give me \'k2\'!".match(/Give me \'\k2\'/u)[0] threw exception SyntaxError: Invalid regular _expression_: invalid escaped character for unicode pattern.
+PASS "Give me a \'kat\'!".match(/Give me a \'\kat\'/u)[0] threw exception SyntaxError: Invalid regular _expression_: invalid escaped character for unicode pattern.
+PASS "10/20/1930".replace(/(?<month>\d{2})\/(?<day>\d{2})\/(?<year>\d{4})/, "$<day>-$<mouth>-$<year>") is "20--1930"
+PASS "10/20/1930".replace(/(?<month>\d{2})\/(?<day>\d{2})\/(?<year>\d{4})/, "$<day>-$<month>-$<year") is "20-10-$<year"
+PASS let r = new RegExp("/(?<groupName1>abc)|(?<groupName1>def)/") threw exception SyntaxError: Invalid regular _expression_: duplicate group specifier name.
+PASS let r = new RegExp("/(?< groupName1>abc)/") threw exception SyntaxError: Invalid regular _expression_: invalid group specifier name.
+PASS let r = new RegExp("/(?<g=oupName1>abc)/") threw exception SyntaxError: Invalid regular _expression_: invalid group specifier name.
+PASS let r = new RegExp("/(?<šgroupName1>abc)/u") threw exception SyntaxError: Invalid regular _expression_: invalid group specifier name.
+PASS let r = new RegExp("/(?<gšoupName1>abc)/u") threw exception SyntaxError: Invalid regular _expression_: invalid group specifier name.
+PASS let r = new RegExp("/(?<āgroupName1>abc)/u") threw exception SyntaxError: Invalid regular _expression_: invalid group specifier name.
+PASS let r = new RegExp("/(?<āgroupName1>abc)/u") threw exception SyntaxError: Invalid regular _expression_: invalid group specifier name.
+PASS successfullyParsed is true
+
+TEST COMPLETE
+
Added: trunk/LayoutTests/js/regexp-named-capture-groups.html (0 => 221769)
--- trunk/LayoutTests/js/regexp-named-capture-groups.html (rev 0)
+++ trunk/LayoutTests/js/regexp-named-capture-groups.html 2017-09-07 23:13:38 UTC (rev 221769)
@@ -0,0 +1,10 @@
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
+<html>
+<head>
+<script src=""
+</head>
+<body>
+<script src=""
+<script src=""
+</body>
+</html>
Added: trunk/LayoutTests/js/script-tests/regexp-named-capture-groups.js (0 => 221769)
--- trunk/LayoutTests/js/script-tests/regexp-named-capture-groups.js (rev 0)
+++ trunk/LayoutTests/js/script-tests/regexp-named-capture-groups.js 2017-09-07 23:13:38 UTC (rev 221769)
@@ -0,0 +1,103 @@
+description(
+'Test for of RegExp named capture groups'
+);
+
+// Verfiy that we can create group names and that we properly create the "groups" object,
+// populated with the groups.
+var re1 = new RegExp("(?<month>\\d{2})/(?<day>\\d{2})/(?<year>\\d{4})", "");
+var src1 = "01/02/2001";
+var execResult1 = re1.exec(src1);
+shouldBe('re1.toString()', '"\\/(?<month>\\\\d{2})\\\\/(?<day>\\\\d{2})\\\\/(?<year>\\\\d{4})\\/"');
+shouldBe('execResult1[0]', '"01/02/2001"');
+shouldBe('execResult1.groups.month', '"01"');
+shouldBe('execResult1.groups.day', '"02"');
+shouldBe('execResult1.groups.year', '"2001"');
+shouldBe('Object.getOwnPropertyNames(execResult1).sort()', '["0","1","2","3","groups","index","input","length"]');
+shouldBe('Object.getOwnPropertyNames(execResult1.groups).sort()', '["day","month","year"]');
+
+var matchResult1 = src1.match(re1);
+shouldBe('matchResult1[0]', '"01/02/2001"');
+shouldBe('matchResult1.groups.month', '"01"');
+shouldBe('matchResult1.groups.day', '"02"');
+shouldBe('matchResult1.groups.year', '"2001"');
+shouldBe('Object.getOwnPropertyNames(matchResult1).sort()', '["0","1","2","3","groups","index","input","length"]');
+shouldBe('Object.getOwnPropertyNames(matchResult1.groups).sort()', '["day","month","year"]');
+
+var re2 = /(?<first_name>\w+)\s(?:(?<middle_initial>\w\.)\s)?(?<last_name>\w+)/;
+var matchResult2a = "John W. Smith".match(re2);
+
+shouldBe('matchResult2a[0]', '"John W. Smith"');
+shouldBe('matchResult2a[1]', '"John"');
+shouldBe('matchResult2a[2]', '"W."');
+shouldBe('matchResult2a[3]', '"Smith"');
+shouldBe('matchResult2a[1]', 'matchResult2a.groups.first_name');
+shouldBe('matchResult2a[2]', 'matchResult2a.groups.middle_initial');
+shouldBe('matchResult2a[3]', 'matchResult2a.groups.last_name');
+shouldBe('Object.getOwnPropertyNames(matchResult1).sort()', '["0","1","2","3","groups","index","input","length"]');
+
+// Verify that named groups that aren't matched are undefined.
+var matchResult2b = "Sally Brown".match(re2);
+
+shouldBe('matchResult2b[0]', '"Sally Brown"');
+shouldBe('matchResult2b[1]', '"Sally"');
+shouldBeUndefined('matchResult2b[2]');
+shouldBe('matchResult2b[3]', '"Brown"');
+shouldBe('matchResult2b[1]', 'matchResult2b.groups.first_name');
+shouldBe('matchResult2b[2]', 'matchResult2b.groups.middle_initial');
+shouldBe('matchResult2b[3]', 'matchResult2b.groups.last_name');
+shouldBe('Object.getOwnPropertyNames(matchResult1).sort()', '["0","1","2","3","groups","index","input","length"]');
+
+// Verify that named backreferences work.
+var re3 = /^(?<part1>.*):(?<part2>.*):\k<part2>:\k<part1>$/;
+shouldBe('re3.toString()', '"\\/^(?<part1>.*):(?<part2>.*):\\\\k<part2>:\\\\k<part1>$\\/"');
+shouldBeTrue('re3.test("a:b:b:a")');
+shouldBeTrue('re3.test("a:a:a:a")');
+shouldBeFalse('re3.test("a:b:c:a")');
+
+// Destructuring should work nicely with named groups.
+var {groups: {first, second}} = /^(?<first>.*),(?<second>.*)$/u.exec('1,2');
+shouldBe('first', '"1"');
+shouldBe('second', '"2"');
+
+// Check that unicode group names work.
+let re4 = /(?<\u043c\u0435\u0441\u044f\u0446>\d{2})\/(?<\u0434\u0435\u043d\u044c>\d{2})\/(?<\u0433\u043e\u0434>\d{4})/;
+var result4 = '02/14/2010'.replace(re4, (...args) => {
+ let {\u0434\u0435\u043d\u044c, \u043c\u0435\u0441\u044f\u0446, \u0433\u043e\u0434} = args[args.length - 1];
+ return `${\u0434\u0435\u043d\u044c}.${\u043c\u0435\u0441\u044f\u0446}.${\u0433\u043e\u0434}`;
+});
+shouldBe('result4', '"14.02.2010"');
+
+// Verify that zero-width joiner and non-joiners can be used as part of a group name identifier
+shouldBe('"third edition".match(/(?<auf\\u200clage>\\w+) edition/).groups.auf\\u200clage', '"third"');
+shouldBe('"fourth edition".match(/(?<auf\\u200dlage>\\w+) edition/).groups.auf\\u200dlage', '"fourth"');
+
+// Verify that both named and numeric group references work in a replacement string.
+shouldBe('"10/20/1930".replace(/(?<month>\\d{2})\\\/(?<day>\\d{2})\\\/(?<year>\\d{4})/, "$<day>-$<month>-$<year>")', '"20-10-1930"');
+shouldBe('"10/20/1930".replace(/(?<month>\\d{2})\\\/(?<day>\\d{2})\\\/(?<year>\\d{4})/, "$2-$<month>-$<year>")', '"20-10-1930"');
+shouldBe('"10/20/1930".replace(/(?<month>\\d{2})\\\/(?<day>\\d{2})\\\/(?<year>\\d{4})/, "$<day>-$1-$<year>")', '"20-10-1930"');
+shouldBe('"10/20/1930".replace(/(?<month>\\d{2})\\\/(?<day>\\d{2})\\\/(?<year>\\d{4})/, "$<day>-$<month>-$3")', '"20-10-1930"');
+
+// Verify that named back references for non-existing named group matches the k<groupName> for non-unicode patterns.
+shouldBe('"Give me a \\\'k\\\'!".match(/Give me a \\\'\\\k\\\'/)[0]', '"Give me a \\\'k\\\'"');
+shouldBe('"Give me \\\'k2\\\'!".match(/Give me \\\'\\\k2\\\'/)[0]', '"Give me \\\'k2\\\'"');
+shouldBe('"Give me a \\\'kat\\\'!".match(/Give me a \\\'\\\kat\\\'/)[0]', '"Give me a \\\'kat\\\'"');
+// Verify that named back references for non-existing named group matches the k<groupName> throw for unicode patterns.
+shouldThrow('"Give me a \\\'k\\\'!".match(/Give me a \\\'\\\k\\\'/u)[0]', '"SyntaxError: Invalid regular _expression_: invalid escaped character for unicode pattern"');
+shouldThrow('"Give me \\\'k2\\\'!".match(/Give me \\\'\\\k2\\\'/u)[0]', '"SyntaxError: Invalid regular _expression_: invalid escaped character for unicode pattern"');
+shouldThrow('"Give me a \\\'kat\\\'!".match(/Give me a \\\'\\\kat\\\'/u)[0]', '"SyntaxError: Invalid regular _expression_: invalid escaped character for unicode pattern"');
+
+// Check invalid group name specifiers in a replace string.
+shouldBe('"10/20/1930".replace(/(?<month>\\d{2})\\\/(?<day>\\d{2})\\\/(?<year>\\d{4})/, "$<day>-$<mouth>-$<year>")', '"20--1930"');
+shouldBe('"10/20/1930".replace(/(?<month>\\d{2})\\\/(?<day>\\d{2})\\\/(?<year>\\d{4})/, "$<day>-$<month>-$<year")', '"20-10-$<year"');
+
+// Check invalid group name exceptions.
+shouldThrow('let r = new RegExp("/(?<groupName1>abc)|(?<groupName1>def)/")', '"SyntaxError: Invalid regular _expression_: duplicate group specifier name"');
+shouldThrow('let r = new RegExp("/(?< groupName1>abc)/")', '"SyntaxError: Invalid regular _expression_: invalid group specifier name"');
+shouldThrow('let r = new RegExp("/(?<g=oupName1>abc)/")', '"SyntaxError: Invalid regular _expression_: invalid group specifier name"');
+
+// And bad Unicode ID start and ID part
+shouldThrow('let r = new RegExp("/(?<\u{10190}groupName1>abc)/u")', '"SyntaxError: Invalid regular _expression_: invalid group specifier name"');
+shouldThrow('let r = new RegExp("/(?<g\u{1019b}oupName1>abc)/u")', '"SyntaxError: Invalid regular _expression_: invalid group specifier name"');
+shouldThrow('let r = new RegExp("/(?<\u200cgroupName1>abc)/u")', '"SyntaxError: Invalid regular _expression_: invalid group specifier name"');
+shouldThrow('let r = new RegExp("/(?<\u200dgroupName1>abc)/u")', '"SyntaxError: Invalid regular _expression_: invalid group specifier name"');
+
Modified: trunk/Source/_javascript_Core/ChangeLog (221768 => 221769)
--- trunk/Source/_javascript_Core/ChangeLog 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/Source/_javascript_Core/ChangeLog 2017-09-07 23:13:38 UTC (rev 221769)
@@ -1,3 +1,63 @@
+2017-09-07 Michael Saboff <msab...@apple.com>
+
+ Add support for RegExp named capture groups
+ https://bugs.webkit.org/show_bug.cgi?id=176435
+
+ Reviewed by Filip Pizlo.
+
+ Added parsing for both naming a captured parenthesis as well and using a named group in
+ a back reference. Also added support for using named groups with String.prototype.replace().
+
+ This patch does not throw Syntax Errors as described in the current spec text for the two
+ cases of malformed back references in String.prototype.replace() as I believe that it
+ is inconsistent with the current semantics for handling of other malformed replacement
+ tokens. I filed an issue for the requested change to the proposed spec and also filed
+ a FIXME bug https://bugs.webkit.org/show_bug.cgi?id=176434.
+
+ This patch does not implement strength reduction in the optimizing JITs for named capture
+ groups. Filed https://bugs.webkit.org/show_bug.cgi?id=176464.
+
+ * dfg/DFGAbstractInterpreterInlines.h:
+ (JSC::DFG::AbstractInterpreter<AbstractStateType>::executeEffects):
+ * dfg/DFGStrengthReductionPhase.cpp:
+ (JSC::DFG::StrengthReductionPhase::handleNode):
+ * runtime/CommonIdentifiers.h:
+ * runtime/JSGlobalObject.cpp:
+ (JSC::JSGlobalObject::init):
+ (JSC::JSGlobalObject::haveABadTime):
+ * runtime/JSGlobalObject.h:
+ (JSC::JSGlobalObject::regExpMatchesArrayWithGroupsStructure const):
+ * runtime/RegExp.cpp:
+ (JSC::RegExp::finishCreation):
+ * runtime/RegExp.h:
+ * runtime/RegExpMatchesArray.cpp:
+ (JSC::createStructureImpl):
+ (JSC::createRegExpMatchesArrayWithGroupsStructure):
+ (JSC::createRegExpMatchesArrayWithGroupsSlowPutStructure):
+ * runtime/RegExpMatchesArray.h:
+ (JSC::createRegExpMatchesArray):
+ * runtime/StringPrototype.cpp:
+ (JSC::substituteBackreferencesSlow):
+ (JSC::replaceUsingRegExpSearch):
+ * yarr/YarrParser.h:
+ (JSC::Yarr::Parser::CharacterClassParserDelegate::atomNamedBackReference):
+ (JSC::Yarr::Parser::parseEscape):
+ (JSC::Yarr::Parser::parseParenthesesBegin):
+ (JSC::Yarr::Parser::tryConsumeUnicodeEscape):
+ (JSC::Yarr::Parser::tryConsumeIdentifierCharacter):
+ (JSC::Yarr::Parser::isIdentifierStart):
+ (JSC::Yarr::Parser::isIdentifierPart):
+ (JSC::Yarr::Parser::tryConsumeGroupName):
+ * yarr/YarrPattern.cpp:
+ (JSC::Yarr::YarrPatternConstructor::atomParenthesesSubpatternBegin):
+ (JSC::Yarr::YarrPatternConstructor::atomNamedBackReference):
+ (JSC::Yarr::YarrPattern::errorMessage):
+ * yarr/YarrPattern.h:
+ (JSC::Yarr::YarrPattern::reset):
+ * yarr/YarrSyntaxChecker.cpp:
+ (JSC::Yarr::SyntaxChecker::atomParenthesesSubpatternBegin):
+ (JSC::Yarr::SyntaxChecker::atomNamedBackReference):
+
2017-09-07 Myles C. Maxfield <mmaxfi...@apple.com>
[PAL] Unify PlatformUserPreferredLanguages.h with Language.h
Modified: trunk/Source/_javascript_Core/dfg/DFGAbstractInterpreterInlines.h (221768 => 221769)
--- trunk/Source/_javascript_Core/dfg/DFGAbstractInterpreterInlines.h 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/Source/_javascript_Core/dfg/DFGAbstractInterpreterInlines.h 2017-09-07 23:13:38 UTC (rev 221769)
@@ -1833,9 +1833,10 @@
if (JSGlobalObject* globalObject = jsDynamicCast<JSGlobalObject*>(m_vm, globalObjectValue)) {
if (!globalObject->isHavingABadTime()) {
m_graph.watchpoints().addLazily(globalObject->havingABadTimeWatchpoint());
- Structure* structure = globalObject->regExpMatchesArrayStructure();
- m_graph.registerStructure(structure);
- forNode(node).set(m_graph, structure);
+ RegisteredStructureSet structureSet;
+ structureSet.add(m_graph.registerStructure(globalObject->regExpMatchesArrayStructure()));
+ structureSet.add(m_graph.registerStructure(globalObject->regExpMatchesArrayWithGroupsStructure()));
+ forNode(node).set(m_graph, structureSet);
forNode(node).merge(SpecOther);
break;
}
Modified: trunk/Source/_javascript_Core/dfg/DFGStrengthReductionPhase.cpp (221768 => 221769)
--- trunk/Source/_javascript_Core/dfg/DFGStrengthReductionPhase.cpp 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/Source/_javascript_Core/dfg/DFGStrengthReductionPhase.cpp 2017-09-07 23:13:38 UTC (rev 221769)
@@ -510,7 +510,15 @@
dataLog("Giving up because of pattern limit.\n");
break;
}
-
+
+ if (m_node->op() == RegExpExec && regExp->hasNamedCaptures()) {
+ // FIXME: https://bugs.webkit.org/show_bug.cgi?id=176464
+ // Implement strength reduction optimization for named capture groups.
+ if (verbose)
+ dataLog("Giving up because of named capture groups.\n");
+ break;
+ }
+
unsigned lastIndex;
if (regExp->globalOrSticky()) {
// This will only work if we can prove what the value of lastIndex is. To do this
@@ -545,7 +553,12 @@
m_graph.watchpoints().addLazily(globalObject->havingABadTimeWatchpoint());
- Structure* structure = globalObject->regExpMatchesArrayStructure();
+ Structure* structure;
+ if (m_node->op() == RegExpExec && regExp->hasNamedCaptures())
+ structure = globalObject->regExpMatchesArrayWithGroupsStructure();
+ else
+ structure = globalObject->regExpMatchesArrayStructure();
+
if (structure->indexingType() != ArrayWithContiguous) {
// This is further protection against a race with haveABadTime.
if (verbose)
Modified: trunk/Source/_javascript_Core/runtime/CommonIdentifiers.h (221768 => 221769)
--- trunk/Source/_javascript_Core/runtime/CommonIdentifiers.h 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/Source/_javascript_Core/runtime/CommonIdentifiers.h 2017-09-07 23:13:38 UTC (rev 221769)
@@ -129,6 +129,7 @@
macro(get) \
macro(global) \
macro(go) \
+ macro(groups) \
macro(has) \
macro(hasOwnProperty) \
macro(hash) \
Modified: trunk/Source/_javascript_Core/runtime/JSGlobalObject.cpp (221768 => 221769)
--- trunk/Source/_javascript_Core/runtime/JSGlobalObject.cpp 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/Source/_javascript_Core/runtime/JSGlobalObject.cpp 2017-09-07 23:13:38 UTC (rev 221769)
@@ -561,6 +561,7 @@
m_regExpPrototype.set(vm, this, RegExpPrototype::create(vm, this, RegExpPrototype::createStructure(vm, this, m_objectPrototype.get())));
m_regExpStructure.set(vm, this, RegExpObject::createStructure(vm, this, m_regExpPrototype.get()));
m_regExpMatchesArrayStructure.set(vm, this, createRegExpMatchesArrayStructure(vm, this));
+ m_regExpMatchesArrayWithGroupsStructure.set(vm, this, createRegExpMatchesArrayWithGroupsStructure(vm, this));
m_moduleRecordStructure.set(vm, this, JSModuleRecord::createStructure(vm, this, m_objectPrototype.get()));
m_moduleNamespaceObjectStructure.set(vm, this, JSModuleNamespaceObject::createStructure(vm, this, jsNull()));
@@ -1200,6 +1201,8 @@
Structure* slowPutStructure;
slowPutStructure = createRegExpMatchesArraySlowPutStructure(vm, this);
m_regExpMatchesArrayStructure.set(vm, this, slowPutStructure);
+ slowPutStructure = createRegExpMatchesArrayWithGroupsSlowPutStructure(vm, this);
+ m_regExpMatchesArrayWithGroupsStructure.set(vm, this, slowPutStructure);
slowPutStructure = ClonedArguments::createSlowPutStructure(vm, this, m_objectPrototype.get());
m_clonedArgumentsStructure.set(vm, this, slowPutStructure);
@@ -1328,6 +1331,7 @@
visitor.append(thisObject->m_asyncGeneratorFunctionStructure);
visitor.append(thisObject->m_iteratorResultObjectStructure);
visitor.append(thisObject->m_regExpMatchesArrayStructure);
+ visitor.append(thisObject->m_regExpMatchesArrayWithGroupsStructure);
visitor.append(thisObject->m_moduleRecordStructure);
visitor.append(thisObject->m_moduleNamespaceObjectStructure);
visitor.append(thisObject->m_dollarVMStructure);
Modified: trunk/Source/_javascript_Core/runtime/JSGlobalObject.h (221768 => 221769)
--- trunk/Source/_javascript_Core/runtime/JSGlobalObject.h 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/Source/_javascript_Core/runtime/JSGlobalObject.h 2017-09-07 23:13:38 UTC (rev 221769)
@@ -336,6 +336,7 @@
WriteBarrier<Structure> m_dollarVMStructure;
WriteBarrier<Structure> m_iteratorResultObjectStructure;
WriteBarrier<Structure> m_regExpMatchesArrayStructure;
+ WriteBarrier<Structure> m_regExpMatchesArrayWithGroupsStructure;
WriteBarrier<Structure> m_moduleRecordStructure;
WriteBarrier<Structure> m_moduleNamespaceObjectStructure;
WriteBarrier<Structure> m_proxyObjectStructure;
@@ -645,6 +646,7 @@
Structure* symbolObjectStructure() const { return m_symbolObjectStructure.get(); }
Structure* iteratorResultObjectStructure() const { return m_iteratorResultObjectStructure.get(); }
Structure* regExpMatchesArrayStructure() const { return m_regExpMatchesArrayStructure.get(); }
+ Structure* regExpMatchesArrayWithGroupsStructure() const { return m_regExpMatchesArrayWithGroupsStructure.get(); }
Structure* moduleRecordStructure() const { return m_moduleRecordStructure.get(); }
Structure* moduleNamespaceObjectStructure() const { return m_moduleNamespaceObjectStructure.get(); }
Structure* proxyObjectStructure() const { return m_proxyObjectStructure.get(); }
Modified: trunk/Source/_javascript_Core/runtime/RegExp.cpp (221768 => 221769)
--- trunk/Source/_javascript_Core/runtime/RegExp.cpp 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/Source/_javascript_Core/runtime/RegExp.cpp 2017-09-07 23:13:38 UTC (rev 221769)
@@ -233,8 +233,11 @@
Yarr::YarrPattern pattern(m_patternString, m_flags, &m_constructionError, vm.stackLimit());
if (!isValid())
m_state = ParseError;
- else
+ else {
m_numSubpatterns = pattern.m_numSubpatterns;
+ m_captureGroupNames.swap(pattern.m_captureGroupNames);
+ m_namedGroupToParenIndex.swap(pattern.m_namedGroupToParenIndex);
+ }
}
void RegExp::destroy(JSCell* cell)
Modified: trunk/Source/_javascript_Core/runtime/RegExp.h (221768 => 221769)
--- trunk/Source/_javascript_Core/runtime/RegExp.h 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/Source/_javascript_Core/runtime/RegExp.h 2017-09-07 23:13:38 UTC (rev 221769)
@@ -79,6 +79,26 @@
unsigned numSubpatterns() const { return m_numSubpatterns; }
+ bool hasNamedCaptures()
+ {
+ return !m_captureGroupNames.isEmpty();
+ }
+
+ String getCaptureGroupName(unsigned i)
+ {
+ if (!i || m_captureGroupNames.size() <= i)
+ return String();
+ return m_captureGroupNames[i];
+ }
+
+ unsigned subpatternForName(String groupName)
+ {
+ auto it = m_namedGroupToParenIndex.find(groupName);
+ if (it == m_namedGroupToParenIndex.end())
+ return 0;
+ return it->value;
+ }
+
bool hasCode()
{
return m_state != NotCompiled;
@@ -134,6 +154,8 @@
RegExpFlags m_flags;
const char* m_constructionError;
unsigned m_numSubpatterns;
+ Vector<String> m_captureGroupNames;
+ HashMap<String, unsigned> m_namedGroupToParenIndex;
#if ENABLE(REGEXP_TRACING)
double m_rtMatchOnlyTotalSubjectStringLen;
double m_rtMatchTotalSubjectStringLen;
Modified: trunk/Source/_javascript_Core/runtime/RegExpMatchesArray.cpp (221768 => 221769)
--- trunk/Source/_javascript_Core/runtime/RegExpMatchesArray.cpp 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/Source/_javascript_Core/runtime/RegExpMatchesArray.cpp 2017-09-07 23:13:38 UTC (rev 221769)
@@ -71,7 +71,9 @@
return array;
}
-static Structure* createStructureImpl(VM& vm, JSGlobalObject* globalObject, IndexingType indexingType)
+enum class ShouldCreateGroups { No, Yes };
+
+static Structure* createStructureImpl(VM& vm, JSGlobalObject* globalObject, IndexingType indexingType, ShouldCreateGroups shouldCreateGroups = ShouldCreateGroups::No)
{
Structure* structure = globalObject->arrayStructureForIndexingTypeDuringAllocation(indexingType);
PropertyOffset offset;
@@ -79,6 +81,14 @@
ASSERT(offset == RegExpMatchesArrayIndexPropertyOffset);
structure = Structure::addPropertyTransition(vm, structure, vm.propertyNames->input, 0, offset);
ASSERT(offset == RegExpMatchesArrayInputPropertyOffset);
+ switch (shouldCreateGroups) {
+ case ShouldCreateGroups::Yes:
+ structure = Structure::addPropertyTransition(vm, structure, vm.propertyNames->groups, 0, offset);
+ ASSERT(offset == RegExpMatchesArrayGroupsPropertyOffset);
+ break;
+ case ShouldCreateGroups::No:
+ break;
+ }
return structure;
}
@@ -92,4 +102,14 @@
return createStructureImpl(vm, globalObject, ArrayWithSlowPutArrayStorage);
}
+Structure* createRegExpMatchesArrayWithGroupsStructure(VM& vm, JSGlobalObject* globalObject)
+{
+ return createStructureImpl(vm, globalObject, ArrayWithContiguous, ShouldCreateGroups::Yes);
+}
+
+Structure* createRegExpMatchesArrayWithGroupsSlowPutStructure(VM& vm, JSGlobalObject* globalObject)
+{
+ return createStructureImpl(vm, globalObject, ArrayWithSlowPutArrayStorage, ShouldCreateGroups::Yes);
+}
+
} // namespace JSC
Modified: trunk/Source/_javascript_Core/runtime/RegExpMatchesArray.h (221768 => 221769)
--- trunk/Source/_javascript_Core/runtime/RegExpMatchesArray.h 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/Source/_javascript_Core/runtime/RegExpMatchesArray.h 2017-09-07 23:13:38 UTC (rev 221769)
@@ -31,6 +31,7 @@
static const PropertyOffset RegExpMatchesArrayIndexPropertyOffset = 100;
static const PropertyOffset RegExpMatchesArrayInputPropertyOffset = 101;
+static const PropertyOffset RegExpMatchesArrayGroupsPropertyOffset = 102;
ALWAYS_INLINE JSArray* tryCreateUninitializedRegExpMatchesArray(ObjectInitializationScope& scope, GCDeferralContext* deferralContext, Structure* structure, unsigned initialLength)
{
@@ -76,18 +77,26 @@
// FIXME: This should handle array allocation errors gracefully.
// https://bugs.webkit.org/show_bug.cgi?id=155144
+ unsigned numSubpatterns = regExp->numSubpatterns();
+ bool hasNamedCaptures = regExp->hasNamedCaptures();
+ JSObject* groups = nullptr;
+
auto setProperties = [&] () {
array->putDirect(vm, RegExpMatchesArrayIndexPropertyOffset, jsNumber(result.start));
array->putDirect(vm, RegExpMatchesArrayInputPropertyOffset, input);
+ if (hasNamedCaptures) {
+ groups = JSFinalObject::create(vm, JSFinalObject::createStructure(vm, globalObject, globalObject->objectPrototype(), 0));
+ array->putDirect(vm, RegExpMatchesArrayGroupsPropertyOffset, groups);
+ }
};
-
- unsigned numSubpatterns = regExp->numSubpatterns();
-
+
GCDeferralContext deferralContext(vm.heap);
-
+
+ Structure* matchStructure = hasNamedCaptures ? globalObject->regExpMatchesArrayWithGroupsStructure() : globalObject->regExpMatchesArrayStructure();
+
if (UNLIKELY(globalObject->isHavingABadTime())) {
ObjectInitializationScope scope(vm);
- array = JSArray::tryCreateUninitializedRestricted(scope, &deferralContext, globalObject->regExpMatchesArrayStructure(), numSubpatterns + 1);
+ array = JSArray::tryCreateUninitializedRestricted(scope, &deferralContext, matchStructure, numSubpatterns + 1);
// FIXME: we should probably throw an out of memory error here, but
// when making this change we should check that all clients of this
// function will correctly handle an exception being thrown from here.
@@ -106,10 +115,15 @@
else
value = jsUndefined();
array->initializeIndexWithoutBarrier(scope, i, value);
+ if (groups) {
+ String groupName = regExp->getCaptureGroupName(i);
+ if (!groupName.isEmpty())
+ groups->putDirect(vm, Identifier::fromString(&vm, groupName), value);
+ }
}
} else {
ObjectInitializationScope scope(vm);
- array = tryCreateUninitializedRegExpMatchesArray(scope, &deferralContext, globalObject->regExpMatchesArrayStructure(), numSubpatterns + 1);
+ array = tryCreateUninitializedRegExpMatchesArray(scope, &deferralContext, matchStructure, numSubpatterns + 1);
RELEASE_ASSERT(array);
setProperties();
@@ -126,6 +140,11 @@
else
value = jsUndefined();
array->initializeIndexWithoutBarrier(scope, i, value, ArrayWithContiguous);
+ if (groups) {
+ String groupName = regExp->getCaptureGroupName(i);
+ if (!groupName.isEmpty())
+ groups->putDirect(vm, Identifier::fromString(&vm, groupName), value);
+ }
}
}
return array;
@@ -139,5 +158,7 @@
JSArray* createEmptyRegExpMatchesArray(JSGlobalObject*, JSString*, RegExp*);
Structure* createRegExpMatchesArrayStructure(VM&, JSGlobalObject*);
Structure* createRegExpMatchesArraySlowPutStructure(VM&, JSGlobalObject*);
+Structure* createRegExpMatchesArrayWithGroupsStructure(VM&, JSGlobalObject*);
+Structure* createRegExpMatchesArrayWithGroupsSlowPutStructure(VM&, JSGlobalObject*);
} // namespace JSC
Modified: trunk/Source/_javascript_Core/runtime/StringPrototype.cpp (221768 => 221769)
--- trunk/Source/_javascript_Core/runtime/StringPrototype.cpp 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/Source/_javascript_Core/runtime/StringPrototype.cpp 2017-09-07 23:13:38 UTC (rev 221769)
@@ -182,6 +182,7 @@
static NEVER_INLINE String substituteBackreferencesSlow(StringView replacement, StringView source, const int* ovector, RegExp* reg, size_t i)
{
StringBuilder substitutedReplacement;
+ bool hasNamedCaptures = reg && reg->hasNamedCaptures();
int offset = 0;
do {
if (i + 1 == replacement.length())
@@ -208,6 +209,39 @@
} else if (ref == '\'') {
backrefStart = ovector[1];
backrefLength = source.length() - backrefStart;
+ } else if (reg && ref == '<') {
+ // Named back reference
+ if (!hasNamedCaptures) {
+ substitutedReplacement.append(replacement.substring(i, 2));
+ offset = i + 1;
+ continue;
+ }
+
+ size_t closingBracket = replacement.find('>', i + 2);
+ if (closingBracket == WTF::notFound) {
+ // FIXME: https://bugs.webkit.org/show_bug.cgi?id=176434
+ // Current proposed spec change throws a syntax error in this case.
+ // We have made the case that it makes more sense to treat this a literal
+ // If throwSyntaxError(exec, scope, "Missing closing '>' in replacement text");
+ continue;
+ }
+
+ unsigned nameLength = closingBracket - i - 2;
+ unsigned backrefIndex = reg->subpatternForName(replacement.substring(i + 2, nameLength).toString());
+
+ if (!backrefIndex || backrefIndex > reg->numSubpatterns()) {
+ // FIXME: https://bugs.webkit.org/show_bug.cgi?id=176434
+ // Proposed spec change throws a throw syntax error in this case.
+ // We have made the case that a non-existent back reference should be replaced with
+ // and empty string.
+ // throwSyntaxError(exec, scope, makeString("Replacement text references non-existent backreference \"" + replacement.substring(i + 2, nameLength).toString()));
+ backrefStart = 0;
+ backrefLength = 0;
+ } else {
+ backrefStart = ovector[2 * backrefIndex];
+ backrefLength = ovector[2 * backrefIndex + 1] - backrefStart;
+ }
+ advance = nameLength + 1;
} else if (reg && isASCIIDigit(ref)) {
// 1- and 2-digit back references are allowed
unsigned backrefIndex = ref - '0';
@@ -487,6 +521,7 @@
RegExpObject* regExpObject = asRegExpObject(searchValue);
RegExp* regExp = regExpObject->regExp();
bool global = regExp->global();
+ bool hasNamedCaptures = regExp->hasNamedCaptures();
if (global) {
// ES5.1 15.5.4.10 step 8.a.
@@ -528,18 +563,38 @@
unsigned i = 0;
cachedCall.clearArguments();
+
+ JSObject* groups = nullptr;
+
+ if (hasNamedCaptures) {
+ JSGlobalObject* globalObject = exec->lexicalGlobalObject();
+ groups = JSFinalObject::create(vm, JSFinalObject::createStructure(vm, globalObject, globalObject->objectPrototype(), 0));
+ }
+
for (; i < regExp->numSubpatterns() + 1; ++i) {
int matchStart = ovector[i * 2];
int matchLen = ovector[i * 2 + 1] - matchStart;
+ JSValue patternValue;
+
if (matchStart < 0)
- cachedCall.appendArgument(jsUndefined());
+ patternValue = jsUndefined();
else
- cachedCall.appendArgument(jsSubstring(&vm, source, matchStart, matchLen));
+ patternValue = jsSubstring(&vm, source, matchStart, matchLen);
+
+ cachedCall.appendArgument(patternValue);
+
+ if (i && hasNamedCaptures) {
+ String groupName = regExp->getCaptureGroupName(i);
+ if (!groupName.isEmpty())
+ groups->putDirect(vm, Identifier::fromString(&vm, groupName), patternValue);
+ }
}
cachedCall.appendArgument(jsNumber(result.start));
cachedCall.appendArgument(string);
+ if (hasNamedCaptures)
+ cachedCall.appendArgument(groups);
cachedCall.setThis(jsUndefined());
JSValue jsResult = cachedCall.call();
@@ -569,18 +624,38 @@
unsigned i = 0;
cachedCall.clearArguments();
+
+ JSObject* groups = nullptr;
+
+ if (hasNamedCaptures) {
+ JSGlobalObject* globalObject = exec->lexicalGlobalObject();
+ groups = JSFinalObject::create(vm, JSFinalObject::createStructure(vm, globalObject, globalObject->objectPrototype(), 0));
+ }
+
for (; i < regExp->numSubpatterns() + 1; ++i) {
int matchStart = ovector[i * 2];
int matchLen = ovector[i * 2 + 1] - matchStart;
+ JSValue patternValue;
+
if (matchStart < 0)
- cachedCall.appendArgument(jsUndefined());
+ patternValue = jsUndefined();
else
- cachedCall.appendArgument(jsSubstring(&vm, source, matchStart, matchLen));
+ patternValue = jsSubstring(&vm, source, matchStart, matchLen);
+
+ cachedCall.appendArgument(patternValue);
+
+ if (i && hasNamedCaptures) {
+ String groupName = regExp->getCaptureGroupName(i);
+ if (!groupName.isEmpty())
+ groups->putDirect(vm, Identifier::fromString(&vm, groupName), patternValue);
+ }
}
cachedCall.appendArgument(jsNumber(result.start));
cachedCall.appendArgument(string);
+ if (hasNamedCaptures)
+ cachedCall.appendArgument(groups);
cachedCall.setThis(jsUndefined());
JSValue jsResult = cachedCall.call();
@@ -611,19 +686,38 @@
OUT_OF_MEMORY(exec, scope);
MarkedArgumentBuffer args;
+ JSObject* groups = nullptr;
+ if (hasNamedCaptures) {
+ JSGlobalObject* globalObject = exec->lexicalGlobalObject();
+ groups = JSFinalObject::create(vm, JSFinalObject::createStructure(vm, globalObject, globalObject->objectPrototype(), 0));
+ }
+
for (unsigned i = 0; i < regExp->numSubpatterns() + 1; ++i) {
int matchStart = ovector[i * 2];
int matchLen = ovector[i * 2 + 1] - matchStart;
+ JSValue patternValue;
+
if (matchStart < 0)
- args.append(jsUndefined());
+ patternValue = jsUndefined();
else
- args.append(jsSubstring(exec, source, matchStart, matchLen));
+ patternValue = jsSubstring(exec, source, matchStart, matchLen);
+
+ args.append(patternValue);
+
+ if (i && hasNamedCaptures) {
+ String groupName = regExp->getCaptureGroupName(i);
+ if (!groupName.isEmpty())
+ groups->putDirect(vm, Identifier::fromString(&vm, groupName), patternValue);
+ }
+
}
args.append(jsNumber(result.start));
args.append(string);
+ if (hasNamedCaptures)
+ args.append(groups);
JSValue replacement = call(exec, replaceValue, callType, callData, jsUndefined(), args);
RETURN_IF_EXCEPTION(scope, encodedJSValue());
Modified: trunk/Source/_javascript_Core/yarr/YarrParser.h (221768 => 221769)
--- trunk/Source/_javascript_Core/yarr/YarrParser.h 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/Source/_javascript_Core/yarr/YarrParser.h 2017-09-07 23:13:38 UTC (rev 221769)
@@ -28,6 +28,9 @@
#include "Yarr.h"
#include "YarrPattern.h"
#include <wtf/ASCIICType.h>
+#include <wtf/HashSet.h>
+#include <wtf/Optional.h>
+#include <wtf/text/StringBuilder.h>
#include <wtf/text/WTFString.h>
namespace JSC { namespace Yarr {
@@ -197,6 +200,7 @@
// invoked with inCharacterClass set.
NO_RETURN_DUE_TO_ASSERT void assertionWordBoundary(bool) { RELEASE_ASSERT_NOT_REACHED(); }
NO_RETURN_DUE_TO_ASSERT void atomBackReference(unsigned) { RELEASE_ASSERT_NOT_REACHED(); }
+ NO_RETURN_DUE_TO_ASSERT void atomNamedBackReference(String) { RELEASE_ASSERT_NOT_REACHED(); }
private:
Delegate& m_delegate;
@@ -419,6 +423,28 @@
break;
}
+ // Named backreference
+ case 'k': {
+ consume();
+ ParseState state = saveState();
+ if (!atEndOfPattern() && !inCharacterClass) {
+ if (consume() == '<') {
+ auto groupName = tryConsumeGroupName();
+ if (groupName && m_captureGroupNames.contains(groupName.value())) {
+ delegate.atomNamedBackReference(groupName.value());
+ break;
+ }
+ if (m_isUnicode) {
+ m_err = YarrPattern::InvalidBackreference;
+ break;
+ }
+ }
+ }
+ restoreState(state);
+ delegate.atomPatternCharacter('k');
+ break;
+ }
+
// UnicodeEscape
case 'u': {
consume();
@@ -599,7 +625,21 @@
case '!':
m_delegate.atomParentheticalAssertionBegin(true);
break;
-
+
+ case '<': {
+ auto groupName = tryConsumeGroupName();
+ if (groupName) {
+ auto setAddResult = m_captureGroupNames.add(groupName.value());
+ if (setAddResult.isNewEntry)
+ m_delegate.atomParenthesesSubpatternBegin(true, groupName);
+ else
+ m_err = YarrPattern::DuplicateGroupName;
+ } else
+ m_err = YarrPattern::InvalidGroupName;
+
+ break;
+ }
+
default:
m_err = YarrPattern::ParenthesesTypeInvalid;
}
@@ -824,6 +864,82 @@
return peek() - '0';
}
+ int tryConsumeUnicodeEscape()
+ {
+ if (!tryConsume('u'))
+ return -1;
+
+ if (m_isUnicode && tryConsume('{')) {
+ int codePoint = 0;
+ do {
+ if (atEndOfPattern() || !isASCIIHexDigit(peek())) {
+ m_err = YarrPattern::InvalidUnicodeEscape;
+ return -1;
+ }
+
+ codePoint = (codePoint << 4) | toASCIIHexValue(consume());
+
+ if (codePoint > UCHAR_MAX_VALUE) {
+ m_err = YarrPattern::InvalidUnicodeEscape;
+ return -1;
+ }
+ } while (!atEndOfPattern() && peek() != '}');
+ if (!atEndOfPattern() && peek() == '}')
+ consume();
+ else if (!m_err)
+ m_err = YarrPattern::InvalidUnicodeEscape;
+ if (m_err)
+ return -1;
+
+ return codePoint;
+ }
+
+ int u = tryConsumeHex(4);
+ if (u == -1)
+ return -1;
+
+ // If we have the first of a surrogate pair, look for the second.
+ if (U16_IS_LEAD(u) && m_isUnicode && (patternRemaining() >= 6) && peek() == '\\') {
+ ParseState state = saveState();
+ consume();
+
+ if (tryConsume('u')) {
+ int surrogate2 = tryConsumeHex(4);
+ if (U16_IS_TRAIL(surrogate2)) {
+ u = U16_GET_SUPPLEMENTARY(u, surrogate2);
+ return u;
+ }
+ }
+
+ restoreState(state);
+ }
+
+ return u;
+ }
+
+ int tryConsumeIdentifierCharacter()
+ {
+ int ch = peek();
+
+ if (ch == '\\') {
+ consume();
+ ch = tryConsumeUnicodeEscape();
+ } else
+ consume();
+
+ return ch;
+ }
+
+ bool isIdentifierStart(int ch)
+ {
+ return (WTF::isASCII(ch) && (WTF::isASCIIAlpha(ch) || ch == '_' || ch == '$')) || (U_GET_GC_MASK(ch) & U_GC_L_MASK);
+ }
+
+ bool isIdentifierPart(int ch)
+ {
+ return (WTF::isASCII(ch) && (WTF::isASCIIAlpha(ch) || ch == '_' || ch == '$')) || (U_GET_GC_MASK(ch) & (U_GC_L_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || ch == 0x200C || ch == 0x200D;
+ }
+
int consume()
{
ASSERT(m_index < m_size);
@@ -880,6 +996,32 @@
return n;
}
+ std::optional<String> tryConsumeGroupName()
+ {
+ ParseState state = saveState();
+
+ int ch = tryConsumeIdentifierCharacter();
+
+ if (isIdentifierStart(ch)) {
+ StringBuilder identifierBuilder;
+
+ do {
+ identifierBuilder.append(ch);
+ ch = tryConsumeIdentifierCharacter();
+ if (ch == '>') {
+ return std::optional<String>(identifierBuilder.toString());
+ break;
+ }
+ if (!isIdentifierPart(ch))
+ break;
+ } while (!atEndOfPattern());
+ }
+
+ restoreState(state);
+
+ return std::nullopt;
+ }
+
Delegate& m_delegate;
unsigned m_backReferenceLimit;
YarrPattern::ErrorCode m_err;
@@ -888,6 +1030,7 @@
unsigned m_index;
bool m_isUnicode;
unsigned m_parenthesesNestingDepth;
+ HashSet<String> m_captureGroupNames;
// Derived by empirical testing of compile time in PCRE and WREC.
static const unsigned MAX_PATTERN_SIZE = 1024 * 1024;
@@ -914,10 +1057,11 @@
* void atomCharacterClassRange(UChar32 begin, UChar32 end)
* void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
* void atomCharacterClassEnd()
- * void atomParenthesesSubpatternBegin(bool capture = true);
+ * void atomParenthesesSubpatternBegin(bool capture = true, std::optional<String> groupName);
* void atomParentheticalAssertionBegin(bool invert = false);
* void atomParenthesesEnd();
* void atomBackReference(unsigned subpatternId);
+ * void atomNamedBackReference(String subpatternName);
*
* void quantifyAtom(unsigned min, unsigned max, bool greedy);
*
Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.cpp (221768 => 221769)
--- trunk/Source/_javascript_Core/yarr/YarrPattern.cpp 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.cpp 2017-09-07 23:13:38 UTC (rev 221769)
@@ -32,8 +32,10 @@
#include "YarrCanonicalize.h"
#include "YarrParser.h"
#include <wtf/DataLog.h>
+#include <wtf/Optional.h>
#include <wtf/Threading.h>
#include <wtf/Vector.h>
+#include <wtf/text/WTFString.h>
using namespace WTF;
@@ -430,11 +432,19 @@
m_pattern.m_userCharacterClasses.append(WTFMove(newCharacterClass));
}
- void atomParenthesesSubpatternBegin(bool capture = true)
+ void atomParenthesesSubpatternBegin(bool capture = true, std::optional<String> optGroupName = std::nullopt)
{
unsigned subpatternId = m_pattern.m_numSubpatterns + 1;
- if (capture)
+ if (capture) {
m_pattern.m_numSubpatterns++;
+ if (optGroupName) {
+ while (m_pattern.m_captureGroupNames.size() < subpatternId)
+ m_pattern.m_captureGroupNames.append(String());
+ m_pattern.m_captureGroupNames.append(optGroupName.value());
+ m_pattern.m_namedGroupToParenIndex.add(optGroupName.value(), subpatternId);
+ }
+ } else
+ ASSERT(!optGroupName);
auto parenthesesDisjunction = std::make_unique<PatternDisjunction>(m_alternative);
m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, false));
@@ -509,7 +519,13 @@
m_alternative->m_terms.append(PatternTerm(subpatternId));
}
- // deep copy the argument disjunction. If filterStartsWithBOL is true,
+ void atomNamedBackReference(String subpatternName)
+ {
+ ASSERT(m_pattern.m_namedGroupToParenIndex.find(subpatternName) != m_pattern.m_namedGroupToParenIndex.end());
+ atomBackReference(m_pattern.m_namedGroupToParenIndex.get(subpatternName));
+ }
+
+ // deep copy the argument disjunction. If filterStartsWithBOL is true,
// skip alternatives with m_startsWithBOL set true.
PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false)
{
@@ -938,6 +954,8 @@
REGEXP_ERROR_PREFIX "missing )", // MissingParentheses
REGEXP_ERROR_PREFIX "unmatched parentheses", // ParenthesesUnmatched
REGEXP_ERROR_PREFIX "unrecognized character after (?", // ParenthesesTypeInvalid
+ REGEXP_ERROR_PREFIX "invalid group specifier name", // InvalidGroupName
+ REGEXP_ERROR_PREFIX "duplicate group specifier name", // DuplicateGroupName
REGEXP_ERROR_PREFIX "missing terminating ] for character class", // CharacterClassUnmatched
REGEXP_ERROR_PREFIX "range out of order in character class", // CharacterClassOutOfOrder
REGEXP_ERROR_PREFIX "\\ at end of pattern", // EscapeUnterminated
Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.h (221768 => 221769)
--- trunk/Source/_javascript_Core/yarr/YarrPattern.h 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.h 2017-09-07 23:13:38 UTC (rev 221769)
@@ -28,6 +28,7 @@
#include "RegExpKey.h"
#include <wtf/CheckedArithmetic.h>
+#include <wtf/HashMap.h>
#include <wtf/PrintStream.h>
#include <wtf/Vector.h>
#include <wtf/text/WTFString.h>
@@ -339,6 +340,8 @@
MissingParentheses,
ParenthesesUnmatched,
ParenthesesTypeInvalid,
+ InvalidGroupName,
+ DuplicateGroupName,
CharacterClassUnmatched,
CharacterClassOutOfOrder,
EscapeUnterminated,
@@ -378,6 +381,7 @@
m_disjunctions.clear();
m_userCharacterClasses.clear();
+ m_captureGroupNames.shrink(0);
}
bool containsIllegalBackReference()
@@ -493,6 +497,8 @@
PatternDisjunction* m_body;
Vector<std::unique_ptr<PatternDisjunction>, 4> m_disjunctions;
Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses;
+ Vector<String> m_captureGroupNames;
+ HashMap<String, unsigned> m_namedGroupToParenIndex;
private:
const char* compile(const String& patternString, void* stackLimit);
Modified: trunk/Source/_javascript_Core/yarr/YarrSyntaxChecker.cpp (221768 => 221769)
--- trunk/Source/_javascript_Core/yarr/YarrSyntaxChecker.cpp 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/Source/_javascript_Core/yarr/YarrSyntaxChecker.cpp 2017-09-07 23:13:38 UTC (rev 221769)
@@ -27,6 +27,8 @@
#include "YarrSyntaxChecker.h"
#include "YarrParser.h"
+#include <wtf/Optional.h>
+#include <wtf/text/WTFString.h>
namespace JSC { namespace Yarr {
@@ -42,10 +44,11 @@
void atomCharacterClassRange(UChar, UChar) {}
void atomCharacterClassBuiltIn(BuiltInCharacterClassID, bool) {}
void atomCharacterClassEnd() {}
- void atomParenthesesSubpatternBegin(bool = true) {}
+ void atomParenthesesSubpatternBegin(bool = true, std::optional<String> = std::nullopt) {}
void atomParentheticalAssertionBegin(bool = false) {}
void atomParenthesesEnd() {}
void atomBackReference(unsigned) {}
+ void atomNamedBackReference(String) {}
void quantifyAtom(unsigned, unsigned, bool) {}
void disjunction() {}
};
Modified: trunk/Source/WebCore/ChangeLog (221768 => 221769)
--- trunk/Source/WebCore/ChangeLog 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/Source/WebCore/ChangeLog 2017-09-07 23:13:38 UTC (rev 221769)
@@ -1,3 +1,19 @@
+2017-09-07 Michael Saboff <msab...@apple.com>
+
+ Add support for RegExp named capture groups
+ https://bugs.webkit.org/show_bug.cgi?id=176435
+
+ Reviewed by Filip Pizlo.
+
+ Implemented stub routines to support named capture groups. These are no-ops
+ just like for number capture group.
+
+ No new tests as this is covered by existing tests.
+
+ * contentextensions/URLFilterParser.cpp:
+ (WebCore::ContentExtensions::PatternParser::atomNamedBackReference):
+ (WebCore::ContentExtensions::PatternParser::atomParenthesesSubpatternBegin):
+
2017-09-07 Myles C. Maxfield <mmaxfi...@apple.com>
[PAL] Unify PlatformUserPreferredLanguages.h with Language.h
Modified: trunk/Source/WebCore/contentextensions/URLFilterParser.cpp (221768 => 221769)
--- trunk/Source/WebCore/contentextensions/URLFilterParser.cpp 2017-09-07 23:11:21 UTC (rev 221768)
+++ trunk/Source/WebCore/contentextensions/URLFilterParser.cpp 2017-09-07 23:13:38 UTC (rev 221769)
@@ -129,6 +129,11 @@
fail(URLFilterParser::BackReference);
}
+ void atomNamedBackReference(String)
+ {
+ fail(URLFilterParser::BackReference);
+ }
+
void assertionBOL()
{
if (hasError())
@@ -203,7 +208,7 @@
fail(URLFilterParser::AtomCharacter);
}
- void atomParenthesesSubpatternBegin(bool = true)
+ void atomParenthesesSubpatternBegin(bool = true, std::optional<String> = std::nullopt)
{
if (hasError())
return;