Title: [201451] trunk/Source/_javascript_Core
Revision
201451
Author
[email protected]
Date
2016-05-27 07:59:46 -0700 (Fri, 27 May 2016)

Log Message

Bogus uses of regexp matching should realize that they will OOM before they start swapping
https://bugs.webkit.org/show_bug.cgi?id=158142

Reviewed by Michael Saboff.
        
Refactored the RegExpObject::matchGlobal() code so that there is less duplication. Took
advantage of this to make the code more resilient in case of absurd situations: if the
result array gets large, it proceeds with a dry run to detect how many matches there will
be. This allows it to OOM before it starts swapping.
        
This also improves the overall performance of the code by using lightweight substrings and
skipping the whole intermediate argument array.
        
This makes some jsfunfuzz tests run a lot faster and use a lot less memory.
        
* builtins/RegExpPrototype.js:
* CMakeLists.txt:
* _javascript_Core.xcodeproj/project.pbxproj:
* runtime/MatchResult.cpp: Added.
(JSC::MatchResult::dump):
* runtime/MatchResult.h:
(JSC::MatchResult::empty):
(MatchResult::empty): Deleted.
* runtime/RegExpObject.cpp:
(JSC::RegExpObject::match):
(JSC::collectMatches):
(JSC::RegExpObject::matchGlobal):
* runtime/StringObject.h:
(JSC::jsStringWithReuse):
(JSC::jsSubstring):
* tests/stress/big-match.js: Added. Make sure that this optimization doesn't break big matches.

Modified Paths

Added Paths

Diff

Modified: trunk/Source/_javascript_Core/CMakeLists.txt (201450 => 201451)


--- trunk/Source/_javascript_Core/CMakeLists.txt	2016-05-27 13:51:02 UTC (rev 201450)
+++ trunk/Source/_javascript_Core/CMakeLists.txt	2016-05-27 14:59:46 UTC (rev 201451)
@@ -744,6 +744,7 @@
     runtime/MapConstructor.cpp
     runtime/MapIteratorPrototype.cpp
     runtime/MapPrototype.cpp
+    runtime/MatchResult.cpp
     runtime/MathCommon.cpp
     runtime/MathObject.cpp
     runtime/MemoryStatistics.cpp

Modified: trunk/Source/_javascript_Core/ChangeLog (201450 => 201451)


--- trunk/Source/_javascript_Core/ChangeLog	2016-05-27 13:51:02 UTC (rev 201450)
+++ trunk/Source/_javascript_Core/ChangeLog	2016-05-27 14:59:46 UTC (rev 201451)
@@ -1,3 +1,37 @@
+2016-05-26  Filip Pizlo  <[email protected]>
+
+        Bogus uses of regexp matching should realize that they will OOM before they start swapping
+        https://bugs.webkit.org/show_bug.cgi?id=158142
+
+        Reviewed by Michael Saboff.
+        
+        Refactored the RegExpObject::matchGlobal() code so that there is less duplication. Took
+        advantage of this to make the code more resilient in case of absurd situations: if the
+        result array gets large, it proceeds with a dry run to detect how many matches there will
+        be. This allows it to OOM before it starts swapping.
+        
+        This also improves the overall performance of the code by using lightweight substrings and
+        skipping the whole intermediate argument array.
+        
+        This makes some jsfunfuzz tests run a lot faster and use a lot less memory.
+        
+        * builtins/RegExpPrototype.js:
+        * CMakeLists.txt:
+        * _javascript_Core.xcodeproj/project.pbxproj:
+        * runtime/MatchResult.cpp: Added.
+        (JSC::MatchResult::dump):
+        * runtime/MatchResult.h:
+        (JSC::MatchResult::empty):
+        (MatchResult::empty): Deleted.
+        * runtime/RegExpObject.cpp:
+        (JSC::RegExpObject::match):
+        (JSC::collectMatches):
+        (JSC::RegExpObject::matchGlobal):
+        * runtime/StringObject.h:
+        (JSC::jsStringWithReuse):
+        (JSC::jsSubstring):
+        * tests/stress/big-match.js: Added. Make sure that this optimization doesn't break big matches.
+
 2016-05-26  Gavin & Ellie Barraclough  <[email protected]>
 
         Static table property lookup should not require getOwnPropertySlot override.

Modified: trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj (201450 => 201451)


--- trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj	2016-05-27 13:51:02 UTC (rev 201450)
+++ trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj	2016-05-27 14:59:46 UTC (rev 201451)
@@ -1987,6 +1987,7 @@
 		DC605B5E1CE26EA200593718 /* ProfilerEvent.h in Headers */ = {isa = PBXBuildFile; fileRef = DC605B5A1CE26E9800593718 /* ProfilerEvent.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		DC605B5F1CE26EA500593718 /* ProfilerUID.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DC605B5B1CE26E9800593718 /* ProfilerUID.cpp */; };
 		DC605B601CE26EA700593718 /* ProfilerUID.h in Headers */ = {isa = PBXBuildFile; fileRef = DC605B5C1CE26E9800593718 /* ProfilerUID.h */; settings = {ATTRIBUTES = (Private, ); }; };
+		DC69AA661CF7A1F200C6272F /* MatchResult.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DC69AA651CF7A1EF00C6272F /* MatchResult.cpp */; };
 		DC7997831CDE9FA0004D4A09 /* TagRegistersMode.h in Headers */ = {isa = PBXBuildFile; fileRef = DC7997821CDE9F9E004D4A09 /* TagRegistersMode.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		DC7997841CDE9FA2004D4A09 /* TagRegistersMode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DC7997811CDE9F9E004D4A09 /* TagRegistersMode.cpp */; };
 		DCEE22091CEB9895000C2396 /* DFGBackwardsCFG.h in Headers */ = {isa = PBXBuildFile; fileRef = DCEE22061CEB9890000C2396 /* DFGBackwardsCFG.h */; };
@@ -4200,6 +4201,7 @@
 		DC605B5A1CE26E9800593718 /* ProfilerEvent.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ProfilerEvent.h; path = profiler/ProfilerEvent.h; sourceTree = "<group>"; };
 		DC605B5B1CE26E9800593718 /* ProfilerUID.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ProfilerUID.cpp; path = profiler/ProfilerUID.cpp; sourceTree = "<group>"; };
 		DC605B5C1CE26E9800593718 /* ProfilerUID.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ProfilerUID.h; path = profiler/ProfilerUID.h; sourceTree = "<group>"; };
+		DC69AA651CF7A1EF00C6272F /* MatchResult.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MatchResult.cpp; sourceTree = "<group>"; };
 		DC7997811CDE9F9E004D4A09 /* TagRegistersMode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TagRegistersMode.cpp; sourceTree = "<group>"; };
 		DC7997821CDE9F9E004D4A09 /* TagRegistersMode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TagRegistersMode.h; sourceTree = "<group>"; };
 		DCEE22061CEB9890000C2396 /* DFGBackwardsCFG.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DFGBackwardsCFG.h; path = dfg/DFGBackwardsCFG.h; sourceTree = "<group>"; };
@@ -5504,12 +5506,6 @@
 		7EF6E0BB0EB7A1EC0079AFAF /* runtime */ = {
 			isa = PBXGroup;
 			children = (
-				708EBE231CE8F35000453146 /* IntlObjectInlines.h */,
-				DCF3D5641CD29468003D5C65 /* LazyClassStructure.cpp */,
-				DCF3D5651CD29468003D5C65 /* LazyClassStructure.h */,
-				DCF3D5661CD29468003D5C65 /* LazyClassStructureInlines.h */,
-				DCF3D5671CD29468003D5C65 /* LazyProperty.h */,
-				DCF3D5681CD29468003D5C65 /* LazyPropertyInlines.h */,
 				BCF605110E203EF800B9A64D /* ArgList.cpp */,
 				BCF605120E203EF800B9A64D /* ArgList.h */,
 				0FE0500C1AA9091100D33B33 /* ArgumentsMode.h */,
@@ -5684,6 +5680,7 @@
 				A1D792FB1B43864B004516F5 /* IntlNumberFormatPrototype.h */,
 				A12BBFF31B044A9800664B69 /* IntlObject.cpp */,
 				A12BBFF11B044A8B00664B69 /* IntlObject.h */,
+				708EBE231CE8F35000453146 /* IntlObjectInlines.h */,
 				86BF642A148DB2B5004DE36A /* Intrinsic.h */,
 				FE4D55B71AE716CA0052E459 /* IterationStatus.h */,
 				70113D491A8DB093003848C4 /* IteratorOperations.cpp */,
@@ -5839,6 +5836,11 @@
 				1442566015EDE98D0066A49B /* JSWithScope.h */,
 				65C7A1710A8EAACB00FA37EA /* JSWrapperObject.cpp */,
 				65C7A1720A8EAACB00FA37EA /* JSWrapperObject.h */,
+				DCF3D5641CD29468003D5C65 /* LazyClassStructure.cpp */,
+				DCF3D5651CD29468003D5C65 /* LazyClassStructure.h */,
+				DCF3D5661CD29468003D5C65 /* LazyClassStructureInlines.h */,
+				DCF3D5671CD29468003D5C65 /* LazyProperty.h */,
+				DCF3D5681CD29468003D5C65 /* LazyPropertyInlines.h */,
 				A7E2EA6A0FB460CF00601F06 /* LiteralParser.cpp */,
 				A7E2EA690FB460CF00601F06 /* LiteralParser.h */,
 				F692A8680255597D01FF60F7 /* Lookup.cpp */,
@@ -5851,6 +5853,7 @@
 				A74DEF8E182D991400522C22 /* MapIteratorPrototype.h */,
 				A700873B17CBE8D300C3E643 /* MapPrototype.cpp */,
 				A700873C17CBE8D300C3E643 /* MapPrototype.h */,
+				DC69AA651CF7A1EF00C6272F /* MatchResult.cpp */,
 				8612E4CB1522918400C836BE /* MatchResult.h */,
 				4340A4821A9051AF00D73CCA /* MathCommon.cpp */,
 				4340A4831A9051AF00D73CCA /* MathCommon.h */,
@@ -9223,6 +9226,7 @@
 				14469DE1107EC7E700650446 /* NativeErrorPrototype.cpp in Sources */,
 				E33E8D201B9013DE00346B52 /* NativeStdFunctionCell.cpp in Sources */,
 				148F21B7107EC5470042EC2C /* Nodes.cpp in Sources */,
+				DC69AA661CF7A1F200C6272F /* MatchResult.cpp in Sources */,
 				E3963CEE1B73F75000EB4CE5 /* NodesAnalyzeModule.cpp in Sources */,
 				655EB29B10CE2581001A990E /* NodesCodegen.cpp in Sources */,
 				6546F5211A32B313006F07D5 /* NullGetterFunction.cpp in Sources */,

Modified: trunk/Source/_javascript_Core/builtins/RegExpPrototype.js (201450 => 201451)


--- trunk/Source/_javascript_Core/builtins/RegExpPrototype.js	2016-05-27 13:51:02 UTC (rev 201450)
+++ trunk/Source/_javascript_Core/builtins/RegExpPrototype.js	2016-05-27 14:59:46 UTC (rev 201451)
@@ -98,6 +98,9 @@
     regexp.lastIndex = 0;
     let resultList = [];
 
+    // FIXME: It would be great to implement a solution similar to what we do in
+    // RegExpObject::matchGlobal(). It's not clear if this is possible, since this loop has
+    // effects. https://bugs.webkit.org/show_bug.cgi?id=158145
     const maximumReasonableMatchSize = 100000000;
 
     while (true) {

Added: trunk/Source/_javascript_Core/runtime/MatchResult.cpp (0 => 201451)


--- trunk/Source/_javascript_Core/runtime/MatchResult.cpp	                        (rev 0)
+++ trunk/Source/_javascript_Core/runtime/MatchResult.cpp	2016-05-27 14:59:46 UTC (rev 201451)
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2016 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "MatchResult.h"
+
+namespace JSC {
+
+void MatchResult::dump(PrintStream& out) const
+{
+    if (start == WTF::notFound)
+        out.print("notFound");
+    else
+        out.print(start, "...", end);
+}
+
+} // namespace JSC
+

Modified: trunk/Source/_javascript_Core/runtime/MatchResult.h (201450 => 201451)


--- trunk/Source/_javascript_Core/runtime/MatchResult.h	2016-05-27 13:51:02 UTC (rev 201450)
+++ trunk/Source/_javascript_Core/runtime/MatchResult.h	2016-05-27 14:59:46 UTC (rev 201451)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012 Apple Inc. All rights reserved.
+ * Copyright (C) 2012, 2016 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -26,6 +26,11 @@
 #ifndef MatchResult_h
 #define MatchResult_h
 
+#include <wtf/PrintStream.h>
+#include <wtf/Vector.h> // for notFound
+
+namespace JSC {
+
 typedef uint64_t EncodedMatchResult;
 
 struct MatchResult {
@@ -69,9 +74,13 @@
     {
         return start == end;
     }
+    
+    void dump(PrintStream&) const;
 
     size_t start;
     size_t end;
 };
 
+} // namespace JSC
+
 #endif

Modified: trunk/Source/_javascript_Core/runtime/RegExpObject.cpp (201450 => 201451)


--- trunk/Source/_javascript_Core/runtime/RegExpObject.cpp	2016-05-27 13:51:02 UTC (rev 201450)
+++ trunk/Source/_javascript_Core/runtime/RegExpObject.cpp	2016-05-27 14:59:46 UTC (rev 201451)
@@ -169,6 +169,65 @@
     return matchInline(exec, globalObject, string);
 }
 
+template<typename FixEndFunc>
+JSValue collectMatches(VM& vm, ExecState* exec, JSString* string, const String& s, RegExpConstructor* constructor, RegExp* regExp, const FixEndFunc& fixEnd)
+{
+    MatchResult result = constructor->performMatch(vm, regExp, string, s, 0);
+    if (!result)
+        return jsNull();
+    
+    static unsigned maxSizeForDirectPath = 100000;
+    
+    JSArray* array = constructEmptyArray(exec, nullptr);
+
+    auto iterate = [&] () {
+        size_t end = result.end;
+        size_t length = end - result.start;
+        array->push(exec, JSRopeString::createSubstringOfResolved(vm, string, result.start, length));
+        if (!length)
+            end = fixEnd(end);
+        result = constructor->performMatch(vm, regExp, string, s, end);
+    };
+    
+    do {
+        if (array->length() >= maxSizeForDirectPath) {
+            // First do a throw-away match to see how many matches we'll get.
+            unsigned matchCount = 0;
+            MatchResult savedResult = result;
+            do {
+                if (array->length() + matchCount >= MAX_STORAGE_VECTOR_LENGTH) {
+                    throwOutOfMemoryError(exec);
+                    return jsUndefined();
+                }
+                
+                size_t end = result.end;
+                matchCount++;
+                if (result.empty())
+                    end = fixEnd(end);
+                
+                // Using RegExpConstructor::performMatch() instead of calling RegExp::match()
+                // directly is a surprising but profitable choice: it means that when we do OOM, we
+                // will leave the cached result in the state it ought to have had just before the
+                // OOM! On the other hand, if this loop concludes that the result is small enough,
+                // then the iterate() loop below will overwrite the cached result anyway.
+                result = constructor->performMatch(vm, regExp, string, s, end);
+            } while (result);
+            
+            // OK, we have a sensible number of matches. Now we can create them for reals.
+            result = savedResult;
+            do
+                iterate();
+            while (result);
+            
+            return array;
+        }
+        
+        iterate();
+    } while (result);
+    
+    return array;
+}
+
 JSValue RegExpObject::matchGlobal(ExecState* exec, JSGlobalObject* globalObject, JSString* string)
 {
     RegExp* regExp = this->regExp();
@@ -183,52 +242,21 @@
 
     String s = string->value(exec);
     RegExpConstructor* regExpConstructor = globalObject->regExpConstructor();
-    MatchResult result = regExpConstructor->performMatch(*vm, regExp, string, s, 0);
-
-    // return array of matches
-    MarkedArgumentBuffer list;
-    // We defend ourselves from crazy.
-    const size_t maximumReasonableMatchSize = 1000000000;
-
+    
     if (regExp->unicode()) {
         unsigned stringLength = s.length();
-        while (result) {
-            if (list.size() > maximumReasonableMatchSize) {
-                throwOutOfMemoryError(exec);
-                return jsUndefined();
-            }
-
-            size_t end = result.end;
-            size_t length = end - result.start;
-            list.append(jsSubstring(exec, s, result.start, length));
-            if (!length)
-                end = advanceStringUnicode(s, stringLength, end);
-            result = regExpConstructor->performMatch(*vm, regExp, string, s, end);
-        }
-    } else {
-        while (result) {
-            if (list.size() > maximumReasonableMatchSize) {
-                throwOutOfMemoryError(exec);
-                return jsUndefined();
-            }
-
-            size_t end = result.end;
-            size_t length = end - result.start;
-            list.append(jsSubstring(exec, s, result.start, length));
-            if (!length)
-                ++end;
-            result = regExpConstructor->performMatch(*vm, regExp, string, s, end);
-        }
+        return collectMatches(
+            *vm, exec, string, s, regExpConstructor, regExp,
+            [&] (size_t end) -> size_t {
+                return advanceStringUnicode(s, stringLength, end);
+            });
     }
-
-    if (list.isEmpty()) {
-        // if there are no matches at all, it's important to return
-        // Null instead of an empty array, because this matches
-        // other browsers and because Null is a false value.
-        return jsNull();
-    }
-
-    return constructArray(exec, static_cast<ArrayAllocationProfile*>(0), list);
+    
+    return collectMatches(
+        *vm, exec, string, s, regExpConstructor, regExp,
+        [&] (size_t end) -> size_t {
+            return end + 1;
+        });
 }
 
 } // namespace JSC

Modified: trunk/Source/_javascript_Core/runtime/StringObject.h (201450 => 201451)


--- trunk/Source/_javascript_Core/runtime/StringObject.h	2016-05-27 13:51:02 UTC (rev 201450)
+++ trunk/Source/_javascript_Core/runtime/StringObject.h	2016-05-27 14:59:46 UTC (rev 201451)
@@ -94,6 +94,11 @@
 }
 
 // Helper that tries to use the JSString substring sharing mechanism if 'originalValue' is a JSString.
+// FIXME: It would be even better if toString returned a JSString*, or if anyone who called
+// toString with the intent of later calling this functon first created a jsString from the String
+// that toString returned. That way, we'd get the substring optimization even when the input was
+// not a JSString.
+// https://bugs.webkit.org/show_bug.cgi?id=158140
 static inline JSString* jsSubstring(ExecState* exec, JSValue originalValue, const String& string, unsigned offset, unsigned length)
 {
     if (originalValue.isString()) {

Added: trunk/Source/_javascript_Core/tests/stress/big-match.js (0 => 201451)


--- trunk/Source/_javascript_Core/tests/stress/big-match.js	                        (rev 0)
+++ trunk/Source/_javascript_Core/tests/stress/big-match.js	2016-05-27 14:59:46 UTC (rev 201451)
@@ -0,0 +1,20 @@
+"use strict";
+
+var bigString = "x";
+while (bigString.length < 200000)
+    bigString = bigString + bigString;
+
+if (bigString.length != 262144)
+    throw "Error: bad string length: " + bigString.length;
+
+var result = /x/g[Symbol.match](bigString);
+
+if (result.length != 262144)
+    throw "Error: bad result array length: " + result.length;
+
+for (var i = 0; i < result.length; ++i) {
+    if (result[i] != "x")
+        throw "Error: array does not contain \"x\" at i = " + i + ": " + result[i];
+}
+
+
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to