Title: [286062] trunk/Source/WTF
Revision
286062
Author
commit-qu...@webkit.org
Date
2021-11-19 07:57:39 -0800 (Fri, 19 Nov 2021)

Log Message

Remove allocation in JSON::Value::parseJSON
https://bugs.webkit.org/show_bug.cgi?id=233346

Patch by Alex Christensen <achristen...@webkit.org> on 2021-11-19
Reviewed by Yusuke Suzuki.

Parse the characters as Latin1 characters if we have an 8 bit string rather than converting them to UTF-16 just to be parsed.

* wtf/JSONValues.cpp:
(WTF::JSONImpl::Value::parseJSON):

Modified Paths

Diff

Modified: trunk/Source/WTF/ChangeLog (286061 => 286062)


--- trunk/Source/WTF/ChangeLog	2021-11-19 15:37:03 UTC (rev 286061)
+++ trunk/Source/WTF/ChangeLog	2021-11-19 15:57:39 UTC (rev 286062)
@@ -1,3 +1,15 @@
+2021-11-19  Alex Christensen  <achristen...@webkit.org>
+
+        Remove allocation in JSON::Value::parseJSON
+        https://bugs.webkit.org/show_bug.cgi?id=233346
+
+        Reviewed by Yusuke Suzuki.
+
+        Parse the characters as Latin1 characters if we have an 8 bit string rather than converting them to UTF-16 just to be parsed.
+
+        * wtf/JSONValues.cpp:
+        (WTF::JSONImpl::Value::parseJSON):
+
 2021-11-18  Antoine Quint  <grao...@webkit.org>
 
         [Model] add support for pausing and resuming animations

Modified: trunk/Source/WTF/wtf/JSONValues.cpp (286061 => 286062)


--- trunk/Source/WTF/wtf/JSONValues.cpp	2021-11-19 15:37:03 UTC (rev 286061)
+++ trunk/Source/WTF/wtf/JSONValues.cpp	2021-11-19 15:57:39 UTC (rev 286062)
@@ -61,7 +61,8 @@
 const char* const trueToken = "true";
 const char* const falseToken = "false";
 
-bool parseConstToken(const UChar* start, const UChar* end, const UChar** tokenEnd, const char* token)
+template<typename CodeUnit>
+bool parseConstToken(const CodeUnit* start, const CodeUnit* end, const CodeUnit** tokenEnd, const char* token)
 {
     while (start < end && *token != '\0' && *start++ == *token++) { }
 
@@ -72,7 +73,8 @@
     return true;
 }
 
-bool readInt(const UChar* start, const UChar* end, const UChar** tokenEnd, bool canHaveLeadingZeros)
+template<typename CodeUnit>
+bool readInt(const CodeUnit* start, const CodeUnit* end, const CodeUnit** tokenEnd, bool canHaveLeadingZeros)
 {
     if (start == end)
         return false;
@@ -94,7 +96,8 @@
     return true;
 }
 
-bool parseNumberToken(const UChar* start, const UChar* end, const UChar** tokenEnd)
+template<typename CodeUnit>
+bool parseNumberToken(const CodeUnit* start, const CodeUnit* end, const CodeUnit** tokenEnd)
 {
     // We just grab the number here. We validate the size in DecodeNumber.
     // According to RFC 4627, a valid number is: [minus] int [frac] [exp]
@@ -101,7 +104,7 @@
     if (start == end)
         return false;
 
-    UChar c = *start;
+    CodeUnit c = *start;
     if ('-' == c)
         ++start;
 
@@ -145,7 +148,8 @@
     return true;
 }
 
-bool readHexDigits(const UChar* start, const UChar* end, const UChar** tokenEnd, int digits)
+template<typename CodeUnit>
+bool readHexDigits(const CodeUnit* start, const CodeUnit* end, const CodeUnit** tokenEnd, int digits)
 {
     if (end - start < digits)
         return false;
@@ -159,10 +163,11 @@
     return true;
 }
 
-bool parseStringToken(const UChar* start, const UChar* end, const UChar** tokenEnd)
+template<typename CodeUnit>
+bool parseStringToken(const CodeUnit* start, const CodeUnit* end, const CodeUnit** tokenEnd)
 {
     while (start < end) {
-        UChar c = *start++;
+        CodeUnit c = *start++;
         if ('\\' == c && start < end) {
             c = *start++;
             // Make sure the escaped char is valid.
@@ -197,7 +202,8 @@
     return false;
 }
 
-Token parseToken(const UChar* start, const UChar* end, const UChar** tokenStart, const UChar** tokenEnd)
+template<typename CodeUnit>
+Token parseToken(const CodeUnit* start, const CodeUnit* end, const CodeUnit** tokenStart, const CodeUnit** tokenEnd)
 {
     while (start < end && isSpaceOrNewline(*start))
         ++start;
@@ -261,7 +267,8 @@
     return Token::Invalid;
 }
 
-bool decodeString(const UChar* start, const UChar* end, StringBuilder& output)
+template<typename CodeUnit>
+bool decodeString(const CodeUnit* start, const CodeUnit* end, StringBuilder& output)
 {
     while (start < end) {
         UChar c = *start++;
@@ -316,7 +323,8 @@
     return true;
 }
 
-bool decodeString(const UChar* start, const UChar* end, String& output)
+template<typename CodeUnit>
+bool decodeString(const CodeUnit* start, const CodeUnit* end, String& output)
 {
     if (start == end) {
         output = emptyString();
@@ -335,14 +343,15 @@
     return true;
 }
 
-RefPtr<JSON::Value> buildValue(const UChar* start, const UChar* end, const UChar** valueTokenEnd, int depth)
+template<typename CodeUnit>
+RefPtr<JSON::Value> buildValue(const CodeUnit* start, const CodeUnit* end, const CodeUnit** valueTokenEnd, int depth)
 {
     if (depth > stackLimit)
         return nullptr;
 
     RefPtr<JSON::Value> result;
-    const UChar* tokenStart;
-    const UChar* tokenEnd;
+    const CodeUnit* tokenStart;
+    const CodeUnit* tokenEnd;
     Token token = parseToken(start, end, &tokenStart, &tokenEnd);
     switch (token) {
     case Token::Invalid:
@@ -506,20 +515,32 @@
 
 RefPtr<Value> Value::parseJSON(const String& json)
 {
-    // FIXME: This whole file should just use StringView instead of UChar/length and avoid upconverting.
-    auto characters = StringView(json).upconvertedCharacters();
-    const UChar* start = characters;
-    const UChar* end = start + json.length();
-    const UChar* tokenEnd;
-    auto result = buildValue(start, end, &tokenEnd, 0);
-    if (!result)
-        return nullptr;
+    auto containsNonSpace = [] (const auto* begin, const auto* end) {
+        if (!begin)
+            return false;
+        for (const auto* it = begin; it < end; it++) {
+            if (!isSpaceOrNewline(*it))
+                return true;
+        }
+        return false;
+    };
 
-    for (const UChar* valueEnd = tokenEnd; valueEnd < end; ++valueEnd) {
-        if (!isSpaceOrNewline(*valueEnd))
+    RefPtr<Value> result;
+    if (json.is8Bit()) {
+        const LChar* start = json.characters8();
+        const LChar* end = start + json.length();
+        const LChar* tokenEnd { nullptr };
+        result = buildValue(start, end, &tokenEnd, 0);
+        if (containsNonSpace(tokenEnd, end))
             return nullptr;
+    } else {
+        const UChar* start = json.characters16();
+        const UChar* end = start + json.length();
+        const UChar* tokenEnd { nullptr };
+        result = buildValue(start, end, &tokenEnd, 0);
+        if (containsNonSpace(tokenEnd, end))
+            return nullptr;
     }
-
     return result;
 }
 
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to