Title: [292486] branches/safari-613-branch/Source/WebCore
Revision
292486
Author
[email protected]
Date
2022-04-06 12:04:07 -0700 (Wed, 06 Apr 2022)

Log Message

Cherry-pick r292417. rdar://problem/91311691

    Implement faster lookup of HTML tags in the HTML parser
    https://bugs.webkit.org/show_bug.cgi?id=238804

    Reviewed by Geoffrey Garen.

    Implement faster lookup of HTML tags in the HTML parser by having make_names.pl generate
    a findHTMLTag function that returns very efficient well-known HTML tag names.

    It is a little more efficient that our current HTMLNameCache (0.4% progression on
    Speedometer on MacBookAir 10,1, neutral on iMac20,1). Unlike the HTMLNameCache, It doesn't
    require any hashing or additional storage since the set of well-known HTML tags is known
    at compile time.

    * dom/make_names.pl:
    (printNamesHeaderFile):
    (findMaxTagLength):
    (tagsWithLength):
    (generateFindTagForLength):
    (printNamesCppFile):
    * html/parser/AtomHTMLToken.h:
    (WebCore::AtomHTMLToken::AtomHTMLToken):
    * html/parser/HTMLNameCache.cpp:
    (WebCore::HTMLNameCache::atomStringCache):
    * html/parser/HTMLNameCache.h:
    (WebCore::HTMLNameCache::makeAttributeValue):
    (WebCore::HTMLNameCache::clear):
    (WebCore::HTMLNameCache::makeAtomString):
    (WebCore::HTMLNameCache::atomStringCacheSlot):
    (WebCore::HTMLNameCache::makeTagName): Deleted.

    git-svn-id: https://svn.webkit.org/repository/webkit/trunk@292417 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Modified Paths

Diff

Modified: branches/safari-613-branch/Source/WebCore/ChangeLog (292485 => 292486)


--- branches/safari-613-branch/Source/WebCore/ChangeLog	2022-04-06 18:51:37 UTC (rev 292485)
+++ branches/safari-613-branch/Source/WebCore/ChangeLog	2022-04-06 19:04:07 UTC (rev 292486)
@@ -1,3 +1,72 @@
+2022-04-06  Alan Coon  <[email protected]>
+
+        Cherry-pick r292417. rdar://problem/91311691
+
+    Implement faster lookup of HTML tags in the HTML parser
+    https://bugs.webkit.org/show_bug.cgi?id=238804
+    
+    Reviewed by Geoffrey Garen.
+    
+    Implement faster lookup of HTML tags in the HTML parser by having make_names.pl generate
+    a findHTMLTag function that returns very efficient well-known HTML tag names.
+    
+    It is a little more efficient that our current HTMLNameCache (0.4% progression on
+    Speedometer on MacBookAir 10,1, neutral on iMac20,1). Unlike the HTMLNameCache, It doesn't
+    require any hashing or additional storage since the set of well-known HTML tags is known
+    at compile time.
+    
+    * dom/make_names.pl:
+    (printNamesHeaderFile):
+    (findMaxTagLength):
+    (tagsWithLength):
+    (generateFindTagForLength):
+    (printNamesCppFile):
+    * html/parser/AtomHTMLToken.h:
+    (WebCore::AtomHTMLToken::AtomHTMLToken):
+    * html/parser/HTMLNameCache.cpp:
+    (WebCore::HTMLNameCache::atomStringCache):
+    * html/parser/HTMLNameCache.h:
+    (WebCore::HTMLNameCache::makeAttributeValue):
+    (WebCore::HTMLNameCache::clear):
+    (WebCore::HTMLNameCache::makeAtomString):
+    (WebCore::HTMLNameCache::atomStringCacheSlot):
+    (WebCore::HTMLNameCache::makeTagName): Deleted.
+    
+    
+    git-svn-id: https://svn.webkit.org/repository/webkit/trunk@292417 268f45cc-cd09-0410-ab3c-d52691b4dbfc
+
+    2022-04-05  Chris Dumez  <[email protected]>
+
+            Implement faster lookup of HTML tags in the HTML parser
+            https://bugs.webkit.org/show_bug.cgi?id=238804
+
+            Reviewed by Geoffrey Garen.
+
+            Implement faster lookup of HTML tags in the HTML parser by having make_names.pl generate
+            a findHTMLTag function that returns very efficient well-known HTML tag names.
+
+            It is a little more efficient that our current HTMLNameCache (0.4% progression on
+            Speedometer on MacBookAir 10,1, neutral on iMac20,1). Unlike the HTMLNameCache, It doesn't
+            require any hashing or additional storage since the set of well-known HTML tags is known
+            at compile time.
+
+            * dom/make_names.pl:
+            (printNamesHeaderFile):
+            (findMaxTagLength):
+            (tagsWithLength):
+            (generateFindTagForLength):
+            (printNamesCppFile):
+            * html/parser/AtomHTMLToken.h:
+            (WebCore::AtomHTMLToken::AtomHTMLToken):
+            * html/parser/HTMLNameCache.cpp:
+            (WebCore::HTMLNameCache::atomStringCache):
+            * html/parser/HTMLNameCache.h:
+            (WebCore::HTMLNameCache::makeAttributeValue):
+            (WebCore::HTMLNameCache::clear):
+            (WebCore::HTMLNameCache::makeAtomString):
+            (WebCore::HTMLNameCache::atomStringCacheSlot):
+            (WebCore::HTMLNameCache::makeTagName): Deleted.
+
 2022-03-31  Alan Coon  <[email protected]>
 
         Apply patch. rdar://problem/90957317

Modified: branches/safari-613-branch/Source/WebCore/dom/make_names.pl (292485 => 292486)


--- branches/safari-613-branch/Source/WebCore/dom/make_names.pl	2022-04-06 18:51:37 UTC (rev 292485)
+++ branches/safari-613-branch/Source/WebCore/dom/make_names.pl	2022-04-06 19:04:07 UTC (rev 292486)
@@ -747,6 +747,9 @@
     if (keys %allTags) {
         print F "const unsigned $parameters{namespace}TagsCount = ", scalar(keys %allTags), ";\n";
         print F "const WebCore::$parameters{namespace}QualifiedName* const* get$parameters{namespace}Tags();\n";
+        if ($parameters{namespace} eq "HTML") {
+            print F "AtomString find$parameters{namespace}Tag(Span<const UChar>);\n"
+        }
     }
 
     if (keys %allAttrs) {
@@ -758,6 +761,86 @@
     close F;
 }
 
+sub findMaxTagLength
+{
+    my $allTags = shift;
+
+    my $maxLength = 0;
+    foreach my $tagName (keys %{$allTags}) {
+        my $tagLength = length($tagName);
+        $maxLength = $tagLength if $tagLength > $maxLength;
+    }
+    return $maxLength;
+}
+
+sub tagsWithLength
+{
+    my $allAttrs = shift;
+    my $expectedLength = shift;
+
+    my @tags = (); 
+    foreach my $tagName (sort keys %{$allAttrs}) {
+        push(@tags, $tagName) if length($tagName) == $expectedLength;
+    }
+    return @tags;
+}
+
+sub generateFindTagForLength
+{
+    my $indent = shift;
+    my $tagsRef = shift;
+    my $length = shift;
+    my $currentIndex = shift;
+
+    my @tags = @{$tagsRef};
+    my $tagCount = @tags;
+    if ($tagCount == 1) {
+        my $tag = $tags[0];
+        my $needsIfCheck = $currentIndex < $length;
+        if ($needsIfCheck) {
+            my $lengthToCompare = $length - $currentIndex;
+            if ($lengthToCompare == 1) {
+                my $letter = substr($tag, $currentIndex, 1);
+                print F "${indent}if (buffer[$currentIndex] == '$letter') {\n";
+            } else {
+                my $bufferStart = $currentIndex > 0 ? "buffer.data() + $currentIndex" : "buffer.data()";
+                print F "${indent}static constexpr UChar ${tag}Rest[] = { ";
+                for (my $index = $currentIndex; $index < $length; $index = $index + 1) {
+                    my $letter = substr($tag, $index, 1);
+                    print F "'$letter', ";
+                }
+                print F "};\n";
+                print F "${indent}if (!memcmp($bufferStart, ${tag}Rest, $lengthToCompare * sizeof(UChar))) {\n";
+            }
+            print F "$indent    return ${tag}Tag->localName();\n";
+            print F "$indent}\n";
+            print F "${indent}return { };\n";
+        } else {
+            print F "${indent}return ${tag}Tag->localName();\n";
+        }
+        return;
+    }
+    for (my $i = 0; $i < $tagCount;) {
+        my $tag = $tags[$i];
+        my $letterAtIndex = substr($tag, $currentIndex, 1);
+        print F "${indent}if (buffer[$currentIndex] == '$letterAtIndex') {\n";
+        my @tagsWithPrefix = ($tag);
+        for ($i = $i + 1; $i < $tagCount; $i = $i + 1) {
+            my $nextTag = $tags[$i];
+            if (substr($nextTag, $currentIndex, 1) eq $letterAtIndex) {
+                push(@tagsWithPrefix, $nextTag);
+            } else {
+                last;
+            }
+        }
+        generateFindTagForLength($indent . "    ", \@tagsWithPrefix, $length, $currentIndex + 1);
+        if (scalar @tagsWithPrefix > 1) {
+            print F "${indent}    return { };\n";
+        }
+        print F "$indent}\n";
+    }
+}
+
 sub printNamesCppFile
 {
     my $cppPath = shift;
@@ -787,6 +870,25 @@
         print F "    };\n";
         print F "    return $parameters{namespace}Tags;\n";
         print F "}\n";
+
+        if ($parameters{namespace} eq "HTML") {
+            print F "\nAtomString find$parameters{namespace}Tag(Span<const UChar> buffer)\n{\n";
+            my $maxTagLength = findMaxTagLength(\%allTags);
+            print F "    switch (buffer.size()) {\n";
+            for (my $length = 1; $length <= $maxTagLength; $length = $length + 1) {
+                my @tags = tagsWithLength(\%allTags, $length);
+                next unless scalar @tags > 0;
+                print F "    case $length: {\n";
+                generateFindTagForLength("        ", \@tags, $length, 0);
+                print F "        break;\n";
+                print F "    }\n";
+            }
+            print F "    default:\n";
+            print F "        break;\n";
+            print F "    };\n";
+            print F "    return { };\n";
+            print F "}\n";
+        }
     }
 
     if (keys %allAttrs) {

Modified: branches/safari-613-branch/Source/WebCore/html/parser/AtomHTMLToken.h (292485 => 292486)


--- branches/safari-613-branch/Source/WebCore/html/parser/AtomHTMLToken.h	2022-04-06 18:51:37 UTC (rev 292485)
+++ branches/safari-613-branch/Source/WebCore/html/parser/AtomHTMLToken.h	2022-04-06 19:04:07 UTC (rev 292486)
@@ -27,6 +27,7 @@
 #pragma once
 
 #include "HTMLNameCache.h"
+#include "HTMLNames.h"
 #include "HTMLToken.h"
 
 namespace WebCore {
@@ -228,7 +229,10 @@
         ASSERT_NOT_REACHED();
         return;
     case HTMLToken::DOCTYPE:
-        m_name = HTMLNameCache::makeTagName(token.name());
+        if (LIKELY(token.name().size() == 4 && equal(HTMLNames::htmlTag->localName().impl(), token.name().data(), 4)))
+            m_name = HTMLNames::htmlTag->localName();
+        else
+            m_name = AtomString(token.name().data(), token.name().size());
         m_doctypeData = token.releaseDoctypeData();
         return;
     case HTMLToken::EndOfFile:
@@ -236,7 +240,9 @@
     case HTMLToken::StartTag:
     case HTMLToken::EndTag:
         m_selfClosing = token.selfClosing();
-        m_name = HTMLNameCache::makeTagName(token.name());
+        m_name = HTMLNames::findHTMLTag(token.name());
+        if (UNLIKELY(m_name.isNull()))
+            m_name = AtomString(token.name().data(), token.name().size());
         initializeAttributes(token.attributes());
         return;
     case HTMLToken::Comment:

Modified: branches/safari-613-branch/Source/WebCore/html/parser/HTMLNameCache.cpp (292485 => 292486)


--- branches/safari-613-branch/Source/WebCore/html/parser/HTMLNameCache.cpp	2022-04-06 18:51:37 UTC (rev 292485)
+++ branches/safari-613-branch/Source/WebCore/html/parser/HTMLNameCache.cpp	2022-04-06 19:04:07 UTC (rev 292486)
@@ -28,10 +28,10 @@
 
 namespace WebCore {
 
-HTMLNameCache::AtomStringCache& HTMLNameCache::atomStringCache(AtomStringType type)
+HTMLNameCache::AtomStringCache& HTMLNameCache::atomStringCache()
 {
-    static MainThreadNeverDestroyed<AtomStringCache> caches[2];
-    return caches[static_cast<size_t>(type)].get();
+    static MainThreadNeverDestroyed<AtomStringCache> cache;
+    return cache.get();
 }
 
 HTMLNameCache::QualifiedNameCache& HTMLNameCache::qualifiedNameCache()

Modified: branches/safari-613-branch/Source/WebCore/html/parser/HTMLNameCache.h (292485 => 292486)


--- branches/safari-613-branch/Source/WebCore/html/parser/HTMLNameCache.h	2022-04-06 18:51:37 UTC (rev 292485)
+++ branches/safari-613-branch/Source/WebCore/html/parser/HTMLNameCache.h	2022-04-06 19:04:07 UTC (rev 292486)
@@ -34,11 +34,6 @@
 
 class HTMLNameCache {
 public:
-    ALWAYS_INLINE static AtomString makeTagName(Span<const UChar> string)
-    {
-        return makeAtomString<AtomStringType::TagName>(string);
-    }
-
     ALWAYS_INLINE static QualifiedName makeAttributeQualifiedName(Span<const UChar> string)
     {
         return makeQualifiedName(string);
@@ -46,21 +41,17 @@
 
     ALWAYS_INLINE static AtomString makeAttributeValue(Span<const UChar> string)
     {
-        return makeAtomString<AtomStringType::AttributeValue>(string);
+        return makeAtomString(string);
     }
 
     ALWAYS_INLINE static void clear()
     {
         // FIXME (webkit.org/b/230019): We should try to find more opportunities to clear this cache without hindering this performance optimization.
-        atomStringCache(AtomStringType::TagName).fill({ });
-        atomStringCache(AtomStringType::AttributeValue).fill({ });
+        atomStringCache().fill({ });
         qualifiedNameCache().fill({ });
     }
 
 private:
-    enum class AtomStringType : bool { TagName, AttributeValue };
-
-    template<HTMLNameCache::AtomStringType type>
     ALWAYS_INLINE static AtomString makeAtomString(Span<const UChar> string)
     {
         if (string.empty())
@@ -72,7 +63,7 @@
 
         auto firstCharacter = string[0];
         auto lastCharacter = string[length - 1];
-        auto& slot = atomStringCacheSlot(type, firstCharacter, lastCharacter, length);
+        auto& slot = atomStringCacheSlot(firstCharacter, lastCharacter, length);
         if (!equal(slot.impl(), string.data(), length)) {
             AtomString result(string.data(), length);
             slot = result;
@@ -111,10 +102,10 @@
         return (hash + (hash >> 6)) % capacity;
     }
 
-    ALWAYS_INLINE static AtomString& atomStringCacheSlot(AtomStringType type, UChar firstCharacter, UChar lastCharacter, UChar length)
+    ALWAYS_INLINE static AtomString& atomStringCacheSlot(UChar firstCharacter, UChar lastCharacter, UChar length)
     {
         auto index = slotIndex(firstCharacter, lastCharacter, length);
-        return atomStringCache(type)[index];
+        return atomStringCache()[index];
     }
 
     ALWAYS_INLINE static RefPtr<QualifiedName::QualifiedNameImpl>& qualifiedNameCacheSlot(UChar firstCharacter, UChar lastCharacter, UChar length)
@@ -129,7 +120,7 @@
     using AtomStringCache = std::array<AtomString, capacity>;
     using QualifiedNameCache = std::array<RefPtr<QualifiedName::QualifiedNameImpl>, capacity>;
 
-    static AtomStringCache& atomStringCache(AtomStringType);
+    static AtomStringCache& atomStringCache();
     static QualifiedNameCache& qualifiedNameCache();
 };
 
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to