- Revision
- 292486
- Author
- [email protected]
- Date
- 2022-04-06 12:04:07 -0700 (Wed, 06 Apr 2022)
Log Message
Cherry-pick r292417. rdar://problem/91311691
Implement faster lookup of HTML tags in the HTML parser
https://bugs.webkit.org/show_bug.cgi?id=238804
Reviewed by Geoffrey Garen.
Implement faster lookup of HTML tags in the HTML parser by having make_names.pl generate
a findHTMLTag function that returns very efficient well-known HTML tag names.
It is a little more efficient that our current HTMLNameCache (0.4% progression on
Speedometer on MacBookAir 10,1, neutral on iMac20,1). Unlike the HTMLNameCache, It doesn't
require any hashing or additional storage since the set of well-known HTML tags is known
at compile time.
* dom/make_names.pl:
(printNamesHeaderFile):
(findMaxTagLength):
(tagsWithLength):
(generateFindTagForLength):
(printNamesCppFile):
* html/parser/AtomHTMLToken.h:
(WebCore::AtomHTMLToken::AtomHTMLToken):
* html/parser/HTMLNameCache.cpp:
(WebCore::HTMLNameCache::atomStringCache):
* html/parser/HTMLNameCache.h:
(WebCore::HTMLNameCache::makeAttributeValue):
(WebCore::HTMLNameCache::clear):
(WebCore::HTMLNameCache::makeAtomString):
(WebCore::HTMLNameCache::atomStringCacheSlot):
(WebCore::HTMLNameCache::makeTagName): Deleted.
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@292417 268f45cc-cd09-0410-ab3c-d52691b4dbfc
Modified Paths
Diff
Modified: branches/safari-613-branch/Source/WebCore/ChangeLog (292485 => 292486)
--- branches/safari-613-branch/Source/WebCore/ChangeLog 2022-04-06 18:51:37 UTC (rev 292485)
+++ branches/safari-613-branch/Source/WebCore/ChangeLog 2022-04-06 19:04:07 UTC (rev 292486)
@@ -1,3 +1,72 @@
+2022-04-06 Alan Coon <[email protected]>
+
+ Cherry-pick r292417. rdar://problem/91311691
+
+ Implement faster lookup of HTML tags in the HTML parser
+ https://bugs.webkit.org/show_bug.cgi?id=238804
+
+ Reviewed by Geoffrey Garen.
+
+ Implement faster lookup of HTML tags in the HTML parser by having make_names.pl generate
+ a findHTMLTag function that returns very efficient well-known HTML tag names.
+
+ It is a little more efficient that our current HTMLNameCache (0.4% progression on
+ Speedometer on MacBookAir 10,1, neutral on iMac20,1). Unlike the HTMLNameCache, It doesn't
+ require any hashing or additional storage since the set of well-known HTML tags is known
+ at compile time.
+
+ * dom/make_names.pl:
+ (printNamesHeaderFile):
+ (findMaxTagLength):
+ (tagsWithLength):
+ (generateFindTagForLength):
+ (printNamesCppFile):
+ * html/parser/AtomHTMLToken.h:
+ (WebCore::AtomHTMLToken::AtomHTMLToken):
+ * html/parser/HTMLNameCache.cpp:
+ (WebCore::HTMLNameCache::atomStringCache):
+ * html/parser/HTMLNameCache.h:
+ (WebCore::HTMLNameCache::makeAttributeValue):
+ (WebCore::HTMLNameCache::clear):
+ (WebCore::HTMLNameCache::makeAtomString):
+ (WebCore::HTMLNameCache::atomStringCacheSlot):
+ (WebCore::HTMLNameCache::makeTagName): Deleted.
+
+
+ git-svn-id: https://svn.webkit.org/repository/webkit/trunk@292417 268f45cc-cd09-0410-ab3c-d52691b4dbfc
+
+ 2022-04-05 Chris Dumez <[email protected]>
+
+ Implement faster lookup of HTML tags in the HTML parser
+ https://bugs.webkit.org/show_bug.cgi?id=238804
+
+ Reviewed by Geoffrey Garen.
+
+ Implement faster lookup of HTML tags in the HTML parser by having make_names.pl generate
+ a findHTMLTag function that returns very efficient well-known HTML tag names.
+
+ It is a little more efficient that our current HTMLNameCache (0.4% progression on
+ Speedometer on MacBookAir 10,1, neutral on iMac20,1). Unlike the HTMLNameCache, It doesn't
+ require any hashing or additional storage since the set of well-known HTML tags is known
+ at compile time.
+
+ * dom/make_names.pl:
+ (printNamesHeaderFile):
+ (findMaxTagLength):
+ (tagsWithLength):
+ (generateFindTagForLength):
+ (printNamesCppFile):
+ * html/parser/AtomHTMLToken.h:
+ (WebCore::AtomHTMLToken::AtomHTMLToken):
+ * html/parser/HTMLNameCache.cpp:
+ (WebCore::HTMLNameCache::atomStringCache):
+ * html/parser/HTMLNameCache.h:
+ (WebCore::HTMLNameCache::makeAttributeValue):
+ (WebCore::HTMLNameCache::clear):
+ (WebCore::HTMLNameCache::makeAtomString):
+ (WebCore::HTMLNameCache::atomStringCacheSlot):
+ (WebCore::HTMLNameCache::makeTagName): Deleted.
+
2022-03-31 Alan Coon <[email protected]>
Apply patch. rdar://problem/90957317
Modified: branches/safari-613-branch/Source/WebCore/dom/make_names.pl (292485 => 292486)
--- branches/safari-613-branch/Source/WebCore/dom/make_names.pl 2022-04-06 18:51:37 UTC (rev 292485)
+++ branches/safari-613-branch/Source/WebCore/dom/make_names.pl 2022-04-06 19:04:07 UTC (rev 292486)
@@ -747,6 +747,9 @@
if (keys %allTags) {
print F "const unsigned $parameters{namespace}TagsCount = ", scalar(keys %allTags), ";\n";
print F "const WebCore::$parameters{namespace}QualifiedName* const* get$parameters{namespace}Tags();\n";
+ if ($parameters{namespace} eq "HTML") {
+ print F "AtomString find$parameters{namespace}Tag(Span<const UChar>);\n"
+ }
}
if (keys %allAttrs) {
@@ -758,6 +761,86 @@
close F;
}
+sub findMaxTagLength
+{
+ my $allTags = shift;
+
+ my $maxLength = 0;
+ foreach my $tagName (keys %{$allTags}) {
+ my $tagLength = length($tagName);
+ $maxLength = $tagLength if $tagLength > $maxLength;
+ }
+ return $maxLength;
+}
+
+sub tagsWithLength
+{
+ my $allAttrs = shift;
+ my $expectedLength = shift;
+
+ my @tags = ();
+ foreach my $tagName (sort keys %{$allAttrs}) {
+ push(@tags, $tagName) if length($tagName) == $expectedLength;
+ }
+ return @tags;
+}
+
+sub generateFindTagForLength
+{
+ my $indent = shift;
+ my $tagsRef = shift;
+ my $length = shift;
+ my $currentIndex = shift;
+
+ my @tags = @{$tagsRef};
+ my $tagCount = @tags;
+ if ($tagCount == 1) {
+ my $tag = $tags[0];
+ my $needsIfCheck = $currentIndex < $length;
+ if ($needsIfCheck) {
+ my $lengthToCompare = $length - $currentIndex;
+ if ($lengthToCompare == 1) {
+ my $letter = substr($tag, $currentIndex, 1);
+ print F "${indent}if (buffer[$currentIndex] == '$letter') {\n";
+ } else {
+ my $bufferStart = $currentIndex > 0 ? "buffer.data() + $currentIndex" : "buffer.data()";
+ print F "${indent}static constexpr UChar ${tag}Rest[] = { ";
+ for (my $index = $currentIndex; $index < $length; $index = $index + 1) {
+ my $letter = substr($tag, $index, 1);
+ print F "'$letter', ";
+ }
+ print F "};\n";
+ print F "${indent}if (!memcmp($bufferStart, ${tag}Rest, $lengthToCompare * sizeof(UChar))) {\n";
+ }
+ print F "$indent return ${tag}Tag->localName();\n";
+ print F "$indent}\n";
+ print F "${indent}return { };\n";
+ } else {
+ print F "${indent}return ${tag}Tag->localName();\n";
+ }
+ return;
+ }
+ for (my $i = 0; $i < $tagCount;) {
+ my $tag = $tags[$i];
+ my $letterAtIndex = substr($tag, $currentIndex, 1);
+ print F "${indent}if (buffer[$currentIndex] == '$letterAtIndex') {\n";
+ my @tagsWithPrefix = ($tag);
+ for ($i = $i + 1; $i < $tagCount; $i = $i + 1) {
+ my $nextTag = $tags[$i];
+ if (substr($nextTag, $currentIndex, 1) eq $letterAtIndex) {
+ push(@tagsWithPrefix, $nextTag);
+ } else {
+ last;
+ }
+ }
+ generateFindTagForLength($indent . " ", \@tagsWithPrefix, $length, $currentIndex + 1);
+ if (scalar @tagsWithPrefix > 1) {
+ print F "${indent} return { };\n";
+ }
+ print F "$indent}\n";
+ }
+}
+
sub printNamesCppFile
{
my $cppPath = shift;
@@ -787,6 +870,25 @@
print F " };\n";
print F " return $parameters{namespace}Tags;\n";
print F "}\n";
+
+ if ($parameters{namespace} eq "HTML") {
+ print F "\nAtomString find$parameters{namespace}Tag(Span<const UChar> buffer)\n{\n";
+ my $maxTagLength = findMaxTagLength(\%allTags);
+ print F " switch (buffer.size()) {\n";
+ for (my $length = 1; $length <= $maxTagLength; $length = $length + 1) {
+ my @tags = tagsWithLength(\%allTags, $length);
+ next unless scalar @tags > 0;
+ print F " case $length: {\n";
+ generateFindTagForLength(" ", \@tags, $length, 0);
+ print F " break;\n";
+ print F " }\n";
+ }
+ print F " default:\n";
+ print F " break;\n";
+ print F " };\n";
+ print F " return { };\n";
+ print F "}\n";
+ }
}
if (keys %allAttrs) {
Modified: branches/safari-613-branch/Source/WebCore/html/parser/AtomHTMLToken.h (292485 => 292486)
--- branches/safari-613-branch/Source/WebCore/html/parser/AtomHTMLToken.h 2022-04-06 18:51:37 UTC (rev 292485)
+++ branches/safari-613-branch/Source/WebCore/html/parser/AtomHTMLToken.h 2022-04-06 19:04:07 UTC (rev 292486)
@@ -27,6 +27,7 @@
#pragma once
#include "HTMLNameCache.h"
+#include "HTMLNames.h"
#include "HTMLToken.h"
namespace WebCore {
@@ -228,7 +229,10 @@
ASSERT_NOT_REACHED();
return;
case HTMLToken::DOCTYPE:
- m_name = HTMLNameCache::makeTagName(token.name());
+ if (LIKELY(token.name().size() == 4 && equal(HTMLNames::htmlTag->localName().impl(), token.name().data(), 4)))
+ m_name = HTMLNames::htmlTag->localName();
+ else
+ m_name = AtomString(token.name().data(), token.name().size());
m_doctypeData = token.releaseDoctypeData();
return;
case HTMLToken::EndOfFile:
@@ -236,7 +240,9 @@
case HTMLToken::StartTag:
case HTMLToken::EndTag:
m_selfClosing = token.selfClosing();
- m_name = HTMLNameCache::makeTagName(token.name());
+ m_name = HTMLNames::findHTMLTag(token.name());
+ if (UNLIKELY(m_name.isNull()))
+ m_name = AtomString(token.name().data(), token.name().size());
initializeAttributes(token.attributes());
return;
case HTMLToken::Comment:
Modified: branches/safari-613-branch/Source/WebCore/html/parser/HTMLNameCache.cpp (292485 => 292486)
--- branches/safari-613-branch/Source/WebCore/html/parser/HTMLNameCache.cpp 2022-04-06 18:51:37 UTC (rev 292485)
+++ branches/safari-613-branch/Source/WebCore/html/parser/HTMLNameCache.cpp 2022-04-06 19:04:07 UTC (rev 292486)
@@ -28,10 +28,10 @@
namespace WebCore {
-HTMLNameCache::AtomStringCache& HTMLNameCache::atomStringCache(AtomStringType type)
+HTMLNameCache::AtomStringCache& HTMLNameCache::atomStringCache()
{
- static MainThreadNeverDestroyed<AtomStringCache> caches[2];
- return caches[static_cast<size_t>(type)].get();
+ static MainThreadNeverDestroyed<AtomStringCache> cache;
+ return cache.get();
}
HTMLNameCache::QualifiedNameCache& HTMLNameCache::qualifiedNameCache()
Modified: branches/safari-613-branch/Source/WebCore/html/parser/HTMLNameCache.h (292485 => 292486)
--- branches/safari-613-branch/Source/WebCore/html/parser/HTMLNameCache.h 2022-04-06 18:51:37 UTC (rev 292485)
+++ branches/safari-613-branch/Source/WebCore/html/parser/HTMLNameCache.h 2022-04-06 19:04:07 UTC (rev 292486)
@@ -34,11 +34,6 @@
class HTMLNameCache {
public:
- ALWAYS_INLINE static AtomString makeTagName(Span<const UChar> string)
- {
- return makeAtomString<AtomStringType::TagName>(string);
- }
-
ALWAYS_INLINE static QualifiedName makeAttributeQualifiedName(Span<const UChar> string)
{
return makeQualifiedName(string);
@@ -46,21 +41,17 @@
ALWAYS_INLINE static AtomString makeAttributeValue(Span<const UChar> string)
{
- return makeAtomString<AtomStringType::AttributeValue>(string);
+ return makeAtomString(string);
}
ALWAYS_INLINE static void clear()
{
// FIXME (webkit.org/b/230019): We should try to find more opportunities to clear this cache without hindering this performance optimization.
- atomStringCache(AtomStringType::TagName).fill({ });
- atomStringCache(AtomStringType::AttributeValue).fill({ });
+ atomStringCache().fill({ });
qualifiedNameCache().fill({ });
}
private:
- enum class AtomStringType : bool { TagName, AttributeValue };
-
- template<HTMLNameCache::AtomStringType type>
ALWAYS_INLINE static AtomString makeAtomString(Span<const UChar> string)
{
if (string.empty())
@@ -72,7 +63,7 @@
auto firstCharacter = string[0];
auto lastCharacter = string[length - 1];
- auto& slot = atomStringCacheSlot(type, firstCharacter, lastCharacter, length);
+ auto& slot = atomStringCacheSlot(firstCharacter, lastCharacter, length);
if (!equal(slot.impl(), string.data(), length)) {
AtomString result(string.data(), length);
slot = result;
@@ -111,10 +102,10 @@
return (hash + (hash >> 6)) % capacity;
}
- ALWAYS_INLINE static AtomString& atomStringCacheSlot(AtomStringType type, UChar firstCharacter, UChar lastCharacter, UChar length)
+ ALWAYS_INLINE static AtomString& atomStringCacheSlot(UChar firstCharacter, UChar lastCharacter, UChar length)
{
auto index = slotIndex(firstCharacter, lastCharacter, length);
- return atomStringCache(type)[index];
+ return atomStringCache()[index];
}
ALWAYS_INLINE static RefPtr<QualifiedName::QualifiedNameImpl>& qualifiedNameCacheSlot(UChar firstCharacter, UChar lastCharacter, UChar length)
@@ -129,7 +120,7 @@
using AtomStringCache = std::array<AtomString, capacity>;
using QualifiedNameCache = std::array<RefPtr<QualifiedName::QualifiedNameImpl>, capacity>;
- static AtomStringCache& atomStringCache(AtomStringType);
+ static AtomStringCache& atomStringCache();
static QualifiedNameCache& qualifiedNameCache();
};