Diff
Modified: trunk/Source/WebCore/ChangeLog (287085 => 287086)
--- trunk/Source/WebCore/ChangeLog 2021-12-15 18:32:26 UTC (rev 287085)
+++ trunk/Source/WebCore/ChangeLog 2021-12-15 18:49:18 UTC (rev 287086)
@@ -1,3 +1,32 @@
+2021-12-15 Alex Christensen <achristen...@webkit.org>
+
+ Avoid unnecessary allocation and UTF-8 conversion when calling DFABytecodeInterpreter::interpret
+ https://bugs.webkit.org/show_bug.cgi?id=234351
+
+ Reviewed by Tim Hatcher.
+
+ A valid URL, the only input into DFABytecodeInterpreter::interpret, contains only ASCII characters.
+ In the overwhelming majority of cases, we have an 8-bit string. There is no need to allocate, copy, and convert it.
+ In the rare case that we somehow get a UTF-16 encoded ASCII string, just do what we did before and UTF-8 encode it.
+
+ Regular expressions allow matching the end of the string, which we currently implement by checking for the
+ null character, so I had to keep the parts that read the null character at the end of a string by checking
+ to see if we are at the end of the string when reading a character and returning the null character if we are.
+
+ Covered by many API tests.
+
+ * contentextensions/ContentExtension.cpp:
+ (WebCore::ContentExtensions::ContentExtension::populateTopURLActionCacheIfNeeded const):
+ (WebCore::ContentExtensions::ContentExtension::populateFrameURLActionCacheIfNeeded const):
+ * contentextensions/ContentExtensionsBackend.cpp:
+ (WebCore::ContentExtensions::ContentExtensionsBackend::actionsFromContentRuleList const):
+ (WebCore::ContentExtensions::ContentExtensionsBackend::actionsForResourceLoad const):
+ * contentextensions/ContentExtensionsBackend.h:
+ * contentextensions/DFABytecodeInterpreter.cpp:
+ (WebCore::ContentExtensions::DFABytecodeInterpreter::interpetJumpTable):
+ (WebCore::ContentExtensions::DFABytecodeInterpreter::interpret):
+ * contentextensions/DFABytecodeInterpreter.h:
+
2021-12-15 Alan Bujtas <za...@apple.com>
[LFC][IFC] Use the physical margin/border/padding values for inline boxes when generating the display content
Modified: trunk/Source/WebCore/contentextensions/ContentExtension.cpp (287085 => 287086)
--- trunk/Source/WebCore/contentextensions/ContentExtension.cpp 2021-12-15 18:32:26 UTC (rev 287085)
+++ trunk/Source/WebCore/contentextensions/ContentExtension.cpp 2021-12-15 18:49:18 UTC (rev 287086)
@@ -114,7 +114,7 @@
return;
DFABytecodeInterpreter interpreter(m_compiledExtension->topURLFiltersBytecode());
- auto topURLActions = interpreter.interpret(topURL.string().utf8(), AllResourceFlags);
+ auto topURLActions = interpreter.interpret(topURL.string(), AllResourceFlags);
m_cachedTopURLActions.clear();
for (uint64_t action : topURLActions)
@@ -131,7 +131,7 @@
return;
DFABytecodeInterpreter interpreter(m_compiledExtension->frameURLFiltersBytecode());
- auto frameURLActions = interpreter.interpret(frameURL.string().utf8(), AllResourceFlags);
+ auto frameURLActions = interpreter.interpret(frameURL.string(), AllResourceFlags);
m_cachedFrameURLActions.clear();
for (uint64_t action : frameURLActions)
Modified: trunk/Source/WebCore/contentextensions/ContentExtensionsBackend.cpp (287085 => 287086)
--- trunk/Source/WebCore/contentextensions/ContentExtensionsBackend.cpp 2021-12-15 18:32:26 UTC (rev 287085)
+++ trunk/Source/WebCore/contentextensions/ContentExtensionsBackend.cpp 2021-12-15 18:49:18 UTC (rev 287086)
@@ -96,7 +96,7 @@
m_contentExtensions.clear();
}
-auto ContentExtensionsBackend::actionsFromContentRuleList(const ContentExtension& contentExtension, const CString& urlString, const ResourceLoadInfo& resourceLoadInfo, ResourceFlags flags) const -> ActionsFromContentRuleList
+auto ContentExtensionsBackend::actionsFromContentRuleList(const ContentExtension& contentExtension, const String& urlString, const ResourceLoadInfo& resourceLoadInfo, ResourceFlags flags) const -> ActionsFromContentRuleList
{
ActionsFromContentRuleList actionsStruct;
actionsStruct.contentRuleListIdentifier = contentExtension.identifier();
@@ -162,9 +162,6 @@
const String& urlString = resourceLoadInfo.resourceURL.string();
ASSERT_WITH_MESSAGE(urlString.isAllASCII(), "A decoded URL should only contain ASCII characters. The matching algorithm assumes the input is ASCII.");
- // FIXME: UTF-8 conversion should only be necessary with UTF-16 String based URLs, which are rare.
- // DFABytecodeInterpreter::interpret should take a Span<char> instead of a CString and we can avoid this allocation almost all of the time.
- const auto urlCString = urlString.utf8();
Vector<ActionsFromContentRuleList> actionsVector;
actionsVector.reserveInitialCapacity(m_contentExtensions.size());
@@ -171,7 +168,7 @@
ASSERT(!(resourceLoadInfo.getResourceFlags() & ActionConditionMask));
const ResourceFlags flags = resourceLoadInfo.getResourceFlags() | ActionConditionMask;
for (auto& contentExtension : m_contentExtensions.values())
- actionsVector.uncheckedAppend(actionsFromContentRuleList(contentExtension.get(), urlCString, resourceLoadInfo, flags));
+ actionsVector.uncheckedAppend(actionsFromContentRuleList(contentExtension.get(), urlString, resourceLoadInfo, flags));
#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
MonotonicTime addedTimeEnd = MonotonicTime::now();
dataLogF("Time added: %f microseconds %s \n", (addedTimeEnd - addedTimeStart).microseconds(), resourceLoadInfo.resourceURL.string().utf8().data());
Modified: trunk/Source/WebCore/contentextensions/ContentExtensionsBackend.h (287085 => 287086)
--- trunk/Source/WebCore/contentextensions/ContentExtensionsBackend.h 2021-12-15 18:32:26 UTC (rev 287085)
+++ trunk/Source/WebCore/contentextensions/ContentExtensionsBackend.h 2021-12-15 18:49:18 UTC (rev 287086)
@@ -80,7 +80,7 @@
WEBCORE_EXPORT static bool shouldBeMadeSecure(const URL&);
private:
- ActionsFromContentRuleList actionsFromContentRuleList(const ContentExtension&, const CString& urlString, const ResourceLoadInfo&, ResourceFlags) const;
+ ActionsFromContentRuleList actionsFromContentRuleList(const ContentExtension&, const String& urlString, const ResourceLoadInfo&, ResourceFlags) const;
HashMap<String, Ref<ContentExtension>> m_contentExtensions;
};
Modified: trunk/Source/WebCore/contentextensions/DFABytecodeInterpreter.cpp (287085 => 287086)
--- trunk/Source/WebCore/contentextensions/DFABytecodeInterpreter.cpp 2021-12-15 18:32:26 UTC (rev 287085)
+++ trunk/Source/WebCore/contentextensions/DFABytecodeInterpreter.cpp 2021-12-15 18:49:18 UTC (rev 287086)
@@ -203,11 +203,12 @@
}
template<bool caseSensitive>
-inline void DFABytecodeInterpreter::interpetJumpTable(const char* url, uint32_t& urlIndex, uint32_t& programCounter, bool& urlIndexIsAfterEndOfString)
+inline void DFABytecodeInterpreter::interpetJumpTable(Span<const char> url, uint32_t& urlIndex, uint32_t& programCounter)
{
DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, programCounter);
- char character = caseSensitive ? url[urlIndex] : toASCIILower(url[urlIndex]);
+ char c = urlIndex < url.size() ? url[urlIndex] : 0;
+ char character = caseSensitive ? c : toASCIILower(c);
uint8_t firstCharacter = getBits<uint8_t>(m_bytecode, programCounter + sizeof(DFABytecodeInstruction));
uint8_t lastCharacter = getBits<uint8_t>(m_bytecode, programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t));
if (character >= firstCharacter && character <= lastCharacter) {
@@ -214,8 +215,6 @@
uint32_t startOffset = programCounter + sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t);
uint32_t jumpLocation = startOffset + (character - firstCharacter) * jumpSizeInBytes(jumpSize);
programCounter += getJumpDistance(m_bytecode, jumpLocation, jumpSize);
- if (!character)
- urlIndexIsAfterEndOfString = true;
urlIndex++; // This represents an edge in the DFA.
} else
programCounter += sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t) + jumpSizeInBytes(jumpSize) * (lastCharacter - firstCharacter + 1);
@@ -243,10 +242,17 @@
return actions;
}
-auto DFABytecodeInterpreter::interpret(const CString& urlCString, ResourceFlags flags) -> Actions
+auto DFABytecodeInterpreter::interpret(const String& urlString, ResourceFlags flags) -> Actions
{
- const char* url = ""
- ASSERT(url);
+ CString urlCString;
+ Span<const char> url;
+ if (LIKELY(urlString.is8Bit()))
+ url = { reinterpret_cast<const char*>(urlString.characters8()), urlString.length() };
+ else {
+ urlCString = urlString.utf8();
+ url = { urlCString.data(), urlCString.length() };
+ }
+ ASSERT(url.data());
Actions actions;
@@ -281,7 +287,6 @@
// Interpret the bytecode from this DFA.
// This should always terminate if interpreting correctly compiled bytecode.
uint32_t urlIndex = 0;
- bool urlIndexIsAfterEndOfString = false;
while (true) {
ASSERT(programCounter <= m_bytecode.size());
switch (getInstruction(m_bytecode, programCounter)) {
@@ -290,17 +295,15 @@
goto nextDFA;
case DFABytecodeInstruction::CheckValueCaseSensitive: {
- if (urlIndexIsAfterEndOfString)
+ if (urlIndex > url.size())
goto nextDFA;
// Check to see if the next character in the url is the value stored with the bytecode.
- char character = url[urlIndex];
+ char character = urlIndex < url.size() ? url[urlIndex] : 0;
DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, programCounter);
if (character == getBits<uint8_t>(m_bytecode, programCounter + sizeof(DFABytecodeInstruction))) {
uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t);
programCounter += getJumpDistance(m_bytecode, jumpLocation, jumpSize);
- if (!character)
- urlIndexIsAfterEndOfString = true;
urlIndex++; // This represents an edge in the DFA.
} else
programCounter += sizeof(DFABytecodeInstruction) + sizeof(uint8_t) + jumpSizeInBytes(jumpSize);
@@ -308,17 +311,15 @@
}
case DFABytecodeInstruction::CheckValueCaseInsensitive: {
- if (urlIndexIsAfterEndOfString)
+ if (urlIndex > url.size())
goto nextDFA;
// Check to see if the next character in the url is the value stored with the bytecode.
- char character = toASCIILower(url[urlIndex]);
+ char character = urlIndex < url.size() ? toASCIILower(url[urlIndex]) : 0;
DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, programCounter);
if (character == getBits<uint8_t>(m_bytecode, programCounter + sizeof(DFABytecodeInstruction))) {
uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t);
programCounter += getJumpDistance(m_bytecode, jumpLocation, jumpSize);
- if (!character)
- urlIndexIsAfterEndOfString = true;
urlIndex++; // This represents an edge in the DFA.
} else
programCounter += sizeof(DFABytecodeInstruction) + sizeof(uint8_t) + jumpSizeInBytes(jumpSize);
@@ -326,30 +327,28 @@
}
case DFABytecodeInstruction::JumpTableCaseInsensitive:
- if (urlIndexIsAfterEndOfString)
+ if (urlIndex > url.size())
goto nextDFA;
- interpetJumpTable<false>(url, urlIndex, programCounter, urlIndexIsAfterEndOfString);
+ interpetJumpTable<false>(url, urlIndex, programCounter);
break;
case DFABytecodeInstruction::JumpTableCaseSensitive:
- if (urlIndexIsAfterEndOfString)
+ if (urlIndex > url.size())
goto nextDFA;
- interpetJumpTable<true>(url, urlIndex, programCounter, urlIndexIsAfterEndOfString);
+ interpetJumpTable<true>(url, urlIndex, programCounter);
break;
case DFABytecodeInstruction::CheckValueRangeCaseSensitive: {
- if (urlIndexIsAfterEndOfString)
+ if (urlIndex > url.size())
goto nextDFA;
- char character = url[urlIndex];
+ char character = urlIndex < url.size() ? url[urlIndex] : 0;
DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, programCounter);
if (character >= getBits<uint8_t>(m_bytecode, programCounter + sizeof(DFABytecodeInstruction))
&& character <= getBits<uint8_t>(m_bytecode, programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t))) {
uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t);
programCounter += getJumpDistance(m_bytecode, jumpLocation, jumpSize);
- if (!character)
- urlIndexIsAfterEndOfString = true;
urlIndex++; // This represents an edge in the DFA.
} else
programCounter += sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t) + jumpSizeInBytes(jumpSize);
@@ -357,17 +356,15 @@
}
case DFABytecodeInstruction::CheckValueRangeCaseInsensitive: {
- if (urlIndexIsAfterEndOfString)
+ if (urlIndex > url.size())
goto nextDFA;
- char character = toASCIILower(url[urlIndex]);
+ char character = urlIndex < url.size() ? toASCIILower(url[urlIndex]) : 0;
DFABytecodeJumpSize jumpSize = getJumpSize(m_bytecode, programCounter);
if (character >= getBits<uint8_t>(m_bytecode, programCounter + sizeof(DFABytecodeInstruction))
&& character <= getBits<uint8_t>(m_bytecode, programCounter + sizeof(DFABytecodeInstruction) + sizeof(uint8_t))) {
uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t);
programCounter += getJumpDistance(m_bytecode, jumpLocation, jumpSize);
- if (!character)
- urlIndexIsAfterEndOfString = true;
urlIndex++; // This represents an edge in the DFA.
} else
programCounter += sizeof(DFABytecodeInstruction) + 2 * sizeof(uint8_t) + jumpSizeInBytes(jumpSize);
@@ -375,7 +372,7 @@
}
case DFABytecodeInstruction::Jump: {
- if (!url[urlIndex] || urlIndexIsAfterEndOfString)
+ if (urlIndex >= url.size())
goto nextDFA;
uint32_t jumpLocation = programCounter + sizeof(DFABytecodeInstruction);
@@ -396,8 +393,8 @@
default:
RELEASE_ASSERT_NOT_REACHED(); // Invalid bytecode.
}
- // We should always terminate before or at a null character at the end of a String.
- ASSERT(urlIndex <= urlCString.length() || (urlIndexIsAfterEndOfString && urlIndex <= urlCString.length() + 1));
+ // We should always terminate before or at an imaginary null character at the end of a String.
+ ASSERT(urlIndex <= url.size() + 1);
}
RELEASE_ASSERT_NOT_REACHED(); // The while loop can only be exited using goto nextDFA.
nextDFA:
Modified: trunk/Source/WebCore/contentextensions/DFABytecodeInterpreter.h (287085 => 287086)
--- trunk/Source/WebCore/contentextensions/DFABytecodeInterpreter.h 2021-12-15 18:32:26 UTC (rev 287085)
+++ trunk/Source/WebCore/contentextensions/DFABytecodeInterpreter.h 2021-12-15 18:49:18 UTC (rev 287086)
@@ -42,7 +42,7 @@
using Actions = HashSet<uint64_t, DefaultHash<uint64_t>, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>>;
- WEBCORE_EXPORT Actions interpret(const CString&, ResourceFlags);
+ WEBCORE_EXPORT Actions interpret(const String&, ResourceFlags);
Actions actionsMatchingEverything();
private:
@@ -50,7 +50,7 @@
void interpretTestFlagsAndAppendAction(unsigned& programCounter, ResourceFlags, Actions&);
template<bool caseSensitive>
- void interpetJumpTable(const char* url, uint32_t& urlIndex, uint32_t& programCounter, bool& urlIndexIsAfterEndOfString);
+ void interpetJumpTable(Span<const char> url, uint32_t& urlIndex, uint32_t& programCounter);
const Span<const uint8_t> m_bytecode;
};
Modified: trunk/Tools/ChangeLog (287085 => 287086)
--- trunk/Tools/ChangeLog 2021-12-15 18:32:26 UTC (rev 287085)
+++ trunk/Tools/ChangeLog 2021-12-15 18:49:18 UTC (rev 287086)
@@ -1,3 +1,13 @@
+2021-12-15 Alex Christensen <achristen...@webkit.org>
+
+ Avoid unnecessary allocation and UTF-8 conversion when calling DFABytecodeInterpreter::interpret
+ https://bugs.webkit.org/show_bug.cgi?id=234351
+
+ Reviewed by Tim Hatcher.
+
+ * TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp:
+ (TestWebKitAPI::TEST_F):
+
2021-12-15 Carlos Garcia Campos <cgar...@igalia.com>
[GTK][a11y] Add support for loading events when building with ATSPI
Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp (287085 => 287086)
--- trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp 2021-12-15 18:32:26 UTC (rev 287085)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp 2021-12-15 18:49:18 UTC (rev 287086)
@@ -1316,7 +1316,7 @@
pattern.append('x');
break;
}
- auto matches = interpreter.interpret(pattern.toString().utf8(), 0);
+ auto matches = interpreter.interpret(pattern.toString(), 0);
switch ((c1 + c2 + c3 + c4) % 4) {
case 0:
compareContents(matches, { });