Modified: trunk/Source/WebCore/contentextensions/DFABytecode.h (181979 => 181980)
--- trunk/Source/WebCore/contentextensions/DFABytecode.h 2015-03-25 22:56:50 UTC (rev 181979)
+++ trunk/Source/WebCore/contentextensions/DFABytecode.h 2015-03-25 23:05:36 UTC (rev 181980)
@@ -39,13 +39,15 @@
// CheckValue has two arguments:
// The value to check (1 byte),
// The index to jump to if the values are equal (4 bytes).
- CheckValue,
+ CheckValueCaseInsensitive,
+ CheckValueCaseSensitive,
// Jump to an offset if the input value is within a certain range.
// The lower value (1 byte).
// The higher value (1 byte).
// The index to jump to if the value is in the range (4 bytes).
- CheckValueRange,
+ CheckValueRangeCaseInsensitive,
+ CheckValueRangeCaseSensitive,
// AppendAction has one argument:
// The action to append (4 bytes).
@@ -67,9 +69,11 @@
static inline size_t instructionSizeWithArguments(DFABytecodeInstruction instruction)
{
switch (instruction) {
- case DFABytecodeInstruction::CheckValue:
+ case DFABytecodeInstruction::CheckValueCaseSensitive:
+ case DFABytecodeInstruction::CheckValueCaseInsensitive:
return sizeof(DFABytecodeInstruction) + sizeof(uint8_t) + sizeof(unsigned);
- case DFABytecodeInstruction::CheckValueRange:
+ case DFABytecodeInstruction::CheckValueRangeCaseSensitive:
+ case DFABytecodeInstruction::CheckValueRangeCaseInsensitive:
return sizeof(DFABytecodeInstruction) + sizeof(uint8_t) + sizeof(uint8_t) + sizeof(unsigned);
case DFABytecodeInstruction::AppendAction:
return sizeof(DFABytecodeInstruction) + sizeof(unsigned);
Modified: trunk/Source/WebCore/contentextensions/DFABytecodeCompiler.cpp (181979 => 181980)
--- trunk/Source/WebCore/contentextensions/DFABytecodeCompiler.cpp 2015-03-25 22:56:50 UTC (rev 181979)
+++ trunk/Source/WebCore/contentextensions/DFABytecodeCompiler.cpp 2015-03-25 23:05:36 UTC (rev 181980)
@@ -69,24 +69,23 @@
append<unsigned>(m_bytecode, 0); // This value will be set when linking.
}
-void DFABytecodeCompiler::emitCheckValue(uint8_t value, unsigned destinationNodeIndex)
+void DFABytecodeCompiler::emitCheckValue(uint8_t value, unsigned destinationNodeIndex, bool caseSensitive)
{
- append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::CheckValue);
+ append<DFABytecodeInstruction>(m_bytecode, caseSensitive ? DFABytecodeInstruction::CheckValueCaseSensitive : DFABytecodeInstruction::CheckValueCaseInsensitive);
append<uint8_t>(m_bytecode, value);
m_linkRecords.append(std::make_pair(m_bytecode.size(), destinationNodeIndex));
append<unsigned>(m_bytecode, 0); // This value will be set when linking.
}
-void DFABytecodeCompiler::emitCheckValueRange(uint8_t lowValue, uint8_t highValue, unsigned destinationNodeIndex)
+void DFABytecodeCompiler::emitCheckValueRange(uint8_t lowValue, uint8_t highValue, unsigned destinationNodeIndex, bool caseSensitive)
{
- ASSERT_WITH_MESSAGE(lowValue != highValue, "A single value check should be emitted for single values.");
- ASSERT_WITH_MESSAGE(lowValue < highValue, "The instruction semantic impose lowValue is smaller than highValue.");
+ ASSERT_WITH_MESSAGE(lowValue < highValue, "The instruction semantic impose lowValue is strictly less than highValue.");
- append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::CheckValueRange);
+ append<DFABytecodeInstruction>(m_bytecode, caseSensitive ? DFABytecodeInstruction::CheckValueRangeCaseSensitive : DFABytecodeInstruction::CheckValueRangeCaseInsensitive);
append<uint8_t>(m_bytecode, lowValue);
append<uint8_t>(m_bytecode, highValue);
m_linkRecords.append(std::make_pair(m_bytecode.size(), destinationNodeIndex));
- append<unsigned>(m_bytecode, 0);
+ append<unsigned>(m_bytecode, 0); // This value will be set when linking.
}
void DFABytecodeCompiler::emitTerminate()
@@ -113,56 +112,84 @@
void DFABytecodeCompiler::compileNodeTransitions(const DFANode& node)
{
+ unsigned destinations[128];
+ const unsigned noDestination = std::numeric_limits<unsigned>::max();
+ for (uint16_t i = 0; i < 128; i++) {
+ auto it = node.transitions.find(i);
+ if (it == node.transitions.end())
+ destinations[i] = noDestination;
+ else
+ destinations[i] = it->value;
+ }
+
+ Vector<Range> ranges;
+ uint8_t rangeMin;
bool hasRangeMin = false;
- uint16_t rangeMin;
- unsigned rangeDestination = 0;
+ for (uint8_t i = 0; i < 128; i++) {
+ if (hasRangeMin) {
+ ASSERT_WITH_MESSAGE(!(node.hasFallbackTransition && node.fallbackTransition == destinations[rangeMin]), "Individual transitions to the fallback transitions should have been eliminated by the optimizer.");
+ if (destinations[i] != destinations[rangeMin]) {
- for (unsigned char i = 0; i < 128; ++i) {
- auto transitionIterator = node.transitions.find(i);
- if (transitionIterator == node.transitions.end()) {
- if (hasRangeMin) {
- ASSERT_WITH_MESSAGE(!(node.hasFallbackTransition && node.fallbackTransition == rangeDestination), "Individual transitions to the fallback transitions should have been eliminated by the optimizer.");
+ // This is the end of a range. Check if it can be case insensitive.
+ uint8_t rangeMax = i - 1;
+ bool caseSensitive = true;
+ if (rangeMin >= 'A' && rangeMax <= 'Z') {
+ caseSensitive = false;
+ for (uint8_t rangeIndex = rangeMin; rangeIndex <= rangeMax; rangeIndex++) {
+ if (destinations[rangeMin] != destinations[toASCIILower(rangeIndex)]) {
+ caseSensitive = true;
+ break;
+ }
+ }
+ }
- unsigned char lastHighValue = i - 1;
- compileCheckForRange(rangeMin, lastHighValue, rangeDestination);
- hasRangeMin = false;
+ if (!caseSensitive) {
+ // If all the lower-case destinations are the same as the upper-case destinations,
+ // then they will be covered by a case-insensitive range and will not need their own range.
+ for (uint8_t rangeIndex = rangeMin; rangeIndex <= rangeMax; rangeIndex++) {
+ ASSERT(destinations[rangeMin] == destinations[toASCIILower(rangeIndex)]);
+ destinations[toASCIILower(rangeIndex)] = noDestination;
+ }
+ ranges.append(Range(toASCIILower(rangeMin), toASCIILower(rangeMax), destinations[rangeMin], caseSensitive));
+ } else
+ ranges.append(Range(rangeMin, rangeMax, destinations[rangeMin], caseSensitive));
+
+ if (destinations[i] == noDestination)
+ hasRangeMin = false;
+ else
+ rangeMin = i;
}
- continue;
- }
-
- if (!hasRangeMin) {
- hasRangeMin = true;
- rangeMin = transitionIterator->key;
- rangeDestination = transitionIterator->value;
} else {
- if (transitionIterator->value == rangeDestination)
- continue;
-
- unsigned char lastHighValue = i - 1;
- compileCheckForRange(rangeMin, lastHighValue, rangeDestination);
- rangeMin = i;
- rangeDestination = transitionIterator->value;
+ if (destinations[i] != noDestination) {
+ rangeMin = i;
+ hasRangeMin = true;
+ }
}
}
- if (hasRangeMin)
- compileCheckForRange(rangeMin, 127, rangeDestination);
+ if (hasRangeMin) {
+ // Ranges are appended after passing the end of them.
+ // If a range goes to 127, we will have an uncommitted rangeMin because the loop does not check 128.
+ // If a range goes to 127, there will never be values higher than it, so checking for case-insensitive ranges would always fail.
+ ranges.append(Range(rangeMin, 127, destinations[rangeMin], true));
+ }
+ for (const auto& range : ranges)
+ compileCheckForRange(range);
if (node.hasFallbackTransition)
emitJump(node.fallbackTransition);
else
emitTerminate();
}
-void DFABytecodeCompiler::compileCheckForRange(uint16_t lowValue, uint16_t highValue, unsigned destinationNodeIndex)
+void DFABytecodeCompiler::compileCheckForRange(const Range& range)
{
- ASSERT_WITH_MESSAGE(lowValue < 128, "The DFA engine only supports the ASCII alphabet.");
- ASSERT_WITH_MESSAGE(highValue < 128, "The DFA engine only supports the ASCII alphabet.");
- ASSERT(lowValue <= highValue);
+ ASSERT_WITH_MESSAGE(range.max < 128, "The DFA engine only supports the ASCII alphabet.");
+ ASSERT(range.min <= range.max);
- if (lowValue == highValue)
- emitCheckValue(lowValue, destinationNodeIndex);
+ if (range.min == range.max)
+ emitCheckValue(range.min, range.destination, range.caseSensitive);
else
- emitCheckValueRange(lowValue, highValue, destinationNodeIndex);
+ emitCheckValueRange(range.min, range.max, range.destination, range.caseSensitive);
}
void DFABytecodeCompiler::compile()
Modified: trunk/Source/WebCore/contentextensions/DFABytecodeCompiler.h (181979 => 181980)
--- trunk/Source/WebCore/contentextensions/DFABytecodeCompiler.h 2015-03-25 22:56:50 UTC (rev 181979)
+++ trunk/Source/WebCore/contentextensions/DFABytecodeCompiler.h 2015-03-25 23:05:36 UTC (rev 181980)
@@ -49,15 +49,28 @@
void compile();
private:
+ struct Range {
+ Range(uint8_t min, uint8_t max, unsigned destination, bool caseSensitive)
+ : min(min)
+ , max(max)
+ , destination(destination)
+ , caseSensitive(caseSensitive)
+ {
+ }
+ uint8_t min;
+ uint8_t max;
+ unsigned destination;
+ bool caseSensitive;
+ };
void compileNode(unsigned);
void compileNodeTransitions(const DFANode&);
- void compileCheckForRange(uint16_t lowValue, uint16_t highValue, unsigned destinationNodeIndex);
+ void compileCheckForRange(const Range&);
void emitAppendAction(unsigned);
void emitTestFlagsAndAppendAction(uint16_t flags, unsigned);
void emitJump(unsigned destinationNodeIndex);
- void emitCheckValue(uint8_t value, unsigned destinationNodeIndex);
- void emitCheckValueRange(uint8_t lowValue, uint8_t highValue, unsigned destinationNodeIndex);
+ void emitCheckValue(uint8_t value, unsigned destinationNodeIndex, bool caseSensitive);
+ void emitCheckValueRange(uint8_t lowValue, uint8_t highValue, unsigned destinationNodeIndex, bool caseSensitive);
void emitTerminate();
Vector<DFABytecode>& m_bytecode;
Modified: trunk/Source/WebCore/contentextensions/DFABytecodeInterpreter.cpp (181979 => 181980)
--- trunk/Source/WebCore/contentextensions/DFABytecodeInterpreter.cpp 2015-03-25 22:56:50 UTC (rev 181979)
+++ trunk/Source/WebCore/contentextensions/DFABytecodeInterpreter.cpp 2015-03-25 23:05:36 UTC (rev 181980)
@@ -85,33 +85,46 @@
case DFABytecodeInstruction::Terminate:
goto nextDFA;
- case DFABytecodeInstruction::CheckValue:
+ case DFABytecodeInstruction::CheckValueCaseSensitive:
+ case DFABytecodeInstruction::CheckValueCaseInsensitive: {
if (urlIndexIsAfterEndOfString)
goto nextDFA;
+ char character = url[urlIndex];
+ if (static_cast<DFABytecodeInstruction>(m_bytecode[programCounter]) == DFABytecodeInstruction::CheckValueCaseInsensitive)
+ character = toASCIILower(character);
+
// Check to see if the next character in the url is the value stored with the bytecode.
- if (url[urlIndex] == getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode))) {
+ if (character == getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode))) {
programCounter = getBits<unsigned>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode) + sizeof(uint8_t));
- if (!url[urlIndex])
+ if (!character)
urlIndexIsAfterEndOfString = true;
urlIndex++; // This represents an edge in the DFA.
- } else
- programCounter += instructionSizeWithArguments(DFABytecodeInstruction::CheckValue);
+ } else {
+ programCounter += instructionSizeWithArguments(DFABytecodeInstruction::CheckValueCaseSensitive);
+ ASSERT(instructionSizeWithArguments(DFABytecodeInstruction::CheckValueCaseSensitive) == instructionSizeWithArguments(DFABytecodeInstruction::CheckValueCaseInsensitive));
+ }
break;
+ }
- case DFABytecodeInstruction::CheckValueRange: {
+ case DFABytecodeInstruction::CheckValueRangeCaseSensitive:
+ case DFABytecodeInstruction::CheckValueRangeCaseInsensitive: {
if (urlIndexIsAfterEndOfString)
goto nextDFA;
char character = url[urlIndex];
+ if (static_cast<DFABytecodeInstruction>(m_bytecode[programCounter]) == DFABytecodeInstruction::CheckValueRangeCaseInsensitive)
+ character = toASCIILower(character);
if (character >= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode))
&& character <= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode) + sizeof(uint8_t))) {
programCounter = getBits<unsigned>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode) + sizeof(uint8_t) + sizeof(uint8_t));
if (!character)
urlIndexIsAfterEndOfString = true;
urlIndex++; // This represents an edge in the DFA.
- } else
- programCounter += instructionSizeWithArguments(DFABytecodeInstruction::CheckValueRange);
+ } else {
+ programCounter += instructionSizeWithArguments(DFABytecodeInstruction::CheckValueRangeCaseSensitive);
+ ASSERT(instructionSizeWithArguments(DFABytecodeInstruction::CheckValueRangeCaseSensitive) == instructionSizeWithArguments(DFABytecodeInstruction::CheckValueRangeCaseInsensitive));
+ }
break;
}