Title: [183518] trunk
Revision
183518
Author
wei...@apple.com
Date
2015-04-28 17:22:33 -0700 (Tue, 28 Apr 2015)

Log Message

[Content Extensions] Process NFAs individually to avoid having all NFAs live at the same time
https://bugs.webkit.org/show_bug.cgi?id=144363

Reviewed by Alex Christensen.

Source/WebCore:

This brings dirty memory use when compiling our test content extension down from ~300MB to ~100MB.

* contentextensions/CombinedURLFilters.cpp:
(WebCore::ContentExtensions::CombinedURLFilters::processNFAs):
(WebCore::ContentExtensions::CombinedURLFilters::createNFAs): Deleted.
* contentextensions/CombinedURLFilters.h:
Replace function that creates a Vector of all the NFAs with one that allows incremental processing
as they are created.

* contentextensions/ContentExtensionCompiler.cpp:
(WebCore::ContentExtensions::addUniversalActionsToDFA):
Extract code to add universal actions into a helper, since we need to call it in two places now.

(WebCore::ContentExtensions::compileRuleList):
Adopt CombinedURLFilters::processNFAs. Now that we don't have a Vector of NFAs, we need to keep track
of whether or not any NFAs were processed and if we are currently processing the first NFA so we can
ensure that we have some bytecode generated event for empty rule sets, and that universal actions are
placed on the first DFA.

Tools:

* TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp:
(TestWebKitAPI::createNFAs):
(TestWebKitAPI::TEST_F):
* TestWebKitAPI/Tests/WebCore/DFAMinimizer.cpp:
(TestWebKitAPI::countLiveNodes):
(TestWebKitAPI::createNFAs):
(TestWebKitAPI::buildDFAFromPatterns):
Update tests to use a hand rolled createNFAs function on top of CombinedURLFilters::processNFAs.

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (183517 => 183518)


--- trunk/Source/WebCore/ChangeLog	2015-04-28 23:27:21 UTC (rev 183517)
+++ trunk/Source/WebCore/ChangeLog	2015-04-29 00:22:33 UTC (rev 183518)
@@ -1,3 +1,29 @@
+2015-04-28  Sam Weinig  <s...@webkit.org>
+
+        [Content Extensions] Process NFAs individually to avoid having all NFAs live at the same time
+        https://bugs.webkit.org/show_bug.cgi?id=144363
+
+        Reviewed by Alex Christensen.
+
+        This brings dirty memory use when compiling our test content extension down from ~300MB to ~100MB.
+
+        * contentextensions/CombinedURLFilters.cpp:
+        (WebCore::ContentExtensions::CombinedURLFilters::processNFAs):
+        (WebCore::ContentExtensions::CombinedURLFilters::createNFAs): Deleted.
+        * contentextensions/CombinedURLFilters.h:
+        Replace function that creates a Vector of all the NFAs with one that allows incremental processing
+        as they are created.
+
+        * contentextensions/ContentExtensionCompiler.cpp:
+        (WebCore::ContentExtensions::addUniversalActionsToDFA):
+        Extract code to add universal actions into a helper, since we need to call it in two places now.
+
+        (WebCore::ContentExtensions::compileRuleList):
+        Adopt CombinedURLFilters::processNFAs. Now that we don't have a Vector of NFAs, we need to keep track
+        of whether or not any NFAs were processed and if we are currently processing the first NFA so we can
+        ensure that we have some bytecode generated event for empty rule sets, and that universal actions are
+        placed on the first DFA.
+
 2015-04-28  Timothy Horton  <timothy_hor...@apple.com>
 
         [TextIndicator] Yellow highlight takes too long to fade out on scroll

Modified: trunk/Source/WebCore/contentextensions/CombinedURLFilters.cpp (183517 => 183518)


--- trunk/Source/WebCore/contentextensions/CombinedURLFilters.cpp	2015-04-28 23:27:21 UTC (rev 183517)
+++ trunk/Source/WebCore/contentextensions/CombinedURLFilters.cpp	2015-04-29 00:22:33 UTC (rev 183518)
@@ -169,13 +169,11 @@
     }
 }
 
-Vector<NFA> CombinedURLFilters::createNFAs() const
+void CombinedURLFilters::processNFAs(std::function<void(NFA&&)> handler) const
 {
     Vector<ActiveSubtree> activeStack;
     activeStack.append(ActiveSubtree({ m_prefixTreeRoot.get(), m_prefixTreeRoot->edges.begin() }));
 
-    Vector<NFA> nfas;
-
     while (true) {
     ProcessSubtree:
         ActiveSubtree& activeSubtree = activeStack.last();
@@ -202,23 +200,24 @@
         }
 
         if (needToGenerate) {
-            nfas.append(NFA());
-            NFA& generatingNFA = nfas.last();
+            NFA nfa;
 
-            unsigned prefixEnd = generatingNFA.root();
+            unsigned prefixEnd = nfa.root();
 
             for (unsigned i = 0; i < activeStack.size() - 1; ++i) {
                 const Term& term = activeStack[i].iterator->first;
-                prefixEnd = term.generateGraph(generatingNFA, prefixEnd, activeStack[i].iterator->second->finalActions);
+                prefixEnd = term.generateGraph(nfa, prefixEnd, activeStack[i].iterator->second->finalActions);
             }
 
             for (const auto& edge : activeSubtree.vertex->edges) {
                 if (!edge.second->inVariableLengthPrefix) {
                     const Term& term = edge.first;
-                    unsigned newSubtreeStart = term.generateGraph(generatingNFA, prefixEnd, edge.second->finalActions);
-                    generateNFAForSubtree(generatingNFA, newSubtreeStart, *edge.second);
+                    unsigned newSubtreeStart = term.generateGraph(nfa, prefixEnd, edge.second->finalActions);
+                    generateNFAForSubtree(nfa, newSubtreeStart, *edge.second);
                 }
             }
+            
+            handler(WTF::move(nfa));
         }
 
         // We have processed all the subtrees of this level, pop the stack and move on to the next sibling.
@@ -227,8 +226,6 @@
             break;
         ++activeStack.last().iterator;
     }
-
-    return nfas;
 }
 
 } // namespace ContentExtensions

Modified: trunk/Source/WebCore/contentextensions/CombinedURLFilters.h (183517 => 183518)


--- trunk/Source/WebCore/contentextensions/CombinedURLFilters.h	2015-04-28 23:27:21 UTC (rev 183517)
+++ trunk/Source/WebCore/contentextensions/CombinedURLFilters.h	2015-04-29 00:22:33 UTC (rev 183518)
@@ -42,9 +42,10 @@
 public:
     CombinedURLFilters();
     ~CombinedURLFilters();
+
     void addPattern(uint64_t patternId, const Vector<Term>& pattern);
 
-    Vector<NFA> createNFAs() const;
+    void processNFAs(std::function<void(NFA&&)> handler) const;
     void clear();
 
     size_t memoryUsed() const;

Modified: trunk/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp (183517 => 183518)


--- trunk/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp	2015-04-28 23:27:21 UTC (rev 183517)
+++ trunk/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp	2015-04-29 00:22:33 UTC (rev 183518)
@@ -119,7 +119,23 @@
     return actionLocations;
 }
 
+typedef HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> UniversalActionLocationsSet;
 
+static void addUniversalActionsToDFA(DFA& dfa, const UniversalActionLocationsSet& universalActionLocations)
+{
+    if (universalActionLocations.isEmpty())
+        return;
+
+    unsigned actionsStart = dfa.actions.size();
+    dfa.actions.reserveCapacity(dfa.actions.size() + universalActionLocations.size());
+    for (uint64_t actionLocation : universalActionLocations)
+        dfa.actions.uncheckedAppend(actionLocation);
+    unsigned actionsEnd = dfa.actions.size();
+    unsigned actionsLength = actionsEnd - actionsStart;
+    RELEASE_ASSERT_WITH_MESSAGE(actionsLength < std::numeric_limits<uint16_t>::max(), "Too many uncombined actions that match everything");
+    dfa.nodes[dfa.root].setActions(actionsStart, static_cast<uint16_t>(actionsLength));
+}
+
 std::error_code compileRuleList(ContentExtensionCompilationClient& client, const String& ruleList)
 {
     Vector<ContentExtensionRule> parsedRuleList;
@@ -137,7 +153,7 @@
     LOG_LARGE_STRUCTURES(actions, actions.capacity() * sizeof(SerializedActionByte));
     actions.clear();
 
-    HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> universalActionLocations;
+    UniversalActionLocationsSet universalActionLocations;
 
     CombinedURLFilters combinedURLFilters;
     URLFilterParser urlFilterParser(combinedURLFilters);
@@ -175,40 +191,27 @@
     dataLogF("    Time spent partitioning the rules into groups: %f\n", (patternPartitioningEnd - patternPartitioningStart));
 #endif
 
-#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
-    double nfaBuildTimeStart = monotonicallyIncreasingTime();
-#endif
-
-    Vector<NFA> nfas = combinedURLFilters.createNFAs();
     LOG_LARGE_STRUCTURES(combinedURLFilters, combinedURLFilters.memoryUsed());
-    combinedURLFilters.clear();
-    if (!nfas.size() && universalActionLocations.size())
-        nfas.append(NFA());
 
 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
-    double nfaBuildTimeEnd = monotonicallyIncreasingTime();
-    dataLogF("    Time spent building the NFAs: %f\n", (nfaBuildTimeEnd - nfaBuildTimeStart));
+    double totalNFAToByteCodeBuildTimeStart = monotonicallyIncreasingTime();
 #endif
 
+    Vector<DFABytecode> bytecode;
+    bool firstNFASeen = false;
+    combinedURLFilters.processNFAs([&](NFA&& nfa) {
 #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
-    for (size_t i = 0; i < nfas.size(); ++i) {
-        WTFLogAlways("NFA %zu", i);
-        const NFA& nfa = nfas[i];
         nfa.debugPrintDot();
-    }
 #endif
 
-#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
-    double totalNFAToByteCodeBuildTimeStart = monotonicallyIncreasingTime();
-#endif
+        LOG_LARGE_STRUCTURES(nfa, nfa.memoryUsed());
 
-    Vector<DFABytecode> bytecode;
-    for (size_t i = 0; i < nfas.size(); ++i) {
 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
         double dfaBuildTimeStart = monotonicallyIncreasingTime();
 #endif
-        DFA dfa = NFAToDFA::convert(nfas[i]);
+        DFA dfa = NFAToDFA::convert(nfa);
         LOG_LARGE_STRUCTURES(dfa, dfa.memoryUsed());
+
 #if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
         double dfaBuildTimeEnd = monotonicallyIncreasingTime();
         dataLogF("    Time spent building the DFA %zu: %f\n", i, (dfaBuildTimeEnd - dfaBuildTimeStart));
@@ -228,20 +231,34 @@
         dfa.debugPrintDot();
 #endif
         ASSERT_WITH_MESSAGE(!dfa.nodes[dfa.root].hasActions(), "All actions on the DFA root should come from regular expressions that match everything.");
-        if (!i) {
+
+        if (!firstNFASeen) {
             // Put all the universal actions on the first DFA.
-            unsigned actionsStart = dfa.actions.size();
-            dfa.actions.reserveCapacity(dfa.actions.size() + universalActionLocations.size());
-            for (uint64_t actionLocation : universalActionLocations)
-                dfa.actions.uncheckedAppend(actionLocation);
-            unsigned actionsEnd = dfa.actions.size();
-            unsigned actionsLength = actionsEnd - actionsStart;
-            RELEASE_ASSERT_WITH_MESSAGE(actionsLength < std::numeric_limits<uint16_t>::max(), "Too many uncombined actions that match everything");
-            dfa.nodes[dfa.root].setActions(actionsStart, static_cast<uint16_t>(actionsLength));
+            addUniversalActionsToDFA(dfa, universalActionLocations);
         }
+
         DFABytecodeCompiler compiler(dfa, bytecode);
         compiler.compile();
+
+        firstNFASeen = true;
+    });
+
+    if (!firstNFASeen) {
+        // Our bytecode interpreter expects to have at least one DFA, so if we haven't seen any
+        // create a dummy one and add any universal actions.
+
+        NFA dummyNFA;
+        DFA dummyDFA = NFAToDFA::convert(dummyNFA);
+
+        addUniversalActionsToDFA(dummyDFA, universalActionLocations);
+
+        DFABytecodeCompiler compiler(dummyDFA, bytecode);
+        compiler.compile();
     }
+
+    // FIXME: combinedURLFilters should be cleared incrementally as it is processing NFAs. 
+    combinedURLFilters.clear();
+
     LOG_LARGE_STRUCTURES(universalActionLocations, universalActionLocations.capacity() * sizeof(unsigned));
     universalActionLocations.clear();
 
@@ -251,11 +268,6 @@
     dataLogF("    Bytecode size %zu\n", bytecode.size());
     dataLogF("    DFA count %zu\n", nfas.size());
 #endif
-    size_t nfaMemoryUsed = 0;
-    for (const NFA& nfa : nfas)
-        nfaMemoryUsed += sizeof(NFA) + nfa.memoryUsed();
-    LOG_LARGE_STRUCTURES(nfas, nfaMemoryUsed);
-    nfas.clear();
 
     LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t));
     client.writeBytecode(WTF::move(bytecode));

Modified: trunk/Tools/ChangeLog (183517 => 183518)


--- trunk/Tools/ChangeLog	2015-04-28 23:27:21 UTC (rev 183517)
+++ trunk/Tools/ChangeLog	2015-04-29 00:22:33 UTC (rev 183518)
@@ -1,3 +1,19 @@
+2015-04-28  Sam Weinig  <s...@webkit.org>
+
+        [Content Extensions] Process NFAs individually to avoid having all NFAs live at the same time
+        https://bugs.webkit.org/show_bug.cgi?id=144363
+
+        Reviewed by Alex Christensen.
+
+        * TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp:
+        (TestWebKitAPI::createNFAs):
+        (TestWebKitAPI::TEST_F):
+        * TestWebKitAPI/Tests/WebCore/DFAMinimizer.cpp:
+        (TestWebKitAPI::countLiveNodes):
+        (TestWebKitAPI::createNFAs):
+        (TestWebKitAPI::buildDFAFromPatterns):
+        Update tests to use a hand rolled createNFAs function on top of CombinedURLFilters::processNFAs.
+
 2015-04-28  Michael Catanzaro  <mcatanz...@igalia.com>
 
         Fully replace ENABLE_LLINT_C_LOOP with ENABLE_JIT

Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp (183517 => 183518)


--- trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp	2015-04-28 23:27:21 UTC (rev 183517)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp	2015-04-29 00:22:33 UTC (rev 183518)
@@ -163,6 +163,17 @@
     return backend;
 }
 
+static Vector<ContentExtensions::NFA> createNFAs(ContentExtensions::CombinedURLFilters& combinedURLFilters)
+{
+    Vector<ContentExtensions::NFA> nfas;
+
+    combinedURLFilters.processNFAs([&](ContentExtensions::NFA&& nfa) {
+        nfas.append(WTF::move(nfa));
+    });
+
+    return nfas;
+}
+
 TEST_F(ContentExtensionTest, Basic)
 {
     auto backend = makeBackend("[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"webkit.org\"}}]");
@@ -599,7 +610,7 @@
     // Not this one.
     EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern("^[ab]+bang", false, 0));
 
-    EXPECT_EQ(2ul, combinedURLFilters.createNFAs().size());
+    EXPECT_EQ(2ul, createNFAs(combinedURLFilters).size());
 }
 
 TEST_F(ContentExtensionTest, StrictPrefixSeparatedMachines2)
@@ -632,7 +643,7 @@
     EXPECT_EQ(ContentExtensions::URLFilterParser::ParseStatus::Ok, parser.addPattern("[a-c]+b+oom", false, 3));
 
     // "^foo" and "^webkit:" can be grouped, the other two have a variable prefix.
-    EXPECT_EQ(3ul, combinedURLFilters.createNFAs().size());
+    EXPECT_EQ(3ul, createNFAs(combinedURLFilters).size());
 }
 
 static void testPatternStatus(String pattern, ContentExtensions::URLFilterParser::ParseStatus status)

Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/DFAMinimizer.cpp (183517 => 183518)


--- trunk/Tools/TestWebKitAPI/Tests/WebCore/DFAMinimizer.cpp	2015-04-28 23:27:21 UTC (rev 183517)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/DFAMinimizer.cpp	2015-04-29 00:22:33 UTC (rev 183518)
@@ -43,7 +43,7 @@
     }
 };
 
-unsigned countLiveNodes(const ContentExtensions::DFA& dfa)
+static unsigned countLiveNodes(const ContentExtensions::DFA& dfa)
 {
     unsigned counter = 0;
     for (const auto& node : dfa.nodes) {
@@ -53,6 +53,17 @@
     return counter;
 }
 
+static Vector<ContentExtensions::NFA> createNFAs(ContentExtensions::CombinedURLFilters& combinedURLFilters)
+{
+    Vector<ContentExtensions::NFA> nfas;
+
+    combinedURLFilters.processNFAs([&](ContentExtensions::NFA&& nfa) {
+        nfas.append(WTF::move(nfa));
+    });
+
+    return nfas;
+}
+
 ContentExtensions::DFA buildDFAFromPatterns(Vector<const char*> patterns)
 {
     ContentExtensions::CombinedURLFilters combinedURLFilters;
@@ -60,7 +71,7 @@
 
     for (const char* pattern : patterns)
         parser.addPattern(pattern, false, 0);
-    Vector<ContentExtensions::NFA> nfas = combinedURLFilters.createNFAs();
+    Vector<ContentExtensions::NFA> nfas = createNFAs(combinedURLFilters);
     return ContentExtensions::NFAToDFA::convert(nfas[0]);
 }
 
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to