Title: [183817] trunk
Revision
183817
Author
achristen...@apple.com
Date
2015-05-05 10:12:36 -0700 (Tue, 05 May 2015)

Log Message

[Content Extensions] Use less memory when writing byte code to file
https://bugs.webkit.org/show_bug.cgi?id=144602

Reviewed by Darin Adler.

Source/WebCore:

* contentextensions/ContentExtensionCompiler.cpp:
(WebCore::ContentExtensions::compileRuleList):
* contentextensions/ContentExtensionCompiler.h:
Compile one DFA at a time so we don't need to keep all the bytecode in memory at the same time.
* contentextensions/DFABytecodeInterpreter.cpp:
(WebCore::ContentExtensions::DFABytecodeInterpreter::interpret):
Jumps are now relative to the current DFA because we don't know about other DFAs that
have been compiling when linking the DFA bytecode.  This will also make the DFA bytecode
easier to minimize because more of the values are small in the DFAs after the first DFA.
* platform/FileSystem.h:

Source/WebKit2:

* UIProcess/API/APIUserContentExtensionStore.cpp:
(API::decodeContentExtensionMetaData):
(API::writeDataToFile):
(API::compiledToFile):
(API::createExtension):
Compile and write each DFA to file, then come back and write the header when finalizing.
Also don't copy the DFA bytecode. This way, we don't need to keep more than one DFA's
bytecode in memory at a time.

Tools:

* TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp:

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (183816 => 183817)


--- trunk/Source/WebCore/ChangeLog	2015-05-05 16:59:31 UTC (rev 183816)
+++ trunk/Source/WebCore/ChangeLog	2015-05-05 17:12:36 UTC (rev 183817)
@@ -1,3 +1,21 @@
+2015-05-04  Alex Christensen  <achristen...@webkit.org>
+
+        [Content Extensions] Use less memory when writing byte code to file
+        https://bugs.webkit.org/show_bug.cgi?id=144602
+
+        Reviewed by Darin Adler.
+
+        * contentextensions/ContentExtensionCompiler.cpp:
+        (WebCore::ContentExtensions::compileRuleList):
+        * contentextensions/ContentExtensionCompiler.h:
+        Compile one DFA at a time so we don't need to keep all the bytecode in memory at the same time.
+        * contentextensions/DFABytecodeInterpreter.cpp:
+        (WebCore::ContentExtensions::DFABytecodeInterpreter::interpret):
+        Jumps are now relative to the current DFA because we don't know about other DFAs that
+        have been compiling when linking the DFA bytecode.  This will also make the DFA bytecode
+        easier to minimize because more of the values are small in the DFAs after the first DFA.
+        * platform/FileSystem.h:
+
 2015-05-05  Csaba Osztrogonác  <o...@webkit.org>
 
         Unreviewed, speculative WinCairo buildfix after r183807.

Modified: trunk/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp (183816 => 183817)


--- trunk/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp	2015-05-05 16:59:31 UTC (rev 183816)
+++ trunk/Source/WebCore/contentextensions/ContentExtensionCompiler.cpp	2015-05-05 17:12:36 UTC (rev 183817)
@@ -197,7 +197,6 @@
     double totalNFAToByteCodeBuildTimeStart = monotonicallyIncreasingTime();
 #endif
 
-    Vector<DFABytecode> bytecode;
     bool firstNFASeen = false;
     combinedURLFilters.processNFAs([&](NFA&& nfa) {
 #if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
@@ -237,8 +236,11 @@
             addUniversalActionsToDFA(dfa, universalActionLocations);
         }
 
+        Vector<DFABytecode> bytecode;
         DFABytecodeCompiler compiler(dfa, bytecode);
         compiler.compile();
+        LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t));
+        client.writeBytecode(WTF::move(bytecode));
 
         firstNFASeen = true;
     });
@@ -252,8 +254,11 @@
 
         addUniversalActionsToDFA(dummyDFA, universalActionLocations);
 
+        Vector<DFABytecode> bytecode;
         DFABytecodeCompiler compiler(dummyDFA, bytecode);
         compiler.compile();
+        LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t));
+        client.writeBytecode(WTF::move(bytecode));
     }
 
     // FIXME: combinedURLFilters should be cleared incrementally as it is processing NFAs. 
@@ -268,9 +273,7 @@
     dataLogF("    Bytecode size %zu\n", bytecode.size());
 #endif
 
-    LOG_LARGE_STRUCTURES(bytecode, bytecode.capacity() * sizeof(uint8_t));
-    client.writeBytecode(WTF::move(bytecode));
-    bytecode.clear();
+    client.finalize();
 
     return { };
 }

Modified: trunk/Source/WebCore/contentextensions/ContentExtensionCompiler.h (183816 => 183817)


--- trunk/Source/WebCore/contentextensions/ContentExtensionCompiler.h	2015-05-05 16:59:31 UTC (rev 183816)
+++ trunk/Source/WebCore/contentextensions/ContentExtensionCompiler.h	2015-05-05 17:12:36 UTC (rev 183817)
@@ -47,6 +47,7 @@
     
     virtual void writeBytecode(Vector<DFABytecode>&&) = 0;
     virtual void writeActions(Vector<SerializedActionByte>&&) = 0;
+    virtual void finalize() = 0;
 };
 
 WEBCORE_EXPORT std::error_code compileRuleList(ContentExtensionCompilationClient&, const String&);

Modified: trunk/Source/WebCore/contentextensions/DFABytecodeInterpreter.cpp (183816 => 183817)


--- trunk/Source/WebCore/contentextensions/DFABytecodeInterpreter.cpp	2015-05-05 16:59:31 UTC (rev 183816)
+++ trunk/Source/WebCore/contentextensions/DFABytecodeInterpreter.cpp	2015-05-05 17:12:36 UTC (rev 183817)
@@ -114,7 +114,7 @@
             if (programCounter >= m_bytecodeLength)
                 return actions;
         } else {
-            ASSERT_WITH_MESSAGE(static_cast<DFABytecodeInstruction>(m_bytecode[programCounter]) != DFABytecodeInstruction::AppendAction 
+            ASSERT_WITH_MESSAGE(static_cast<DFABytecodeInstruction>(m_bytecode[programCounter]) != DFABytecodeInstruction::AppendAction
                 && static_cast<DFABytecodeInstruction>(m_bytecode[programCounter]) != DFABytecodeInstruction::TestFlagsAndAppendAction, 
                 "Triggers that match everything should only be in the first DFA.");
         }
@@ -137,7 +137,7 @@
                 // Check to see if the next character in the url is the value stored with the bytecode.
                 char character = url[urlIndex];
                 if (character == getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode), m_pagesUsed)) {
-                    programCounter = getBits<unsigned>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode) + sizeof(uint8_t), m_pagesUsed);
+                    programCounter = dfaStart + getBits<unsigned>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode) + sizeof(uint8_t), m_pagesUsed);
                     if (!character)
                         urlIndexIsAfterEndOfString = true;
                     urlIndex++; // This represents an edge in the DFA.
@@ -153,7 +153,7 @@
                 // Check to see if the next character in the url is the value stored with the bytecode.
                 char character = toASCIILower(url[urlIndex]);
                 if (character == getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode), m_pagesUsed)) {
-                    programCounter = getBits<unsigned>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode) + sizeof(uint8_t), m_pagesUsed);
+                    programCounter = dfaStart + getBits<unsigned>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode) + sizeof(uint8_t), m_pagesUsed);
                     if (!character)
                         urlIndexIsAfterEndOfString = true;
                     urlIndex++; // This represents an edge in the DFA.
@@ -169,7 +169,7 @@
                 char character = url[urlIndex];
                 if (character >= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode), m_pagesUsed)
                     && character <= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode) + sizeof(uint8_t), m_pagesUsed)) {
-                    programCounter = getBits<unsigned>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode) + sizeof(uint8_t) + sizeof(uint8_t), m_pagesUsed);
+                    programCounter = dfaStart + getBits<unsigned>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode) + sizeof(uint8_t) + sizeof(uint8_t), m_pagesUsed);
                     if (!character)
                         urlIndexIsAfterEndOfString = true;
                     urlIndex++; // This represents an edge in the DFA.
@@ -185,7 +185,7 @@
                 char character = toASCIILower(url[urlIndex]);
                 if (character >= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode), m_pagesUsed)
                     && character <= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode) + sizeof(uint8_t), m_pagesUsed)) {
-                    programCounter = getBits<unsigned>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode) + sizeof(uint8_t) + sizeof(uint8_t), m_pagesUsed);
+                    programCounter = dfaStart + getBits<unsigned>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode) + sizeof(uint8_t) + sizeof(uint8_t), m_pagesUsed);
                     if (!character)
                         urlIndexIsAfterEndOfString = true;
                     urlIndex++; // This represents an edge in the DFA.
@@ -198,7 +198,7 @@
                 if (!url[urlIndex] || urlIndexIsAfterEndOfString)
                     goto nextDFA;
                 
-                programCounter = getBits<unsigned>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode), m_pagesUsed);
+                programCounter = dfaStart + getBits<unsigned>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode), m_pagesUsed);
                 urlIndex++; // This represents an edge in the DFA.
                 break;
                     

Modified: trunk/Source/WebCore/platform/FileSystem.h (183816 => 183817)


--- trunk/Source/WebCore/platform/FileSystem.h	2015-05-05 16:59:31 UTC (rev 183816)
+++ trunk/Source/WebCore/platform/FileSystem.h	2015-05-05 17:12:36 UTC (rev 183817)
@@ -168,7 +168,7 @@
 WEBCORE_EXPORT PlatformFileHandle openFile(const String& path, FileOpenMode);
 WEBCORE_EXPORT void closeFile(PlatformFileHandle&);
 // Returns the resulting offset from the beginning of the file if successful, -1 otherwise.
-long long seekFile(PlatformFileHandle, long long offset, FileSeekOrigin);
+WEBCORE_EXPORT long long seekFile(PlatformFileHandle, long long offset, FileSeekOrigin);
 bool truncateFile(PlatformFileHandle, long long offset);
 // Returns number of bytes actually read if successful, -1 otherwise.
 WEBCORE_EXPORT int writeToFile(PlatformFileHandle, const char* data, int length);

Modified: trunk/Source/WebKit2/ChangeLog (183816 => 183817)


--- trunk/Source/WebKit2/ChangeLog	2015-05-05 16:59:31 UTC (rev 183816)
+++ trunk/Source/WebKit2/ChangeLog	2015-05-05 17:12:36 UTC (rev 183817)
@@ -1,3 +1,19 @@
+2015-05-04  Alex Christensen  <achristen...@webkit.org>
+
+        [Content Extensions] Use less memory when writing byte code to file
+        https://bugs.webkit.org/show_bug.cgi?id=144602
+
+        Reviewed by Darin Adler.
+
+        * UIProcess/API/APIUserContentExtensionStore.cpp:
+        (API::decodeContentExtensionMetaData):
+        (API::writeDataToFile):
+        (API::compiledToFile):
+        (API::createExtension):
+        Compile and write each DFA to file, then come back and write the header when finalizing.
+        Also don't copy the DFA bytecode. This way, we don't need to keep more than one DFA's
+        bytecode in memory at a time.
+
 2015-05-05  Sungmann Cho  <sungmann....@navercorp.com>
 
         Make all FrameLoadState data members private.

Modified: trunk/Source/WebKit2/UIProcess/API/APIUserContentExtensionStore.cpp (183816 => 183817)


--- trunk/Source/WebKit2/UIProcess/API/APIUserContentExtensionStore.cpp	2015-05-05 16:59:31 UTC (rev 183816)
+++ trunk/Source/WebKit2/UIProcess/API/APIUserContentExtensionStore.cpp	2015-05-05 17:12:36 UTC (rev 183817)
@@ -74,12 +74,11 @@
     return WebCore::pathByAppendingComponent(base, "ContentExtension-" + WebCore::encodeForFileName(identifier));
 }
 
+const size_t ContentExtensionFileHeaderSize = sizeof(uint32_t) + 2 * sizeof(uint64_t);
 struct ContentExtensionMetaData {
-    uint32_t version;
-    uint64_t actionsOffset;
-    uint64_t actionsSize;
-    uint64_t bytecodeOffset;
-    uint64_t bytecodeSize;
+    uint32_t version { 1 };
+    uint64_t actionsSize { 0 };
+    uint64_t bytecodeSize { 0 };
 };
 
 static Data encodeContentExtensionMetaData(const ContentExtensionMetaData& metaData)
@@ -90,6 +89,7 @@
     encoder << metaData.actionsSize;
     encoder << metaData.bytecodeSize;
 
+    ASSERT(encoder.bufferSize() == ContentExtensionFileHeaderSize);
     return Data(encoder.buffer(), encoder.bufferSize());
 }
 
@@ -109,8 +109,6 @@
             return false;
         if (!decoder.decode(metaData.bytecodeSize))
             return false;
-        metaData.actionsOffset = decoder.currentOffset();
-        metaData.bytecodeOffset = metaData.actionsOffset + metaData.actionsSize;
         success = true;
         return false;
     });
@@ -141,7 +139,7 @@
     return true;
 }
 
-static bool writeDataToFile(Data& fileData, WebCore::PlatformFileHandle fd)
+static bool writeDataToFile(const Data& fileData, WebCore::PlatformFileHandle fd)
 {
     bool success = true;
     fileData.apply([fd, &success](const uint8_t* data, size_t size) {
@@ -161,65 +159,76 @@
 
     class CompilationClient final : public ContentExtensionCompilationClient {
     public:
-        CompilationClient(Data& bytecodeData, Data& actionsData)
-            : m_bytecodeData(bytecodeData)
-            , m_actionsData(actionsData)
+        CompilationClient(WebCore::PlatformFileHandle fileHandle, ContentExtensionMetaData& metaData)
+            : m_fileHandle(fileHandle)
+            , m_metaData(metaData)
         {
         }
 
         virtual void writeBytecode(Vector<DFABytecode>&& bytecode) override
         {
-            m_bytecodeData = Data(bytecode.data(), bytecode.size());
+            m_bytecodeWritten += bytecode.size();
+            write(Data(bytecode.data(), bytecode.size()));
         }
 
         virtual void writeActions(Vector<SerializedActionByte>&& actions) override
         {
-            m_actionsData = Data(actions.data(), actions.size());
+            ASSERT(!m_bytecodeWritten);
+            ASSERT(!m_actionsWritten);
+            m_actionsWritten += actions.size();
+            write(Data(actions.data(), actions.size()));
         }
+        
+        virtual void finalize() override
+        {
+            m_metaData.actionsSize = m_actionsWritten;
+            m_metaData.bytecodeSize = m_bytecodeWritten;
+            
+            Data header = encodeContentExtensionMetaData(m_metaData);
+            if (!m_fileError && WebCore::seekFile(m_fileHandle, 0ll, WebCore::FileSeekOrigin::SeekFromBeginning) == -1) {
+                WebCore::closeFile(m_fileHandle);
+                m_fileError = true;
+            }
+            write(header);
+        }
+        
+        bool hadErrorWhileWritingToFile() { return m_fileError; }
 
     private:
-        Data& m_bytecodeData;
-        Data& m_actionsData;
+        void write(const Data& data)
+        {
+            if (!m_fileError && !writeDataToFile(data, m_fileHandle)) {
+                WebCore::closeFile(m_fileHandle);
+                m_fileError = true;
+            }
+        }
+        
+        WebCore::PlatformFileHandle m_fileHandle;
+        ContentExtensionMetaData& m_metaData;
+        size_t m_bytecodeWritten { 0 };
+        size_t m_actionsWritten { 0 };
+        bool m_fileError { false };
     };
 
-    Data bytecode;
-    Data actions;
-    CompilationClient compilationClient(bytecode, actions);
-
-    // FIXME: This copies the data. Instead, we should be passing an interface
-    // to the compiler that can write directly to a file.
-
-    auto compilerError = compileRuleList(compilationClient, json);
-    if (compilerError)
-        return compilerError;
-
-    auto actionsAndBytecode = concatenate(actions, bytecode);
-
-    metaData.version = 1;
-    metaData.actionsSize = actions.size();
-    metaData.bytecodeSize = bytecode.size();
-
-    auto encodedMetaData = encodeContentExtensionMetaData(metaData);
-
-    metaData.actionsOffset = encodedMetaData.size();
-    metaData.bytecodeOffset = encodedMetaData.size() + metaData.actionsSize;
-
-    auto data = "" actionsAndBytecode);
-    auto dataSize = data.size();
-
-    ASSERT(metaData.actionsOffset + metaData.actionsSize + metaData.bytecodeSize == dataSize);
-
     auto temporaryFileHandle = WebCore::invalidPlatformFileHandle;
     String temporaryFilePath = WebCore::openTemporaryFile("ContentExtension", temporaryFileHandle);
     if (temporaryFileHandle == WebCore::invalidPlatformFileHandle)
         return UserContentExtensionStore::Error::CompileFailed;
+    
+    char invalidHeader[ContentExtensionFileHeaderSize];
+    memset(invalidHeader, 0xFF, sizeof(invalidHeader));
+    // This header will be rewritten in CompilationClient::finalize.
+    if (WebCore::writeToFile(temporaryFileHandle, invalidHeader, sizeof(invalidHeader)) == -1)
+        return UserContentExtensionStore::Error::CompileFailed;
 
-    if (!writeDataToFile(data, temporaryFileHandle)) {
-        WebCore::closeFile(temporaryFileHandle);
+    CompilationClient compilationClient(temporaryFileHandle, metaData);
+    
+    if (auto compilerError = compileRuleList(compilationClient, json))
+        return compilerError;
+    if (compilationClient.hadErrorWhileWritingToFile())
         return UserContentExtensionStore::Error::CompileFailed;
-    }
-
-    mappedData = mapFile(temporaryFileHandle, 0, dataSize);
+    
+    mappedData = mapFile(temporaryFileHandle, 0, ContentExtensionFileHeaderSize + metaData.actionsSize + metaData.bytecodeSize);
     WebCore::closeFile(temporaryFileHandle);
 
     if (mappedData.isNull())
@@ -237,9 +246,9 @@
     auto compiledContentExtensionData = WebKit::WebCompiledContentExtensionData(
         WTF::move(sharedMemory),
         fileData,
-        metaData.actionsOffset,
+        ContentExtensionFileHeaderSize,
         metaData.actionsSize,
-        metaData.bytecodeOffset,
+        ContentExtensionFileHeaderSize + metaData.actionsSize,
         metaData.bytecodeSize
     );
     auto compiledContentExtension = WebKit::WebCompiledContentExtension::create(WTF::move(compiledContentExtensionData));

Modified: trunk/Tools/ChangeLog (183816 => 183817)


--- trunk/Tools/ChangeLog	2015-05-05 16:59:31 UTC (rev 183816)
+++ trunk/Tools/ChangeLog	2015-05-05 17:12:36 UTC (rev 183817)
@@ -1,3 +1,12 @@
+2015-05-04  Alex Christensen  <achristen...@webkit.org>
+
+        [Content Extensions] Use less memory when writing byte code to file
+        https://bugs.webkit.org/show_bug.cgi?id=144602
+
+        Reviewed by Darin Adler.
+
+        * TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp:
+
 2015-05-05  Carlos Garcia Campos  <cgar...@igalia.com>
 
         [GTK] Async operations running in the WorkQueue thread should schedule their sources to the WorkQueue main lopp

Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp (183816 => 183817)


--- trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp	2015-05-05 16:59:31 UTC (rev 183816)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp	2015-05-05 17:12:36 UTC (rev 183817)
@@ -81,20 +81,30 @@
     InMemoryContentExtensionCompilationClient(WebCore::ContentExtensions::CompiledContentExtensionData& data)
         : m_data(data)
     {
+        EXPECT_EQ(data.bytecode.size(), 0ull);
+        EXPECT_EQ(data.actions.size(), 0ull);
     }
 
     virtual void writeBytecode(Vector<WebCore::ContentExtensions::DFABytecode>&& bytecode) override
     {
-        m_data.bytecode = WTF::move(bytecode);
+        EXPECT_FALSE(finalized);
+        m_data.bytecode.appendVector(bytecode);
     }
     
     virtual void writeActions(Vector<WebCore::ContentExtensions::SerializedActionByte>&& actions) override
     {
-        m_data.actions = WTF::move(actions);
+        EXPECT_FALSE(finalized);
+        m_data.actions.appendVector(actions);
     }
+    
+    virtual void finalize() override
+    {
+        finalized = true;
+    }
 
 private:
     WebCore::ContentExtensions::CompiledContentExtensionData& m_data;
+    bool finalized { false };
 };
 
 class InMemoryCompiledContentExtension : public ContentExtensions::CompiledContentExtension {
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to