Title: [254788] trunk
Revision
254788
Author
sbar...@apple.com
Date
2020-01-17 19:24:31 -0800 (Fri, 17 Jan 2020)

Log Message

Air O0 should have better stack allocation
https://bugs.webkit.org/show_bug.cgi?id=206436

Reviewed by Tadeu Zagallo.

JSTests:

* wasm/stress/dont-stack-overflow-in-air.js: Added.

Source/_javascript_Core:

This patch adds a simple stack slot allocator to Air O0 to make code
use smaller stack frames. The huge stack frames from the old stack
allocator were leading to stack overflows in some programs. Before,
each Tmp got its own stack slot. The new allocator works similar to O0's
register allocator. This stack allocator linearizes the program and uses live
range end as an opportunity to place the stack slot on a free list of
available stack slots. This patch also fixes an issue in our linearization code
where the head of a block and the tail of another block would share the
same linearization index. This didn't matter for register allocation, but
does matter for the stack allocator. So "live at head", and "live at tail"
now get their own linearization index.

* b3/air/AirAllocateRegistersAndStackAndGenerateCode.cpp:
(JSC::B3::Air::GenerateAndAllocateRegisters::buildLiveRanges):
(JSC::B3::Air::GenerateAndAllocateRegisters::prepareForGeneration):
(JSC::B3::Air::GenerateAndAllocateRegisters::generate):
* b3/air/AirAllocateRegistersAndStackAndGenerateCode.h:
* b3/air/AirLiveness.h:

Modified Paths

Added Paths

Diff

Modified: trunk/JSTests/ChangeLog (254787 => 254788)


--- trunk/JSTests/ChangeLog	2020-01-18 02:49:33 UTC (rev 254787)
+++ trunk/JSTests/ChangeLog	2020-01-18 03:24:31 UTC (rev 254788)
@@ -1,3 +1,12 @@
+2020-01-17  Saam Barati  <sbar...@apple.com>
+
+        Air O0 should have better stack allocation
+        https://bugs.webkit.org/show_bug.cgi?id=206436
+
+        Reviewed by Tadeu Zagallo.
+
+        * wasm/stress/dont-stack-overflow-in-air.js: Added.
+
 2020-01-17  Mark Lam  <mark....@apple.com>
 
         JSModuleLoader's printableModuleKey() should never throw.

Added: trunk/JSTests/wasm/stress/dont-stack-overflow-in-air.js (0 => 254788)


--- trunk/JSTests/wasm/stress/dont-stack-overflow-in-air.js	                        (rev 0)
+++ trunk/JSTests/wasm/stress/dont-stack-overflow-in-air.js	2020-01-18 03:24:31 UTC (rev 254788)
@@ -0,0 +1,37 @@
+//@ requireOptions("--maxPerThreadStackUsage=512000")
+
+import Builder from '../Builder.js'
+import * as assert from '../assert.js'
+
+{
+    const locals = [];
+    const numLocals = 1000;
+    for (let i = 0; i < numLocals; ++i)
+        locals[i] = "f64";
+
+    const b = new Builder();
+    b.Type().End()
+        .Function().End()
+        .Export()
+            .Function("test")
+        .End()
+        .Code()
+        .Function("test", { params: ["i32"], ret: "void" }, locals)
+            .GetLocal(0)
+            .I32Const(0)
+            .I32Eq()
+            .BrIf(0)
+            .GetLocal(0)
+            .I32Const(1)
+            .I32Sub()
+            .Call(0)
+        .End()
+        .End();
+
+    const bin = b.WebAssembly().get();
+    const module = new WebAssembly.Module(bin);
+    const instance = new WebAssembly.Instance(module);
+
+    // This should not stack overflow
+    instance.exports.test(25);
+}

Modified: trunk/Source/_javascript_Core/ChangeLog (254787 => 254788)


--- trunk/Source/_javascript_Core/ChangeLog	2020-01-18 02:49:33 UTC (rev 254787)
+++ trunk/Source/_javascript_Core/ChangeLog	2020-01-18 03:24:31 UTC (rev 254788)
@@ -1,3 +1,29 @@
+2020-01-17  Saam Barati  <sbar...@apple.com>
+
+        Air O0 should have better stack allocation
+        https://bugs.webkit.org/show_bug.cgi?id=206436
+
+        Reviewed by Tadeu Zagallo.
+
+        This patch adds a simple stack slot allocator to Air O0 to make code
+        use smaller stack frames. The huge stack frames from the old stack
+        allocator were leading to stack overflows in some programs. Before,
+        each Tmp got its own stack slot. The new allocator works similar to O0's
+        register allocator. This stack allocator linearizes the program and uses live
+        range end as an opportunity to place the stack slot on a free list of
+        available stack slots. This patch also fixes an issue in our linearization code
+        where the head of a block and the tail of another block would share the
+        same linearization index. This didn't matter for register allocation, but
+        does matter for the stack allocator. So "live at head", and "live at tail"
+        now get their own linearization index.
+
+        * b3/air/AirAllocateRegistersAndStackAndGenerateCode.cpp:
+        (JSC::B3::Air::GenerateAndAllocateRegisters::buildLiveRanges):
+        (JSC::B3::Air::GenerateAndAllocateRegisters::prepareForGeneration):
+        (JSC::B3::Air::GenerateAndAllocateRegisters::generate):
+        * b3/air/AirAllocateRegistersAndStackAndGenerateCode.h:
+        * b3/air/AirLiveness.h:
+
 2020-01-17  David Kilzer  <ddkil...@apple.com>
 
         [JSC] Add missing header guards

Modified: trunk/Source/_javascript_Core/b3/air/AirAllocateRegistersAndStackAndGenerateCode.cpp (254787 => 254788)


--- trunk/Source/_javascript_Core/b3/air/AirAllocateRegistersAndStackAndGenerateCode.cpp	2020-01-18 02:49:33 UTC (rev 254787)
+++ trunk/Source/_javascript_Core/b3/air/AirAllocateRegistersAndStackAndGenerateCode.cpp	2020-01-18 03:24:31 UTC (rev 254788)
@@ -84,6 +84,7 @@
             if (!tmp.isReg())
                 m_liveRangeEnd[tmp] = m_globalInstIndex;
         }
+        ++m_globalInstIndex;
         for (Inst& inst : *block) {
             inst.forEachTmpFast([&] (Tmp tmp) {
                 if (!tmp.isReg())
@@ -95,6 +96,7 @@
             if (!tmp.isReg())
                 m_liveRangeEnd[tmp] = m_globalInstIndex;
         }
+        ++m_globalInstIndex;
     }
 }
 
@@ -280,22 +282,75 @@
     handleCalleeSaves(m_code, RegisterSet::calleeSaveRegisters());
     allocateEscapedStackSlots(m_code);
 
-    // Each Tmp gets its own stack slot.
-    auto createStackSlot = [&] (const Tmp& tmp) {
-        TmpData data;
-        data.spillSlot = m_code.addStackSlot(8, StackSlotKind::Spill);
-        data.reg = Reg();
-        m_map[tmp] = data;
+    insertBlocksForFlushAfterTerminalPatchpoints();
+
 #if ASSERT_ENABLED
-        m_allTmps[tmp.bank()].append(tmp);
-#endif
-    };
-
     m_code.forEachTmp([&] (Tmp tmp) {
         ASSERT(!tmp.isReg());
-        createStackSlot(tmp);
+        m_allTmps[tmp.bank()].append(tmp);
     });
+#endif
 
+    m_liveness = makeUnique<UnifiedTmpLiveness>(m_code);
+
+    {
+        buildLiveRanges(*m_liveness);
+
+        Vector<StackSlot*, 16> freeSlots;
+        Vector<StackSlot*, 4> toFree;
+        m_globalInstIndex = 0;
+        for (BasicBlock* block : m_code) {
+            auto assignStackSlotToTmp = [&] (Tmp tmp) {
+                if (tmp.isReg())
+                    return;
+
+                TmpData& data = ""
+                if (data.spillSlot) {
+                    if (m_liveRangeEnd[tmp] == m_globalInstIndex)
+                        toFree.append(data.spillSlot);
+                    return;
+                }
+
+                if (freeSlots.size())
+                    data.spillSlot = freeSlots.takeLast();
+                else
+                    data.spillSlot = m_code.addStackSlot(8, StackSlotKind::Spill);
+                data.reg = Reg();
+            };
+
+            auto flushToFreeList = [&] {
+                for (auto* stackSlot : toFree)
+                    freeSlots.append(stackSlot);
+                toFree.clear();
+            };
+
+            for (Tmp tmp : m_liveness->liveAtHead(block))
+                assignStackSlotToTmp(tmp);
+            flushToFreeList();
+
+            ++m_globalInstIndex;
+
+            for (Inst& inst : *block) {
+                Vector<Tmp, 4> seenTmps;
+                inst.forEachTmpFast([&] (Tmp tmp) {
+                    if (seenTmps.contains(tmp))
+                        return;
+                    seenTmps.append(tmp);
+                    assignStackSlotToTmp(tmp);
+                });
+
+                flushToFreeList();
+                ++m_globalInstIndex;
+            }
+
+            for (Tmp tmp : m_liveness->liveAtTail(block))
+                assignStackSlotToTmp(tmp);
+            flushToFreeList();
+
+            ++m_globalInstIndex;
+        }
+    } 
+
     m_allowedRegisters = RegisterSet();
 
     forEachBank([&] (Bank bank) {
@@ -303,7 +358,9 @@
 
         for (Reg reg : m_registers[bank]) {
             m_allowedRegisters.set(reg);
-            createStackSlot(Tmp(reg));
+            TmpData& data = ""
+            data.spillSlot = m_code.addStackSlot(8, StackSlotKind::Spill);
+            data.reg = Reg();
         }
     });
 
@@ -323,11 +380,37 @@
 
     lowerStackArgs(m_code);
 
+#if ASSERT_ENABLED
     // Verify none of these passes add any tmps.
-#if ASSERT_ENABLED
     forEachBank([&] (Bank bank) {
-        ASSERT(m_allTmps[bank].size() - m_registers[bank].size() == m_code.numTmps(bank));
+        ASSERT(m_allTmps[bank].size() == m_code.numTmps(bank));
     });
+
+    {
+        // Verify that lowerStackArgs didn't change Tmp liveness at the boundaries for the Tmps and Registers we model.
+        UnifiedTmpLiveness liveness(m_code);
+        for (BasicBlock* block : m_code) {
+            auto assertLivenessAreEqual = [&] (auto a, auto b) {
+                HashSet<Tmp> livenessA;
+                HashSet<Tmp> livenessB;
+                for (Tmp tmp : a) {
+                    if (tmp.isReg() && isDisallowedRegister(tmp.reg()))
+                        continue;
+                    livenessA.add(tmp);
+                }
+                for (Tmp tmp : b) {
+                    if (tmp.isReg() && isDisallowedRegister(tmp.reg()))
+                        continue;
+                    livenessB.add(tmp);
+                }
+
+                ASSERT(livenessA == livenessB);
+            };
+
+            assertLivenessAreEqual(m_liveness->liveAtHead(block), liveness.liveAtHead(block));
+            assertLivenessAreEqual(m_liveness->liveAtTail(block), liveness.liveAtTail(block));
+        }
+    }
 #endif
 }
 
@@ -337,12 +420,9 @@
 
     TimingScope timingScope("Air::generateAndAllocateRegisters");
 
-    insertBlocksForFlushAfterTerminalPatchpoints();
-
     DisallowMacroScratchRegisterUsage disallowScratch(*m_jit);
 
-    UnifiedTmpLiveness liveness(m_code);
-    buildLiveRanges(liveness);
+    buildLiveRanges(*m_liveness);
 
     IndexMap<BasicBlock*, IndexMap<Reg, Tmp>> currentAllocationMap(m_code.size());
     {
@@ -355,7 +435,7 @@
 
         for (unsigned i = m_code.numEntrypoints(); i--;) {
             BasicBlock* entrypoint = m_code.entrypoint(i).block();
-            for (Tmp tmp : liveness.liveAtHead(entrypoint)) {
+            for (Tmp tmp : m_liveness->liveAtHead(entrypoint)) {
                 if (tmp.isReg())
                     currentAllocationMap[entrypoint][tmp.reg()] = tmp;
             }
@@ -443,6 +523,8 @@
             m_availableRegs[tmp.bank()].clear(reg);
         }
 
+        ++m_globalInstIndex;
+
         bool isReplayingSameInst = false;
         for (size_t instIndex = 0; instIndex < block->size(); ++instIndex) {
             checkConsistency();
@@ -673,7 +755,7 @@
                         everySuccessorGetsOurRegisterState = false;
                 }
                 if (!everySuccessorGetsOurRegisterState) {
-                    for (Tmp tmp : liveness.liveAtTail(block)) {
+                    for (Tmp tmp : m_liveness->liveAtTail(block)) {
                         if (tmp.isReg() && isDisallowedRegister(tmp.reg()))
                             continue;
                         if (Reg reg = m_map[tmp].reg)
@@ -752,6 +834,8 @@
             if (Tmp tmp = currentAllocation[i])
                 m_map[tmp].reg = Reg();
         }
+
+        ++m_globalInstIndex;
     }
 
     for (auto& entry : m_blocksAfterTerminalPatchForSpilling) {

Modified: trunk/Source/_javascript_Core/b3/air/AirAllocateRegistersAndStackAndGenerateCode.h (254787 => 254788)


--- trunk/Source/_javascript_Core/b3/air/AirAllocateRegistersAndStackAndGenerateCode.h	2020-01-18 02:49:33 UTC (rev 254787)
+++ trunk/Source/_javascript_Core/b3/air/AirAllocateRegistersAndStackAndGenerateCode.h	2020-01-18 03:24:31 UTC (rev 254788)
@@ -43,7 +43,7 @@
     WTF_MAKE_NONMOVABLE(GenerateAndAllocateRegisters);
 
     struct TmpData {
-        StackSlot* spillSlot;
+        StackSlot* spillSlot { nullptr };
         Reg reg;
     };
 
@@ -84,6 +84,7 @@
     RegisterSet m_namedUsedRegs;
     RegisterSet m_namedDefdRegs;
     RegisterSet m_allowedRegisters;
+    std::unique_ptr<UnifiedTmpLiveness> m_liveness;
 
     struct PatchSpillData {
         CCallHelpers::Jump jump;

Modified: trunk/Source/_javascript_Core/b3/air/AirLiveness.h (254787 => 254788)


--- trunk/Source/_javascript_Core/b3/air/AirLiveness.h	2020-01-18 02:49:33 UTC (rev 254787)
+++ trunk/Source/_javascript_Core/b3/air/AirLiveness.h	2020-01-18 03:24:31 UTC (rev 254788)
@@ -36,6 +36,7 @@
 
 template<typename Adapter>
 class Liveness : public WTF::Liveness<Adapter> {
+    WTF_MAKE_FAST_ALLOCATED;
 public:
     Liveness(Code& code)
         : WTF::Liveness<Adapter>(code.cfg(), code)
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to