Title: [249677] trunk/Source/_javascript_Core
Revision
249677
Author
rmoris...@apple.com
Date
2019-09-09 17:23:50 -0700 (Mon, 09 Sep 2019)

Log Message

[Air] highOrderAdjacents in AbstractColoringAllocator::conservativeHeuristic should be some kind of array
https://bugs.webkit.org/show_bug.cgi?id=197305

Reviewed by Keith Miller.

Currently it is a HashSet, but it only ever holds at most registerCount() items. And linear search tends to be faster on such a small collection than hashing + searching in a HashSet.
Further benefits include avoiding the allocation of the HashSet, not actually adding the nodes adjacent to V (since there are no duplicates in the adjacency lists).

This patch also contains a trivial optimization: if the remaining number of nodes to consider + the number of highOrderAdjacents already seen is smaller than registerCount() we can return true directly.
Apart from that, the patch got some trivial cleanup of GraphColoringRegisterAllocation::allocateOnBank() (that for example was only logging the number of iterations for FP registers, and not the more interesting number for GP registers).

The time spent in the register allocator throughout JetStream2 on this MacBook Pro moves from 3767 / 3710 / 3785 ms to 3551 / 3454 / 3503 ms.
So about a 6% speedup for that phase, and between 1 and 1.5% speedup for FTL/OMG compilation overall.

No new tests as there is no intended change to the code being generated, and this was already tested by running testb3 + JetStream2.

* b3/air/AirAllocateRegistersByGraphColoring.cpp:

Modified Paths

Diff

Modified: trunk/Source/_javascript_Core/ChangeLog (249676 => 249677)


--- trunk/Source/_javascript_Core/ChangeLog	2019-09-09 23:51:16 UTC (rev 249676)
+++ trunk/Source/_javascript_Core/ChangeLog	2019-09-10 00:23:50 UTC (rev 249677)
@@ -1,3 +1,23 @@
+2019-09-09  Robin Morisset  <rmoris...@apple.com>
+
+        [Air] highOrderAdjacents in AbstractColoringAllocator::conservativeHeuristic should be some kind of array
+        https://bugs.webkit.org/show_bug.cgi?id=197305
+
+        Reviewed by Keith Miller.
+
+        Currently it is a HashSet, but it only ever holds at most registerCount() items. And linear search tends to be faster on such a small collection than hashing + searching in a HashSet.
+        Further benefits include avoiding the allocation of the HashSet, not actually adding the nodes adjacent to V (since there are no duplicates in the adjacency lists).
+
+        This patch also contains a trivial optimization: if the remaining number of nodes to consider + the number of highOrderAdjacents already seen is smaller than registerCount() we can return true directly.
+        Apart from that, the patch got some trivial cleanup of GraphColoringRegisterAllocation::allocateOnBank() (that for example was only logging the number of iterations for FP registers, and not the more interesting number for GP registers).
+
+        The time spent in the register allocator throughout JetStream2 on this MacBook Pro moves from 3767 / 3710 / 3785 ms to 3551 / 3454 / 3503 ms.
+        So about a 6% speedup for that phase, and between 1 and 1.5% speedup for FTL/OMG compilation overall.
+
+        No new tests as there is no intended change to the code being generated, and this was already tested by running testb3 + JetStream2.
+
+        * b3/air/AirAllocateRegistersByGraphColoring.cpp:
+
 2019-09-09  Yusuke Suzuki  <ysuz...@apple.com>
 
         [JSC] Use metadata table to iterate specific bytecode metadata instead of propertyAccessInstructions vector

Modified: trunk/Source/_javascript_Core/b3/air/AirAllocateRegistersByGraphColoring.cpp (249676 => 249677)


--- trunk/Source/_javascript_Core/b3/air/AirAllocateRegistersByGraphColoring.cpp	2019-09-09 23:51:16 UTC (rev 249676)
+++ trunk/Source/_javascript_Core/b3/air/AirAllocateRegistersByGraphColoring.cpp	2019-09-10 00:23:50 UTC (rev 249677)
@@ -192,32 +192,45 @@
         const auto& adjacentsOfU = m_adjacencyList[u];
         const auto& adjacentsOfV = m_adjacencyList[v];
 
-        if (adjacentsOfU.size() + adjacentsOfV.size() < registerCount()) {
+        Vector<IndexType, MacroAssembler::numGPRs + MacroAssembler::numFPRs> highOrderAdjacents;
+        RELEASE_ASSERT(registerCount() <= MacroAssembler::numGPRs + MacroAssembler::numFPRs);
+        unsigned numCandidates = adjacentsOfU.size() + adjacentsOfV.size();
+        if (numCandidates < registerCount()) {
             // Shortcut: if the total number of adjacents is less than the number of register, the condition is always met.
             return true;
         }
 
-        HashSet<IndexType> highOrderAdjacents;
-
         for (IndexType adjacentTmpIndex : adjacentsOfU) {
             ASSERT(adjacentTmpIndex != v);
             ASSERT(adjacentTmpIndex != u);
+            numCandidates--;
             if (!hasBeenSimplified(adjacentTmpIndex) && m_degrees[adjacentTmpIndex] >= registerCount()) {
-                auto addResult = highOrderAdjacents.add(adjacentTmpIndex);
-                if (addResult.isNewEntry && highOrderAdjacents.size() >= registerCount())
+                ASSERT(std::find(highOrderAdjacents.begin(), highOrderAdjacents.end(), adjacentTmpIndex) == highOrderAdjacents.end());
+                highOrderAdjacents.uncheckedAppend(adjacentTmpIndex);
+                if (highOrderAdjacents.size() >= registerCount())
                     return false;
-            }
+            } else if (highOrderAdjacents.size() + numCandidates < registerCount())
+                return true;
         }
+        ASSERT(numCandidates == adjacentsOfV.size());
+
+        auto iteratorEndHighOrderAdjacentsOfU = highOrderAdjacents.end();
         for (IndexType adjacentTmpIndex : adjacentsOfV) {
             ASSERT(adjacentTmpIndex != u);
             ASSERT(adjacentTmpIndex != v);
-            if (!hasBeenSimplified(adjacentTmpIndex) && m_degrees[adjacentTmpIndex] >= registerCount()) {
-                auto addResult = highOrderAdjacents.add(adjacentTmpIndex);
-                if (addResult.isNewEntry && highOrderAdjacents.size() >= registerCount())
+            numCandidates--;
+            if (!hasBeenSimplified(adjacentTmpIndex)
+                && m_degrees[adjacentTmpIndex] >= registerCount()
+                && std::find(highOrderAdjacents.begin(), iteratorEndHighOrderAdjacentsOfU, adjacentTmpIndex) == iteratorEndHighOrderAdjacentsOfU) {
+                ASSERT(std::find(iteratorEndHighOrderAdjacentsOfU, highOrderAdjacents.end(), adjacentTmpIndex) == highOrderAdjacents.end());
+                highOrderAdjacents.uncheckedAppend(adjacentTmpIndex);
+                if (highOrderAdjacents.size() >= registerCount())
                     return false;
-            }
+            } else if (highOrderAdjacents.size() + numCandidates < registerCount())
+                return true;
         }
 
+        ASSERT(!numCandidates);
         ASSERT(highOrderAdjacents.size() < registerCount());
         return true;
     }
@@ -1021,7 +1034,7 @@
     }
 
     // Low-degree vertex can always be colored: just pick any of the color taken by any
-    // other adjacent verices.
+    // other adjacent vertices.
     // The "Simplify" phase takes a low-degree out of the interference graph to simplify it.
     void simplify()
     {
@@ -1789,13 +1802,9 @@
         padInterference(m_code);
 
         allocateOnBank<GP>();
-        m_numIterations = 0;
         allocateOnBank<FP>();
 
         fixSpillsAfterTerminals(m_code);
-
-        if (reportStats)
-            dataLog("Num iterations = ", m_numIterations, "\n");
     }
 
 private:
@@ -1808,12 +1817,15 @@
         // we should add the Tmp to unspillableTmps. That will help avoid relooping only to turn the
         // Tmp into an unspillable Tmp.
         // https://bugs.webkit.org/show_bug.cgi?id=152699
-        
-        while (true) {
-            ++m_numIterations;
 
+        unsigned numIterations = 0;
+        bool done = false;
+
+        while (!done) {
+            ++numIterations;
+
             if (traceDebug)
-                dataLog("Code at iteration ", m_numIterations, ":\n", m_code);
+                dataLog("Code at iteration ", numIterations, ":\n", m_code);
 
             // FIXME: One way to optimize this code is to remove the recomputation inside the fixpoint.
             // We need to recompute because spilling adds tmps, but we could just update tmpWidth when we
@@ -1822,7 +1834,7 @@
             // spill code we emit. Since we currently recompute TmpWidth after spilling, the newly
             // created Tmps may get narrower use/def widths. On the other hand, the spiller already
             // selects which move instruction to use based on the original Tmp's widths, so it may not
-            // matter than a subsequent iteration sees a coservative width for the new Tmps. Also, the
+            // matter than a subsequent iteration sees a conservative width for the new Tmps. Also, the
             // recomputation may not actually be a performance problem; it's likely that a better way to
             // improve performance of TmpWidth is to replace its HashMap with something else. It's
             // possible that most of the TmpWidth overhead is from queries of TmpWidth rather than the
@@ -1835,7 +1847,7 @@
                 if (!allocator.requiresSpilling()) {
                     this->assignRegistersToTmp<bank>(allocator);
                     if (traceDebug)
-                        dataLog("Successfull allocation at iteration ", m_numIterations, ":\n", m_code);
+                        dataLog("Successfull allocation at iteration ", numIterations, ":\n", m_code);
 
                     return true;
                 }
@@ -1843,8 +1855,7 @@
                 this->addSpillAndFill<bank>(allocator, unspillableTmps);
                 return false;
             };
-            
-            bool done;
+
             if (useIRC()) {
                 ColoringAllocator<bank, IRC> allocator(m_code, m_tmpWidth, m_useCounts, unspillableTmps);
                 done = doAllocation(allocator);
@@ -1852,9 +1863,8 @@
                 ColoringAllocator<bank, Briggs> allocator(m_code, m_tmpWidth, m_useCounts, unspillableTmps);
                 done = doAllocation(allocator);
             }
-            if (done)
-                return;
         }
+        dataLogLnIf(reportStats, "Num iterations = ", numIterations, " for bank: ", bank);
     }
 
     template<Bank bank>
@@ -2187,7 +2197,6 @@
     Code& m_code;
     TmpWidth m_tmpWidth;
     UseCounts<Tmp>& m_useCounts;
-    unsigned m_numIterations { 0 };
 };
 
 } // anonymous namespace
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to