Diff
Modified: trunk/Source/_javascript_Core/ChangeLog (195502 => 195503)
--- trunk/Source/_javascript_Core/ChangeLog 2016-01-23 02:10:17 UTC (rev 195502)
+++ trunk/Source/_javascript_Core/ChangeLog 2016-01-23 03:24:42 UTC (rev 195503)
@@ -1,3 +1,31 @@
+2016-01-22 Filip Pizlo <fpi...@apple.com>
+
+ B3 should strength-reduce division by a constant
+ https://bugs.webkit.org/show_bug.cgi?id=153386
+
+ Reviewed by Benjamin Poulain.
+
+ You can turn a 32-bit division by a constant into a 64-bit multiplication by a constant
+ plus some shifts. A book called "Hacker's Delight" has a bunch of math about this. The
+ hard part is finding the constant by which to multiply, and the amount by which to shift.
+ The book tells you some theroems, but you still have to turn that into code by thinking
+ deep thoughts. Luckily I was able to avoid that because it turns out that LLVM already
+ has code for this. It's called APInt::magic(), where APInt is their class for reasoning
+ about integers.
+
+ The code has a compatible license to ours and we have already in the past taken code from
+ LLVM. So, that's what this patch does. The LLVM code is localized in
+ B3ComputeDivisionMagic.h. Then reduceStrength() uses that to construct the multiply+shift
+ sequence.
+
+ This is an enormous speed-up on AsmBench-0.9/bigfib.cpp.js. It makes us as fast on that
+ test as LLVM. It reduces our deficit on AsmBench to 1.5%. Previously it was 4.5%.
+
+ * _javascript_Core.xcodeproj/project.pbxproj:
+ * b3/B3ComputeDivisionMagic.h: Added.
+ (JSC::B3::computeDivisionMagic):
+ * b3/B3ReduceStrength.cpp:
+
2016-01-22 Saam barati <sbar...@apple.com>
genericUnwind might overflow the instructions() vector when catching an FTL exception
Modified: trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj (195502 => 195503)
--- trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj 2016-01-23 02:10:17 UTC (rev 195502)
+++ trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj 2016-01-23 03:24:42 UTC (rev 195503)
@@ -492,6 +492,7 @@
0F8335B71639C1E6001443B5 /* ArrayAllocationProfile.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0F8335B41639C1E3001443B5 /* ArrayAllocationProfile.cpp */; };
0F8335B81639C1EA001443B5 /* ArrayAllocationProfile.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F8335B51639C1E3001443B5 /* ArrayAllocationProfile.h */; settings = {ATTRIBUTES = (Private, ); }; };
0F8364B7164B0C110053329A /* DFGBranchDirection.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F8364B5164B0C0E0053329A /* DFGBranchDirection.h */; };
+ 0F86AE201C5311C5006BE8EC /* B3ComputeDivisionMagic.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F86AE1F1C5311C5006BE8EC /* B3ComputeDivisionMagic.h */; };
0F885E111849A3BE00F1E3FA /* BytecodeUseDef.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F885E101849A3BE00F1E3FA /* BytecodeUseDef.h */; settings = {ATTRIBUTES = (Private, ); }; };
0F893BDB1936E23C001211F4 /* DFGStructureAbstractValue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0F893BDA1936E23C001211F4 /* DFGStructureAbstractValue.cpp */; };
0F898F311B27689F0083A33C /* DFGIntegerRangeOptimizationPhase.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0F898F2F1B27689F0083A33C /* DFGIntegerRangeOptimizationPhase.cpp */; };
@@ -2674,6 +2675,7 @@
0F8335B41639C1E3001443B5 /* ArrayAllocationProfile.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ArrayAllocationProfile.cpp; sourceTree = "<group>"; };
0F8335B51639C1E3001443B5 /* ArrayAllocationProfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ArrayAllocationProfile.h; sourceTree = "<group>"; };
0F8364B5164B0C0E0053329A /* DFGBranchDirection.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DFGBranchDirection.h; path = dfg/DFGBranchDirection.h; sourceTree = "<group>"; };
+ 0F86AE1F1C5311C5006BE8EC /* B3ComputeDivisionMagic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = B3ComputeDivisionMagic.h; path = b3/B3ComputeDivisionMagic.h; sourceTree = "<group>"; };
0F885E101849A3BE00F1E3FA /* BytecodeUseDef.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BytecodeUseDef.h; sourceTree = "<group>"; };
0F893BDA1936E23C001211F4 /* DFGStructureAbstractValue.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = DFGStructureAbstractValue.cpp; path = dfg/DFGStructureAbstractValue.cpp; sourceTree = "<group>"; };
0F898F2F1B27689F0083A33C /* DFGIntegerRangeOptimizationPhase.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = DFGIntegerRangeOptimizationPhase.cpp; path = dfg/DFGIntegerRangeOptimizationPhase.cpp; sourceTree = "<group>"; };
@@ -4771,6 +4773,7 @@
0FEC84C21BDACDAC0080FF74 /* B3Commutativity.h */,
0F338DFF1BF0276C0013C88F /* B3Compilation.cpp */,
0F338E001BF0276C0013C88F /* B3Compilation.h */,
+ 0F86AE1F1C5311C5006BE8EC /* B3ComputeDivisionMagic.h */,
0FEC84C31BDACDAC0080FF74 /* B3Const32Value.cpp */,
0FEC84C41BDACDAC0080FF74 /* B3Const32Value.h */,
0FEC84C51BDACDAC0080FF74 /* B3Const64Value.cpp */,
@@ -7989,6 +7992,7 @@
BC18C4560E16F5CD00B34460 /* Protect.h in Headers */,
1474C33B16AA2D950062F01D /* PrototypeMap.h in Headers */,
0F5780A218FE1E98001E72D9 /* PureNaN.h in Headers */,
+ 0F86AE201C5311C5006BE8EC /* B3ComputeDivisionMagic.h in Headers */,
0F15CD231BA5F9860031FFD3 /* PutByIdFlags.h in Headers */,
0F9332A414CA7DD90085F3C6 /* PutByIdStatus.h in Headers */,
0F93275F1C21EF7F00CF6564 /* JSObjectInlines.h in Headers */,
Added: trunk/Source/_javascript_Core/b3/B3ComputeDivisionMagic.h (0 => 195503)
--- trunk/Source/_javascript_Core/b3/B3ComputeDivisionMagic.h (rev 0)
+++ trunk/Source/_javascript_Core/b3/B3ComputeDivisionMagic.h 2016-01-23 03:24:42 UTC (rev 195503)
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2016 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This contains code taken from LLVM's APInt class. That code implements finding the magic
+ * numbers for strength-reducing division. The LLVM code on which this code is based was
+ * implemented using "Hacker's Delight", Henry S. Warren, Jr., chapter 10.
+ *
+ * ==============================================================================
+ * LLVM Release License
+ * ==============================================================================
+ * University of Illinois/NCSA
+ * Open Source License
+ *
+ * Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign.
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ * LLVM Team
+ *
+ * University of Illinois at Urbana-Champaign
+ *
+ * http://llvm.org
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal with
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimers.
+ *
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimers in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the names of the LLVM Team, University of Illinois at
+ * Urbana-Champaign, nor the names of its contributors may be used to
+ * endorse or promote products derived from this Software without specific
+ * prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+ * SOFTWARE.
+ */
+
+#ifndef B3ComputeDivisionMagic_h
+#define B3ComputeDivisionMagic_h
+
+#if ENABLE(B3_JIT)
+
+namespace JSC { namespace B3 {
+
+template<typename T>
+struct DivisionMagic {
+ T magicMultiplier;
+ unsigned shift;
+};
+
+// This contains code taken from LLVM's APInt::magic(). It's modestly adapted to our style, but
+// not completely, to make it easier to apply their changes in the future.
+template<typename T>
+DivisionMagic<T> computeDivisionMagic(T divisor)
+{
+ T d = divisor;
+ unsigned p;
+ T ad, anc, delta, q1, r1, q2, r2, t;
+ T signedMin = std::numeric_limits<T>::min();
+ DivisionMagic<T> mag;
+ unsigned bitWidth = sizeof(divisor) * 8;
+
+ // This code doesn't like to think of signedness as a type. Instead it likes to think that
+ // operations have signedness. This is how we generally do it in B3 as well. For this reason,
+ // this provides helpers for unsigned operations on the signed type (T).
+
+ auto zshr = [&] (T value, int amount) -> T {
+ return static_cast<typename std::make_unsigned<T>::type>(value) >> amount;
+ };
+
+ auto udiv = [&] (T left, T right) -> T {
+ return static_cast<T>(static_cast<typename std::make_unsigned<T>::type>(left) / static_cast<typename std::make_unsigned<T>::type>(right));
+ };
+
+ auto urem = [&] (T left, T right) -> T {
+ return static_cast<T>(static_cast<typename std::make_unsigned<T>::type>(left) % static_cast<typename std::make_unsigned<T>::type>(right));
+ };
+
+ auto aboveEqual = [&] (T left, T right) -> bool {
+ return static_cast<typename std::make_unsigned<T>::type>(left) >= static_cast<typename std::make_unsigned<T>::type>(right);
+ };
+
+ auto below = [&] (T left, T right) -> bool {
+ return static_cast<typename std::make_unsigned<T>::type>(left) < static_cast<typename std::make_unsigned<T>::type>(right);
+ };
+
+ ad = d < 0 ? -d : d;
+ t = signedMin + zshr(d, bitWidth - 1);
+ anc = t - 1 - urem(t, ad); // absolute value of nc
+ p = bitWidth - 1; // initialize p
+ q1 = udiv(signedMin, anc); // initialize q1 = 2p/abs(nc)
+ r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc))
+ q2 = udiv(signedMin, ad); // initialize q2 = 2p/abs(d)
+ r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d))
+ do {
+ p = p + 1;
+ q1 = q1 << 1; // update q1 = 2p/abs(nc)
+ r1 = r1 << 1; // update r1 = rem(2p/abs(nc))
+ if (aboveEqual(r1, anc)) { // must be unsigned comparison
+ q1 = q1 + 1;
+ r1 = r1 - anc;
+ }
+ q2 = q2 << 1; // update q2 = 2p/abs(d)
+ r2 = r2 << 1; // update r2 = rem(2p/abs(d))
+ if (aboveEqual(r2,ad)) { // must be unsigned comparison
+ q2 = q2 + 1;
+ r2 = r2 - ad;
+ }
+ delta = ad - r2;
+ } while (below(q1, delta) || (q1 == delta && r1 == 0));
+
+ mag.magicMultiplier = q2 + 1;
+ if (d < 0)
+ mag.magicMultiplier = -mag.magicMultiplier; // resulting magic number
+ mag.shift = p - bitWidth; // resulting shift
+
+ return mag;
+}
+
+} } // namespace JSC::B3
+
+#endif // ENABLE(B3_JIT)
+
+#endif // B3ComputeDivisionMagic_h
+
Modified: trunk/Source/_javascript_Core/b3/B3ReduceStrength.cpp (195502 => 195503)
--- trunk/Source/_javascript_Core/b3/B3ReduceStrength.cpp 2016-01-23 02:10:17 UTC (rev 195502)
+++ trunk/Source/_javascript_Core/b3/B3ReduceStrength.cpp 2016-01-23 03:24:42 UTC (rev 195503)
@@ -30,6 +30,7 @@
#include "B3BasicBlockInlines.h"
#include "B3BlockInsertionSet.h"
+#include "B3ComputeDivisionMagic.h"
#include "B3ControlValue.h"
#include "B3Dominators.h"
#include "B3IndexSet.h"
@@ -508,7 +509,91 @@
// Note that this uses ChillDiv semantics. That's fine, because the rules for Div
// are strictly weaker: it has corner cases where it's allowed to do anything it
// likes.
- replaceWithNewValue(m_value->child(0)->divConstant(m_proc, m_value->child(1)));
+ if (replaceWithNewValue(m_value->child(0)->divConstant(m_proc, m_value->child(1))))
+ break;
+
+ if (m_value->child(1)->hasInt()) {
+ switch (m_value->child(1)->asInt()) {
+ case -1:
+ // Turn this: Div(value, -1)
+ // Into this: Neg(value)
+ replaceWithNewValue(
+ m_proc.add<Value>(Neg, m_value->origin(), m_value->child(0)));
+ break;
+
+ case 0:
+ // Turn this: Div(value, 0)
+ // Into this: 0
+ // We can do this because it's precisely correct for ChillDiv and for Div we
+ // are allowed to do whatever we want.
+ m_value->replaceWithIdentity(m_value->child(1));
+ m_changed = true;
+ break;
+
+ case 1:
+ // Turn this: Div(value, 1)
+ // Into this: value
+ m_value->replaceWithIdentity(m_value->child(0));
+ m_changed = true;
+ break;
+
+ default:
+ // Perform super comprehensive strength reduction of division. Currently we
+ // only do this for 32-bit divisions, since we need a high multiply
+ // operation. We emulate it using 64-bit multiply. We can't emulate 64-bit
+ // high multiply with a 128-bit multiply because we don't have a 128-bit
+ // multiply. We could do it with a patchpoint if we cared badly enough.
+
+ if (m_value->type() != Int32)
+ break;
+
+ int32_t divisor = m_value->child(1)->asInt32();
+ DivisionMagic<int32_t> magic = computeDivisionMagic(divisor);
+
+ // Perform the "high" multiplication. We do it just to get the high bits.
+ // This is sort of like multiplying by the reciprocal, just more gnarly. It's
+ // from Hacker's Delight and I don't claim to understand it.
+ Value* magicQuotient = m_insertionSet.insert<Value>(
+ m_index, Trunc, m_value->origin(),
+ m_insertionSet.insert<Value>(
+ m_index, ZShr, m_value->origin(),
+ m_insertionSet.insert<Value>(
+ m_index, Mul, m_value->origin(),
+ m_insertionSet.insert<Value>(
+ m_index, SExt32, m_value->origin(), m_value->child(0)),
+ m_insertionSet.insert<Const64Value>(
+ m_index, m_value->origin(), magic.magicMultiplier)),
+ m_insertionSet.insert<Const32Value>(
+ m_index, m_value->origin(), 32)));
+
+ if (divisor > 0 && magic.magicMultiplier < 0) {
+ magicQuotient = m_insertionSet.insert<Value>(
+ m_index, Add, m_value->origin(), magicQuotient, m_value->child(0));
+ }
+ if (divisor < 0 && magic.magicMultiplier > 0) {
+ magicQuotient = m_insertionSet.insert<Value>(
+ m_index, Sub, m_value->origin(), magicQuotient, m_value->child(0));
+ }
+ if (magic.shift > 0) {
+ magicQuotient = m_insertionSet.insert<Value>(
+ m_index, SShr, m_value->origin(), magicQuotient,
+ m_insertionSet.insert<Const32Value>(
+ m_index, m_value->origin(), magic.shift));
+ }
+ m_value->replaceWithIdentity(
+ m_insertionSet.insert<Value>(
+ m_index, Add, m_value->origin(), magicQuotient,
+ m_insertionSet.insert<Value>(
+ m_index, ZShr, m_value->origin(), magicQuotient,
+ m_insertionSet.insert<Const32Value>(
+ m_index, m_value->origin(), 31))));
+ m_changed = true;
+ break;
+ }
+
+ if (m_value->opcode() != ChillDiv && m_value->opcode() != Div)
+ break;
+ }
break;
case Mod: