- Revision
- 278994
- Author
- commit-qu...@webkit.org
- Date
- 2021-06-17 10:16:54 -0700 (Thu, 17 Jun 2021)
Log Message
Add a new pattern to instruction selector to utilize UBFX supported by ARM64
https://bugs.webkit.org/show_bug.cgi?id=226984
Patch by Yijia Huang <yijia_hu...@apple.com> on 2021-06-17
Reviewed by Filip Pizlo.
UBFX, supported by ARM64, copies adjacent bits from the source register into
the least significant bits of a destination register in zero extension. The
instruction selector can utilize this to lowering certain patterns in B3 IR
before further Air optimization.
ubfx dest, src, lsb, width
tmp, tmp, imm, imm
This is equivalent to "dest = (src >> lsb) & ((1 << width) - 1)". Since wasm
introduces constant folding, then the pattern would be:
dest = (src >> lsb) & mask
where the mask should have a binary format in contiguous ones starting from
the least significant bit. For example:
0b00111111
To make the pattern matching in instruction selection beneficial to JIT, these
constraints should be introduced:
1. lsb >= 0
2. width > 0
3. lsb + width <= bit field limit (32 or 64)
Given:
// B3 IR
Int @0 = ArgumentReg(%0)
Int @1 = lsb
Int @2 = 0b0011
Int @3 = ZShr(@0, @1)
Int @4 = BitAnd(@3, @2)
Void@5 = Return(@4, Terminal)
w/o UBFX Pattern:
// Old optimized AIR
Urshift %x0, lsb, %x0, @3
And 0b0011, %x0, %x0, @4
Ret %x0, @5
w/ UBFX Pattern:
// New optimized AIR
Ubfx %x0, lsb, 2, %x0, @4
Ret %x0, @5
Note:
Suppose a 32-bit version of (src >> 20) & 0x0FFF, it is equivalent to src >> 20.
In this case, Logical Shift Right should be utilized instead when:
lsb + width == bit field limit (32 or 64)
This case/pattern should be added and upadated in the future patch.
* assembler/MacroAssemblerARM64.h:
(JSC::MacroAssemblerARM64::ubfx32):
(JSC::MacroAssemblerARM64::ubfx64):
* assembler/testmasm.cpp:
(JSC::testUbfx32):
(JSC::testUbfx64):
* b3/B3LowerToAir.cpp:
* b3/air/AirOpcode.opcodes:
* b3/testb3.h:
* b3/testb3_2.cpp:
(testUbfx64PatternMatch):
(testUbfx32PatternMatch):
(addBitTests):
Modified Paths
Diff
Modified: trunk/Source/_javascript_Core/ChangeLog (278993 => 278994)
--- trunk/Source/_javascript_Core/ChangeLog 2021-06-17 17:05:13 UTC (rev 278993)
+++ trunk/Source/_javascript_Core/ChangeLog 2021-06-17 17:16:54 UTC (rev 278994)
@@ -1,3 +1,77 @@
+2021-06-17 Yijia Huang <yijia_hu...@apple.com>
+
+ Add a new pattern to instruction selector to utilize UBFX supported by ARM64
+ https://bugs.webkit.org/show_bug.cgi?id=226984
+
+ Reviewed by Filip Pizlo.
+
+ UBFX, supported by ARM64, copies adjacent bits from the source register into
+ the least significant bits of a destination register in zero extension. The
+ instruction selector can utilize this to lowering certain patterns in B3 IR
+ before further Air optimization.
+
+ ubfx dest, src, lsb, width
+ tmp, tmp, imm, imm
+
+ This is equivalent to "dest = (src >> lsb) & ((1 << width) - 1)". Since wasm
+ introduces constant folding, then the pattern would be:
+
+ dest = (src >> lsb) & mask
+
+ where the mask should have a binary format in contiguous ones starting from
+ the least significant bit. For example:
+
+ 0b00111111
+
+ To make the pattern matching in instruction selection beneficial to JIT, these
+ constraints should be introduced:
+
+ 1. lsb >= 0
+ 2. width > 0
+ 3. lsb + width <= bit field limit (32 or 64)
+
+ Given:
+ // B3 IR
+ Int @0 = ArgumentReg(%0)
+ Int @1 = lsb
+ Int @2 = 0b0011
+ Int @3 = ZShr(@0, @1)
+ Int @4 = BitAnd(@3, @2)
+ Void@5 = Return(@4, Terminal)
+
+ w/o UBFX Pattern:
+ // Old optimized AIR
+ Urshift %x0, lsb, %x0, @3
+ And 0b0011, %x0, %x0, @4
+ Ret %x0, @5
+
+ w/ UBFX Pattern:
+ // New optimized AIR
+ Ubfx %x0, lsb, 2, %x0, @4
+ Ret %x0, @5
+
+ Note:
+ Suppose a 32-bit version of (src >> 20) & 0x0FFF, it is equivalent to src >> 20.
+ In this case, Logical Shift Right should be utilized instead when:
+
+ lsb + width == bit field limit (32 or 64)
+
+ This case/pattern should be added and upadated in the future patch.
+
+ * assembler/MacroAssemblerARM64.h:
+ (JSC::MacroAssemblerARM64::ubfx32):
+ (JSC::MacroAssemblerARM64::ubfx64):
+ * assembler/testmasm.cpp:
+ (JSC::testUbfx32):
+ (JSC::testUbfx64):
+ * b3/B3LowerToAir.cpp:
+ * b3/air/AirOpcode.opcodes:
+ * b3/testb3.h:
+ * b3/testb3_2.cpp:
+ (testUbfx64PatternMatch):
+ (testUbfx32PatternMatch):
+ (addBitTests):
+
2021-06-17 Angelos Oikonomopoulos <ange...@igalia.com>
[JSC] Work around apparent miscompilation on ARM/GCC >=8.4
Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h (278993 => 278994)
--- trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h 2021-06-17 17:05:13 UTC (rev 278993)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h 2021-06-17 17:16:54 UTC (rev 278994)
@@ -390,6 +390,16 @@
and32(dataTempRegister, dest);
}
+ void ubfx32(RegisterID src, TrustedImm32 lsb, TrustedImm32 width, RegisterID dest)
+ {
+ m_assembler.ubfx<32>(dest, src, lsb.m_value, width.m_value);
+ }
+
+ void ubfx64(RegisterID src, TrustedImm32 lsb, TrustedImm32 width, RegisterID dest)
+ {
+ m_assembler.ubfx<64>(dest, src, lsb.m_value, width.m_value);
+ }
+
void and64(RegisterID src1, RegisterID src2, RegisterID dest)
{
m_assembler.and_<64>(dest, src1, src2);
Modified: trunk/Source/_javascript_Core/assembler/testmasm.cpp (278993 => 278994)
--- trunk/Source/_javascript_Core/assembler/testmasm.cpp 2021-06-17 17:05:13 UTC (rev 278993)
+++ trunk/Source/_javascript_Core/assembler/testmasm.cpp 2021-06-17 17:16:54 UTC (rev 278994)
@@ -1042,6 +1042,54 @@
CHECK_EQ(invoke<int64_t>(sub, value), static_cast<int64_t>(value - immediate));
}
}
+
+void testUbfx32()
+{
+ uint32_t src = ""
+ Vector<uint32_t> imms = { 0, 1, 30, 31, 32, 62, 63, 64 };
+ for (auto lsb : imms) {
+ for (auto width : imms) {
+ if (lsb >= 0 && width > 0 && lsb + width < 32) {
+ auto ubfx32 = compile([=] (CCallHelpers& jit) {
+ emitFunctionPrologue(jit);
+
+ jit.ubfx32(GPRInfo::returnValueGPR,
+ CCallHelpers::TrustedImm32(lsb),
+ CCallHelpers::TrustedImm32(width),
+ GPRInfo::returnValueGPR);
+
+ emitFunctionEpilogue(jit);
+ jit.ret();
+ });
+ CHECK_EQ(invoke<uint32_t>(ubfx32, src), ((src >> lsb) & ((1U << width) - 1U)));
+ }
+ }
+ }
+}
+
+void testUbfx64()
+{
+ uint64_t src = ""
+ Vector<uint32_t> imms = { 0, 1, 30, 31, 32, 62, 63, 64 };
+ for (auto lsb : imms) {
+ for (auto width : imms) {
+ if (lsb >= 0 && width > 0 && lsb + width < 64) {
+ auto ubfx64 = compile([=] (CCallHelpers& jit) {
+ emitFunctionPrologue(jit);
+
+ jit.ubfx64(GPRInfo::returnValueGPR,
+ CCallHelpers::TrustedImm32(lsb),
+ CCallHelpers::TrustedImm32(width),
+ GPRInfo::returnValueGPR);
+
+ emitFunctionEpilogue(jit);
+ jit.ret();
+ });
+ CHECK_EQ(invoke<uint64_t>(ubfx64, src), ((src >> lsb) & ((1ULL << width) - 1ULL)));
+ }
+ }
+ }
+}
#endif
#if CPU(X86) || CPU(X86_64) || CPU(ARM64)
@@ -3206,6 +3254,8 @@
RUN(testSub64ArgImm32());
RUN(testSub64Imm64());
RUN(testSub64ArgImm64());
+ RUN(testUbfx32());
+ RUN(testUbfx64());
#endif
#if CPU(X86) || CPU(X86_64) || CPU(ARM64)
Modified: trunk/Source/_javascript_Core/b3/B3LowerToAir.cpp (278993 => 278994)
--- trunk/Source/_javascript_Core/b3/B3LowerToAir.cpp 2021-06-17 17:05:13 UTC (rev 278993)
+++ trunk/Source/_javascript_Core/b3/B3LowerToAir.cpp 2021-06-17 17:16:54 UTC (rev 278994)
@@ -59,6 +59,7 @@
#include "B3WasmAddressValue.h"
#include <wtf/IndexMap.h>
#include <wtf/IndexSet.h>
+#include <wtf/StdLibExtras.h>
#if !ASSERT_ENABLED
IGNORE_RETURN_TYPE_WARNINGS_BEGIN
@@ -2694,23 +2695,46 @@
}
case BitAnd: {
- if (m_value->child(1)->isInt(0xff)) {
- appendUnOp<ZeroExtend8To32, ZeroExtend8To32>(m_value->child(0));
+ Value* left = m_value->child(0);
+ Value* right = m_value->child(1);
+
+ if (right->isInt(0xff)) {
+ appendUnOp<ZeroExtend8To32, ZeroExtend8To32>(left);
return;
}
-
- if (m_value->child(1)->isInt(0xffff)) {
- appendUnOp<ZeroExtend16To32, ZeroExtend16To32>(m_value->child(0));
+
+ if (right->isInt(0xffff)) {
+ appendUnOp<ZeroExtend16To32, ZeroExtend16To32>(left);
return;
}
- if (m_value->child(1)->isInt64(0xffffffff) || m_value->child(1)->isInt32(0xffffffff)) {
- appendUnOp<Move32, Move32>(m_value->child(0));
+ if (right->isInt64(0xffffffff) || right->isInt32(0xffffffff)) {
+ appendUnOp<Move32, Move32>(left);
return;
}
-
- appendBinOp<And32, And64, AndDouble, AndFloat, Commutative>(
- m_value->child(0), m_value->child(1));
+
+ // UBFX Pattern: dest = (src >> lsb) & ((1 << width) - 1)
+ if (canBeInternal(left) && left->opcode() == ZShr) {
+ Value* srcValue = left->child(0);
+ Value* lsbValue = left->child(1);
+ if (!imm(srcValue) && imm(lsbValue) && right->hasInt()) {
+ int64_t lsb = lsbValue->asInt();
+ uint64_t mask = right->asInt();
+ uint8_t width = static_cast<uint8_t>(!(mask & (mask + 1))) * WTF::bitCount(mask);
+ Air::Opcode opcode = opcodeForType(Ubfx32, Ubfx64, srcValue->type());
+ if (opcode
+ && lsb >= 0
+ && width > 0
+ && lsb + width <= (32 << (opcode == Ubfx64))
+ && isValidForm(opcode, Arg::Tmp, Arg::Imm, Arg::Imm, Arg::Tmp)) {
+ append(opcode, tmp(srcValue), imm(lsbValue), imm(width), tmp(m_value));
+ commitInternal(left);
+ return;
+ }
+ }
+ }
+
+ appendBinOp<And32, And64, AndDouble, AndFloat, Commutative>(left, right);
return;
}
Modified: trunk/Source/_javascript_Core/b3/air/AirOpcode.opcodes (278993 => 278994)
--- trunk/Source/_javascript_Core/b3/air/AirOpcode.opcodes 2021-06-17 17:05:13 UTC (rev 278993)
+++ trunk/Source/_javascript_Core/b3/air/AirOpcode.opcodes 2021-06-17 17:16:54 UTC (rev 278994)
@@ -357,6 +357,12 @@
x86: Imm, Addr
x86: Imm, Index
+arm64: Ubfx32 U:G:32, U:G:32, U:G:32, ZD:G:32
+ Tmp, Imm, Imm, Tmp
+
+arm64: Ubfx64 U:G:64, U:G:32, U:G:32, D:G:64
+ Tmp, Imm, Imm, Tmp
+
64: And64 U:G:64, U:G:64, D:G:64
Tmp, Tmp, Tmp
arm64: BitImm64, Tmp, Tmp
Modified: trunk/Source/_javascript_Core/b3/testb3.h (278993 => 278994)
--- trunk/Source/_javascript_Core/b3/testb3.h 2021-06-17 17:05:13 UTC (rev 278993)
+++ trunk/Source/_javascript_Core/b3/testb3.h 2021-06-17 17:16:54 UTC (rev 278994)
@@ -416,6 +416,10 @@
void run(const char* filter);
void testBitAndSExt32(int32_t value, int64_t mask);
+void testUbfx32();
+void testUbfx32PatternMatch();
+void testUbfx64();
+void testUbfx64PatternMatch();
void testBasicSelect();
void testSelectTest();
void testSelectCompareDouble();
Modified: trunk/Source/_javascript_Core/b3/testb3_2.cpp (278993 => 278994)
--- trunk/Source/_javascript_Core/b3/testb3_2.cpp 2021-06-17 17:05:13 UTC (rev 278993)
+++ trunk/Source/_javascript_Core/b3/testb3_2.cpp 2021-06-17 17:16:54 UTC (rev 278994)
@@ -2540,6 +2540,154 @@
CHECK(isIdentical(compileAndRun<float>(proc, bitwise_cast<int32_t>(a)), -a));
}
+void testUbfx32()
+{
+ // (src >> lsb) & mask
+ uint32_t src = ""
+ Vector<uint32_t> lsbs = { 0, 15, 30 };
+ Vector<uint32_t> widths = { 30, 16, 1 };
+
+ auto test = [&] (uint32_t lsb, uint32_t mask) -> uint32_t {
+ Procedure proc;
+ BasicBlock* root = proc.addBlock();
+
+ Value* srcValue = root->appendNew<Value>(
+ proc, Trunc, Origin(),
+ root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0));
+ Value* lsbValue = root->appendNew<Const32Value>(proc, Origin(), lsb);
+ Value* maskValue = root->appendNew<Const32Value>(proc, Origin(), mask);
+
+ Value* left = root->appendNew<Value>(proc, ZShr, Origin(), srcValue, lsbValue);
+ root->appendNewControlValue(
+ proc, Return, Origin(),
+ root->appendNew<Value>(proc, BitAnd, Origin(), left, maskValue));
+
+ return compileAndRun<uint32_t>(proc, src);
+ };
+
+ auto generateMask = [&] (uint32_t width) -> uint32_t {
+ return (1U << width) - 1U;
+ };
+
+ for (size_t i = 0; i < lsbs.size(); ++i) {
+ uint32_t lsb = lsbs.at(i);
+ uint32_t mask = generateMask(widths.at(i));
+ uint32_t lhs = test(lsb, mask);
+ uint32_t rhs = ((src >> lsb) & mask);
+ CHECK(lhs == rhs);
+ }
+}
+
+void testUbfx32PatternMatch()
+{
+ // (src >> lsb) & ((1 << width) - 1)
+ uint32_t src = ""
+ Vector<uint32_t> imms = { 0, 1, 30, 31, 32, 62, 63, 64 };
+
+ auto test = [&] (uint32_t lsb, uint32_t width) -> uint32_t {
+ Procedure proc;
+ BasicBlock* root = proc.addBlock();
+
+ Value* srcValue = root->appendNew<Value>(
+ proc, Trunc, Origin(),
+ root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0));
+ Value* lsbValue = root->appendNew<Const32Value>(proc, Origin(), lsb);
+ Value* widthValue = root->appendNew<Const32Value>(proc, Origin(), width);
+ Value* constValueA = root->appendNew<Const32Value>(proc, Origin(), 1);
+ Value* constValueB = root->appendNew<Const32Value>(proc, Origin(), 1);
+
+ Value* left = root->appendNew<Value>(proc, ZShr, Origin(), srcValue, lsbValue);
+ Value* right = root->appendNew<Value>(
+ proc, Sub, Origin(),
+ root->appendNew<Value>(proc, Shl, Origin(), constValueA, widthValue), constValueB);
+ root->appendNewControlValue(
+ proc, Return, Origin(),
+ root->appendNew<Value>(proc, BitAnd, Origin(), left, right));
+
+ return compileAndRun<uint32_t>(proc, src);
+ };
+
+ for (auto lsb : imms) {
+ for (auto width : imms) {
+ uint32_t lhs = test(lsb, width);
+ uint32_t rhs = ((src >> lsb) & ((1U << width) - 1U));
+ CHECK(lhs == rhs);
+ }
+ }
+}
+
+void testUbfx64()
+{
+ // (src >> lsb) & mask
+ uint64_t src = ""
+ Vector<uint64_t> lsbs = { 0, 31, 62 };
+ Vector<uint64_t> widths = { 63, 32, 1 };
+
+ auto test = [&] (uint64_t lsb, uint64_t mask) -> uint64_t {
+ Procedure proc;
+ BasicBlock* root = proc.addBlock();
+
+ Value* srcValue = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0);
+ Value* lsbValue = root->appendNew<Const32Value>(proc, Origin(), lsb);
+ Value* maskValue = root->appendNew<Const64Value>(proc, Origin(), mask);
+
+ Value* left = root->appendNew<Value>(proc, ZShr, Origin(), srcValue, lsbValue);
+ root->appendNewControlValue(
+ proc, Return, Origin(),
+ root->appendNew<Value>(proc, BitAnd, Origin(), left, maskValue));
+
+ return compileAndRun<uint64_t>(proc, src);
+ };
+
+ auto generateMask = [&] (uint64_t width) -> uint64_t {
+ return (1ULL << width) - 1ULL;
+ };
+
+ for (size_t i = 0; i < lsbs.size(); ++i) {
+ uint64_t lsb = lsbs.at(i);
+ uint64_t mask = generateMask(widths.at(i));
+ uint64_t lhs = test(lsb, mask);
+ uint64_t rhs = ((src >> lsb) & mask);
+ CHECK(lhs == rhs);
+ }
+}
+
+void testUbfx64PatternMatch()
+{
+ // (src >> lsb) & ((1 << width) - 1)
+ uint64_t src = ""
+ Vector<uint32_t> imms = { 0, 1, 30, 31, 32, 62, 63, 64 };
+
+ auto test = [&] (uint32_t lsb, uint32_t width) -> uint64_t {
+ Procedure proc;
+ BasicBlock* root = proc.addBlock();
+
+ Value* srcValue = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0);
+ Value* lsbValue = root->appendNew<Const32Value>(proc, Origin(), lsb);
+ Value* widthValue = root->appendNew<Const32Value>(proc, Origin(), width);
+ Value* constValueA = root->appendNew<Const64Value>(proc, Origin(), 1);
+ Value* constValueB = root->appendNew<Const64Value>(proc, Origin(), 1);
+
+ Value* left = root->appendNew<Value>(proc, ZShr, Origin(), srcValue, lsbValue);
+ Value* right = root->appendNew<Value>(
+ proc, Sub, Origin(),
+ root->appendNew<Value>(proc, Shl, Origin(), constValueA, widthValue), constValueB);
+ root->appendNewControlValue(
+ proc, Return, Origin(),
+ root->appendNew<Value>(proc, BitAnd, Origin(), left, right));
+
+ return compileAndRun<uint64_t>(proc, src);
+ };
+
+ for (auto lsb : imms) {
+ for (auto width : imms) {
+ uint64_t lhs = test(lsb, width);
+ uint64_t rhs = ((src >> lsb) & ((1ULL << width) - 1ULL));
+ CHECK(lhs == rhs);
+ }
+ }
+}
+
static void testBitAndArgs(int64_t a, int64_t b)
{
Procedure proc;
@@ -3354,6 +3502,11 @@
void addBitTests(const char* filter, Deque<RefPtr<SharedTask<void()>>>& tasks)
{
+ RUN(testUbfx32());
+ RUN(testUbfx32PatternMatch());
+ RUN(testUbfx64());
+ RUN(testUbfx64PatternMatch());
+
RUN(testBitAndArgs(43, 43));
RUN(testBitAndArgs(43, 0));
RUN(testBitAndArgs(10, 3));