Title: [278994] trunk/Source/_javascript_Core
Revision
278994
Author
commit-qu...@webkit.org
Date
2021-06-17 10:16:54 -0700 (Thu, 17 Jun 2021)

Log Message

Add a new pattern to instruction selector to utilize UBFX supported by ARM64
https://bugs.webkit.org/show_bug.cgi?id=226984

Patch by Yijia Huang <yijia_hu...@apple.com> on 2021-06-17
Reviewed by Filip Pizlo.

UBFX, supported by ARM64, copies adjacent bits from the source register into
the least significant bits of a destination register in zero extension. The
instruction selector can utilize this to lowering certain patterns in B3 IR
before further Air optimization.

ubfx dest, src, lsb, width
   tmp, tmp, imm, imm

This is equivalent to "dest = (src >> lsb) & ((1 << width) - 1)". Since wasm
introduces constant folding, then the pattern would be:

dest = (src >> lsb) & mask

where the mask should have a binary format in contiguous ones starting from
the least significant bit. For example:

0b00111111

To make the pattern matching in instruction selection beneficial to JIT, these
constraints should be introduced:

1. lsb >= 0
2. width > 0
3. lsb + width <= bit field limit (32 or 64)

Given:
// B3 IR
Int @0 = ArgumentReg(%0)
Int @1 = lsb
Int @2 = 0b0011
Int @3 = ZShr(@0, @1)
Int @4 = BitAnd(@3, @2)
Void@5 = Return(@4, Terminal)

w/o UBFX Pattern:
// Old optimized AIR
Urshift %x0, lsb, %x0, @3
And  0b0011, %x0, %x0, @4
Ret     %x0,           @5

w/ UBFX Pattern:
// New optimized AIR
Ubfx %x0, lsb, 2, %x0, @4
Ret  %x0,              @5

Note:
Suppose a 32-bit version of (src >> 20) & 0x0FFF, it is equivalent to src >> 20.
In this case, Logical Shift Right should be utilized instead when:

lsb + width == bit field limit (32 or 64)

This case/pattern should be added and upadated in the future patch.

* assembler/MacroAssemblerARM64.h:
(JSC::MacroAssemblerARM64::ubfx32):
(JSC::MacroAssemblerARM64::ubfx64):
* assembler/testmasm.cpp:
(JSC::testUbfx32):
(JSC::testUbfx64):
* b3/B3LowerToAir.cpp:
* b3/air/AirOpcode.opcodes:
* b3/testb3.h:
* b3/testb3_2.cpp:
(testUbfx64PatternMatch):
(testUbfx32PatternMatch):
(addBitTests):

Modified Paths

Diff

Modified: trunk/Source/_javascript_Core/ChangeLog (278993 => 278994)


--- trunk/Source/_javascript_Core/ChangeLog	2021-06-17 17:05:13 UTC (rev 278993)
+++ trunk/Source/_javascript_Core/ChangeLog	2021-06-17 17:16:54 UTC (rev 278994)
@@ -1,3 +1,77 @@
+2021-06-17  Yijia Huang  <yijia_hu...@apple.com>
+
+        Add a new pattern to instruction selector to utilize UBFX supported by ARM64
+        https://bugs.webkit.org/show_bug.cgi?id=226984
+
+        Reviewed by Filip Pizlo.
+
+        UBFX, supported by ARM64, copies adjacent bits from the source register into 
+        the least significant bits of a destination register in zero extension. The 
+        instruction selector can utilize this to lowering certain patterns in B3 IR 
+        before further Air optimization.
+
+        ubfx dest, src, lsb, width
+           tmp, tmp, imm, imm
+
+        This is equivalent to "dest = (src >> lsb) & ((1 << width) - 1)". Since wasm 
+        introduces constant folding, then the pattern would be:
+
+        dest = (src >> lsb) & mask
+
+        where the mask should have a binary format in contiguous ones starting from 
+        the least significant bit. For example:
+
+        0b00111111
+
+        To make the pattern matching in instruction selection beneficial to JIT, these 
+        constraints should be introduced:
+
+        1. lsb >= 0 
+        2. width > 0
+        3. lsb + width <= bit field limit (32 or 64)
+
+        Given:
+        // B3 IR
+        Int @0 = ArgumentReg(%0)
+        Int @1 = lsb
+        Int @2 = 0b0011
+        Int @3 = ZShr(@0, @1)
+        Int @4 = BitAnd(@3, @2)  
+        Void@5 = Return(@4, Terminal)      
+
+        w/o UBFX Pattern:
+        // Old optimized AIR
+        Urshift %x0, lsb, %x0, @3
+        And  0b0011, %x0, %x0, @4
+        Ret     %x0,           @5
+
+        w/ UBFX Pattern:
+        // New optimized AIR
+        Ubfx %x0, lsb, 2, %x0, @4
+        Ret  %x0,              @5
+
+        Note:
+        Suppose a 32-bit version of (src >> 20) & 0x0FFF, it is equivalent to src >> 20. 
+        In this case, Logical Shift Right should be utilized instead when:
+
+        lsb + width == bit field limit (32 or 64)
+
+        This case/pattern should be added and upadated in the future patch.
+
+        * assembler/MacroAssemblerARM64.h:
+        (JSC::MacroAssemblerARM64::ubfx32):
+        (JSC::MacroAssemblerARM64::ubfx64):
+        * assembler/testmasm.cpp:
+        (JSC::testUbfx32):
+        (JSC::testUbfx64):
+        * b3/B3LowerToAir.cpp:
+        * b3/air/AirOpcode.opcodes:
+        * b3/testb3.h:
+        * b3/testb3_2.cpp:
+        (testUbfx64PatternMatch):
+        (testUbfx32PatternMatch):
+        (addBitTests):
+
 2021-06-17  Angelos Oikonomopoulos  <ange...@igalia.com>
 
         [JSC] Work around apparent miscompilation on ARM/GCC >=8.4

Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h (278993 => 278994)


--- trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h	2021-06-17 17:05:13 UTC (rev 278993)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h	2021-06-17 17:16:54 UTC (rev 278994)
@@ -390,6 +390,16 @@
         and32(dataTempRegister, dest);
     }
 
+    void ubfx32(RegisterID src, TrustedImm32 lsb, TrustedImm32 width, RegisterID dest)
+    {
+        m_assembler.ubfx<32>(dest, src, lsb.m_value, width.m_value);
+    }
+
+    void ubfx64(RegisterID src, TrustedImm32 lsb, TrustedImm32 width, RegisterID dest)
+    {
+        m_assembler.ubfx<64>(dest, src, lsb.m_value, width.m_value);
+    }
+
     void and64(RegisterID src1, RegisterID src2, RegisterID dest)
     {
         m_assembler.and_<64>(dest, src1, src2);

Modified: trunk/Source/_javascript_Core/assembler/testmasm.cpp (278993 => 278994)


--- trunk/Source/_javascript_Core/assembler/testmasm.cpp	2021-06-17 17:05:13 UTC (rev 278993)
+++ trunk/Source/_javascript_Core/assembler/testmasm.cpp	2021-06-17 17:16:54 UTC (rev 278994)
@@ -1042,6 +1042,54 @@
             CHECK_EQ(invoke<int64_t>(sub, value), static_cast<int64_t>(value - immediate));
     }
 }
+
+void testUbfx32()
+{
+    uint32_t src = ""
+    Vector<uint32_t> imms = { 0, 1, 30, 31, 32, 62, 63, 64 };
+    for (auto lsb : imms) {
+        for (auto width : imms) {
+            if (lsb >= 0 && width > 0 && lsb + width < 32) {
+                auto ubfx32 = compile([=] (CCallHelpers& jit) {
+                    emitFunctionPrologue(jit);
+
+                    jit.ubfx32(GPRInfo::returnValueGPR, 
+                        CCallHelpers::TrustedImm32(lsb), 
+                        CCallHelpers::TrustedImm32(width), 
+                        GPRInfo::returnValueGPR);
+
+                    emitFunctionEpilogue(jit);
+                    jit.ret();
+                });
+                CHECK_EQ(invoke<uint32_t>(ubfx32, src), ((src >> lsb) & ((1U << width) - 1U)));
+            }
+        }
+    }
+}
+
+void testUbfx64()
+{
+    uint64_t src = ""
+    Vector<uint32_t> imms = { 0, 1, 30, 31, 32, 62, 63, 64 };
+    for (auto lsb : imms) {
+        for (auto width : imms) {
+            if (lsb >= 0 && width > 0 && lsb + width < 64) {
+                auto ubfx64 = compile([=] (CCallHelpers& jit) {
+                    emitFunctionPrologue(jit);
+
+                    jit.ubfx64(GPRInfo::returnValueGPR, 
+                        CCallHelpers::TrustedImm32(lsb), 
+                        CCallHelpers::TrustedImm32(width), 
+                        GPRInfo::returnValueGPR);
+
+                    emitFunctionEpilogue(jit);
+                    jit.ret();
+                });
+                CHECK_EQ(invoke<uint64_t>(ubfx64, src), ((src >> lsb) & ((1ULL << width) - 1ULL)));
+            }
+        }
+    }
+}
 #endif
 
 #if CPU(X86) || CPU(X86_64) || CPU(ARM64)
@@ -3206,6 +3254,8 @@
     RUN(testSub64ArgImm32());
     RUN(testSub64Imm64());
     RUN(testSub64ArgImm64());
+    RUN(testUbfx32());
+    RUN(testUbfx64());
 #endif
 
 #if CPU(X86) || CPU(X86_64) || CPU(ARM64)

Modified: trunk/Source/_javascript_Core/b3/B3LowerToAir.cpp (278993 => 278994)


--- trunk/Source/_javascript_Core/b3/B3LowerToAir.cpp	2021-06-17 17:05:13 UTC (rev 278993)
+++ trunk/Source/_javascript_Core/b3/B3LowerToAir.cpp	2021-06-17 17:16:54 UTC (rev 278994)
@@ -59,6 +59,7 @@
 #include "B3WasmAddressValue.h"
 #include <wtf/IndexMap.h>
 #include <wtf/IndexSet.h>
+#include <wtf/StdLibExtras.h>
 
 #if !ASSERT_ENABLED
 IGNORE_RETURN_TYPE_WARNINGS_BEGIN
@@ -2694,23 +2695,46 @@
         }
 
         case BitAnd: {
-            if (m_value->child(1)->isInt(0xff)) {
-                appendUnOp<ZeroExtend8To32, ZeroExtend8To32>(m_value->child(0));
+            Value* left = m_value->child(0);
+            Value* right = m_value->child(1);
+
+            if (right->isInt(0xff)) {
+                appendUnOp<ZeroExtend8To32, ZeroExtend8To32>(left);
                 return;
             }
-            
-            if (m_value->child(1)->isInt(0xffff)) {
-                appendUnOp<ZeroExtend16To32, ZeroExtend16To32>(m_value->child(0));
+
+            if (right->isInt(0xffff)) {
+                appendUnOp<ZeroExtend16To32, ZeroExtend16To32>(left);
                 return;
             }
 
-            if (m_value->child(1)->isInt64(0xffffffff) || m_value->child(1)->isInt32(0xffffffff)) {
-                appendUnOp<Move32, Move32>(m_value->child(0));
+            if (right->isInt64(0xffffffff) || right->isInt32(0xffffffff)) {
+                appendUnOp<Move32, Move32>(left);
                 return;
             }
-            
-            appendBinOp<And32, And64, AndDouble, AndFloat, Commutative>(
-                m_value->child(0), m_value->child(1));
+
+            // UBFX Pattern: dest = (src >> lsb) & ((1 << width) - 1)
+            if (canBeInternal(left) && left->opcode() == ZShr) {
+                Value* srcValue = left->child(0);
+                Value* lsbValue = left->child(1);
+                if (!imm(srcValue) && imm(lsbValue) && right->hasInt()) {
+                    int64_t lsb = lsbValue->asInt();
+                    uint64_t mask = right->asInt();
+                    uint8_t width = static_cast<uint8_t>(!(mask & (mask + 1))) * WTF::bitCount(mask);
+                    Air::Opcode opcode = opcodeForType(Ubfx32, Ubfx64, srcValue->type());
+                    if (opcode
+                        && lsb >= 0
+                        && width > 0
+                        && lsb + width <= (32 << (opcode == Ubfx64))
+                        && isValidForm(opcode, Arg::Tmp, Arg::Imm, Arg::Imm, Arg::Tmp))  {
+                        append(opcode, tmp(srcValue), imm(lsbValue), imm(width), tmp(m_value));
+                        commitInternal(left);
+                        return;
+                    }
+                }
+            }
+
+            appendBinOp<And32, And64, AndDouble, AndFloat, Commutative>(left, right);
             return;
         }
 

Modified: trunk/Source/_javascript_Core/b3/air/AirOpcode.opcodes (278993 => 278994)


--- trunk/Source/_javascript_Core/b3/air/AirOpcode.opcodes	2021-06-17 17:05:13 UTC (rev 278993)
+++ trunk/Source/_javascript_Core/b3/air/AirOpcode.opcodes	2021-06-17 17:16:54 UTC (rev 278994)
@@ -357,6 +357,12 @@
     x86: Imm, Addr
     x86: Imm, Index
 
+arm64: Ubfx32 U:G:32, U:G:32, U:G:32, ZD:G:32
+    Tmp, Imm, Imm, Tmp
+
+arm64: Ubfx64 U:G:64, U:G:32, U:G:32, D:G:64
+    Tmp, Imm, Imm, Tmp
+
 64: And64 U:G:64, U:G:64, D:G:64
     Tmp, Tmp, Tmp
     arm64: BitImm64, Tmp, Tmp

Modified: trunk/Source/_javascript_Core/b3/testb3.h (278993 => 278994)


--- trunk/Source/_javascript_Core/b3/testb3.h	2021-06-17 17:05:13 UTC (rev 278993)
+++ trunk/Source/_javascript_Core/b3/testb3.h	2021-06-17 17:16:54 UTC (rev 278994)
@@ -416,6 +416,10 @@
 
 void run(const char* filter);
 void testBitAndSExt32(int32_t value, int64_t mask);
+void testUbfx32();
+void testUbfx32PatternMatch();
+void testUbfx64();
+void testUbfx64PatternMatch();
 void testBasicSelect();
 void testSelectTest();
 void testSelectCompareDouble();

Modified: trunk/Source/_javascript_Core/b3/testb3_2.cpp (278993 => 278994)


--- trunk/Source/_javascript_Core/b3/testb3_2.cpp	2021-06-17 17:05:13 UTC (rev 278993)
+++ trunk/Source/_javascript_Core/b3/testb3_2.cpp	2021-06-17 17:16:54 UTC (rev 278994)
@@ -2540,6 +2540,154 @@
     CHECK(isIdentical(compileAndRun<float>(proc, bitwise_cast<int32_t>(a)), -a));
 }
 
+void testUbfx32()
+{
+    // (src >> lsb) & mask
+    uint32_t src = ""
+    Vector<uint32_t> lsbs = { 0, 15, 30 };
+    Vector<uint32_t> widths = { 30, 16, 1 };
+
+    auto test = [&] (uint32_t lsb, uint32_t mask) -> uint32_t {
+        Procedure proc;
+        BasicBlock* root = proc.addBlock();
+
+        Value* srcValue = root->appendNew<Value>(
+            proc, Trunc, Origin(), 
+            root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0));
+        Value* lsbValue = root->appendNew<Const32Value>(proc, Origin(), lsb);
+        Value* maskValue = root->appendNew<Const32Value>(proc, Origin(), mask);
+
+        Value* left = root->appendNew<Value>(proc, ZShr, Origin(), srcValue, lsbValue);
+        root->appendNewControlValue(
+            proc, Return, Origin(),
+            root->appendNew<Value>(proc, BitAnd, Origin(), left, maskValue));
+        
+        return compileAndRun<uint32_t>(proc, src);
+    };
+
+    auto generateMask = [&] (uint32_t width) -> uint32_t {
+        return (1U << width) - 1U;
+    };
+
+    for (size_t i = 0; i < lsbs.size(); ++i) {
+        uint32_t lsb = lsbs.at(i);
+        uint32_t mask = generateMask(widths.at(i));
+        uint32_t lhs = test(lsb, mask);
+        uint32_t rhs = ((src >> lsb) & mask);
+        CHECK(lhs == rhs);
+    }
+}
+
+void testUbfx32PatternMatch()
+{
+    // (src >> lsb) & ((1 << width) - 1)
+    uint32_t src = ""
+    Vector<uint32_t> imms = { 0, 1, 30, 31, 32, 62, 63, 64 };
+
+    auto test = [&] (uint32_t lsb, uint32_t width) -> uint32_t {
+        Procedure proc;
+        BasicBlock* root = proc.addBlock();
+
+        Value* srcValue = root->appendNew<Value>(
+            proc, Trunc, Origin(), 
+            root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0));
+        Value* lsbValue = root->appendNew<Const32Value>(proc, Origin(), lsb);
+        Value* widthValue = root->appendNew<Const32Value>(proc, Origin(), width);
+        Value* constValueA = root->appendNew<Const32Value>(proc, Origin(), 1);
+        Value* constValueB = root->appendNew<Const32Value>(proc, Origin(), 1);
+
+        Value* left = root->appendNew<Value>(proc, ZShr, Origin(), srcValue, lsbValue);
+        Value* right = root->appendNew<Value>(
+            proc, Sub, Origin(), 
+            root->appendNew<Value>(proc, Shl, Origin(), constValueA, widthValue), constValueB);
+        root->appendNewControlValue(
+            proc, Return, Origin(),
+            root->appendNew<Value>(proc, BitAnd, Origin(), left, right));
+
+        return compileAndRun<uint32_t>(proc, src);
+    };
+
+    for (auto lsb : imms) {
+        for (auto width : imms) {
+            uint32_t lhs = test(lsb, width);
+            uint32_t rhs = ((src >> lsb) & ((1U << width) - 1U));
+            CHECK(lhs == rhs);
+        }
+    }
+}
+
+void testUbfx64()
+{
+    // (src >> lsb) & mask
+    uint64_t src = ""
+    Vector<uint64_t> lsbs = { 0, 31, 62 };
+    Vector<uint64_t> widths = { 63, 32, 1 };
+
+    auto test = [&] (uint64_t lsb, uint64_t mask) -> uint64_t {
+        Procedure proc;
+        BasicBlock* root = proc.addBlock();
+
+        Value* srcValue = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0);
+        Value* lsbValue = root->appendNew<Const32Value>(proc, Origin(), lsb);
+        Value* maskValue = root->appendNew<Const64Value>(proc, Origin(), mask);
+
+        Value* left = root->appendNew<Value>(proc, ZShr, Origin(), srcValue, lsbValue);
+        root->appendNewControlValue(
+            proc, Return, Origin(),
+            root->appendNew<Value>(proc, BitAnd, Origin(), left, maskValue));
+
+        return compileAndRun<uint64_t>(proc, src);
+    };
+
+    auto generateMask = [&] (uint64_t width) -> uint64_t {
+        return (1ULL << width) - 1ULL;
+    };
+
+    for (size_t i = 0; i < lsbs.size(); ++i) {
+        uint64_t lsb = lsbs.at(i);
+        uint64_t mask = generateMask(widths.at(i));
+        uint64_t lhs = test(lsb, mask);
+        uint64_t rhs = ((src >> lsb) & mask);
+        CHECK(lhs == rhs);
+    }
+}
+
+void testUbfx64PatternMatch()
+{
+    // (src >> lsb) & ((1 << width) - 1)
+    uint64_t src = ""
+    Vector<uint32_t> imms = { 0, 1, 30, 31, 32, 62, 63, 64 };
+
+    auto test = [&] (uint32_t lsb, uint32_t width) -> uint64_t {
+        Procedure proc;
+        BasicBlock* root = proc.addBlock();
+
+        Value* srcValue = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0);
+        Value* lsbValue = root->appendNew<Const32Value>(proc, Origin(), lsb);
+        Value* widthValue = root->appendNew<Const32Value>(proc, Origin(), width);
+        Value* constValueA = root->appendNew<Const64Value>(proc, Origin(), 1);
+        Value* constValueB = root->appendNew<Const64Value>(proc, Origin(), 1);
+
+        Value* left = root->appendNew<Value>(proc, ZShr, Origin(), srcValue, lsbValue);
+        Value* right = root->appendNew<Value>(
+            proc, Sub, Origin(), 
+            root->appendNew<Value>(proc, Shl, Origin(), constValueA, widthValue), constValueB);
+        root->appendNewControlValue(
+            proc, Return, Origin(),
+            root->appendNew<Value>(proc, BitAnd, Origin(), left, right));
+
+        return compileAndRun<uint64_t>(proc, src);
+    };
+
+    for (auto lsb : imms) {
+        for (auto width : imms) {
+            uint64_t lhs = test(lsb, width);
+            uint64_t rhs = ((src >> lsb) & ((1ULL << width) - 1ULL));
+            CHECK(lhs == rhs);
+        }
+    }
+}
+
 static void testBitAndArgs(int64_t a, int64_t b)
 {
     Procedure proc;
@@ -3354,6 +3502,11 @@
 
 void addBitTests(const char* filter, Deque<RefPtr<SharedTask<void()>>>& tasks)
 {
+    RUN(testUbfx32());
+    RUN(testUbfx32PatternMatch());
+    RUN(testUbfx64());
+    RUN(testUbfx64PatternMatch());
+
     RUN(testBitAndArgs(43, 43));
     RUN(testBitAndArgs(43, 0));
     RUN(testBitAndArgs(10, 3));
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to