llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: None (llvmbot) <details> <summary>Changes</summary> Backport 8302e8ae6694978806f94aca81cd31258db66 0321f3eeee5cceddc2541046ee155863f5f59585 Requested by: @<!-- -->davemgreen --- Full diff: https://github.com/llvm/llvm-project/pull/179188.diff 4 Files Affected: - (modified) llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp (+5-1) - (modified) llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-xor-tbz-tbnz.mir (+4-3) - (modified) llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir (+7-7) - (modified) llvm/test/CodeGen/AArch64/aarch64-tbz.ll (+180-25) ``````````diff diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index f9db39e5f8622..ac659f100a863 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1410,7 +1410,8 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) break; - // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits. + // (tbz (any_ext x), b) -> (tbz x, b) and + // (tbz (zext x), b) -> (tbz x, b) if we don't use the extended bits. // // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number // on the truncated x is the same as the bit number on x. @@ -1423,6 +1424,9 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, // Did we find something worth folding? if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg)) break; + TypeSize InSize = MRI.getType(NextReg).getSizeInBits(); + if (Bit >= InSize) + break; // NextReg is worth folding. Keep looking. Reg = NextReg; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-xor-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-xor-tbz-tbnz.mir index ed24193b58267..64d09ea682e4a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-xor-tbz-tbnz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-xor-tbz-tbnz.mir @@ -134,9 +134,10 @@ body: | ; CHECK-NEXT: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $wzr - ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[COPY]], %subreg.sub_32 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[SUBREG_TO_REG]] - ; CHECK-NEXT: TBNZX [[COPY1]], 63, %bb.1 + ; CHECK-NEXT: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr $wzr, [[COPY]] + ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[ORNWrr]], 0 + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 + ; CHECK-NEXT: TBNZX [[SUBREG_TO_REG]], 63, %bb.1 ; CHECK-NEXT: B %bb.0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir index 2f8409f9fd3e3..92a37ebf87d9a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir @@ -53,10 +53,10 @@ body: | ; CHECK-NEXT: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $w0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %reg:gpr32all = COPY $w0 - ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, %reg, %subreg.sub_32 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY [[SUBREG_TO_REG]] - ; CHECK-NEXT: TBZX [[COPY]], 33, %bb.1 + ; CHECK-NEXT: %reg:gpr32 = COPY $w0 + ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %reg, 0 + ; CHECK-NEXT: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 + ; CHECK-NEXT: TBZX %zext, 33, %bb.1 ; CHECK-NEXT: B %bb.0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -90,9 +90,9 @@ body: | ; CHECK-NEXT: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %reg:gpr32 = IMPLICIT_DEF - ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, %reg, %subreg.sub_32 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY [[SUBREG_TO_REG]] - ; CHECK-NEXT: TBZX [[COPY]], 33, %bb.1 + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg, %subreg.sub_32 + ; CHECK-NEXT: %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 15 + ; CHECK-NEXT: TBZX %zext, 33, %bb.1 ; CHECK-NEXT: B %bb.0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AArch64/aarch64-tbz.ll b/llvm/test/CodeGen/AArch64/aarch64-tbz.ll index 4cf36337f9a2d..3a13bc7a7f677 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-tbz.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-tbz.ll @@ -1,14 +1,45 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64 < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=aarch64 -cgp-verify-bfi-updates=true < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -verify-machineinstrs -mtriple=aarch64 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -verify-machineinstrs -mtriple=aarch64 -cgp-verify-bfi-updates=true < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -verify-machineinstrs -mtriple=aarch64 -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -verify-machineinstrs -mtriple=aarch64 -global-isel -O0 < %s | FileCheck %s --check-prefixes=CHECK-GIO0 -; CHECK-LABEL: test1 -; CHECK: tbz {{w[0-9]}}, #3, {{.LBB0_3}} -; CHECK: tbz w[[REG1:[0-9]+]], #2, {{.LBB0_3}} -; CHECK-NOT: and x{{[0-9]+}}, x[[REG1]], #0x4 -; CHECK-NOT: cbz x{{[0-9]+}}, .LBB0_3 - -; CHECK: b define void @test1(i64 %A, i64 %B) { +; CHECK-LABEL: test1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: tbz w1, #3, .LBB0_3 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: tbz w0, #2, .LBB0_3 +; CHECK-NEXT: // %bb.2: // %if.then2 +; CHECK-NEXT: b foo +; CHECK-NEXT: .LBB0_3: // %if.end3 +; CHECK-NEXT: ret +; +; CHECK-GIO0-LABEL: test1: +; CHECK-GIO0: // %bb.0: // %entry +; CHECK-GIO0-NEXT: sub sp, sp, #32 +; CHECK-GIO0-NEXT: str x30, [sp, #16] // 8-byte Spill +; CHECK-GIO0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GIO0-NEXT: .cfi_offset w30, -16 +; CHECK-GIO0-NEXT: str x0, [sp] // 8-byte Spill +; CHECK-GIO0-NEXT: str x1, [sp, #8] // 8-byte Spill +; CHECK-GIO0-NEXT: mov w8, w1 +; CHECK-GIO0-NEXT: tbz w8, #3, .LBB0_3 +; CHECK-GIO0-NEXT: b .LBB0_1 +; CHECK-GIO0-NEXT: .LBB0_1: // %entry +; CHECK-GIO0-NEXT: ldr x0, [sp] // 8-byte Reload +; CHECK-GIO0-NEXT: mov w8, w0 +; CHECK-GIO0-NEXT: tbz w8, #2, .LBB0_3 +; CHECK-GIO0-NEXT: b .LBB0_2 +; CHECK-GIO0-NEXT: .LBB0_2: // %if.then2 +; CHECK-GIO0-NEXT: ldr x1, [sp, #8] // 8-byte Reload +; CHECK-GIO0-NEXT: ldr x0, [sp] // 8-byte Reload +; CHECK-GIO0-NEXT: bl foo +; CHECK-GIO0-NEXT: b .LBB0_3 +; CHECK-GIO0-NEXT: .LBB0_3: // %if.end3 +; CHECK-GIO0-NEXT: ldr x30, [sp, #16] // 8-byte Reload +; CHECK-GIO0-NEXT: add sp, sp, #32 +; CHECK-GIO0-NEXT: ret entry: %and = and i64 %A, 4 %notlhs = icmp eq i64 %and, 0 @@ -25,13 +56,43 @@ if.end3: ; preds = %if.then2, %entry ret void } -; CHECK-LABEL: test2 -; CHECK: cbz {{x[0-9]}}, {{.LBB1_3}} -; CHECK: tbz w[[REG1:[0-9]+]], #3, {{.LBB1_3}} -; CHECK-NOT: and x{{[0-9]+}}, x[[REG1]], #0x08 -; CHECK-NOT: cbz x{{[0-9]+}}, .LBB1_3 - define void @test2(i64 %A, ptr readonly %B) #0 { +; CHECK-LABEL: test2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz x1, .LBB1_3 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: tbz w0, #3, .LBB1_3 +; CHECK-NEXT: // %bb.2: // %if.then2 +; CHECK-NEXT: ldr x1, [x1] +; CHECK-NEXT: b foo +; CHECK-NEXT: .LBB1_3: // %if.end3 +; CHECK-NEXT: ret +; +; CHECK-GIO0-LABEL: test2: +; CHECK-GIO0: // %bb.0: // %entry +; CHECK-GIO0-NEXT: sub sp, sp, #32 +; CHECK-GIO0-NEXT: str x30, [sp, #16] // 8-byte Spill +; CHECK-GIO0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GIO0-NEXT: .cfi_offset w30, -16 +; CHECK-GIO0-NEXT: str x0, [sp] // 8-byte Spill +; CHECK-GIO0-NEXT: str x1, [sp, #8] // 8-byte Spill +; CHECK-GIO0-NEXT: cbz x1, .LBB1_3 +; CHECK-GIO0-NEXT: b .LBB1_1 +; CHECK-GIO0-NEXT: .LBB1_1: // %entry +; CHECK-GIO0-NEXT: ldr x0, [sp] // 8-byte Reload +; CHECK-GIO0-NEXT: mov w8, w0 +; CHECK-GIO0-NEXT: tbz w8, #3, .LBB1_3 +; CHECK-GIO0-NEXT: b .LBB1_2 +; CHECK-GIO0-NEXT: .LBB1_2: // %if.then2 +; CHECK-GIO0-NEXT: ldr x0, [sp] // 8-byte Reload +; CHECK-GIO0-NEXT: ldr x8, [sp, #8] // 8-byte Reload +; CHECK-GIO0-NEXT: ldr x1, [x8] +; CHECK-GIO0-NEXT: bl foo +; CHECK-GIO0-NEXT: b .LBB1_3 +; CHECK-GIO0-NEXT: .LBB1_3: // %if.end3 +; CHECK-GIO0-NEXT: ldr x30, [sp, #16] // 8-byte Reload +; CHECK-GIO0-NEXT: add sp, sp, #32 +; CHECK-GIO0-NEXT: ret entry: %tobool = icmp eq ptr %B, null %and = and i64 %A, 8 @@ -49,12 +110,42 @@ if.end3: ; preds = %entry, %if.then2 } ; Make sure we use the W variant when log2(mask) is < 32. -; CHECK-LABEL: test3 -; CHECK: tbz {{w[0-9]}}, #3, {{.LBB2_3}} -; CHECK: tbz w[[REG1:[0-9]+]], #28, {{.LBB2_3}} -; CHECK-NOT: and x{{[0-9]+}}, x[[REG1]] -; CHECK-NOT: cbz x{{[0-9]+}}, .LBB2_3 define void @test3(i64 %A, i64 %B) { +; CHECK-LABEL: test3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: tbz w1, #3, .LBB2_3 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: tbz w0, #28, .LBB2_3 +; CHECK-NEXT: // %bb.2: // %if.end3 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_3: // %if.then2 +; CHECK-NEXT: b foo +; +; CHECK-GIO0-LABEL: test3: +; CHECK-GIO0: // %bb.0: // %entry +; CHECK-GIO0-NEXT: sub sp, sp, #32 +; CHECK-GIO0-NEXT: str x30, [sp, #16] // 8-byte Spill +; CHECK-GIO0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GIO0-NEXT: .cfi_offset w30, -16 +; CHECK-GIO0-NEXT: str x0, [sp] // 8-byte Spill +; CHECK-GIO0-NEXT: str x1, [sp, #8] // 8-byte Spill +; CHECK-GIO0-NEXT: mov w8, w1 +; CHECK-GIO0-NEXT: tbz w8, #3, .LBB2_2 +; CHECK-GIO0-NEXT: b .LBB2_1 +; CHECK-GIO0-NEXT: .LBB2_1: // %entry +; CHECK-GIO0-NEXT: ldr x0, [sp] // 8-byte Reload +; CHECK-GIO0-NEXT: mov w8, w0 +; CHECK-GIO0-NEXT: tbnz w8, #28, .LBB2_3 +; CHECK-GIO0-NEXT: b .LBB2_2 +; CHECK-GIO0-NEXT: .LBB2_2: // %if.then2 +; CHECK-GIO0-NEXT: ldr x1, [sp, #8] // 8-byte Reload +; CHECK-GIO0-NEXT: ldr x0, [sp] // 8-byte Reload +; CHECK-GIO0-NEXT: bl foo +; CHECK-GIO0-NEXT: b .LBB2_3 +; CHECK-GIO0-NEXT: .LBB2_3: // %if.end3 +; CHECK-GIO0-NEXT: ldr x30, [sp, #16] // 8-byte Reload +; CHECK-GIO0-NEXT: add sp, sp, #32 +; CHECK-GIO0-NEXT: ret entry: %shift = shl i64 1, 28 %and = and i64 %A, %shift @@ -72,12 +163,41 @@ if.end3: ; preds = %if.then2, %entry ret void } -; CHECK-LABEL: test4 -; CHECK: tbz {{w[0-9]}}, #3, {{.LBB3_3}} -; CHECK: tbz [[REG1:x[0-9]+]], #35, {{.LBB3_3}} -; CHECK-NOT: and x{{[0-9]+}}, x[[REG1]] -; CHECK-NOT: cbz x{{[0-9]+}}, .LBB2_3 define void @test4(i64 %A, i64 %B) { +; CHECK-LABEL: test4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: tbz w1, #3, .LBB3_3 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: tbz x0, #35, .LBB3_3 +; CHECK-NEXT: // %bb.2: // %if.end3 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_3: // %if.then2 +; CHECK-NEXT: b foo +; +; CHECK-GIO0-LABEL: test4: +; CHECK-GIO0: // %bb.0: // %entry +; CHECK-GIO0-NEXT: sub sp, sp, #32 +; CHECK-GIO0-NEXT: str x30, [sp, #16] // 8-byte Spill +; CHECK-GIO0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GIO0-NEXT: .cfi_offset w30, -16 +; CHECK-GIO0-NEXT: str x0, [sp] // 8-byte Spill +; CHECK-GIO0-NEXT: str x1, [sp, #8] // 8-byte Spill +; CHECK-GIO0-NEXT: mov w8, w1 +; CHECK-GIO0-NEXT: tbz w8, #3, .LBB3_2 +; CHECK-GIO0-NEXT: b .LBB3_1 +; CHECK-GIO0-NEXT: .LBB3_1: // %entry +; CHECK-GIO0-NEXT: ldr x8, [sp] // 8-byte Reload +; CHECK-GIO0-NEXT: tbnz x8, #35, .LBB3_3 +; CHECK-GIO0-NEXT: b .LBB3_2 +; CHECK-GIO0-NEXT: .LBB3_2: // %if.then2 +; CHECK-GIO0-NEXT: ldr x1, [sp, #8] // 8-byte Reload +; CHECK-GIO0-NEXT: ldr x0, [sp] // 8-byte Reload +; CHECK-GIO0-NEXT: bl foo +; CHECK-GIO0-NEXT: b .LBB3_3 +; CHECK-GIO0-NEXT: .LBB3_3: // %if.end3 +; CHECK-GIO0-NEXT: ldr x30, [sp, #16] // 8-byte Reload +; CHECK-GIO0-NEXT: add sp, sp, #32 +; CHECK-GIO0-NEXT: ret entry: %shift = shl i64 1, 35 %and = and i64 %A, %shift @@ -95,5 +215,40 @@ if.end3: ; preds = %if.then2, %entry ret void } +define i32 @tbzfromextract(<8 x i8> %b) { +; CHECK-LABEL: tbzfromextract: +; CHECK: // %bb.0: // %common.ret +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: ret +; +; CHECK-GIO0-LABEL: tbzfromextract: +; CHECK-GIO0: // %bb.0: +; CHECK-GIO0-NEXT: fmov d1, d0 +; CHECK-GIO0-NEXT: // implicit-def: $q0 +; CHECK-GIO0-NEXT: fmov d0, d1 +; CHECK-GIO0-NEXT: umov w8, v0.b[0] +; CHECK-GIO0-NEXT: tbnz w8, #31, .LBB4_2 +; CHECK-GIO0-NEXT: b .LBB4_1 +; CHECK-GIO0-NEXT: .LBB4_1: // %land.rhs +; CHECK-GIO0-NEXT: mov w0, #1 // =0x1 +; CHECK-GIO0-NEXT: ret +; CHECK-GIO0-NEXT: .LBB4_2: // %land.end +; CHECK-GIO0-NEXT: mov w0, wzr +; CHECK-GIO0-NEXT: ret + %e = extractelement <8 x i8> %b, i32 0 + %z = zext i8 %e to i32 + %cmp = icmp sge i32 %z, 0 + br i1 %cmp, label %land.rhs, label %land.end + +land.rhs: + ret i32 1 + +land.end: + ret i32 0 +} declare void @foo(i64, i64) + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}} `````````` </details> https://github.com/llvm/llvm-project/pull/179188 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
