================
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s
+
+define i32 @bcnt032_not_for_vregs(i32 %val0) {
+; CHECK-LABEL: bcnt032_not_for_vregs:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_bcnt_u32_b32 v0, v0, 0
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 32, v0
+; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %result = call i32 @llvm.ctpop.i32(i32 %val0)
+ %result2 = sub i32 32, %result
+ %cmp = icmp ne i32 %result2, 0
+ %zext = zext i1 %cmp to i32
+ ret i32 %zext
+}
+
+define i32 @bcnt064_not_for_vregs(i64 %val0) {
+; CHECK-LABEL: bcnt064_not_for_vregs:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_bcnt_u32_b32 v0, v0, 0
+; CHECK-NEXT: v_bcnt_u32_b32 v0, v1, v0
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 64, v[0:1]
+; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %result = call i64 @llvm.ctpop.i64(i64 %val0)
+ %result2 = sub i64 64, %result
+ %cmp = icmp ne i64 %result2, 0
+ %zext = zext i1 %cmp to i32
+ ret i32 %zext
+}
+
+define amdgpu_ps i32 @bcnt032_ctpop_multiple_uses(i32 inreg %val0) {
+; CHECK-LABEL: bcnt032_ctpop_multiple_uses:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_bcnt1_i32_b32 s1, s0
+; CHECK-NEXT: s_bcnt0_i32_b32 s0, s0
----------------
arsenm wrote:
I'm not sure we should do this in the multiple use case. It's not worse, but
it's trading for an equivalently good instruction
https://github.com/llvm/llvm-project/pull/164847
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits