llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-aarch64 Author: Sander de Smalen (sdesmalen-arm) <details> <summary>Changes</summary> Most use of subreg liveness tracking will be for streaming SME2 functions where it can use the strided- and contiguous form of the multi-vector LD1, see #<!-- -->123081 for details. --- Patch is 963.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/174189.diff 100 Files Affected: - (modified) llvm/lib/Target/AArch64/AArch64Subtarget.cpp (+11-1) - (modified) llvm/test/CodeGen/AArch64/active_lane_mask.ll (+2-15) - (modified) llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll (+3-9) - (modified) llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll (+3-9) - (modified) llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll (+11-53) - (modified) llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll (-3) - (modified) llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll (-1) - (modified) llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll (-1) - (modified) llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll (+10-40) - (modified) llvm/test/CodeGen/AArch64/sme-streaming-body.ll (-2) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll (-24) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-bfmul.ll (-18) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-bfscale.ll (-18) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll (-4) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-faminmax.ll (+78-78) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-fclamp.ll (-18) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll (+2-98) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll (-54) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane-x4.ll (+2-2) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4.ll (+2-6) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll (+182-218) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll (+182-218) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll (+92-152) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll (+1-192) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4-fp8.ll (-8) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_2x1.ll (-64) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_2x2.ll (-82) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll (+104-104) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-sclamp.ll (-24) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll (-2) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll (+52-52) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll (-60) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll (-26) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-uclamp.ll (-24) - (modified) llvm/test/CodeGen/AArch64/sme2p2-intrinsics-fmul.ll (-54) - (modified) llvm/test/CodeGen/AArch64/sve-bf16-reductions.ll (+2-4) - (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll (-38) - (modified) llvm/test/CodeGen/AArch64/sve-fmsub.ll (+2-50) - (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll (+2-5) - (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll (+2-26) - (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll (+2-26) - (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll (+2-121) - (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll (+2-65) - (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll (+2-83) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll (+4-16) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll (+3-13) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll (+3-13) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll (+3-37) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll (+9-69) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll (+3-5) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll (+3-23) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll (+3-39) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll (+3-20) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll (+3-29) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll (+3-18) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll (+10-10) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll (+3-135) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll (+3-22) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll (+7-14) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll (+3-63) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll (+3-87) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll (+3-23) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll (+8-32) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll (+4-144) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll (+3-27) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll (+81-141) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll (+15-39) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll (+3-75) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll (+3-99) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll (+3-6) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll (+6-126) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll (+3-38) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll (+78-132) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll (+3-36) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll (+7-97) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll (+12-52) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll (+7-7) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll (+3-5) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll (+3-5) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll (+3-30) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll (+3-4) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll (+3-4) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll (+26-26) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll (+12-16) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll (+2-3) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll (+3-37) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll (+3-23) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll (+8-12) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll (+4-4) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll (+355-365) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll (+6-33) - (modified) llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll (+30-61) - (modified) llvm/test/CodeGen/AArch64/sve-vector-interleave.ll (+5-64) - (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-luti.ll (+6-6) - (modified) llvm/test/CodeGen/AArch64/sve2p1-dots-partial-reduction.ll (+2-10) - (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll (+4-157) - (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll (+56-56) - (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll (+51-51) - (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll (+10-10) - (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll (+4-36) ``````````diff diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 1737a0c1529b4..194d1d94e0b25 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -417,7 +417,17 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP")) ReserveXRegisterForRA.set(29); - EnableSubregLiveness = EnableSubregLivenessTracking.getValue(); + // To benefit from SME2's strided-register multi-vector load/store + // instructions we'll need to enable subreg liveness. Our longer + // term aim is to make this the default, regardless of streaming + // mode, but there are still some outstanding issues, see: + // https://github.com/llvm/llvm-project/pull/174188 + // and: + // https://github.com/llvm/llvm-project/pull/168353 + if (IsStreaming) + EnableSubregLiveness = true; + else + EnableSubregLiveness = EnableSubregLivenessTracking.getValue(); } const CallLowering *AArch64Subtarget::getCallLowering() const { diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll index b77e90f6fdc45..05d083a654cf6 100644 --- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll +++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STREAMING +; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE +; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STREAMING ; == Scalable == @@ -209,7 +209,6 @@ define <16 x i1> @lane_mask_v16i1_i32(i32 %index, i32 %TC) { ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo p0.b, w0, w1 ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC) ret <16 x i1> %active.lane.mask @@ -220,7 +219,6 @@ define <8 x i1> @lane_mask_v8i1_i32(i32 %index, i32 %TC) { ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo p0.b, w0, w1 ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC) ret <8 x i1> %active.lane.mask @@ -231,7 +229,6 @@ define <4 x i1> @lane_mask_v4i1_i32(i32 %index, i32 %TC) { ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo p0.h, w0, w1 ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC) ret <4 x i1> %active.lane.mask @@ -242,7 +239,6 @@ define <2 x i1> @lane_mask_v2i1_i32(i32 %index, i32 %TC) { ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo p0.s, w0, w1 ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 %index, i32 %TC) ret <2 x i1> %active.lane.mask @@ -253,7 +249,6 @@ define <16 x i1> @lane_mask_v16i1_i64(i64 %index, i64 %TC) { ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo p0.b, x0, x1 ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 %index, i64 %TC) ret <16 x i1> %active.lane.mask @@ -264,7 +259,6 @@ define <8 x i1> @lane_mask_v8i1_i64(i64 %index, i64 %TC) { ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo p0.b, x0, x1 ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 %index, i64 %TC) ret <8 x i1> %active.lane.mask @@ -275,7 +269,6 @@ define <4 x i1> @lane_mask_v4i1_i64(i64 %index, i64 %TC) { ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo p0.h, x0, x1 ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %index, i64 %TC) ret <4 x i1> %active.lane.mask @@ -286,7 +279,6 @@ define <2 x i1> @lane_mask_v2i1_i64(i64 %index, i64 %TC) { ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo p0.s, x0, x1 ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 %index, i64 %TC) ret <2 x i1> %active.lane.mask @@ -313,7 +305,6 @@ define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) { ; CHECK-STREAMING-NEXT: mov z1.b, w1 ; CHECK-STREAMING-NEXT: cmphi p0.b, p0/z, z1.b, z0.b ; CHECK-STREAMING-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff -; CHECK-STREAMING-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-STREAMING-NEXT: ret %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i8(i8 %index, i8 %TC) ret <16 x i1> %active.lane.mask @@ -340,7 +331,6 @@ define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) { ; CHECK-STREAMING-NEXT: mov z1.b, w1 ; CHECK-STREAMING-NEXT: cmphi p0.b, p0/z, z1.b, z0.b ; CHECK-STREAMING-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff -; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-STREAMING-NEXT: ret %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i8(i8 %index, i8 %TC) ret <8 x i1> %active.lane.mask @@ -372,7 +362,6 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) { ; CHECK-STREAMING-NEXT: and z1.h, z1.h, #0xff ; CHECK-STREAMING-NEXT: cmphi p0.h, p0/z, z1.h, z0.h ; CHECK-STREAMING-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-STREAMING-NEXT: ret %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8 %index, i8 %TC) ret <4 x i1> %active.lane.mask @@ -402,7 +391,6 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) { ; CHECK-STREAMING-NEXT: umin z0.s, z0.s, #255 ; CHECK-STREAMING-NEXT: cmphi p0.s, p0/z, z1.s, z0.s ; CHECK-STREAMING-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-STREAMING-NEXT: ret %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8 %index, i8 %TC) ret <2 x i1> %active.lane.mask @@ -465,7 +453,6 @@ define <8 x i1> @lane_mask_v8i1_imm3() { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b, vl3 ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret entry: %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 0, i64 3) diff --git a/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll index e42f2b1cfba48..b735a52e49d63 100644 --- a/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll +++ b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll @@ -1,15 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mattr=+sve2,+fp8 < %s | FileCheck %s -; RUN: llc -mattr=+sve,+sme2,+fp8 < %s | FileCheck %s -; RUN: llc -mattr=+sme2,+fp8 --force-streaming < %s | FileCheck %s +; RUN: llc -mattr=+sve2,+fp8 -enable-subreg-liveness=true < %s | FileCheck %s +; RUN: llc -mattr=+sve,+sme2,+fp8 -enable-subreg-liveness=true < %s | FileCheck %s +; RUN: llc -mattr=+sme2,+fp8 --force-streaming -enable-subreg-liveness=true < %s | FileCheck %s target triple = "aarch64-linux" define <vscale x 16 x i8> @cvtn_bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2) { ; CHECK-LABEL: cvtn_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfcvtn z0.b, { z0.h, z1.h } ; CHECK-NEXT: ret %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2) @@ -19,8 +17,6 @@ define <vscale x 16 x i8> @cvtn_bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bf define <vscale x 16 x i8> @cvtn_f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2) { ; CHECK-LABEL: cvtn_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fcvtn z0.b, { z0.h, z1.h } ; CHECK-NEXT: ret %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2) @@ -30,8 +26,6 @@ define <vscale x 16 x i8> @cvtn_f16(<vscale x 8 x half> %s1, <vscale x 8 x half> define <vscale x 16 x i8> @cvtnb_f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2) { ; CHECK-LABEL: cvtnb_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fcvtnb z0.b, { z0.s, z1.s } ; CHECK-NEXT: ret %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnb.nxv4f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2) diff --git a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll index e2c861b40e706..aa0b934151fef 100644 --- a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll +++ b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mattr=+sve < %s | FileCheck %s -check-prefix CHECK-SVE -; RUN: llc -mattr=+sve2p1 < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SVE2p1 -; RUN: llc -mattr=+sve -mattr=+sme2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SME2 +; RUN: llc -enable-subreg-liveness -mattr=+sve < %s | FileCheck %s -check-prefix CHECK-SVE +; RUN: llc -enable-subreg-liveness -mattr=+sve2p1 < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SVE2p1 +; RUN: llc -enable-subreg-liveness -mattr=+sve -mattr=+sme2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SME2 target triple = "aarch64-linux" ; Test combining of getActiveLaneMask with a pair of extract_vector operations. @@ -181,8 +181,6 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 { ; CHECK-SVE-NEXT: fmov s0, w8 ; CHECK-SVE-NEXT: mov v0.s[1], v1.s[1] ; CHECK-SVE-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-SVE-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SVE-NEXT: // kill: def $d1 killed $d1 killed $z1 ; CHECK-SVE-NEXT: b use ; ; CHECK-SVE2p1-LABEL: test_fixed_extract: @@ -193,8 +191,6 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 { ; CHECK-SVE2p1-NEXT: fmov s0, w8 ; CHECK-SVE2p1-NEXT: mov v0.s[1], v1.s[1] ; CHECK-SVE2p1-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-SVE2p1-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SVE2p1-NEXT: // kill: def $d1 killed $d1 killed $z1 ; CHECK-SVE2p1-NEXT: b use ; ; CHECK-SME2-LABEL: test_fixed_extract: @@ -205,9 +201,7 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 { ; CHECK-SME2-NEXT: fmov s2, w8 ; CHECK-SME2-NEXT: mov z0.s, z1.s[1] ; CHECK-SME2-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-SME2-NEXT: // kill: def $d1 killed $d1 killed $z1 ; CHECK-SME2-NEXT: zip1 z0.s, z2.s, z0.s -; CHECK-SME2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-SME2-NEXT: b use %r = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %i, i64 %n) %v0 = call <2 x i1> @llvm.vector.extract.v2i1.nxv4i1.i64(<vscale x 4 x i1> %r, i64 0) diff --git a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll index ca16df3c09ade..49a0086a7be54 100644 --- a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll +++ b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,NONSTREAMING -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,STREAMING +; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,NONSTREAMING +; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,STREAMING ; WITH VSCALE RANGE @@ -145,7 +145,6 @@ define i32 @ctz_nxv2i1(<vscale x 2 x i1> %a) { ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: brkb p0.b, p1/z, p0.b ; CHECK-NEXT: cntp x0, p0, p0.d -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %res = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> %a, i1 0) ret i32 %res @@ -157,7 +156,6 @@ define i32 @ctz_nxv2i1_poison(<vscale x 2 x i1> %a) { ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: brkb p0.b, p1/z, p0.b ; CHECK-NEXT: cntp x0, p0, p0.d -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %res = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> %a, i1 1) ret i32 %res @@ -179,10 +177,8 @@ define i32 @add_i32_ctz_nxv2i1_poison(<vscale x 2 x i1> %a, i32 %b) { ; CHECK-LABEL: add_i32_ctz_nxv2i1_poison: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: brkb p0.b, p1/z, p0.b ; CHECK-NEXT: incp x0, p0.d -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %res = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> %a, i1 1) %trunc = trunc i64 %res to i32 @@ -196,7 +192,6 @@ define i32 @ctz_nxv4i1(<vscale x 4 x i1> %a) { ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: brkb p0.b, p1/z, p0.b ; CHECK-NEXT: cntp x0, p0, p0.s -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> %a, i1 0) ret i32 %res @@ -208,7 +203,6 @@ define i32 @ctz_nxv4i1_poison(<vscale x 4 x i1> %a) { ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: brkb p0.b, p1/z, p0.b ; CHECK-NEXT: cntp x0, p0, p0.s -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> %a, i1 1) ret i32 %res @@ -230,10 +224,8 @@ define i32 @add_i32_ctz_nxv4i1_poison(<vscale x 4 x i1> %a, i32 %b) { ; CHECK-LABEL: add_i32_ctz_nxv4i1_poison: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: brkb p0.b, p1/z, p0.b ; CHECK-NEXT: incp x0, p0.s -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %res = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> %a, i1 1) %trunc = trunc i64 %res to i32 @@ -247,7 +239,6 @@ define i32 @ctz_nxv8i1(<vscale x 8 x i1> %a) { ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: brkb p0.b, p1/z, p0.b ; CHECK-NEXT: cntp x0, p0, p0.h -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %res = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> %a, i1 0) ret i32 %res @@ -259,7 +250,6 @@ define i32 @ctz_nxv8i1_poison(<vscale x 8 x i1> %a) { ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: brkb p0.b, p1/z, p0.b ; CHECK-NEXT: cntp x0, p0, p0.h -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %res = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> %a, i1 1) ret i32 %res @@ -281,10 +271,8 @@ define i32 @add_i32_ctz_nxv8i1_poison(<vscale x 8 x i1> %a, i32 %b) { ; CHECK-LABEL: add_i32_ctz_nxv8i1_poison: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: brkb p0.b, p1/z, p0.b ; CHECK-NEXT: incp x0, p0.h -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %res = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> %a, i1 1) %trunc = trunc i64 %res to i32 @@ -298,7 +286,6 @@ define i32 @ctz_nxv16i1(<vscale x 16 x i1> %a) { ; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: brkb p0.b, p1/z, p0.b ; CHECK-NEXT: cntp x0, p0, p0.b -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 0) ret i32 %res @@ -310,7 +297,6 @@ define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %a) { ; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: brkb p0.b, p1/z, p0.b ; CHECK-NEXT: cntp x0, p0, p0.b -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 1) ret i32 %res @@ -323,7 +309,6 @@ define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vsca ; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: brkb p0.b, p1/z, p0.b ; CHECK-NEXT: cntp x0, p0, p0.b -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %cmp = icmp ne <vscale x 16 x i8> %a, %b %select = select <vscale x 16 x i1> %pg, <vscale x 16 x i1> %cmp, <vscale x 16 x i1> zeroinitializer @@ -348,10 +333,8 @@ define i32 @add_i32_ctz_nxv16i1_poison(<vscale x 16 x i1> %a, i32 %b) { ; CHECK-LABEL: add_i32_ctz_nxv16i1_poison: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: brkb p0.b, p1/z, p0.b ; CHECK-NEXT: incp x0, p0.b -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %res = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> %a, i1 1) %trunc = trunc i64 %res to i32 @@ -370,20 +353,17 @@ define i32 @ctz_v16i1(<16 x i1> %a) { ; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b ; NONSTREAMING-NEXT: cntp x0, p0, p0.b -; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0 ; NONSTREAMING-NEXT: ret ; ; STREAMING-LABEL: ctz_v16i1: ; STREAMING: // %bb.0: -; STREAMING-NEXT: // kill: def $q0 killed $q0 def $z0 -; STREAMING-NEXT: ptrue p0.b, vl16 ; STREAMING-NEXT: lsl z0.b, z0.b, #7 +; STREAMING-NEXT: ptrue p0.b, vl16 ; STREAMING-NEXT: ptrue p1.b ; STREAMING-NEXT: asr z0.b, z0.b, #7 ; STREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; STREAMING-NEXT: brkb p0.b, p1/z, p0.b ; STREAMING-NEXT: cntp x0, p0, p0.b -; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0 ; STREAMING-NEXT: ret %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0) ret i32 %res @@ -398,20 +378,17 @@ define i32 @ctz_v16i1_poison(<16 x i1> %a) { ; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b ; NONSTREAMING-NEXT: cntp x0, p0, p0.b -; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0 ; NONSTREAMING-NEXT: ret ; ; STREAMING-LABEL: ctz_v16i1_poison: ; STREAMING: // %bb.0: -; STREAMING-NEXT: // kill: def $q0 killed $q0 def $z0 -; STREAMING-NEXT: ptrue p0.b, vl16 ; STREAMING-NEXT: lsl z0.b, z0.b, #7 +; STREAMING-NEXT: ptrue p0.b, vl16 ; STREAMING-NEXT: ptrue p1.b ; STREAMING-NEXT: asr z0.b, z0.b, #7 ; STREAMING-NEXT: cmpne p0.b, p... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/174189 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
