================
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
UTC_ARGS: --version 6
+; RUN: llc -O2 -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 \
+; RUN: -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck %s \
+; RUN: --check-prefix=COMBINE \
+; RUN: --implicit-check-not=V_ADD_U32 --implicit-check-not=REG_SEQUENCE
+; RUN: llc -O2 -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 \
+; RUN: -verify-machineinstrs -combiner-disabled -stop-after=amdgpu-isel < %s
\
+; RUN: | FileCheck %s --check-prefix=NOCOMBINE
+
+declare <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32>, i64 immarg)
+
+define <4 x i32> @freeze_lshr_extract_concat_poisonable(<4 x i32> %a, <4 x
i32> %b) nounwind {
+ ; COMBINE-LABEL: name: freeze_lshr_extract_concat_poisonable
+ ; COMBINE: bb.0 (%ir-block.0):
+ ; COMBINE-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; COMBINE-NEXT: {{ $}}
+ ; COMBINE-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; COMBINE-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; COMBINE-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; COMBINE-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; COMBINE-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; COMBINE-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64
[[S_MOV_B32_]], [[COPY3]], implicit $exec
+ ; COMBINE-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 =
V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[COPY2]], implicit $exec
+ ; COMBINE-NEXT: [[V_LSHRREV_B32_e64_2:%[0-9]+]]:vgpr_32 =
V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec
+ ; COMBINE-NEXT: [[V_LSHRREV_B32_e64_3:%[0-9]+]]:vgpr_32 =
V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; COMBINE-NEXT: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]]
+ ; COMBINE-NEXT: $vgpr1 = COPY [[V_LSHRREV_B32_e64_1]]
+ ; COMBINE-NEXT: $vgpr2 = COPY [[V_LSHRREV_B32_e64_2]]
+ ; COMBINE-NEXT: $vgpr3 = COPY [[V_LSHRREV_B32_e64_3]]
+ ; COMBINE-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit
$vgpr2, implicit $vgpr3
+ ;
+ ; NOCOMBINE-LABEL: name: freeze_lshr_extract_concat_poisonable
+ ; NOCOMBINE: bb.0 (%ir-block.0):
+ ; NOCOMBINE-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5,
$vgpr6, $vgpr7
+ ; NOCOMBINE-NEXT: {{ $}}
+ ; NOCOMBINE-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr7
+ ; NOCOMBINE-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr6
+ ; NOCOMBINE-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr5
+ ; NOCOMBINE-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; NOCOMBINE-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; NOCOMBINE-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; NOCOMBINE-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; NOCOMBINE-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; NOCOMBINE-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; NOCOMBINE-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64
[[COPY3]], [[S_MOV_B32_]], 0, implicit $exec
+ ; NOCOMBINE-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64
[[COPY2]], [[S_MOV_B32_]], 0, implicit $exec
+ ; NOCOMBINE-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64
[[COPY1]], [[S_MOV_B32_]], 0, implicit $exec
+ ; NOCOMBINE-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64
[[COPY]], [[S_MOV_B32_]], 0, implicit $exec
+ ; NOCOMBINE-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 =
V_LSHRREV_B32_e64 [[S_MOV_B32_1]], [[COPY4]], implicit $exec
+ ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 =
V_LSHRREV_B32_e64 [[S_MOV_B32_1]], [[COPY5]], implicit $exec
+ ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_2:%[0-9]+]]:vgpr_32 =
V_LSHRREV_B32_e64 [[S_MOV_B32_1]], [[COPY6]], implicit $exec
+ ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_3:%[0-9]+]]:vgpr_32 =
V_LSHRREV_B32_e64 [[S_MOV_B32_1]], [[COPY7]], implicit $exec
+ ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_4:%[0-9]+]]:vgpr_32 =
V_LSHRREV_B32_e64 [[S_MOV_B32_1]], killed [[V_ADD_U32_e64_3]], implicit $exec
+ ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_5:%[0-9]+]]:vgpr_32 =
V_LSHRREV_B32_e64 [[S_MOV_B32_1]], killed [[V_ADD_U32_e64_2]], implicit $exec
+ ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_6:%[0-9]+]]:vgpr_32 =
V_LSHRREV_B32_e64 [[S_MOV_B32_1]], killed [[V_ADD_U32_e64_1]], implicit $exec
+ ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_7:%[0-9]+]]:vgpr_32 =
V_LSHRREV_B32_e64 [[S_MOV_B32_1]], killed [[V_ADD_U32_e64_]], implicit $exec
+ ; NOCOMBINE-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_256_align2 = REG_SEQUENCE
killed [[V_LSHRREV_B32_e64_3]], %subreg.sub0, killed [[V_LSHRREV_B32_e64_2]],
%subreg.sub1, killed [[V_LSHRREV_B32_e64_1]], %subreg.sub2, killed
[[V_LSHRREV_B32_e64_]], %subreg.sub3, killed [[V_LSHRREV_B32_e64_7]],
%subreg.sub4, killed [[V_LSHRREV_B32_e64_6]], %subreg.sub5, killed
[[V_LSHRREV_B32_e64_5]], %subreg.sub6, killed [[V_LSHRREV_B32_e64_4]],
%subreg.sub7
+ ; NOCOMBINE-NEXT: [[COPY8:%[0-9]+]]:av_256_align2 = COPY killed
[[REG_SEQUENCE]]
+ ; NOCOMBINE-NEXT: [[COPY9:%[0-9]+]]:av_32 = COPY [[COPY8]].sub3
+ ; NOCOMBINE-NEXT: [[COPY10:%[0-9]+]]:av_32 = COPY [[COPY8]].sub2
+ ; NOCOMBINE-NEXT: [[COPY11:%[0-9]+]]:av_32 = COPY [[COPY8]].sub1
+ ; NOCOMBINE-NEXT: [[COPY12:%[0-9]+]]:av_32 = COPY [[COPY8]].sub0
+ ; NOCOMBINE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:av_128_align2 = REG_SEQUENCE
killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed
[[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
+ ; NOCOMBINE-NEXT: [[COPY13:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; NOCOMBINE-NEXT: [[COPY14:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; NOCOMBINE-NEXT: [[COPY15:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; NOCOMBINE-NEXT: [[COPY16:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; NOCOMBINE-NEXT: $vgpr0 = COPY [[COPY13]]
+ ; NOCOMBINE-NEXT: $vgpr1 = COPY [[COPY14]]
+ ; NOCOMBINE-NEXT: $vgpr2 = COPY [[COPY15]]
+ ; NOCOMBINE-NEXT: $vgpr3 = COPY [[COPY16]]
+ ; NOCOMBINE-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit
$vgpr2, implicit $vgpr3
+ %poisonable = add nsw <4 x i32> %b, <i32 2147483647, i32 2147483647, i32
2147483647, i32 2147483647>
+ %wide = shufflevector <4 x i32> %a, <4 x i32> %poisonable, <8 x i32> <i32 0,
i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %shifted = lshr <8 x i32> %wide, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
i32 1, i32 1>
+ %fr = freeze <8 x i32> %shifted
+ %ext = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> %fr, i64 0)
+ ret <4 x i32> %ext
----------------
RKSimon wrote:
I really don't like these combine vs nocombine tests - plus make sure you
commit the base line test as the first commit in the chain, so that the codegen
change commit shows the change in the test
https://github.com/llvm/llvm-project/pull/200932
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits