llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-x86 Author: None (llvmbot) <details> <summary>Changes</summary> Backport ff72c83b017ba636be13750c7f96cd87fc22c465 Requested by: @<!-- -->phoebewang --- Full diff: https://github.com/llvm/llvm-project/pull/83758.diff 3 Files Affected: - (modified) llvm/lib/Target/X86/X86InstrVecCompiler.td (+3) - (modified) llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll (+22) - (modified) llvm/test/CodeGen/X86/bfloat.ll (+3-4) ``````````diff diff --git a/llvm/lib/Target/X86/X86InstrVecCompiler.td b/llvm/lib/Target/X86/X86InstrVecCompiler.td index bbd19cf8d5b25e..461b2badc13134 100644 --- a/llvm/lib/Target/X86/X86InstrVecCompiler.td +++ b/llvm/lib/Target/X86/X86InstrVecCompiler.td @@ -83,6 +83,7 @@ defm : subvector_subreg_lowering<VR128, v2f64, VR256, v4f64, sub_xmm>; defm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>; defm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8, sub_xmm>; defm : subvector_subreg_lowering<VR128, v8f16, VR256, v16f16, sub_xmm>; +defm : subvector_subreg_lowering<VR128, v8bf16, VR256, v16bf16, sub_xmm>; // A 128-bit subvector extract from the first 512-bit vector position is a // subregister copy that needs no instruction. Likewise, a 128-bit subvector @@ -95,6 +96,7 @@ defm : subvector_subreg_lowering<VR128, v2f64, VR512, v8f64, sub_xmm>; defm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>; defm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8, sub_xmm>; defm : subvector_subreg_lowering<VR128, v8f16, VR512, v32f16, sub_xmm>; +defm : subvector_subreg_lowering<VR128, v8bf16, VR512, v32bf16, sub_xmm>; // A 128-bit subvector extract from the first 512-bit vector position is a // subregister copy that needs no instruction. Likewise, a 128-bit subvector @@ -107,6 +109,7 @@ defm : subvector_subreg_lowering<VR256, v4f64, VR512, v8f64, sub_ymm>; defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>; defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>; defm : subvector_subreg_lowering<VR256, v16f16, VR512, v32f16, sub_ymm>; +defm : subvector_subreg_lowering<VR256, v16bf16, VR512, v32bf16, sub_ymm>; // If we're inserting into an all zeros vector, just use a plain move which diff --git a/llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll index 0826faa1071b01..482713e12d15c7 100644 --- a/llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll @@ -381,3 +381,25 @@ entry: %1 = shufflevector <8 x bfloat> %0, <8 x bfloat> undef, <16 x i32> zeroinitializer ret <16 x bfloat> %1 } + +define <16 x i32> @pr83358() { +; X86-LABEL: pr83358: +; X86: # %bb.0: +; X86-NEXT: vcvtneps2bf16y {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x72,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 +; X86-NEXT: vshufi64x2 $0, %zmm0, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x43,0xc0,0x00] +; X86-NEXT: # zmm0 = zmm0[0,1,0,1,0,1,0,1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: pr83358: +; X64: # %bb.0: +; X64-NEXT: vcvtneps2bf16y {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x72,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vshufi64x2 $0, %zmm0, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x43,0xc0,0x00] +; X64-NEXT: # zmm0 = zmm0[0,1,0,1,0,1,0,1] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>) + %2 = bitcast <8 x bfloat> %1 to <4 x i32> + %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> + ret <16 x i32> %3 +} diff --git a/llvm/test/CodeGen/X86/bfloat.ll b/llvm/test/CodeGen/X86/bfloat.ll index f2d3c4fb34199e..0042d477f3b364 100644 --- a/llvm/test/CodeGen/X86/bfloat.ll +++ b/llvm/test/CodeGen/X86/bfloat.ll @@ -511,7 +511,7 @@ define void @fold_ext_trunc(ptr %pa, ptr %pc) nounwind { define bfloat @fold_ext_trunc2(bfloat %a) nounwind { ; X86-LABEL: fold_ext_trunc2: ; X86: # %bb.0: -; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: retl ; ; CHECK-LABEL: fold_ext_trunc2: @@ -934,8 +934,8 @@ define <8 x bfloat> @addv(<8 x bfloat> %a, <8 x bfloat> %b) nounwind { define <2 x bfloat> @pr62997(bfloat %a, bfloat %b) { ; X86-LABEL: pr62997: ; X86: # %bb.0: -; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 -; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm1 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vmovsh {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; X86-NEXT: retl ; @@ -2423,7 +2423,6 @@ define <16 x bfloat> @fptrunc_v16f32(<16 x float> %a) nounwind { ; AVXNC-LABEL: fptrunc_v16f32: ; AVXNC: # %bb.0: ; AVXNC-NEXT: {vex} vcvtneps2bf16 %ymm0, %xmm0 -; AVXNC-NEXT: vinsertf128 $0, %xmm0, %ymm0, %ymm0 ; AVXNC-NEXT: {vex} vcvtneps2bf16 %ymm1, %xmm1 ; AVXNC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVXNC-NEXT: retq `````````` </details> https://github.com/llvm/llvm-project/pull/83758 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits