CarolineConcatto created this revision.
Herald added subscribers: hiraditya, kristof.beyls, tschuett.
Herald added a project: All.
CarolineConcatto requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

As described in: https://github.com/ARM-software/acle/pull/257

Patch by: Kerry McLaughlin <kerry.mclaugh...@arm.com>


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D151461

Files:
  clang/include/clang/Basic/arm_sve.td
  clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
  clang/utils/TableGen/SveEmitter.cpp
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls.ll

Index: llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p1 -mattr=+b16b16 -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 4 x float> @bfmlslb_f32(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: bfmlslb_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlslb z0.s, z1.h, z2.h
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @bfmlslt_f32(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: bfmlslt_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlslt z0.s, z1.h, z2.h
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @bfmlslb_lane_f32(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: bfmlslb_lane_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlslb z0.s, z1.h, z2.h[7]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb.lane(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm, i32 7)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @bfmlslt_lane_f32(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: bfmlslt_lane_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlslt z0.s, z1.h, z2.h[7]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt.lane(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm, i32 7)
+  ret <vscale x 4 x float> %out
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
+declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3743,10 +3743,10 @@
 defm FCLAMP_ZZZ : sve2p1_fclamp<"fclamp", int_aarch64_sve_fclamp>;
 defm FDOT_ZZZ_S  : sve_float_dot<0b0, "fdot", nxv8f16, int_aarch64_sve_fdot_x2>;
 defm FDOT_ZZZI_S : sve_float_dot_indexed<0b0, "fdot", nxv8f16, int_aarch64_sve_fdot_lane_x2>;
-def BFMLSLB_ZZZ_S : sve2_fp_mla_long<0b110, "bfmlslb">;
-def BFMLSLT_ZZZ_S : sve2_fp_mla_long<0b111, "bfmlslt">;
-def BFMLSLB_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b110, "bfmlslb">;
-def BFMLSLT_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b111, "bfmlslt">;
+defm BFMLSLB_ZZZ_S : sve2_fp_mla_long<0b110, "bfmlslb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslb>;
+defm BFMLSLT_ZZZ_S : sve2_fp_mla_long<0b111, "bfmlslt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslt>;
+defm BFMLSLB_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b110, "bfmlslb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslb_lane>;
+defm BFMLSLT_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b111, "bfmlslt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslt_lane>;
 
 defm SDOT_ZZZ_HtoS  : sve2p1_two_way_dot_vv<"sdot", 0b0, int_aarch64_sve_sdot_x2>;
 defm UDOT_ZZZ_HtoS  : sve2p1_two_way_dot_vv<"udot", 0b1, int_aarch64_sve_udot_x2>;
Index: llvm/include/llvm/IR/IntrinsicsAArch64.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3018,6 +3018,16 @@
                             [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
                             [IntrNoMem]>;
 
+  class SME2_BFMLS_Intrinsic
+    : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty],
+                            [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty],
+                            [IntrNoMem]>;
+
+  class SME2_BFMLS_Lane_Intrinsic
+    : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty],
+                            [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+
   class SME2_ZA_ArrayVector_Read_VG2_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
                 [llvm_i32_ty],
@@ -3221,6 +3231,12 @@
   def int_aarch64_sme_usmla_za32_lane_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
   def int_aarch64_sme_usmla_za32_lane_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
 
+  def int_aarch64_sve_bfmlslb : SME2_BFMLS_Intrinsic;
+  def int_aarch64_sve_bfmlslb_lane : SME2_BFMLS_Lane_Intrinsic;
+
+  def int_aarch64_sve_bfmlslt : SME2_BFMLS_Intrinsic;
+  def int_aarch64_sve_bfmlslt_lane : SME2_BFMLS_Lane_Intrinsic;
+
   // Multi-vector signed saturating doubling multiply high
 
   def int_aarch64_sve_sqdmulh_single_vgx2 : SME2_VG2_Multi_Single_Intrinsic;
Index: clang/utils/TableGen/SveEmitter.cpp
===================================================================
--- clang/utils/TableGen/SveEmitter.cpp
+++ clang/utils/TableGen/SveEmitter.cpp
@@ -835,6 +835,13 @@
     NumVectors = 0;
     Signed = false;
     break;
+  case '$':
+    Predicate = false;
+    Svcount = false;
+    Float = false;
+    BFloat = true;
+    ElementBitwidth = 16;
+    break;
   case '}':
     Predicate = false;
     Signed = true;
Index: clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
@@ -0,0 +1,85 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+// BFMLSLB
+
+
+// CHECK-LABEL: @test_bfmlslb(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z12test_bfmlslbu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_bfmlslb(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
+{
+  return SVE_ACLE_FUNC(svbfmlslb,_f32,,)(zda, zn, zm);
+}
+
+
+// CHECK-LABEL: @test_bfmlslb_idx(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb.lane(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z16test_bfmlslb_idxu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb.lane(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_bfmlslb_idx(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
+{
+  return SVE_ACLE_FUNC(svbfmlslb_lane,_f32,,)(zda, zn, zm, 7);
+}
+
+// BFMLSLT
+
+
+// CHECK-LABEL: @test_bfmlslt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z12test_bfmlsltu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_bfmlslt(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
+{
+  return SVE_ACLE_FUNC(svbfmlslt,_f32,,)(zda, zn, zm);
+}
+
+
+// CHECK-LABEL: @test_bfmlslt_idx(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt.lane(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z16test_bfmlslt_idxu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt.lane(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_bfmlslt_idx(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
+{
+  return SVE_ACLE_FUNC(svbfmlslt_lane,_f32,,)(zda, zn, zm, 7);
+}
Index: clang/include/clang/Basic/arm_sve.td
===================================================================
--- clang/include/clang/Basic/arm_sve.td
+++ clang/include/clang/Basic/arm_sve.td
@@ -98,6 +98,7 @@
 // O: svfloat16_t
 // M: svfloat32_t
 // N: svfloat64_t
+// $: svbfloat16_t
 
 // J: Prefetch type (sv_prfop)
 // A: pointer to int8_t
@@ -2213,6 +2214,12 @@
 def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i",  MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f",  MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
+
+def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>;
+def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>;
+
+def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
 }
 
 let TargetGuard = "sve2p1" in {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D151461: [Clang][SV... Caroline via Phabricator via cfe-commits

Reply via email to