[clang] [llvm] [AMDGPU] Use `bf16` instead of `i16` for bfloat (PR #80908)

2024-02-16 Thread Shilei Tian via cfe-commits

https://github.com/shiltian closed 
https://github.com/llvm/llvm-project/pull/80908
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Use `bf16` instead of `i16` for bfloat (PR #80908)

2024-02-16 Thread Stanislav Mekhanoshin via cfe-commits

https://github.com/rampitec approved this pull request.

Thanks. There are definitely at least 2 outstanding problems, but it seems 
there are no regressions comparing to what we have now. LGTM.

https://github.com/llvm/llvm-project/pull/80908
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Use `bf16` instead of `i16` for bfloat (PR #80908)

2024-02-16 Thread Shilei Tian via cfe-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/80908

>From 5b66bb22a91690078a955cea6c02b6b746b6502b Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Fri, 16 Feb 2024 15:08:35 -0500
Subject: [PATCH] [RFC][WIP][AMDGPU] Use `bf16` instead of `i16` for bfloat

Currently it looks like we generally use `i16` to represent `bf16` in those 
tablegen
files. I'm not sure of the reason behind it. My wild guess is the type `bf16` 
was
not available when we enabled the support. This patch is trying to use `bf16`
directly in those tablegen files, aiming at fixing #79369. Of course for #79369
a workaround can be to treat all `INT16` variants as `BFloat` in 
`getOpFltSemantics`,
but it doesn't look good IMHO.

Since I'm fairly new to AMDGPU backend, I'd appreciate it if you can point out
where I don't understand correctly.
---
 .../builtins-amdgcn-dl-insts-gfx11.cl |  5 +-
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td  |  8 +-
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp  | 92 +++
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 57 
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h   |  2 +
 .../MCTargetDesc/AMDGPUMCCodeEmitter.cpp  | 39 
 llvm/lib/Target/AMDGPU/SIDefines.h|  7 ++
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp| 15 +++
 llvm/lib/Target/AMDGPU/SIInstrInfo.td | 16 ++--
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td  | 21 -
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp| 54 +++
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 16 
 llvm/lib/Target/AMDGPU/VOP3Instructions.td|  2 +-
 .../AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll | 39 
 llvm/test/MC/AMDGPU/bf16_imm.s| 14 +++
 llvm/test/MC/Disassembler/AMDGPU/bf16_imm.txt | 16 
 16 files changed, 369 insertions(+), 34 deletions(-)
 create mode 100644 llvm/test/MC/AMDGPU/bf16_imm.s
 create mode 100644 llvm/test/MC/Disassembler/AMDGPU/bf16_imm.txt

diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx11.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx11.cl
index dc7069decaaa61..7688dfa55a78e3 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx11.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx11.cl
@@ -11,7 +11,10 @@ typedef unsigned short __attribute__((ext_vector_type(2))) 
ushort2;
 // CHECK: call float @llvm.amdgcn.fdot2(<2 x half> %v2hA, <2 x half> %v2hB, 
float %fC, i1 false)
 // CHECK: call float @llvm.amdgcn.fdot2(<2 x half> %v2hA, <2 x half> %v2hB, 
float %fC, i1 true)
 // CHECK: call half @llvm.amdgcn.fdot2.f16.f16(<2 x half> %v2hA, <2 x half> 
%v2hB, half %hC)
-// CHECK: call i16 @llvm.amdgcn.fdot2.bf16.bf16(<2 x i16> %v2ssA, <2 x i16> 
%v2ssB, i16 %sC)
+// CHECK: [[s1:%[0-9]+]] = bitcast <2 x i16> %v2ssA to <2 x bfloat>
+// CHECK-NEXT: [[s2:%[0-9]+]] = bitcast <2 x i16> %v2ssB to <2 x bfloat>
+// CHECK-NEXT: [[s3:%[0-9]+]] = bitcast i16 %sC to bfloat
+// CHECK-NEXT: [[d:%[0-9]+]] = tail call bfloat 
@llvm.amdgcn.fdot2.bf16.bf16(<2 x bfloat> [[s1]], <2 x bfloat> [[s2]], bfloat 
[[s3]])
 // CHECK: call float @llvm.amdgcn.fdot2.f32.bf16(<2 x i16> %v2ssA, <2 x i16> 
%v2ssB, float %fC, i1 false)
 // CHECK: call float @llvm.amdgcn.fdot2.f32.bf16(<2 x i16> %v2ssA, <2 x i16> 
%v2ssB, float %fC, i1 true)
 // CHECK: call i32 @llvm.amdgcn.udot4(i32 %uiA, i32 %uiB, i32 %uiC, i1 false)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td 
b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 202fa4e8f4ea81..6795fb7aa0edb8 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2819,11 +2819,11 @@ def int_amdgcn_fdot2_f16_f16 :
 def int_amdgcn_fdot2_bf16_bf16 :
   ClangBuiltin<"__builtin_amdgcn_fdot2_bf16_bf16">,
   DefaultAttrsIntrinsic<
-[llvm_i16_ty],   // %r
+[llvm_bfloat_ty],   // %r
 [
-  llvm_v2i16_ty, // %a
-  llvm_v2i16_ty, // %b
-  llvm_i16_ty// %c
+  llvm_v2bf16_ty, // %a
+  llvm_v2bf16_ty, // %b
+  llvm_bfloat_ty// %c
 ],
 [IntrNoMem, IntrSpeculatable]
   >;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp 
b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 79ad6ddf7861fc..883b30562e911b 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -475,6 +475,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
 
   bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
 
+  bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
+
   bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
 
   bool isSSrcV2F16() const {
@@ -541,22 +543,40 @@ class AMDGPUOperand : public MCParsedAsmOperand {
 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
   }
 
+  bool isVCSrcTBF16() const {
+return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
+  }
+
   bool isVCSrcTF16() const {
 return 

[clang] [llvm] [AMDGPU] Use `bf16` instead of `i16` for bfloat (PR #80908)

2024-02-16 Thread Stanislav Mekhanoshin via cfe-commits


@@ -0,0 +1,8 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck %s
+
+v_dot2_bf16_bf16 v5, v1, v2, 100.0
+// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 0x42c8 ; encoding: 
[0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0xc8,0x42,0x00,0x00]
+
+v_dot2_bf16_bf16 v5, v1, v2, 1.0
+// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 1.0 ; encoding: 
[0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03]

rampitec wrote:

Wow! Yeah, it's another ticket. Looks like a can of worms.
Add at least 'v_dot2_bf16_bf16 v2, v0, 1.0, v2', this one works.

https://github.com/llvm/llvm-project/pull/80908
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Use `bf16` instead of `i16` for bfloat (PR #80908)

2024-02-16 Thread Shilei Tian via cfe-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/80908
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Use `bf16` instead of `i16` for bfloat (PR #80908)

2024-02-16 Thread Shilei Tian via cfe-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/80908
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Use `bf16` instead of `i16` for bfloat (PR #80908)

2024-02-16 Thread Shilei Tian via cfe-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/80908
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Use `bf16` instead of `i16` for bfloat (PR #80908)

2024-02-16 Thread Shilei Tian via cfe-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/80908
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Use `bf16` instead of `i16` for bfloat (PR #80908)

2024-02-16 Thread Shilei Tian via cfe-commits


@@ -0,0 +1,8 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck %s
+
+v_dot2_bf16_bf16 v5, v1, v2, 100.0
+// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 0x42c8 ; encoding: 
[0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0xc8,0x42,0x00,0x00]
+
+v_dot2_bf16_bf16 v5, v1, v2, 1.0
+// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 1.0 ; encoding: 
[0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03]

shiltian wrote:

It looks like our assembler doesn't like the case such as `v_dot2_bf16_bf16 v5, 
v1, 0x42c842c8, 0x42c8`, even w/o this patch. Even the asm (`v_dot2_bf16_bf16 
v2, s0, 0x3f803f80, v2`) generated in another file above 
(`llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll`) can't be recognized 
by our assembler. 

https://github.com/llvm/llvm-project/pull/80908
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Use `bf16` instead of `i16` for bfloat (PR #80908)

2024-02-16 Thread Stanislav Mekhanoshin via cfe-commits


@@ -0,0 +1,8 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck %s
+
+v_dot2_bf16_bf16 v5, v1, v2, 100.0
+// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 0x42c8 ; encoding: 
[0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0xc8,0x42,0x00,0x00]
+
+v_dot2_bf16_bf16 v5, v1, v2, 1.0
+// CHECK: v_dot2_bf16_bf16 v5, v1, v2, 1.0 ; encoding: 
[0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03]

rampitec wrote:

Can you add couple more tests here? The same instruction, but with the 
immediate in place of v2bf16 operand, so we test that code path too. Both 
immediate and inline literal. Here and in disasm.

https://github.com/llvm/llvm-project/pull/80908
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Use `bf16` instead of `i16` for bfloat (PR #80908)

2024-02-16 Thread Stanislav Mekhanoshin via cfe-commits


@@ -2652,6 +2652,23 @@ bool isInlinableLiteral32(int32_t Literal, bool 
HasInv2Pi) {
  (Val == 0x3e22f983 && HasInv2Pi);
 }
 
+bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
+  if (!HasInv2Pi)
+return false;

rampitec wrote:

It does not change the behavior, but generally it shall only matter when you 
compare value to 0x3E22.

https://github.com/llvm/llvm-project/pull/80908
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Use `bf16` instead of `i16` for bfloat (PR #80908)

2024-02-16 Thread Jay Foad via cfe-commits


@@ -157,6 +157,27 @@ static uint32_t getLit16Encoding(uint16_t Val, const 
MCSubtargetInfo ) {
   return 255;
 }
 
+static uint32_t getLitBF16Encoding(uint16_t Val) {
+  uint16_t IntImm = getIntInlineImmEncoding(static_cast(Val));
+  if (IntImm != 0)
+return IntImm;
+
+  // clang-format off
+  switch (Val) {

jayfoad wrote:

Yeah, I really don't like having 4 different copies of this list of hex values 
(0x3f00, 0xbf00...).

https://github.com/llvm/llvm-project/pull/80908
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Use `bf16` instead of `i16` for bfloat (PR #80908)

2024-02-16 Thread Shilei Tian via cfe-commits


@@ -157,6 +157,27 @@ static uint32_t getLit16Encoding(uint16_t Val, const 
MCSubtargetInfo ) {
   return 255;
 }
 
+static uint32_t getLitBF16Encoding(uint16_t Val) {
+  uint16_t IntImm = getIntInlineImmEncoding(static_cast(Val));
+  if (IntImm != 0)
+return IntImm;
+
+  // clang-format off
+  switch (Val) {

shiltian wrote:

In theory, yes, but for now we can't because `getInlineEncodingV2BF16` can't 
handle some cases (that I didn't dig yet). It looks like in the conversion 
between `uint16_t` and `uint32_t` that makes some test cases fail. IMO we need 
to unify them (not only for 16-bit) in one place instead of having almost the 
same logic at least in three places.

https://github.com/llvm/llvm-project/pull/80908
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Use `bf16` instead of `i16` for bfloat (PR #80908)

2024-02-16 Thread Jay Foad via cfe-commits


@@ -157,6 +157,27 @@ static uint32_t getLit16Encoding(uint16_t Val, const 
MCSubtargetInfo ) {
   return 255;
 }
 
+static uint32_t getLitBF16Encoding(uint16_t Val) {
+  uint16_t IntImm = getIntInlineImmEncoding(static_cast(Val));
+  if (IntImm != 0)
+return IntImm;
+
+  // clang-format off
+  switch (Val) {

jayfoad wrote:

Can this call `getInlineEncodingV2BF16`?

https://github.com/llvm/llvm-project/pull/80908
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Use `bf16` instead of `i16` for bfloat (PR #80908)

2024-02-16 Thread Shilei Tian via cfe-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/80908

>From d95e99ebcefa76ba2e8068f663be86340c14ab5b Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Fri, 16 Feb 2024 11:29:47 -0500
Subject: [PATCH] [RFC][WIP][AMDGPU] Use `bf16` instead of `i16` for bfloat

Currently it looks like we generally use `i16` to represent `bf16` in those 
tablegen
files. I'm not sure of the reason behind it. My wild guess is the type `bf16` 
was
not available when we enabled the support. This patch is trying to use `bf16`
directly in those tablegen files, aiming at fixing #79369. Of course for #79369
a workaround can be to treat all `INT16` variants as `BFloat` in 
`getOpFltSemantics`,
but it doesn't look good IMHO.

Since I'm fairly new to AMDGPU backend, I'd appreciate it if you can point out
where I don't understand correctly.
---
 .../builtins-amdgcn-dl-insts-gfx11.cl |  5 +-
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td  |  8 +-
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp  | 92 +++
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 57 
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h   |  2 +
 .../MCTargetDesc/AMDGPUMCCodeEmitter.cpp  | 39 
 llvm/lib/Target/AMDGPU/SIDefines.h|  7 ++
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp| 15 +++
 llvm/lib/Target/AMDGPU/SIInstrInfo.td | 16 ++--
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td  | 21 -
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp| 54 +++
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 16 
 llvm/lib/Target/AMDGPU/VOP3Instructions.td|  2 +-
 .../AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll | 39 
 llvm/test/MC/AMDGPU/bf16_imm.s|  8 ++
 llvm/test/MC/Disassembler/AMDGPU/bf16_imm.txt |  9 ++
 16 files changed, 356 insertions(+), 34 deletions(-)
 create mode 100644 llvm/test/MC/AMDGPU/bf16_imm.s
 create mode 100644 llvm/test/MC/Disassembler/AMDGPU/bf16_imm.txt

diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx11.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx11.cl
index dc7069decaaa61..7688dfa55a78e3 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx11.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx11.cl
@@ -11,7 +11,10 @@ typedef unsigned short __attribute__((ext_vector_type(2))) 
ushort2;
 // CHECK: call float @llvm.amdgcn.fdot2(<2 x half> %v2hA, <2 x half> %v2hB, 
float %fC, i1 false)
 // CHECK: call float @llvm.amdgcn.fdot2(<2 x half> %v2hA, <2 x half> %v2hB, 
float %fC, i1 true)
 // CHECK: call half @llvm.amdgcn.fdot2.f16.f16(<2 x half> %v2hA, <2 x half> 
%v2hB, half %hC)
-// CHECK: call i16 @llvm.amdgcn.fdot2.bf16.bf16(<2 x i16> %v2ssA, <2 x i16> 
%v2ssB, i16 %sC)
+// CHECK: [[s1:%[0-9]+]] = bitcast <2 x i16> %v2ssA to <2 x bfloat>
+// CHECK-NEXT: [[s2:%[0-9]+]] = bitcast <2 x i16> %v2ssB to <2 x bfloat>
+// CHECK-NEXT: [[s3:%[0-9]+]] = bitcast i16 %sC to bfloat
+// CHECK-NEXT: [[d:%[0-9]+]] = tail call bfloat 
@llvm.amdgcn.fdot2.bf16.bf16(<2 x bfloat> [[s1]], <2 x bfloat> [[s2]], bfloat 
[[s3]])
 // CHECK: call float @llvm.amdgcn.fdot2.f32.bf16(<2 x i16> %v2ssA, <2 x i16> 
%v2ssB, float %fC, i1 false)
 // CHECK: call float @llvm.amdgcn.fdot2.f32.bf16(<2 x i16> %v2ssA, <2 x i16> 
%v2ssB, float %fC, i1 true)
 // CHECK: call i32 @llvm.amdgcn.udot4(i32 %uiA, i32 %uiB, i32 %uiC, i1 false)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td 
b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 202fa4e8f4ea81..6795fb7aa0edb8 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2819,11 +2819,11 @@ def int_amdgcn_fdot2_f16_f16 :
 def int_amdgcn_fdot2_bf16_bf16 :
   ClangBuiltin<"__builtin_amdgcn_fdot2_bf16_bf16">,
   DefaultAttrsIntrinsic<
-[llvm_i16_ty],   // %r
+[llvm_bfloat_ty],   // %r
 [
-  llvm_v2i16_ty, // %a
-  llvm_v2i16_ty, // %b
-  llvm_i16_ty// %c
+  llvm_v2bf16_ty, // %a
+  llvm_v2bf16_ty, // %b
+  llvm_bfloat_ty// %c
 ],
 [IntrNoMem, IntrSpeculatable]
   >;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp 
b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 79ad6ddf7861fc..883b30562e911b 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -475,6 +475,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
 
   bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
 
+  bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
+
   bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
 
   bool isSSrcV2F16() const {
@@ -541,22 +543,40 @@ class AMDGPUOperand : public MCParsedAsmOperand {
 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
   }
 
+  bool isVCSrcTBF16() const {
+return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
+  }
+
   bool isVCSrcTF16() const {
 return 

[clang] [llvm] [AMDGPU] Use `bf16` instead of `i16` for bfloat (PR #80908)

2024-02-16 Thread Shilei Tian via cfe-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/80908
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits