https://github.com/ayokunle321 updated 
https://github.com/llvm/llvm-project/pull/197852

>From a3242d5fca8d389fb2a20b343ad5cb6771c8d60e Mon Sep 17 00:00:00 2001
From: Ayokunle Amodu <[email protected]>
Date: Thu, 14 May 2026 20:42:44 -0400
Subject: [PATCH 1/3] add amdgcn tanh builtins

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp |  7 +-
 .../CodeGenHIP/builtins-amdgcn-gfx1250.hip    | 26 ++++++++
 .../CIR/CodeGenHIP/builtins-amdgcn-vi.hip     | 65 +++++++++++++++++++
 clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip | 14 +++-
 4 files changed, 105 insertions(+), 7 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
 create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
index 04ab1c29b0d63..b754e0453383a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
@@ -350,10 +350,9 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
   case AMDGPU::BI__builtin_amdgcn_tanhf:
   case AMDGPU::BI__builtin_amdgcn_tanhh:
   case AMDGPU::BI__builtin_amdgcn_tanh_bf16: {
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented AMDGPU builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinId));
-    return mlir::Value{};
+    mlir::Value src = emitScalarExpr(expr->getArg(0));
+    return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()), 
"amdgcn.tanh",
+                                       src.getType(), mlir::ValueRange{src});
   }
   case AMDGPU::BI__builtin_amdgcn_uicmp:
   case AMDGPU::BI__builtin_amdgcn_uicmpl:
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
new file mode 100644
index 0000000000000..aca9cd45ee234
--- /dev/null
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
@@ -0,0 +1,26 @@
+#include "../CodeGenCUDA/Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1250 -target-feature +tanh-insts 
-fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1250 -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1250 -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @_Z14test_tanh_bf16PDF16bDF16b
+// CIR: cir.call_llvm_intrinsic "amdgcn.tanh" {{.*}} : (!cir.bf16) -> !cir.bf16
+// LLVM: define{{.*}} void @_Z14test_tanh_bf16PDF16bDF16b
+// LLVM: call{{.*}} bfloat @llvm.amdgcn.tanh.bf16(bfloat %{{.*}})
+__device__ void test_tanh_bf16(__bf16* out, __bf16 a) {
+  *out = __builtin_amdgcn_tanh_bf16(a);
+}
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
new file mode 100644
index 0000000000000..8cbd3c48a58d5
--- /dev/null
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
@@ -0,0 +1,65 @@
+#include "../CodeGenCUDA/Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu tonga -target-feature +tanh-insts 
-fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx900 -target-feature +tanh-insts 
-fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1010 -target-feature +tanh-insts 
-fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1012 -target-feature +tanh-insts 
-fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu tonga -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx900 -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1010 -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1012 -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu tonga -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx900 -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1010 -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1012 -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @_Z13test_tanh_f16PDF16_DF16_
+// CIR: cir.call_llvm_intrinsic "amdgcn.tanh" {{.*}} : (!cir.f16) -> !cir.f16
+// LLVM: define{{.*}} void @_Z13test_tanh_f16PDF16_DF16_
+// LLVM: call{{.*}} half @llvm.{{((amdgcn.){0,1})}}tanh.f16(half %{{.*}})
+__device__ void test_tanh_f16(_Float16* out, _Float16 a) {
+  *out = __builtin_amdgcn_tanhh(a);
+}
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
index 4a61fde7aa90c..c64f32be2444a 100644
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
@@ -2,15 +2,15 @@
 
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
-// RUN:            -target-cpu tahiti -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN:            -target-cpu tahiti -target-feature +tanh-insts 
-fcuda-is-device -emit-cir %s -o %t.cir
 // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
-// RUN:            -target-cpu tahiti -fcuda-is-device -emit-llvm %s -o 
%t-cir.ll
+// RUN:            -target-cpu tahiti --target-feature +tanh-insts 
fcuda-is-device -emit-llvm %s -o %t-cir.ll
 // RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
-// RUN:            -target-cpu tahiti -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN:            -target-cpu tahiti -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
 // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
 
 
//===----------------------------------------------------------------------===//
@@ -71,3 +71,11 @@ __device__ void test_div_fmas_f64(double* out, double a, 
double b, double c, int
 __device__ void test_ds_swizzle(int* out, int a) {
   *out = __builtin_amdgcn_ds_swizzle(a, 32);
 }
+
+// CIR-LABEL: @_Z14test_tanhf_f32Pff
+// CIR: cir.call_llvm_intrinsic "amdgcn.tanh" {{.*}} : (!cir.float) -> 
!cir.float
+// LLVM: define{{.*}} void @_Z14test_tanhf_f32Pff
+// LLVM: call{{.*}} float @llvm.amdgcn.tanh.f32(float %{{.*}})
+__device__ void test_tanhf_f32(float* out, float a) {
+  *out = __builtin_amdgcn_tanhf(a);
+}

>From 95ed7ed7dc2879310ecef7414ceaa5cf810ecf45 Mon Sep 17 00:00:00 2001
From: Ayokunle Amodu <[email protected]>
Date: Thu, 14 May 2026 20:50:20 -0400
Subject: [PATCH 2/3] fix style

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
index b754e0453383a..8d17fcabcea6a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
@@ -351,8 +351,9 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
   case AMDGPU::BI__builtin_amdgcn_tanhh:
   case AMDGPU::BI__builtin_amdgcn_tanh_bf16: {
     mlir::Value src = emitScalarExpr(expr->getArg(0));
-    return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()), 
"amdgcn.tanh",
-                                       src.getType(), mlir::ValueRange{src});
+    return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()),
+                                       "amdgcn.tanh", src.getType(),
+                                       mlir::ValueRange{src});
   }
   case AMDGPU::BI__builtin_amdgcn_uicmp:
   case AMDGPU::BI__builtin_amdgcn_uicmpl:

>From 85a36c19e68585b3a958d1ef9bb4bf80bb2fcbd6 Mon Sep 17 00:00:00 2001
From: Ayokunle Amodu <[email protected]>
Date: Fri, 15 May 2026 14:47:16 -0400
Subject: [PATCH 3/3] isolated tanh test

---
 .../CodeGenHIP/builtins-amdgcn-gfx1250.hip    | 26 --------
 .../CIR/CodeGenHIP/builtins-amdgcn-tanh.hip   | 38 +++++++++++
 .../CIR/CodeGenHIP/builtins-amdgcn-vi.hip     | 65 -------------------
 clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip | 14 +---
 4 files changed, 41 insertions(+), 102 deletions(-)
 delete mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
 create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-tanh.hip
 delete mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip

diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
deleted file mode 100644
index aca9cd45ee234..0000000000000
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
+++ /dev/null
@@ -1,26 +0,0 @@
-#include "../CodeGenCUDA/Inputs/cuda.h"
-
-// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
-// RUN:            -target-cpu gfx1250 -target-feature +tanh-insts 
-fcuda-is-device -emit-cir %s -o %t.cir
-// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
-
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
-// RUN:            -target-cpu gfx1250 -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
-
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
-// RUN:            -target-cpu gfx1250 -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
-
-//===----------------------------------------------------------------------===//
-// Test AMDGPU builtins
-//===----------------------------------------------------------------------===//
-
-// CIR-LABEL: @_Z14test_tanh_bf16PDF16bDF16b
-// CIR: cir.call_llvm_intrinsic "amdgcn.tanh" {{.*}} : (!cir.bf16) -> !cir.bf16
-// LLVM: define{{.*}} void @_Z14test_tanh_bf16PDF16bDF16b
-// LLVM: call{{.*}} bfloat @llvm.amdgcn.tanh.bf16(bfloat %{{.*}})
-__device__ void test_tanh_bf16(__bf16* out, __bf16 a) {
-  *out = __builtin_amdgcn_tanh_bf16(a);
-}
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-tanh.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-tanh.hip
new file mode 100644
index 0000000000000..5024c0d7a9d81
--- /dev/null
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-tanh.hip
@@ -0,0 +1,38 @@
+#include "../CodeGenCUDA/Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir 
-target-cpu gfx1250 \
+// RUN:            -target-feature +tanh-insts -fcuda-is-device -emit-cir %s 
-o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir 
-target-cpu gfx1250 \
+// RUN:            -target-feature +tanh-insts -fcuda-is-device -emit-llvm %s 
-o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -target-cpu 
gfx1250 \
+// RUN:             -target-feature +tanh-insts -fcuda-is-device -emit-llvm %s 
-o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// CIR-LABEL: @_Z14test_tanhf_f32Pff
+// CIR: cir.call_llvm_intrinsic "amdgcn.tanh" {{.*}} : (!cir.float) -> 
!cir.float
+// LLVM: define{{.*}} void @_Z14test_tanhf_f32Pff
+// LLVM: call{{.*}} float @llvm.amdgcn.tanh.f32(float %{{.*}})
+__device__ void test_tanhf_f32(float* out, float a) {
+  *out = __builtin_amdgcn_tanhf(a);
+}
+
+// CIR-LABEL: @_Z13test_tanh_f16PDF16_DF16_
+// CIR: cir.call_llvm_intrinsic "amdgcn.tanh" {{.*}} : (!cir.f16) -> !cir.f16
+// LLVM: define{{.*}} void @_Z13test_tanh_f16PDF16_DF16_
+// LLVM: call{{.*}} half @llvm.{{((amdgcn.){0,1})}}tanh.f16(half %{{.*}})
+__device__ void test_tanh_f16(_Float16* out, _Float16 a) {
+  *out = __builtin_amdgcn_tanhh(a);
+}
+
+// CIR-LABEL: @_Z14test_tanh_bf16PDF16bDF16b
+// CIR: cir.call_llvm_intrinsic "amdgcn.tanh" {{.*}} : (!cir.bf16) -> !cir.bf16
+// LLVM: define{{.*}} void @_Z14test_tanh_bf16PDF16bDF16b
+// LLVM: call{{.*}} bfloat @llvm.amdgcn.tanh.bf16(bfloat %{{.*}})
+__device__ void test_tanh_bf16(__bf16* out, __bf16 a) {
+  *out = __builtin_amdgcn_tanh_bf16(a);
+}
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
deleted file mode 100644
index 8cbd3c48a58d5..0000000000000
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
+++ /dev/null
@@ -1,65 +0,0 @@
-#include "../CodeGenCUDA/Inputs/cuda.h"
-
-// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
-// RUN:            -target-cpu tonga -target-feature +tanh-insts 
-fcuda-is-device -emit-cir %s -o %t.cir
-// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
-
-// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
-// RUN:            -target-cpu gfx900 -target-feature +tanh-insts 
-fcuda-is-device -emit-cir %s -o %t.cir
-// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
-
-// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
-// RUN:            -target-cpu gfx1010 -target-feature +tanh-insts 
-fcuda-is-device -emit-cir %s -o %t.cir
-// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
-
-// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
-// RUN:            -target-cpu gfx1012 -target-feature +tanh-insts 
-fcuda-is-device -emit-cir %s -o %t.cir
-// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
-
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
-// RUN:            -target-cpu tonga -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
-
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
-// RUN:            -target-cpu gfx900 -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
-
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
-// RUN:            -target-cpu gfx1010 -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
-
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
-// RUN:            -target-cpu gfx1012 -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
-
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
-// RUN:            -target-cpu tonga -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
-
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
-// RUN:            -target-cpu gfx900 -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
-
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
-// RUN:            -target-cpu gfx1010 -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
-
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
-// RUN:            -target-cpu gfx1012 -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
-
-//===----------------------------------------------------------------------===//
-// Test AMDGPU builtins
-//===----------------------------------------------------------------------===//
-
-// CIR-LABEL: @_Z13test_tanh_f16PDF16_DF16_
-// CIR: cir.call_llvm_intrinsic "amdgcn.tanh" {{.*}} : (!cir.f16) -> !cir.f16
-// LLVM: define{{.*}} void @_Z13test_tanh_f16PDF16_DF16_
-// LLVM: call{{.*}} half @llvm.{{((amdgcn.){0,1})}}tanh.f16(half %{{.*}})
-__device__ void test_tanh_f16(_Float16* out, _Float16 a) {
-  *out = __builtin_amdgcn_tanhh(a);
-}
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
index c64f32be2444a..4a61fde7aa90c 100644
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
@@ -2,15 +2,15 @@
 
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
-// RUN:            -target-cpu tahiti -target-feature +tanh-insts 
-fcuda-is-device -emit-cir %s -o %t.cir
+// RUN:            -target-cpu tahiti -fcuda-is-device -emit-cir %s -o %t.cir
 // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
-// RUN:            -target-cpu tahiti --target-feature +tanh-insts 
fcuda-is-device -emit-llvm %s -o %t-cir.ll
+// RUN:            -target-cpu tahiti -fcuda-is-device -emit-llvm %s -o 
%t-cir.ll
 // RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
-// RUN:            -target-cpu tahiti -target-feature +tanh-insts 
-fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN:            -target-cpu tahiti -fcuda-is-device -emit-llvm %s -o %t.ll
 // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
 
 
//===----------------------------------------------------------------------===//
@@ -71,11 +71,3 @@ __device__ void test_div_fmas_f64(double* out, double a, 
double b, double c, int
 __device__ void test_ds_swizzle(int* out, int a) {
   *out = __builtin_amdgcn_ds_swizzle(a, 32);
 }
-
-// CIR-LABEL: @_Z14test_tanhf_f32Pff
-// CIR: cir.call_llvm_intrinsic "amdgcn.tanh" {{.*}} : (!cir.float) -> 
!cir.float
-// LLVM: define{{.*}} void @_Z14test_tanhf_f32Pff
-// LLVM: call{{.*}} float @llvm.amdgcn.tanh.f32(float %{{.*}})
-__device__ void test_tanhf_f32(float* out, float a) {
-  *out = __builtin_amdgcn_tanhf(a);
-}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to