[PATCH] D143495: [AMDGPU ASAN] Remove reference to asan bitcode library

2023-02-13 Thread praveen velliengiri via Phabricator via cfe-commits
pvellien added a comment.

@yaxunl Could you please commit this change on my behalf? I don't have a write 
access to the trunk


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143495/new/

https://reviews.llvm.org/D143495

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D143495: [AMDGPU ASAN] Remove reference to asan bitcode library

2023-02-07 Thread praveen velliengiri via Phabricator via cfe-commits
pvellien created this revision.
pvellien added reviewers: yaxunl, b-sumner.
Herald added subscribers: kosarev, kerbowa, tpr, dstuttard, jvesely, kzhuravl.
Herald added a project: All.
pvellien requested review of this revision.
Herald added subscribers: cfe-commits, wdng.
Herald added a project: clang.

The asan functions are now attributed as "used" in the device library, no need 
to keep the declaration of asan device preserve function.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D143495

Files:
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenCUDA/amdgpu-asan.cu


Index: clang/test/CodeGenCUDA/amdgpu-asan.cu
===
--- clang/test/CodeGenCUDA/amdgpu-asan.cu
+++ /dev/null
@@ -1,31 +0,0 @@
-// Create a sample address sanitizer bitcode library.
-
-// RUN: %clang_cc1 -x ir -fcuda-is-device -triple amdgcn-amd-amdhsa 
-emit-llvm-bc \
-// RUN:   -disable-llvm-passes -o %t.asanrtl.bc %S/Inputs/amdgpu-asanrtl.ll
-
-// Check sanitizer runtime library functions survive
-// optimizations without being removed or parameters altered.
-
-// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
-// RUN:   -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
-// RUN:   -mlink-bitcode-file %t.asanrtl.bc -x hip \
-// RUN:   | FileCheck -check-prefixes=ASAN %s
-
-// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
-// RUN:   -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
-// RUN:   -O3 -mlink-bitcode-file %t.asanrtl.bc -x hip \
-// RUN:   | FileCheck -check-prefixes=ASAN %s
-
-// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
-// RUN:   -fcuda-is-device -target-cpu gfx906 -x hip \
-// RUN:   | FileCheck %s
-
-// REQUIRES: amdgpu-registered-target
-
-// ASAN-DAG: define weak void 
@__amdgpu_device_library_preserve_asan_functions()
-// ASAN-DAG: @__amdgpu_device_library_preserve_asan_functions_ptr = weak 
addrspace(1) constant ptr @__amdgpu_device_library_preserve_asan_functions
-// ASAN-DAG: @llvm.compiler.used = 
{{.*}}@__amdgpu_device_library_preserve_asan_functions_ptr
-// ASAN-DAG: define weak void @__asan_report_load1(i64 %{{.*}})
-
-// CHECK-NOT: @__amdgpu_device_library_preserve_asan_functions
-// CHECK-NOT: @__asan_report_load1
Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -580,20 +580,6 @@
 EmitMainVoidAlias();
 
   if (getTriple().isAMDGPU()) {
-// Emit reference of __amdgpu_device_library_preserve_asan_functions to
-// preserve ASAN functions in bitcode libraries.
-if (LangOpts.Sanitize.has(SanitizerKind::Address)) {
-  auto *FT = llvm::FunctionType::get(VoidTy, {});
-  auto *F = llvm::Function::Create(
-  FT, llvm::GlobalValue::ExternalLinkage,
-  "__amdgpu_device_library_preserve_asan_functions", &getModule());
-  auto *Var = new llvm::GlobalVariable(
-  getModule(), FT->getPointerTo(),
-  /*isConstant=*/true, llvm::GlobalValue::WeakAnyLinkage, F,
-  "__amdgpu_device_library_preserve_asan_functions_ptr", nullptr,
-  llvm::GlobalVariable::NotThreadLocal);
-  addCompilerUsedGlobal(Var);
-}
 // Emit amdgpu_code_object_version module flag, which is code object 
version
 // times 100.
 if (getTarget().getTargetOpts().CodeObjectVersion !=


Index: clang/test/CodeGenCUDA/amdgpu-asan.cu
===
--- clang/test/CodeGenCUDA/amdgpu-asan.cu
+++ /dev/null
@@ -1,31 +0,0 @@
-// Create a sample address sanitizer bitcode library.
-
-// RUN: %clang_cc1 -x ir -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
-// RUN:   -disable-llvm-passes -o %t.asanrtl.bc %S/Inputs/amdgpu-asanrtl.ll
-
-// Check sanitizer runtime library functions survive
-// optimizations without being removed or parameters altered.
-
-// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
-// RUN:   -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
-// RUN:   -mlink-bitcode-file %t.asanrtl.bc -x hip \
-// RUN:   | FileCheck -check-prefixes=ASAN %s
-
-// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
-// RUN:   -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
-// RUN:   -O3 -mlink-bitcode-file %t.asanrtl.bc -x hip \
-// RUN:   | FileCheck -check-prefixes=ASAN %s
-
-// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
-// RUN:   -fcuda-is-device -target-cpu gfx906 -x hip \
-// RUN:   | FileCheck %s
-
-// REQUIRES: amdgpu-registered-target
-
-// ASAN-DAG: define weak void @__amdgpu_device_library_preserve_asan_functions()
-// ASAN-DAG: @__amdgpu_device_library_preserve_asan_functions_ptr = weak addrspace(1) constant ptr @__amdgpu_device_library_preserve_asan_functions
-// ASAN-DAG: @llvm.compiler.used = {{.*}}@__amdgpu_device_library_preserve_asan_functions_ptr
-

[PATCH] D116216: Prevent adding module flag - amdgpu_hostcall multiple times.

2022-01-18 Thread praveen velliengiri via Phabricator via cfe-commits
pvellien added a comment.

In D116216#3251066 , @yaxunl wrote:

> LGTM. Thanks.

Could you please commit on my behalf? I don't have a commit access to llvm trunk


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116216/new/

https://reviews.llvm.org/D116216

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D116216: Prevent adding module flag - amdgpu_hostcall multiple times.

2022-01-18 Thread praveen velliengiri via Phabricator via cfe-commits
pvellien updated this revision to Diff 400750.
pvellien added a comment.

Removed amdgpu-asan-noprintf.cu and added amdgpu-asan-printf.cu testcase.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116216/new/

https://reviews.llvm.org/D116216

Files:
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenCUDA/amdgpu-asan-printf.cu


Index: clang/test/CodeGenCUDA/amdgpu-asan-printf.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-asan-printf.cu
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
+// RUN:   -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
+// RUN:   -O3 -x hip | FileCheck -check-prefixes=MFCHECK %s
+
+// MFCHECK: !llvm.module.flags = !{![[FLAG1:[0-9]+]], ![[FLAG2:[0-9]+]]}
+// MFCHECK: ![[FLAG1]] = !{i32 4, !"amdgpu_hostcall", i32 1}
+
+// Test to check hostcall module flag metadata is generated correctly
+// when a program has printf call and compiled with -fsanitize=address.
+#include "Inputs/cuda.h"
+__device__ void non_kernel() {
+  printf("sanitized device function");
+}
+
+__global__ void kernel() {
+  non_kernel();
+}
+
Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -565,7 +565,9 @@
 "__amdgpu_device_library_preserve_asan_functions_ptr", nullptr,
 llvm::GlobalVariable::NotThreadLocal);
 addCompilerUsedGlobal(Var);
-getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
+if (!getModule().getModuleFlag("amdgpu_hostcall")) {
+  getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
+}
   }
 
   emitLLVMUsed();


Index: clang/test/CodeGenCUDA/amdgpu-asan-printf.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-asan-printf.cu
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
+// RUN:   -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
+// RUN:   -O3 -x hip | FileCheck -check-prefixes=MFCHECK %s
+
+// MFCHECK: !llvm.module.flags = !{![[FLAG1:[0-9]+]], ![[FLAG2:[0-9]+]]}
+// MFCHECK: ![[FLAG1]] = !{i32 4, !"amdgpu_hostcall", i32 1}
+
+// Test to check hostcall module flag metadata is generated correctly
+// when a program has printf call and compiled with -fsanitize=address.
+#include "Inputs/cuda.h"
+__device__ void non_kernel() {
+  printf("sanitized device function");
+}
+
+__global__ void kernel() {
+  non_kernel();
+}
+
Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -565,7 +565,9 @@
 "__amdgpu_device_library_preserve_asan_functions_ptr", nullptr,
 llvm::GlobalVariable::NotThreadLocal);
 addCompilerUsedGlobal(Var);
-getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
+if (!getModule().getModuleFlag("amdgpu_hostcall")) {
+  getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
+}
   }
 
   emitLLVMUsed();
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D116216: Prevent adding module flag - amdgpu_hostcall multiple times.

2022-01-09 Thread praveen velliengiri via Phabricator via cfe-commits
pvellien updated this revision to Diff 398519.
pvellien added a comment.
Herald added subscribers: kerbowa, nhaehnle, jvesely.

@lebedev.ri updated with test-cases.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116216/new/

https://reviews.llvm.org/D116216

Files:
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenCUDA/amdgpu-asan-noprintf.cu
  clang/test/CodeGenCUDA/amdgpu-asan.cu


Index: clang/test/CodeGenCUDA/amdgpu-asan.cu
===
--- clang/test/CodeGenCUDA/amdgpu-asan.cu
+++ clang/test/CodeGenCUDA/amdgpu-asan.cu
@@ -30,5 +30,16 @@
 // MFCHECK: !llvm.module.flags = !{![[FLAG1:[0-9]+]], ![[FLAG2:[0-9]+]]}
 // MFCHECK: ![[FLAG1]] = !{i32 4, !"amdgpu_hostcall", i32 1}
 
+// CHECK: !llvm.module.flags = !{![[FLAG1:[0-9]+]], ![[FLAG2:[0-9]+]]}
+// CHECK: ![[FLAG1]] = !{i32 4, !"amdgpu_hostcall", i32 1}
 // CHECK-NOT: @__amdgpu_device_library_preserve_asan_functions
 // CHECK-NOT: @__asan_report_load1
+
+#include "Inputs/cuda.h"
+__device__ void non_kernel() {
+  printf("sanitized device function");
+}
+
+__global__ void kernel() {
+  non_kernel();
+}
Index: clang/test/CodeGenCUDA/amdgpu-asan-noprintf.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-asan-noprintf.cu
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
+// RUN:   -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
+// RUN:   -O3 -x hip | FileCheck -check-prefixes=MFCHECK %s
+
+// MFCHECK: !llvm.module.flags = !{![[FLAG1:[0-9]+]], ![[FLAG2:[0-9]+]]}
+// MFCHECK: ![[FLAG1]] = !{i32 4, !"amdgpu_hostcall", i32 1}
+
+// Test to check hostcall module flag metadata is generated
+// without a call to printf when compiled with -fsanitize=address.
+#include "Inputs/cuda.h"
+__global__ void sanitize_kernel() {
+}
+
Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -565,7 +565,9 @@
 "__amdgpu_device_library_preserve_asan_functions_ptr", nullptr,
 llvm::GlobalVariable::NotThreadLocal);
 addCompilerUsedGlobal(Var);
-getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
+if (!getModule().getModuleFlag("amdgpu_hostcall")) {
+  getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
+}
   }
 
   emitLLVMUsed();


Index: clang/test/CodeGenCUDA/amdgpu-asan.cu
===
--- clang/test/CodeGenCUDA/amdgpu-asan.cu
+++ clang/test/CodeGenCUDA/amdgpu-asan.cu
@@ -30,5 +30,16 @@
 // MFCHECK: !llvm.module.flags = !{![[FLAG1:[0-9]+]], ![[FLAG2:[0-9]+]]}
 // MFCHECK: ![[FLAG1]] = !{i32 4, !"amdgpu_hostcall", i32 1}
 
+// CHECK: !llvm.module.flags = !{![[FLAG1:[0-9]+]], ![[FLAG2:[0-9]+]]}
+// CHECK: ![[FLAG1]] = !{i32 4, !"amdgpu_hostcall", i32 1}
 // CHECK-NOT: @__amdgpu_device_library_preserve_asan_functions
 // CHECK-NOT: @__asan_report_load1
+
+#include "Inputs/cuda.h"
+__device__ void non_kernel() {
+  printf("sanitized device function");
+}
+
+__global__ void kernel() {
+  non_kernel();
+}
Index: clang/test/CodeGenCUDA/amdgpu-asan-noprintf.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-asan-noprintf.cu
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
+// RUN:   -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
+// RUN:   -O3 -x hip | FileCheck -check-prefixes=MFCHECK %s
+
+// MFCHECK: !llvm.module.flags = !{![[FLAG1:[0-9]+]], ![[FLAG2:[0-9]+]]}
+// MFCHECK: ![[FLAG1]] = !{i32 4, !"amdgpu_hostcall", i32 1}
+
+// Test to check hostcall module flag metadata is generated
+// without a call to printf when compiled with -fsanitize=address.
+#include "Inputs/cuda.h"
+__global__ void sanitize_kernel() {
+}
+
Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -565,7 +565,9 @@
 "__amdgpu_device_library_preserve_asan_functions_ptr", nullptr,
 llvm::GlobalVariable::NotThreadLocal);
 addCompilerUsedGlobal(Var);
-getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
+if (!getModule().getModuleFlag("amdgpu_hostcall")) {
+  getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
+}
   }
 
   emitLLVMUsed();
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D116216: Prevent adding module flag - amdgpu_hostcall multiple times.

2021-12-24 Thread praveen velliengiri via Phabricator via cfe-commits
pvellien added a comment.

@yaxunl It would be very much helpful to know how to write test coverage for 
this particular patch? thanks


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116216/new/

https://reviews.llvm.org/D116216

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D116216: Prevent adding module flag - amdgpu_hostcall multiple times.

2021-12-23 Thread praveen velliengiri via Phabricator via cfe-commits
pvellien added a comment.

The testcases related to this patch are already added via this patch 
https://reviews.llvm.org/D112820.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116216/new/

https://reviews.llvm.org/D116216

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D116216: Prevent adding module flag - amdgpu_hostcall multiple times.

2021-12-23 Thread praveen velliengiri via Phabricator via cfe-commits
pvellien created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, kzhuravl.
pvellien requested review of this revision.
Herald added subscribers: cfe-commits, wdng.
Herald added a project: clang.

HIP program with printf call fails to compile with -fsanitize=address option, 
because of appending module flag - amdgpu_hostcall twice, one for printf and 
one for sanitize option. This patch fixes that issue.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D116216

Files:
  clang/lib/CodeGen/CodeGenModule.cpp


Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -565,7 +565,9 @@
 "__amdgpu_device_library_preserve_asan_functions_ptr", nullptr,
 llvm::GlobalVariable::NotThreadLocal);
 addCompilerUsedGlobal(Var);
-getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
+if (!getModule().getModuleFlag("amdgpu_hostcall")) {
+  getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
+}
   }
 
   emitLLVMUsed();


Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -565,7 +565,9 @@
 "__amdgpu_device_library_preserve_asan_functions_ptr", nullptr,
 llvm::GlobalVariable::NotThreadLocal);
 addCompilerUsedGlobal(Var);
-getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
+if (!getModule().getModuleFlag("amdgpu_hostcall")) {
+  getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
+}
   }
 
   emitLLVMUsed();
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D92115: AMDGPU - Add diagnostic for compiling modules with AMD HSA OS type and GFX 6 arch

2020-12-24 Thread praveen velliengiri via Phabricator via cfe-commits
pvellien abandoned this revision.
pvellien added a comment.

This change is wrong, the different patch is landed in llvm to handle global 
address space access in gfx60x for HSA Os. So closing it.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92115/new/

https://reviews.llvm.org/D92115

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D92115: AMDGPU - Add diagnostic for compiling modules with AMD HSA OS type and GFX 6 arch

2020-11-27 Thread praveen velliengiri via Phabricator via cfe-commits
pvellien added inline comments.



Comment at: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp:134
+  if (isAmdHsaOS() && getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+report_fatal_error("GFX6 (SI) ASICs does not support AMD HSA OS type \n",
+   false);

t-tye wrote:
> pvellien wrote:
> > t-tye wrote:
> > > rampitec wrote:
> > > > "do not support". I would also drop "(SI)" from the message. Maybe even 
> > > > better just "GFX6 does not support AMD HSA".
> > > Make the message include the full target triple text so the user 
> > > understands how to resolve the issue. For example:
> > > 
> > >   The target triple %s is not supported: the processor %s does not 
> > > support the amdhsa OS
> > > 
> > > Do the r600 targets also produce a similar error message?
> > > 
> > > Is this really the right test? My understanding is that the issue is not 
> > > that gfx60x does not support the amdhsa OS, but that it does not use the 
> > > FLAT address space.
> > > 
> > > My understanding is that the current problem is that FLAT instructions 
> > > are being used for the GLOBAL address space accesses. The use of FLAT 
> > > instructions for the global address space was introduced after gfx60x was 
> > > initially being supported on amdhsa. Originally BUFFER instructions that 
> > > use an SRD that has a 0 base and are marked as addr64 where used for 
> > > GLOBAL address space accesses. This was changed to use FLAT instructions 
> > > due to later targets dropping the SRD addr64 support. I suspect it is 
> > > that change that broke gfx60x as there were no tests to catch it.
> > > 
> > > So the real fix seems to find that change and make the code still use use 
> > > BUFFER instructions for gfxx60x and FLAT instructions for gfx70x+. The 
> > > tests can then be updated to test gfx60x for amdhsa but to omit the FLAT 
> > > address space tests. The error would then indicate that the gfx60x does 
> > > not support the FLAT address space (and that is not conditional on the 
> > > OS). The documentation in AMDGPUUsage can state that gfx60x does not 
> > > support the FLAT address space in the Address Space section. The 
> > > Processor table can add a column for processor characteristics and 
> > > mention that the gfx60x targets do not support the FLAT address space.
> > Previously in the internal review process it mentioned that gfx60x does not 
> > support HSA and agreed to add a diagnostic to report that GFX6 do not 
> > support HSA OS type, @rampitec mentioned that SI ASICs cannot support HSA 
> > because we can't able to map memory on SI as HSA requires so the user will 
> > just have weird runtime failures. But based on your comment it seems like 
> > we have to use MUBUF instructions for -mtriple=amdgcn-amd-amdhsa 
> > -mcpu=gfx60x combination and use FLAT instructions for 
> > -mtriple=amdgcn-amd-amdhsa -mcpu=gfx70x+. Is my understanding correct? If 
> > the compiler emits the MUBUF instructions for global address space 
> > accesses, it is still required to produce the error msg? 
> In the early days of implementing HSA I believe we were bringing up on gfx6. 
> It could not support all HSA features, but it did function with the parts it 
> could support. So I was suggesting we restore the code to support what it did 
> originally. That would mean using MUBUF for the GLOBAL address space like it 
> used to do (is that code still present?).
> 
> The compiler can then report errors for the features it cannot support, which 
> in this case is it cannot support instruction selection of the GENERIC 
> address space on gfx6.
> 
> If you could find the commit that switched to using FLAT instructions to 
> access the GLOBAL address space that will likely provide the necessary 
> information to decide the best thing to do for this issue.
I code to select MUBUF instructions for Global address space is still present, 
In fact my first patch for this issue is to generate MUBUF instructions instead 
of reporting error. But it got rejected due to SI ASICs do not support HSA. 
This is the patch [[ https://reviews.llvm.org/D15543 |  
https://reviews.llvm.org/D15543 ]] which switched to using FLAT instructions 
for global. 
So whether the new approach is to off FlatForGlobal flag for 
-mtriple=amdgcn-amd-amdhsa -mcpu=gfx60x combination and generate MUBUF 
instructions instead. It would be very much to know what is the expectation :)  
whether we wait for @rampitec for a comment
Btw, thanks a lot for your feedback. 


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92115/new/

https://reviews.llvm.org/D92115

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D92115: AMDGPU - Add diagnostic for compiling modules with AMD HSA OS type and GFX 6 arch

2020-11-27 Thread praveen velliengiri via Phabricator via cfe-commits
pvellien added inline comments.



Comment at: llvm/docs/AMDGPUUsage.rst:2109-2112
+Caution:
+  AMD HSA Os is not supported in Southern Islands (GFX6) ASICs.
+
 For example:

t-tye wrote:
> This is not the right place for mentioning this. The Processor table would 
> likely be a better place. It should be in terms of supporting the amdhsa ABI.
Thanks for your feedback, I will update this



Comment at: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp:134
+  if (isAmdHsaOS() && getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+report_fatal_error("GFX6 (SI) ASICs does not support AMD HSA OS type \n",
+   false);

t-tye wrote:
> rampitec wrote:
> > "do not support". I would also drop "(SI)" from the message. Maybe even 
> > better just "GFX6 does not support AMD HSA".
> Make the message include the full target triple text so the user understands 
> how to resolve the issue. For example:
> 
>   The target triple %s is not supported: the processor %s does not support 
> the amdhsa OS
> 
> Do the r600 targets also produce a similar error message?
> 
> Is this really the right test? My understanding is that the issue is not that 
> gfx60x does not support the amdhsa OS, but that it does not use the FLAT 
> address space.
> 
> My understanding is that the current problem is that FLAT instructions are 
> being used for the GLOBAL address space accesses. The use of FLAT 
> instructions for the global address space was introduced after gfx60x was 
> initially being supported on amdhsa. Originally BUFFER instructions that use 
> an SRD that has a 0 base and are marked as addr64 where used for GLOBAL 
> address space accesses. This was changed to use FLAT instructions due to 
> later targets dropping the SRD addr64 support. I suspect it is that change 
> that broke gfx60x as there were no tests to catch it.
> 
> So the real fix seems to find that change and make the code still use use 
> BUFFER instructions for gfxx60x and FLAT instructions for gfx70x+. The tests 
> can then be updated to test gfx60x for amdhsa but to omit the FLAT address 
> space tests. The error would then indicate that the gfx60x does not support 
> the FLAT address space (and that is not conditional on the OS). The 
> documentation in AMDGPUUsage can state that gfx60x does not support the FLAT 
> address space in the Address Space section. The Processor table can add a 
> column for processor characteristics and mention that the gfx60x targets do 
> not support the FLAT address space.
Previously in the internal review process it mentioned that gfx60x does not 
support HSA and agreed to add a diagnostic to report that GFX6 do not support 
HSA OS type, @rampitec mentioned that SI ASICs cannot support HSA because we 
can't able to map memory on SI as HSA requires so the user will just have weird 
runtime failures. But based on your comment it seems like we have to use MUBUF 
instructions for -mtriple=amdgcn-amd-amdhsa -mcpu=gfx60x combination and use 
FLAT instructions for -mtriple=amdgcn-amd-amdhsa -mcpu=gfx70x+. Is my 
understanding correct? If the compiler emits the MUBUF instructions for global 
address space accesses, it is still required to produce the error msg? 


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92115/new/

https://reviews.llvm.org/D92115

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D92115: AMDGPU - Add diagnostic for compiling modules with AMD HSA OS type and GFX 6 arch

2020-11-27 Thread praveen velliengiri via Phabricator via cfe-commits
pvellien updated this revision to Diff 308015.
pvellien added a comment.

Updated with stanislav  comments


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92115/new/

https://reviews.llvm.org/D92115

Files:
  clang/test/CodeGenOpenCL/amdgpu-attrs.cl
  llvm/docs/AMDGPUUsage.rst
  llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
  llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir
  llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir
  llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir
  llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
  llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll
  llvm/test/CodeGen/AMDGPU/gfx6-amdhsa-noflat.ll
  llvm/test/CodeGen/AMDGPU/lower-kernargs-si-mesa.ll
  llvm/test/CodeGen/AMDGPU/lower-kernargs.ll

Index: llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
===
--- llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
+++ llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
@@ -530,24 +530,6 @@
   ret void
 }
 
-define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 {
-; HSA-LABEL: @kern_lds_ptr_si(
-; HSA-NEXT:[[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
-; HSA-NEXT:[[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_SI_KERNARG_SEGMENT]], i64 0
-; HSA-NEXT:[[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
-; HSA-NEXT:[[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4
-; HSA-NEXT:ret void
-;
-; MESA-LABEL: @kern_lds_ptr_si(
-; MESA-NEXT:[[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
-; MESA-NEXT:store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
-; MESA-NEXT:ret void
-;
-  store i32 0, i32 addrspace(3)* %lds, align 4
-  ret void
-}
-
 define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
 ; HSA-LABEL: @kern_realign_i8_i8(
 ; HSA-NEXT:[[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
@@ -1914,7 +1896,6 @@
 
 attributes #0 = { nounwind "target-cpu"="kaveri" }
 attributes #1 = { nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" }
-attributes #2 = { nounwind "target-cpu"="tahiti" }
 
 ; GCN: 0 = !{}
 ; GCN: !1 = !{i64 42}
Index: llvm/test/CodeGen/AMDGPU/lower-kernargs-si-mesa.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/lower-kernargs-si-mesa.ll
@@ -0,0 +1,15 @@
+; RUN: opt -mtriple=amdgcn-- -S -o - -amdgpu-lower-kernel-arguments %s | FileCheck -check-prefix=MESA %s
+
+target datalayout = "A5"
+
+define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #0 {
+; MESA-LABEL: @kern_lds_ptr_si(
+; MESA-NEXT:[[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
+; MESA-NEXT:ret void
+;
+  store i32 0, i32 addrspace(3)* %lds, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="tahiti" }
Index: llvm/test/CodeGen/AMDGPU/gfx6-amdhsa-noflat.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/gfx6-amdhsa-noflat.ll
@@ -0,0 +1,9 @@
+; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s
+; Report error for gfx6 and amdhsa 
+; ERR: LLVM ERROR: GFX6 do not support AMD HSA
+
+define void @f(i32 addrspace(1)* %out) {
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
+
Index: llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll
===
--- llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll
+++ llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll
@@ -1,4 +1,3 @@
-; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s
 ; RUN: not --crash llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s
 
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -o - %s | FileCheck -check-prefix=HSA-DEFAULT %s
Index: llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
===
--- llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
+++ llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
@@ -1,11 +1,11 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=

[PATCH] D92115: AMDGPU - Add diagnostic for compiling modules with AMD HSA OS type and GFX 6 arch

2020-11-25 Thread praveen velliengiri via Phabricator via cfe-commits
pvellien created this revision.
pvellien added reviewers: rampitec, arsenm, sameerds.
Herald added subscribers: llvm-commits, cfe-commits, kerbowa, jfb, hiraditya, 
t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl.
Herald added projects: clang, LLVM.
pvellien requested review of this revision.
Herald added a subscriber: wdng.

Bail out from compiling modules for GFX6 + AMD HSA OS type as HSA is not 
supported for SI ASICs. Currently gfx6+hsa setup crashing during ISel for 
global load/stores due to lack of FLAT instructions. This patch add a check to 
report error when modules are compiled with -mtriple=amdgcn-amd-amdhsa 
-mcpu=gfx600 and exit from compilation pipeline.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D92115

Files:
  clang/test/CodeGenOpenCL/amdgpu-attrs.cl
  llvm/docs/AMDGPUUsage.rst
  llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
  llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir
  llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir
  llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir
  llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
  llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll
  llvm/test/CodeGen/AMDGPU/lower-kernargs-si-mesa.ll
  llvm/test/CodeGen/AMDGPU/lower-kernargs.ll

Index: llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
===
--- llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
+++ llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
@@ -530,24 +530,6 @@
   ret void
 }
 
-define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 {
-; HSA-LABEL: @kern_lds_ptr_si(
-; HSA-NEXT:[[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
-; HSA-NEXT:[[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_SI_KERNARG_SEGMENT]], i64 0
-; HSA-NEXT:[[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
-; HSA-NEXT:[[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4
-; HSA-NEXT:ret void
-;
-; MESA-LABEL: @kern_lds_ptr_si(
-; MESA-NEXT:[[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
-; MESA-NEXT:store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
-; MESA-NEXT:ret void
-;
-  store i32 0, i32 addrspace(3)* %lds, align 4
-  ret void
-}
-
 define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
 ; HSA-LABEL: @kern_realign_i8_i8(
 ; HSA-NEXT:[[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
@@ -1914,7 +1896,6 @@
 
 attributes #0 = { nounwind "target-cpu"="kaveri" }
 attributes #1 = { nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" }
-attributes #2 = { nounwind "target-cpu"="tahiti" }
 
 ; GCN: 0 = !{}
 ; GCN: !1 = !{i64 42}
Index: llvm/test/CodeGen/AMDGPU/lower-kernargs-si-mesa.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/lower-kernargs-si-mesa.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; FIXME: Manually added checks for metadata nodes at bottom
+; RUN: opt -mtriple=amdgcn-- -S -o - -amdgpu-lower-kernel-arguments %s | FileCheck -check-prefix=MESA %s
+
+target datalayout = "A5"
+
+define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #0 {
+; MESA-LABEL: @kern_lds_ptr_si(
+; MESA-NEXT:[[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
+; MESA-NEXT:ret void
+;
+  store i32 0, i32 addrspace(3)* %lds, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="tahiti" }
Index: llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll
===
--- llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll
+++ llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll
@@ -1,4 +1,3 @@
-; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s
 ; RUN: not --crash llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s
 
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -o - %s | FileCheck -check-prefix=HSA-DEFAULT %s
Index: llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
===
--- llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
+++ l