This is a performance hint that can be applied to kernels 
to attempt to limit the number of used registers. This adds two attributes,
amdgpu_num_vgpr and amdgpu_num_sgpr. Alternatively, there could only be one
with two arguments. That version would also be usable for Evergreen, which 
could 
ignore the second argument as a request for the number of SGPRs.

http://reviews.llvm.org/D6288

Files:
  include/clang/Basic/Attr.td
  include/clang/Basic/AttrDocs.td
  include/clang/Basic/DiagnosticSemaKinds.td
  include/clang/Sema/AttributeList.h
  lib/CodeGen/TargetInfo.cpp
  lib/Sema/SemaDeclAttr.cpp
  test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl
  test/SemaOpenCL/amdgpu-num-register-attrs.cl
Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -115,6 +115,10 @@
 def FunctionLike : SubsetSubject<DeclBase,
                                   [{S->getFunctionType(false) != NULL}]>;
 
+def KernelFunction : SubsetSubject<Function, [{
+  S->hasAttr<OpenCLKernelAttr>()
+}]>;
+
 // HasFunctionProto is a more strict version of FunctionLike, so it should
 // never be specified in a Subjects list along with FunctionLike (due to the
 // inclusive nature of subject testing).
@@ -241,6 +245,7 @@
   let OSes = ["Win32"];
 }
 def TargetMips : TargetArch<["mips", "mipsel"]>;
+def TargetAMDGPU : TargetArch<["r600"]>;
 
 class Attr {
   // The various ways in which an attribute can be spelled in source
@@ -859,6 +864,22 @@
   let Documentation = [Undocumented];
 }
 
+def AMDGPUNumVGPR : InheritableAttr, TargetSpecificAttr<TargetAMDGPU> {
+  let Spellings = [GCC<"amdgpu_num_vgpr">];
+  let Args = [UnsignedArgument<"NumVGPR">];
+  let Documentation = [AMDGPURegisterDocs];
+  let Subjects = SubjectList<[KernelFunction], ErrorDiag,
+                             "ExpectedKernelFunction">;
+}
+
+def AMDGPUNumSGPR : InheritableAttr, TargetSpecificAttr<TargetAMDGPU> {
+  let Spellings = [GCC<"amdgpu_num_sgpr">];
+  let Args = [UnsignedArgument<"NumSGPR">];
+  let Documentation = [AMDGPURegisterDocs];
+  let Subjects = SubjectList<[KernelFunction], ErrorDiag,
+                              "ExpectedKernelFunction">;
+}
+
 def NoSplitStack : InheritableAttr {
   let Spellings = [GCC<"no_split_stack">];
   let Subjects = SubjectList<[Function], ErrorDiag>;
Index: include/clang/Basic/AttrDocs.td
===================================================================
--- include/clang/Basic/AttrDocs.td
+++ include/clang/Basic/AttrDocs.td
@@ -673,6 +673,31 @@
   }];
 }
 
+def AMDGPURegisterDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+Clang supports the
+``__attribute__((amdgpu_num_vgpr(<num_registers>)))`` and
+``__attribute__((amdgpu_num_sgpr(<num_registers>)))`` attributes on
+AMD GPU targets. This attribute may be attached to a kernel function
+definition and is an optimization hint to the backend for the maximum
+number of registers to use. This is useful in cases where register
+limited occupancy is known to be an important factor for the performance
+for the kernel.
+
+The semantics are as follows:
+
+- The backend will attempt to limit the number of used registers to
+  the specified values, but the exact number used is not
+  guaranteed. The number used may be rounded up to satisfy the
+  allocation requirements or ABI constraints of the subtarget. The
+  backend may also use fewer registers than requested whenever
+  possible.
+
+- 0 implies the default no limit on register usage.
+  }];
+}
+
 def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> {
   let Content = [{
 Clang supports several different calling conventions, depending on the target
Index: include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- include/clang/Basic/DiagnosticSemaKinds.td
+++ include/clang/Basic/DiagnosticSemaKinds.td
@@ -2237,7 +2237,7 @@
   "Objective-C instance methods|init methods of interface or class extension declarations|"
   "variables, functions and classes|Objective-C protocols|"
   "functions and global variables|structs or typedefs|"
-  "interface or protocol declarations}1">,
+  "interface or protocol declarations|kernel functions}1">,
   InGroup<IgnoredAttributes>;
 def err_attribute_wrong_decl_type : Error<warn_attribute_wrong_decl_type.Text>;
 def warn_type_attribute_wrong_type : Warning<
Index: include/clang/Sema/AttributeList.h
===================================================================
--- include/clang/Sema/AttributeList.h
+++ include/clang/Sema/AttributeList.h
@@ -843,7 +843,8 @@
   ExpectedObjectiveCProtocol,
   ExpectedFunctionGlobalVarMethodOrProperty,
   ExpectedStructOrTypedef,
-  ExpectedObjectiveCInterfaceOrProtocol
+  ExpectedObjectiveCInterfaceOrProtocol,
+  ExpectedKernelFunction
 };
 
 }  // end namespace clang
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -20,6 +20,7 @@
 #include "clang/AST/RecordLayout.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Type.h"
@@ -6172,6 +6173,40 @@
   return AddrTyped;
 }
 
+//===----------------------------------------------------------------------===//
+// AMDGPU ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+  AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
+    : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
+  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                           CodeGen::CodeGenModule &M) const override;
+};
+
+}
+
+void AMDGPUTargetCodeGenInfo::SetTargetAttributes(const Decl *D,
+                                                  llvm::GlobalValue *GV,
+                                                  CodeGen::CodeGenModule &M) const {
+  const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
+  if (!FD)
+    return;
+
+  if (const AMDGPUNumVGPRAttr *Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) {
+    llvm::Function *F = cast<llvm::Function>(GV);
+    F->addFnAttr("amdgpu_num_vgpr", llvm::utostr(Attr->getNumVGPR()));
+  }
+
+  if (const AMDGPUNumSGPRAttr *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
+    llvm::Function *F = cast<llvm::Function>(GV);
+    F->addFnAttr("amdgpu_num_sgpr", llvm::utostr(Attr->getNumSGPR()));
+  }
+}
+
 
 //===----------------------------------------------------------------------===//
 // SPARC v9 ABI Implementation.
@@ -7233,6 +7268,8 @@
   }
   case llvm::Triple::hexagon:
     return *(TheTargetCodeGenInfo = new HexagonTargetCodeGenInfo(Types));
+  case llvm::Triple::r600:
+    return *(TheTargetCodeGenInfo = new AMDGPUTargetCodeGenInfo(Types));
   case llvm::Triple::sparcv9:
     return *(TheTargetCodeGenInfo = new SparcV9TargetCodeGenInfo(Types));
   case llvm::Triple::xcore:
Index: lib/Sema/SemaDeclAttr.cpp
===================================================================
--- lib/Sema/SemaDeclAttr.cpp
+++ lib/Sema/SemaDeclAttr.cpp
@@ -3940,6 +3940,38 @@
     handleARMInterruptAttr(S, D, Attr);
 }
 
+static void handleAMDGPUNumRegAttr(Sema &S, Decl *D,
+                                   const AttributeList &Attr,
+                                   bool IsVGPR) {
+  if (!checkAttributeNumArgs(S, Attr, 1))
+    return;
+
+  Expr *NumRegsExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
+  llvm::APSInt NumRegs(32);
+
+  if (!NumRegsExpr->isIntegerConstantExpr(NumRegs, S.Context)) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_argument_type)
+      << Attr.getName() << AANT_ArgumentIntegerConstant
+      << NumRegsExpr->getSourceRange();
+    return;
+  }
+
+  if (IsVGPR) {
+    D->addAttr(::new (S.Context)
+               AMDGPUNumVGPRAttr(Attr.getLoc(), S.Context,
+                                 NumRegs.getZExtValue(),
+                                 Attr.getAttributeSpellingListIndex()));
+  } else {
+    D->addAttr(::new (S.Context)
+               AMDGPUNumSGPRAttr(Attr.getLoc(), S.Context,
+                                 NumRegs.getZExtValue(),
+                                 Attr.getAttributeSpellingListIndex()));
+  }
+
+
+  D->addAttr(UsedAttr::CreateImplicit(S.Context));
+}
+
 static void handleX86ForceAlignArgPointerAttr(Sema &S, Decl *D,
                                               const AttributeList& Attr) {
   // If we try to apply it to a function pointer, don't warn, but don't
@@ -4247,6 +4279,12 @@
   case AttributeList::AT_NoMips16:
     handleSimpleAttribute<NoMips16Attr>(S, D, Attr);
     break;
+  case AttributeList::AT_AMDGPUNumVGPR:
+    handleAMDGPUNumRegAttr(S, D, Attr, true);
+    break;
+  case AttributeList::AT_AMDGPUNumSGPR:
+    handleAMDGPUNumRegAttr(S, D, Attr, false);
+    break;
   case AttributeList::AT_IBAction:
     handleSimpleAttribute<IBActionAttr>(S, D, Attr);
     break;
Index: test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple r600-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s
+
+__attribute__((amdgpu_num_vgpr(64)))
+kernel void test_num_vgpr64() {
+// CHECK: define void @test_num_vgpr64() [[ATTR_VGPR64:#[0-9]+]]
+}
+
+__attribute__((amdgpu_num_sgpr(32)))
+kernel void test_num_sgpr32() {
+// CHECK: define void @test_num_sgpr32() [[ATTR_SGPR32:#[0-9]+]]
+}
+
+__attribute__((amdgpu_num_vgpr(64), amdgpu_num_sgpr(32)))
+kernel void test_num_vgpr64_sgpr32() {
+// CHECK: define void @test_num_vgpr64_sgpr32() [[ATTR_VGPR64_SGPR32:#[0-9]+]]
+
+}
+
+__attribute__((amdgpu_num_sgpr(20), amdgpu_num_vgpr(40)))
+kernel void test_num_sgpr20_vgpr40() {
+// CHECK: define void @test_num_sgpr20_vgpr40() [[ATTR_SGPR20_VGPR40:#[0-9]+]]
+}
+
+
+// CHECK-DAG: attributes [[ATTR_VGPR64]] = { nounwind "amdgpu_num_vgpr"="64"
+// CHECK-DAG: attributes [[ATTR_SGPR32]] = { nounwind "amdgpu_num_sgpr"="32"
+// CHECK-DAG: attributes [[ATTR_VGPR64_SGPR32]] = { nounwind "amdgpu_num_sgpr"="32" "amdgpu_num_vgpr"="64"
+// CHECK-DAG: attributes [[ATTR_SGPR20_VGPR40]] = { nounwind "amdgpu_num_sgpr"="20" "amdgpu_num_vgpr"="40"
Index: test/SemaOpenCL/amdgpu-num-register-attrs.cl
===================================================================
--- /dev/null
+++ test/SemaOpenCL/amdgpu-num-register-attrs.cl
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple r600-- -verify -fsyntax-only %s
+
+typedef __attribute__((amdgpu_num_vgpr(128))) struct FooStruct { // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+  int x;
+  float y;
+} FooStruct;
+
+
+__attribute__((amdgpu_num_vgpr("ABC"))) kernel void foo2() {} // expected-error {{'amdgpu_num_vgpr' attribute requires an integer constant}}
+__attribute__((amdgpu_num_sgpr("ABC"))) kernel void foo3() {} // expected-error {{'amdgpu_num_sgpr' attribute requires an integer constant}}
+
+
+__attribute__((amdgpu_num_vgpr(40))) void foo4() {} // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_num_sgpr(64))) void foo5() {} // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+
+__attribute__((amdgpu_num_vgpr(40))) kernel void foo7() {}
+__attribute__((amdgpu_num_sgpr(64))) kernel void foo8() {}
+__attribute__((amdgpu_num_vgpr(40), amdgpu_num_sgpr(64))) kernel void foo9() {}
+
+
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to