[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints

2017-09-28 Thread Yaxun Liu via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL314452: [AMDGPU] Allow flexible register names in inline asm 
constraints (authored by yaxunl).

Changed prior to commit:
  https://reviews.llvm.org/D37568?vs=116383=117037#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D37568

Files:
  cfe/trunk/lib/Basic/Targets/AMDGPU.h
  cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl

Index: cfe/trunk/lib/Basic/Targets/AMDGPU.h
===
--- cfe/trunk/lib/Basic/Targets/AMDGPU.h
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.h
@@ -17,6 +17,7 @@
 #include "clang/AST/Type.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TargetOptions.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/Compiler.h"
 
@@ -115,17 +116,83 @@
 return None;
   }
 
+  /// Accepted register names: (n, m is unsigned integer, n < m)
+  /// v
+  /// s
+  /// {vn}, {v[n]}
+  /// {sn}, {s[n]}
+  /// {S} , where S is a special register name
+  {v[n:m]}
+  /// {s[n:m]}
   bool validateAsmConstraint(const char *,
  TargetInfo::ConstraintInfo ) const override {
-switch (*Name) {
-default:
-  break;
-case 'v': // vgpr
-case 's': // sgpr
+static const ::llvm::StringSet<> SpecialRegs({
+"exec", "vcc", "flat_scratch", "m0", "scc", "tba", "tma",
+"flat_scratch_lo", "flat_scratch_hi", "vcc_lo", "vcc_hi", "exec_lo",
+"exec_hi", "tma_lo", "tma_hi", "tba_lo", "tba_hi",
+});
+
+StringRef S(Name);
+bool HasLeftParen = false;
+if (S.front() == '{') {
+  HasLeftParen = true;
+  S = S.drop_front();
+}
+if (S.empty())
+  return false;
+if (S.front() != 'v' && S.front() != 's') {
+  if (!HasLeftParen)
+return false;
+  auto E = S.find('}');
+  if (!SpecialRegs.count(S.substr(0, E)))
+return false;
+  S = S.drop_front(E + 1);
+  if (!S.empty())
+return false;
+  // Found {S} where S is a special register.
+  Info.setAllowsRegister();
+  Name = S.data() - 1;
+  return true;
+}
+S = S.drop_front();
+if (!HasLeftParen) {
+  if (!S.empty())
+return false;
+  // Found s or v.
   Info.setAllowsRegister();
+  Name = S.data() - 1;
   return true;
 }
-return false;
+bool HasLeftBracket = false;
+if (!S.empty() && S.front() == '[') {
+  HasLeftBracket = true;
+  S = S.drop_front();
+}
+unsigned long long N;
+if (S.empty() || consumeUnsignedInteger(S, 10, N))
+  return false;
+if (!S.empty() && S.front() == ':') {
+  if (!HasLeftBracket)
+return false;
+  S = S.drop_front();
+  unsigned long long M;
+  if (consumeUnsignedInteger(S, 10, M) || N >= M)
+return false;
+}
+if (HasLeftBracket) {
+  if (S.empty() || S.front() != ']')
+return false;
+  S = S.drop_front();
+}
+if (S.empty() || S.front() != '}')
+  return false;
+S = S.drop_front();
+if (!S.empty())
+  return false;
+// Found {vn}, {sn}, {v[n]}, {s[n]}, {v[n:m]}, or {s[n:m]}.
+Info.setAllowsRegister();
+Name = S.data() - 1;
+return true;
   }
 
   bool
Index: cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl
===
--- cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl
+++ cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl
@@ -1,14 +1,76 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -x cl -triple amdgcn -fsyntax-only  %s
-// expected-no-diagnostics
+// RUN: %clang_cc1 -triple amdgcn -fsyntax-only -verify %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 kernel void test () {
 
   int sgpr = 0, vgpr = 0, imm = 0;
 
   // sgpr constraints
   __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "s" (imm) : );
 
+  __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}" (imm) : );
+  __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exe" (imm) : ); // expected-error {{invalid input constraint '{exe' in asm}}
+  __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec" (imm) : ); // expected-error {{invalid input constraint '{exec' in asm}}
+  __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}a" (imm) : ); // expected-error {{invalid input constraint '{exec}a' in asm}}
+
   // vgpr constraints
   __asm__ ("v_mov_b32 %0, %1" : "=v" (vgpr) : "v" (imm) : );
 }
+
+__kernel void
+test_float(const __global float *a, const __global float *b, __global float *c, unsigned i)
+{
+float ai = a[i];
+float bi = b[i];
+float ci;
+
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : );
+__asm("v_add_f32_e32 v1, v2, v3" : ""(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "="(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output 

[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints

2017-09-28 Thread Brian Sumner via Phabricator via cfe-commits
b-sumner accepted this revision.
b-sumner added a comment.
This revision is now accepted and ready to land.

LGTM.  I think we can leave immediates to another patch.


https://reviews.llvm.org/D37568



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints

2017-09-28 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl marked an inline comment as done.
yaxunl added a comment.

Ping.

Brian, Stas,

Can you review this since Matt is on vacation? Thanks.


https://reviews.llvm.org/D37568



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints

2017-09-22 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl marked an inline comment as done.
yaxunl added inline comments.



Comment at: test/Sema/inline-asm-validate-amdgpu.cl:74
+__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v[1:2]"(ci) : 
"v[3:4]"(ai), "v[5:6]"(bi) : ); //expected-error {{invalid output constraint 
'=v[1:2]' in asm}}
+
+c[i] = ci;

yaxunl wrote:
> arsenm wrote:
> > While you're here can we add some tests for the immediate constraints? 
> > There was a bug report recently when using s_trap with the i constraints 
> > for the constant operand.
> what's the syntax of the immediate constraints? And some examples? Thanks.
Ping. Can we leave this for another patch? Since there are people waiting for 
this feature.


https://reviews.llvm.org/D37568



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints

2017-09-22 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 116383.
yaxunl marked 4 inline comments as done.
yaxunl edited the summary of this revision.
yaxunl added a comment.

Fix typo.


https://reviews.llvm.org/D37568

Files:
  lib/Basic/Targets/AMDGPU.h
  test/CodeGenOpenCL/amdgcn-inline-asm.cl
  test/Sema/inline-asm-validate-amdgpu.cl

Index: test/Sema/inline-asm-validate-amdgpu.cl
===
--- test/Sema/inline-asm-validate-amdgpu.cl
+++ test/Sema/inline-asm-validate-amdgpu.cl
@@ -1,14 +1,76 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -x cl -triple amdgcn -fsyntax-only  %s
-// expected-no-diagnostics
+// RUN: %clang_cc1 -triple amdgcn -fsyntax-only -verify %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 kernel void test () {
 
   int sgpr = 0, vgpr = 0, imm = 0;
 
   // sgpr constraints
   __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "s" (imm) : );
 
+  __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}" (imm) : );
+  __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exe" (imm) : ); // expected-error {{invalid input constraint '{exe' in asm}}
+  __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec" (imm) : ); // expected-error {{invalid input constraint '{exec' in asm}}
+  __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}a" (imm) : ); // expected-error {{invalid input constraint '{exec}a' in asm}}
+
   // vgpr constraints
   __asm__ ("v_mov_b32 %0, %1" : "=v" (vgpr) : "v" (imm) : );
 }
+
+__kernel void
+test_float(const __global float *a, const __global float *b, __global float *c, unsigned i)
+{
+float ai = a[i];
+float bi = b[i];
+float ci;
+
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : );
+__asm("v_add_f32_e32 v1, v2, v3" : ""(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "="(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '=' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={a}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1a}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={va}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={va}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}a"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1}a' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "=v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '=v1}' in asm}}
+
+__asm("v_add_f32_e32 v1, v2, v3" : "={v[1]}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : );
+__asm("v_add_f32_e32 v1, v2, v3" : "={v[1}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[1}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v[1]"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[1]' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v[a]}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[a]}' in asm}}
+
+__asm("v_add_f32_e32 v1, v2, v3" : "=v"(ci) : "v"(ai), "v"(bi) : );
+__asm("v_add_f32_e32 v1, v2, v3" : "=v1"(ci) : "v2"(ai), "v3"(bi) : ); /// expected-error {{invalid output constraint '=v1' in asm}}
+
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{a}"(ai), "{v3}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{a}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}}
+c[i] = ci;
+}
+
+__kernel void
+test_double(const __global double *a, const __global double *b, __global double *c, unsigned i)
+{
+double ai = a[i];
+double bi = b[i];
+double ci;
+
+__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : );
+__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v{[1:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '=v{[1:2]}' in asm}}
+__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]a}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:2]a}' in asm}}
+   

[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints

2017-09-15 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl marked 2 inline comments as done.
yaxunl added inline comments.



Comment at: lib/Basic/Targets/AMDGPU.h:194
+Info.setAllowsRegister();
+Name = S.data() - 1;
+return true;

arsenm wrote:
> I'm not sure I understand these data() - 1s. 
The caller of this function expects Name is on the last parsed char and will 
increase it before check the next char.



Comment at: test/Sema/inline-asm-validate-amdgpu.cl:74
+__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v[1:2]"(ci) : 
"v[3:4]"(ai), "v[5:6]"(bi) : ); //expected-error {{invalid output constraint 
'=v[1:2]' in asm}}
+
+c[i] = ci;

arsenm wrote:
> While you're here can we add some tests for the immediate constraints? There 
> was a bug report recently when using s_trap with the i constraints for the 
> constant operand.
what's the syntax of the immediate constraints? And some examples? Thanks.


https://reviews.llvm.org/D37568



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints

2017-09-15 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added inline comments.



Comment at: lib/Basic/Targets/AMDGPU.h:124
+  /// {sn}, {s[n]}
+  /// {S} , wheere S is a special register name
+  {v[n:m]}

Typo wheere



Comment at: lib/Basic/Targets/AMDGPU.h:194
+Info.setAllowsRegister();
+Name = S.data() - 1;
+return true;

I'm not sure I understand these data() - 1s. 



Comment at: test/Sema/inline-asm-validate-amdgpu.cl:74
+__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v[1:2]"(ci) : 
"v[3:4]"(ai), "v[5:6]"(bi) : ); //expected-error {{invalid output constraint 
'=v[1:2]' in asm}}
+
+c[i] = ci;

While you're here can we add some tests for the immediate constraints? There 
was a bug report recently when using s_trap with the i constraints for the 
constant operand.


https://reviews.llvm.org/D37568



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints

2017-09-08 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 114367.
yaxunl edited the summary of this revision.
yaxunl added a comment.

Allow {v[n]} and {s[n]}. Add more tests.


https://reviews.llvm.org/D37568

Files:
  lib/Basic/Targets/AMDGPU.h
  test/CodeGenOpenCL/amdgcn-inline-asm.cl
  test/Sema/inline-asm-validate-amdgpu.cl

Index: test/Sema/inline-asm-validate-amdgpu.cl
===
--- test/Sema/inline-asm-validate-amdgpu.cl
+++ test/Sema/inline-asm-validate-amdgpu.cl
@@ -1,14 +1,76 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -x cl -triple amdgcn -fsyntax-only  %s
-// expected-no-diagnostics
+// RUN: %clang_cc1 -triple amdgcn -fsyntax-only -verify %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 kernel void test () {
 
   int sgpr = 0, vgpr = 0, imm = 0;
 
   // sgpr constraints
   __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "s" (imm) : );
 
+  __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}" (imm) : );
+  __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exe" (imm) : ); // expected-error {{invalid input constraint '{exe' in asm}}
+  __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec" (imm) : ); // expected-error {{invalid input constraint '{exec' in asm}}
+  __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}a" (imm) : ); // expected-error {{invalid input constraint '{exec}a' in asm}}
+
   // vgpr constraints
   __asm__ ("v_mov_b32 %0, %1" : "=v" (vgpr) : "v" (imm) : );
 }
+
+__kernel void
+test_float(const __global float *a, const __global float *b, __global float *c, unsigned i)
+{
+float ai = a[i];
+float bi = b[i];
+float ci;
+
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : );
+__asm("v_add_f32_e32 v1, v2, v3" : ""(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "="(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '=' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={a}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1a}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={va}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={va}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}a"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1}a' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "=v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '=v1}' in asm}}
+
+__asm("v_add_f32_e32 v1, v2, v3" : "={v[1]}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : );
+__asm("v_add_f32_e32 v1, v2, v3" : "={v[1}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[1}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v[1]"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[1]' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v[a]}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[a]}' in asm}}
+
+__asm("v_add_f32_e32 v1, v2, v3" : "=v"(ci) : "v"(ai), "v"(bi) : );
+__asm("v_add_f32_e32 v1, v2, v3" : "=v1"(ci) : "v2"(ai), "v3"(bi) : ); /// expected-error {{invalid output constraint '=v1' in asm}}
+
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{a}"(ai), "{v3}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{a}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}}
+c[i] = ci;
+}
+
+__kernel void
+test_double(const __global double *a, const __global double *b, __global double *c, unsigned i)
+{
+double ai = a[i];
+double bi = b[i];
+double ci;
+
+__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : );
+__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v{[1:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '=v{[1:2]}' in asm}}
+__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]a}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:2]a}' in asm}}
+

[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints

2017-09-07 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In https://reviews.llvm.org/D37568#863735, @b-sumner wrote:

> The assembler accepts v[N] in addition to vN.  I'm not sure if that is needed 
> here.


Then we'd better also allow that in constraints to avoid confusion of users.


https://reviews.llvm.org/D37568



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints

2017-09-07 Thread Brian Sumner via Phabricator via cfe-commits
b-sumner added a comment.

The assembler accepts v[N] in addition to vN.  I'm not sure if that is needed 
here.


https://reviews.llvm.org/D37568



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints

2017-09-07 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 114223.
yaxunl marked 3 inline comments as done.
yaxunl edited the summary of this revision.
yaxunl added a comment.

Revised by Matt's comments.


https://reviews.llvm.org/D37568

Files:
  lib/Basic/Targets/AMDGPU.h
  test/CodeGenOpenCL/amdgcn-inline-asm.cl
  test/Sema/inline-asm-validate-amdgpu.cl

Index: test/Sema/inline-asm-validate-amdgpu.cl
===
--- test/Sema/inline-asm-validate-amdgpu.cl
+++ test/Sema/inline-asm-validate-amdgpu.cl
@@ -1,6 +1,7 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -x cl -triple amdgcn -fsyntax-only  %s
-// expected-no-diagnostics
+// RUN: %clang_cc1 -triple amdgcn -fsyntax-only -verify %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 kernel void test () {
 
@@ -12,3 +13,45 @@
   // vgpr constraints
   __asm__ ("v_mov_b32 %0, %1" : "=v" (vgpr) : "v" (imm) : );
 }
+
+__kernel void
+test_float(const __global float *a, const __global float *b, __global float *c, unsigned i)
+{
+float ai = a[i];
+float bi = b[i];
+float ci;
+
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : );
+__asm("v_add_f32_e32 v1, v2, v3" : ""(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "="(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '=' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={a}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1a}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}a"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1}a' in asm}}
+
+__asm("v_add_f32_e32 v1, v2, exec" : "={v1}"(ci) : "{v2}"(ai), "{exec}"(bi) : );
+__asm("v_add_f32_e32 v1, v2, exec" : "={v1}"(ci) : "{v2}"(ai), "{exec}a"(bi) : ); // expected-error {{invalid input constraint '{exec}a' in asm}}
+
+__asm("v_add_f32_e32 v1, v2, v3" : "=v"(ci) : "v"(ai), "v"(bi) : );
+__asm("v_add_f32_e32 v1, v2, v3" : "=v1"(ci) : "v2"(ai), "v3"(bi) : ); /// expected-error {{invalid output constraint '=v1' in asm}}
+
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{a}"(ai), "{v3}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{a}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}}
+c[i] = ci;
+}
+
+__kernel void
+test_double(const __global double *a, const __global double *b, __global double *c, unsigned i)
+{
+double ai = a[i];
+double bi = b[i];
+double ci;
+
+__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : );
+__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v{[1:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '=v{[1:2]}' in asm}}
+__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]a}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:2]a}' in asm}}
+__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]}a"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:2]}a' in asm}}
+
+__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v[1:2]"(ci) : "v[3:4]"(ai), "v[5:6]"(bi) : ); //expected-error {{invalid output constraint '=v[1:2]' in asm}}
+
+c[i] = ci;
+}
Index: test/CodeGenOpenCL/amdgcn-inline-asm.cl
===
--- /dev/null
+++ test/CodeGenOpenCL/amdgcn-inline-asm.cl
@@ -0,0 +1,16 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn -O0 -emit-llvm -o - %s | FileCheck %s
+
+// CHECK-LABEL: @ker
+__kernel void
+ker(const __global float *a, const __global float *b, __global float *c, unsigned i)
+{
+float ai = a[i];
+float bi = b[i];
+float ci;
+// CHECK: call float asm "v_add_f32_e32 v1, v2, v3", "={v1},{v2},{v3}"(float %{{.*}}, float %{{.*}})
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : );
+// CHECK: call float asm "v_add_f32_e32 $0, $1, $2", "={v1},{v2},{v3}"(float %{{.*}}, float %{{.*}})
+__asm("v_add_f32_e32 %0, %1, %2" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : );
+c[i] = ci;
+}
Index: lib/Basic/Targets/AMDGPU.h
===
--- lib/Basic/Targets/AMDGPU.h
+++ lib/Basic/Targets/AMDGPU.h
@@ -17,6 +17,7 @@
 #include "clang/AST/Type.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TargetOptions.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/Compiler.h"
 
@@ -115,17 +116,79 @@
 return None;
   }
 
+  /// Accepted register names: (n, m 

[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints

2017-09-07 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added inline comments.



Comment at: test/Sema/inline-asm-validate-amdgpu.cl:38
+__asm("v_add_f32_e32 v1, v2, v3" : "=v1"(ci) : "v2"(ai), "v3"(bi) : ); /// 
expected-error {{invalid output constraint '=v1' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "=v1:2"(ci) : "v3:4"(ai), "v4:5"(bi) : 
);  // expected-error {{invalid output constraint '=v1:2' in asm}}
+

arsenm wrote:
> arsenm wrote:
> > I don't understand. The backend parsed syntax is v[3:4]. Why should v3:4 be 
> > valid? Also in this example you are using a 64-bit input to a 32-bit operand
> i.e. this won't parse in the backend and is invalid. The brackets are required
Will fix the syntax about [].

The test did not consider validity of the inline assembly for the backend since 
FE only check format. I agree it is better to make it valid for backend and 
will try to fix that.



https://reviews.llvm.org/D37568



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints

2017-09-07 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added inline comments.



Comment at: test/Sema/inline-asm-validate-amdgpu.cl:38
+__asm("v_add_f32_e32 v1, v2, v3" : "=v1"(ci) : "v2"(ai), "v3"(bi) : ); /// 
expected-error {{invalid output constraint '=v1' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "=v1:2"(ci) : "v3:4"(ai), "v4:5"(bi) : 
);  // expected-error {{invalid output constraint '=v1:2' in asm}}
+

arsenm wrote:
> I don't understand. The backend parsed syntax is v[3:4]. Why should v3:4 be 
> valid? Also in this example you are using a 64-bit input to a 32-bit operand
i.e. this won't parse in the backend and is invalid. The brackets are required


https://reviews.llvm.org/D37568



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints

2017-09-07 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added inline comments.



Comment at: test/Sema/inline-asm-validate-amdgpu.cl:38
+__asm("v_add_f32_e32 v1, v2, v3" : "=v1"(ci) : "v2"(ai), "v3"(bi) : ); /// 
expected-error {{invalid output constraint '=v1' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "=v1:2"(ci) : "v3:4"(ai), "v4:5"(bi) : 
);  // expected-error {{invalid output constraint '=v1:2' in asm}}
+

I don't understand. The backend parsed syntax is v[3:4]. Why should v3:4 be 
valid? Also in this example you are using a 64-bit input to a 32-bit operand


https://reviews.llvm.org/D37568



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints

2017-09-07 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl created this revision.
Herald added subscribers: eraman, t-tye, tpr, dstuttard, nhaehnle, wdng, 
kzhuravl.

Currently AMDGPU inline asm only allow "v" and "s" as register names in 
constraints.

This patch allows the following register names in constraints: (n, m is 
unsigned integer, n < m)

v

s

{vn}

{sn}

{S} , wheere S is a special register name

{vn:m}

{sn:m}


https://reviews.llvm.org/D37568

Files:
  lib/Basic/Targets/AMDGPU.h
  test/CodeGenOpenCL/amdgcn-inline-asm.cl
  test/Sema/inline-asm-validate-amdgpu.cl

Index: test/Sema/inline-asm-validate-amdgpu.cl
===
--- test/Sema/inline-asm-validate-amdgpu.cl
+++ test/Sema/inline-asm-validate-amdgpu.cl
@@ -1,6 +1,5 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -x cl -triple amdgcn -fsyntax-only  %s
-// expected-no-diagnostics
+// RUN: %clang_cc1 -triple amdgcn -fsyntax-only -verify %s
 
 kernel void test () {
 
@@ -12,3 +11,33 @@
   // vgpr constraints
   __asm__ ("v_mov_b32 %0, %1" : "=v" (vgpr) : "v" (imm) : );
 }
+
+__kernel void
+ker(const __global float *a, const __global float *b, __global float *c, unsigned i)
+{
+float ai = a[i];
+float bi = b[i];
+float ci;
+
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : );
+__asm("v_add_f32_e32 v1, v2, v3" : ""(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "="(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '=' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={a}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1a}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}a"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1}a' in asm}}
+
+__asm("v_add_f32_e32 v1, v2, v3" : "={exec}"(ci) : "{v2}"(ai), "{v3}"(bi) : );
+__asm("v_add_f32_e32 v1, v2, v3" : "={exec}a"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={exec}a' in asm}}
+
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1:2}"(ci) : "{v3:4}"(ai), "{v4:5}"(bi) : );
+__asm("v_add_f32_e32 v1, v2, v3" : "=v{1:2}"(ci) : "{v3:4}"(ai), "{v4:5}"(bi) : ); //expected-error {{invalid output constraint '=v{1:2}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1:2}a"(ci) : "{v3:4}"(ai), "{v4:5}"(bi) : ); //expected-error {{invalid output constraint '={v1:2}a' in asm}}
+
+__asm("v_add_f32_e32 v1, v2, v3" : "=v"(ci) : "v"(ai), "v"(bi) : );
+__asm("v_add_f32_e32 v1, v2, v3" : "=v1"(ci) : "v2"(ai), "v3"(bi) : ); /// expected-error {{invalid output constraint '=v1' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "=v1:2"(ci) : "v3:4"(ai), "v4:5"(bi) : );  // expected-error {{invalid output constraint '=v1:2' in asm}}
+
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{a}"(ai), "{v3}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}}
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{a}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}}
+c[i] = ci;
+}
Index: test/CodeGenOpenCL/amdgcn-inline-asm.cl
===
--- /dev/null
+++ test/CodeGenOpenCL/amdgcn-inline-asm.cl
@@ -0,0 +1,16 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn -O0 -emit-llvm -o - %s | FileCheck %s
+
+// CHECK-LABEL: @ker
+__kernel void
+ker(const __global float *a, const __global float *b, __global float *c, unsigned i)
+{
+float ai = a[i];
+float bi = b[i];
+float ci;
+// CHECK: call float asm "v_add_f32_e32 v1, v2, v3", "={v1},{v2},{v3}"(float %{{.*}}, float %{{.*}})
+__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : );
+// CHECK: call float asm "v_add_f32_e32 $0, $1, $2", "={v1},{v2},{v3}"(float %{{.*}}, float %{{.*}})
+__asm("v_add_f32_e32 %0, %1, %2" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : );
+c[i] = ci;
+}
Index: lib/Basic/Targets/AMDGPU.h
===
--- lib/Basic/Targets/AMDGPU.h
+++ lib/Basic/Targets/AMDGPU.h
@@ -17,6 +17,7 @@
 #include "clang/AST/Type.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TargetOptions.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/Compiler.h"
 
@@ -115,17 +116,70 @@
 return None;
   }
 
+  /// Accepted register names: (n, m is unsigned integer, n < m)
+  /// v
+  /// s
+  /// {vn}
+  /// {sn}
+  /// {S} , wheere S is a special register name
+  {vn:m}
+  /// {sn:m}
   bool validateAsmConstraint(const char *,
  TargetInfo::ConstraintInfo ) const override {
-switch (*Name) {
-default:
-