[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints
This revision was automatically updated to reflect the committed changes. Closed by commit rL314452: [AMDGPU] Allow flexible register names in inline asm constraints (authored by yaxunl). Changed prior to commit: https://reviews.llvm.org/D37568?vs=116383=117037#toc Repository: rL LLVM https://reviews.llvm.org/D37568 Files: cfe/trunk/lib/Basic/Targets/AMDGPU.h cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl Index: cfe/trunk/lib/Basic/Targets/AMDGPU.h === --- cfe/trunk/lib/Basic/Targets/AMDGPU.h +++ cfe/trunk/lib/Basic/Targets/AMDGPU.h @@ -17,6 +17,7 @@ #include "clang/AST/Type.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ADT/Triple.h" #include "llvm/Support/Compiler.h" @@ -115,17 +116,83 @@ return None; } + /// Accepted register names: (n, m is unsigned integer, n < m) + /// v + /// s + /// {vn}, {v[n]} + /// {sn}, {s[n]} + /// {S} , where S is a special register name + {v[n:m]} + /// {s[n:m]} bool validateAsmConstraint(const char *, TargetInfo::ConstraintInfo ) const override { -switch (*Name) { -default: - break; -case 'v': // vgpr -case 's': // sgpr +static const ::llvm::StringSet<> SpecialRegs({ +"exec", "vcc", "flat_scratch", "m0", "scc", "tba", "tma", +"flat_scratch_lo", "flat_scratch_hi", "vcc_lo", "vcc_hi", "exec_lo", +"exec_hi", "tma_lo", "tma_hi", "tba_lo", "tba_hi", +}); + +StringRef S(Name); +bool HasLeftParen = false; +if (S.front() == '{') { + HasLeftParen = true; + S = S.drop_front(); +} +if (S.empty()) + return false; +if (S.front() != 'v' && S.front() != 's') { + if (!HasLeftParen) +return false; + auto E = S.find('}'); + if (!SpecialRegs.count(S.substr(0, E))) +return false; + S = S.drop_front(E + 1); + if (!S.empty()) +return false; + // Found {S} where S is a special register. + Info.setAllowsRegister(); + Name = S.data() - 1; + return true; +} +S = S.drop_front(); +if (!HasLeftParen) { + if (!S.empty()) +return false; + // Found s or v. Info.setAllowsRegister(); + Name = S.data() - 1; return true; } -return false; +bool HasLeftBracket = false; +if (!S.empty() && S.front() == '[') { + HasLeftBracket = true; + S = S.drop_front(); +} +unsigned long long N; +if (S.empty() || consumeUnsignedInteger(S, 10, N)) + return false; +if (!S.empty() && S.front() == ':') { + if (!HasLeftBracket) +return false; + S = S.drop_front(); + unsigned long long M; + if (consumeUnsignedInteger(S, 10, M) || N >= M) +return false; +} +if (HasLeftBracket) { + if (S.empty() || S.front() != ']') +return false; + S = S.drop_front(); +} +if (S.empty() || S.front() != '}') + return false; +S = S.drop_front(); +if (!S.empty()) + return false; +// Found {vn}, {sn}, {v[n]}, {s[n]}, {v[n:m]}, or {s[n:m]}. +Info.setAllowsRegister(); +Name = S.data() - 1; +return true; } bool Index: cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl === --- cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl +++ cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl @@ -1,14 +1,76 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -x cl -triple amdgcn -fsyntax-only %s -// expected-no-diagnostics +// RUN: %clang_cc1 -triple amdgcn -fsyntax-only -verify %s + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable kernel void test () { int sgpr = 0, vgpr = 0, imm = 0; // sgpr constraints __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "s" (imm) : ); + __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}" (imm) : ); + __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exe" (imm) : ); // expected-error {{invalid input constraint '{exe' in asm}} + __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec" (imm) : ); // expected-error {{invalid input constraint '{exec' in asm}} + __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}a" (imm) : ); // expected-error {{invalid input constraint '{exec}a' in asm}} + // vgpr constraints __asm__ ("v_mov_b32 %0, %1" : "=v" (vgpr) : "v" (imm) : ); } + +__kernel void +test_float(const __global float *a, const __global float *b, __global float *c, unsigned i) +{ +float ai = a[i]; +float bi = b[i]; +float ci; + +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); +__asm("v_add_f32_e32 v1, v2, v3" : ""(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "="(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output
[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints
b-sumner accepted this revision. b-sumner added a comment. This revision is now accepted and ready to land. LGTM. I think we can leave immediates to another patch. https://reviews.llvm.org/D37568 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints
yaxunl marked an inline comment as done. yaxunl added a comment. Ping. Brian, Stas, Can you review this since Matt is on vacation? Thanks. https://reviews.llvm.org/D37568 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints
yaxunl marked an inline comment as done. yaxunl added inline comments. Comment at: test/Sema/inline-asm-validate-amdgpu.cl:74 +__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v[1:2]"(ci) : "v[3:4]"(ai), "v[5:6]"(bi) : ); //expected-error {{invalid output constraint '=v[1:2]' in asm}} + +c[i] = ci; yaxunl wrote: > arsenm wrote: > > While you're here can we add some tests for the immediate constraints? > > There was a bug report recently when using s_trap with the i constraints > > for the constant operand. > what's the syntax of the immediate constraints? And some examples? Thanks. Ping. Can we leave this for another patch? Since there are people waiting for this feature. https://reviews.llvm.org/D37568 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints
yaxunl updated this revision to Diff 116383. yaxunl marked 4 inline comments as done. yaxunl edited the summary of this revision. yaxunl added a comment. Fix typo. https://reviews.llvm.org/D37568 Files: lib/Basic/Targets/AMDGPU.h test/CodeGenOpenCL/amdgcn-inline-asm.cl test/Sema/inline-asm-validate-amdgpu.cl Index: test/Sema/inline-asm-validate-amdgpu.cl === --- test/Sema/inline-asm-validate-amdgpu.cl +++ test/Sema/inline-asm-validate-amdgpu.cl @@ -1,14 +1,76 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -x cl -triple amdgcn -fsyntax-only %s -// expected-no-diagnostics +// RUN: %clang_cc1 -triple amdgcn -fsyntax-only -verify %s + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable kernel void test () { int sgpr = 0, vgpr = 0, imm = 0; // sgpr constraints __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "s" (imm) : ); + __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}" (imm) : ); + __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exe" (imm) : ); // expected-error {{invalid input constraint '{exe' in asm}} + __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec" (imm) : ); // expected-error {{invalid input constraint '{exec' in asm}} + __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}a" (imm) : ); // expected-error {{invalid input constraint '{exec}a' in asm}} + // vgpr constraints __asm__ ("v_mov_b32 %0, %1" : "=v" (vgpr) : "v" (imm) : ); } + +__kernel void +test_float(const __global float *a, const __global float *b, __global float *c, unsigned i) +{ +float ai = a[i]; +float bi = b[i]; +float ci; + +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); +__asm("v_add_f32_e32 v1, v2, v3" : ""(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "="(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '=' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={a}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v1a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1a}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={va}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={va}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}a"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1}a' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v1"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "=v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '=v1}' in asm}} + +__asm("v_add_f32_e32 v1, v2, v3" : "={v[1]}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); +__asm("v_add_f32_e32 v1, v2, v3" : "={v[1}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[1}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v[1]"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[1]' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v[a]}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[a]}' in asm}} + +__asm("v_add_f32_e32 v1, v2, v3" : "=v"(ci) : "v"(ai), "v"(bi) : ); +__asm("v_add_f32_e32 v1, v2, v3" : "=v1"(ci) : "v2"(ai), "v3"(bi) : ); /// expected-error {{invalid output constraint '=v1' in asm}} + +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{a}"(ai), "{v3}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{a}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}} +c[i] = ci; +} + +__kernel void +test_double(const __global double *a, const __global double *b, __global double *c, unsigned i) +{ +double ai = a[i]; +double bi = b[i]; +double ci; + +__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); +__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v{[1:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '=v{[1:2]}' in asm}} +__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]a}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:2]a}' in asm}} +
[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints
yaxunl marked 2 inline comments as done. yaxunl added inline comments. Comment at: lib/Basic/Targets/AMDGPU.h:194 +Info.setAllowsRegister(); +Name = S.data() - 1; +return true; arsenm wrote: > I'm not sure I understand these data() - 1s. The caller of this function expects Name is on the last parsed char and will increase it before check the next char. Comment at: test/Sema/inline-asm-validate-amdgpu.cl:74 +__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v[1:2]"(ci) : "v[3:4]"(ai), "v[5:6]"(bi) : ); //expected-error {{invalid output constraint '=v[1:2]' in asm}} + +c[i] = ci; arsenm wrote: > While you're here can we add some tests for the immediate constraints? There > was a bug report recently when using s_trap with the i constraints for the > constant operand. what's the syntax of the immediate constraints? And some examples? Thanks. https://reviews.llvm.org/D37568 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints
arsenm added inline comments. Comment at: lib/Basic/Targets/AMDGPU.h:124 + /// {sn}, {s[n]} + /// {S} , wheere S is a special register name + {v[n:m]} Typo wheere Comment at: lib/Basic/Targets/AMDGPU.h:194 +Info.setAllowsRegister(); +Name = S.data() - 1; +return true; I'm not sure I understand these data() - 1s. Comment at: test/Sema/inline-asm-validate-amdgpu.cl:74 +__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v[1:2]"(ci) : "v[3:4]"(ai), "v[5:6]"(bi) : ); //expected-error {{invalid output constraint '=v[1:2]' in asm}} + +c[i] = ci; While you're here can we add some tests for the immediate constraints? There was a bug report recently when using s_trap with the i constraints for the constant operand. https://reviews.llvm.org/D37568 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints
yaxunl updated this revision to Diff 114367. yaxunl edited the summary of this revision. yaxunl added a comment. Allow {v[n]} and {s[n]}. Add more tests. https://reviews.llvm.org/D37568 Files: lib/Basic/Targets/AMDGPU.h test/CodeGenOpenCL/amdgcn-inline-asm.cl test/Sema/inline-asm-validate-amdgpu.cl Index: test/Sema/inline-asm-validate-amdgpu.cl === --- test/Sema/inline-asm-validate-amdgpu.cl +++ test/Sema/inline-asm-validate-amdgpu.cl @@ -1,14 +1,76 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -x cl -triple amdgcn -fsyntax-only %s -// expected-no-diagnostics +// RUN: %clang_cc1 -triple amdgcn -fsyntax-only -verify %s + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable kernel void test () { int sgpr = 0, vgpr = 0, imm = 0; // sgpr constraints __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "s" (imm) : ); + __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}" (imm) : ); + __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exe" (imm) : ); // expected-error {{invalid input constraint '{exe' in asm}} + __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec" (imm) : ); // expected-error {{invalid input constraint '{exec' in asm}} + __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}a" (imm) : ); // expected-error {{invalid input constraint '{exec}a' in asm}} + // vgpr constraints __asm__ ("v_mov_b32 %0, %1" : "=v" (vgpr) : "v" (imm) : ); } + +__kernel void +test_float(const __global float *a, const __global float *b, __global float *c, unsigned i) +{ +float ai = a[i]; +float bi = b[i]; +float ci; + +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); +__asm("v_add_f32_e32 v1, v2, v3" : ""(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "="(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '=' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={a}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v1a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1a}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={va}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={va}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}a"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1}a' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v1"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "=v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '=v1}' in asm}} + +__asm("v_add_f32_e32 v1, v2, v3" : "={v[1]}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); +__asm("v_add_f32_e32 v1, v2, v3" : "={v[1}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[1}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v[1]"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[1]' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v[a]}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[a]}' in asm}} + +__asm("v_add_f32_e32 v1, v2, v3" : "=v"(ci) : "v"(ai), "v"(bi) : ); +__asm("v_add_f32_e32 v1, v2, v3" : "=v1"(ci) : "v2"(ai), "v3"(bi) : ); /// expected-error {{invalid output constraint '=v1' in asm}} + +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{a}"(ai), "{v3}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{a}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}} +c[i] = ci; +} + +__kernel void +test_double(const __global double *a, const __global double *b, __global double *c, unsigned i) +{ +double ai = a[i]; +double bi = b[i]; +double ci; + +__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); +__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v{[1:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '=v{[1:2]}' in asm}} +__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]a}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:2]a}' in asm}} +
[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints
yaxunl added a comment. In https://reviews.llvm.org/D37568#863735, @b-sumner wrote: > The assembler accepts v[N] in addition to vN. I'm not sure if that is needed > here. Then we'd better also allow that in constraints to avoid confusion of users. https://reviews.llvm.org/D37568 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints
b-sumner added a comment. The assembler accepts v[N] in addition to vN. I'm not sure if that is needed here. https://reviews.llvm.org/D37568 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints
yaxunl updated this revision to Diff 114223. yaxunl marked 3 inline comments as done. yaxunl edited the summary of this revision. yaxunl added a comment. Revised by Matt's comments. https://reviews.llvm.org/D37568 Files: lib/Basic/Targets/AMDGPU.h test/CodeGenOpenCL/amdgcn-inline-asm.cl test/Sema/inline-asm-validate-amdgpu.cl Index: test/Sema/inline-asm-validate-amdgpu.cl === --- test/Sema/inline-asm-validate-amdgpu.cl +++ test/Sema/inline-asm-validate-amdgpu.cl @@ -1,6 +1,7 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -x cl -triple amdgcn -fsyntax-only %s -// expected-no-diagnostics +// RUN: %clang_cc1 -triple amdgcn -fsyntax-only -verify %s + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable kernel void test () { @@ -12,3 +13,45 @@ // vgpr constraints __asm__ ("v_mov_b32 %0, %1" : "=v" (vgpr) : "v" (imm) : ); } + +__kernel void +test_float(const __global float *a, const __global float *b, __global float *c, unsigned i) +{ +float ai = a[i]; +float bi = b[i]; +float ci; + +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); +__asm("v_add_f32_e32 v1, v2, v3" : ""(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "="(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '=' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={a}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v1a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1a}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}a"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1}a' in asm}} + +__asm("v_add_f32_e32 v1, v2, exec" : "={v1}"(ci) : "{v2}"(ai), "{exec}"(bi) : ); +__asm("v_add_f32_e32 v1, v2, exec" : "={v1}"(ci) : "{v2}"(ai), "{exec}a"(bi) : ); // expected-error {{invalid input constraint '{exec}a' in asm}} + +__asm("v_add_f32_e32 v1, v2, v3" : "=v"(ci) : "v"(ai), "v"(bi) : ); +__asm("v_add_f32_e32 v1, v2, v3" : "=v1"(ci) : "v2"(ai), "v3"(bi) : ); /// expected-error {{invalid output constraint '=v1' in asm}} + +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{a}"(ai), "{v3}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{a}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}} +c[i] = ci; +} + +__kernel void +test_double(const __global double *a, const __global double *b, __global double *c, unsigned i) +{ +double ai = a[i]; +double bi = b[i]; +double ci; + +__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); +__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v{[1:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '=v{[1:2]}' in asm}} +__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]a}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:2]a}' in asm}} +__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]}a"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:2]}a' in asm}} + +__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v[1:2]"(ci) : "v[3:4]"(ai), "v[5:6]"(bi) : ); //expected-error {{invalid output constraint '=v[1:2]' in asm}} + +c[i] = ci; +} Index: test/CodeGenOpenCL/amdgcn-inline-asm.cl === --- /dev/null +++ test/CodeGenOpenCL/amdgcn-inline-asm.cl @@ -0,0 +1,16 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn -O0 -emit-llvm -o - %s | FileCheck %s + +// CHECK-LABEL: @ker +__kernel void +ker(const __global float *a, const __global float *b, __global float *c, unsigned i) +{ +float ai = a[i]; +float bi = b[i]; +float ci; +// CHECK: call float asm "v_add_f32_e32 v1, v2, v3", "={v1},{v2},{v3}"(float %{{.*}}, float %{{.*}}) +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); +// CHECK: call float asm "v_add_f32_e32 $0, $1, $2", "={v1},{v2},{v3}"(float %{{.*}}, float %{{.*}}) +__asm("v_add_f32_e32 %0, %1, %2" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); +c[i] = ci; +} Index: lib/Basic/Targets/AMDGPU.h === --- lib/Basic/Targets/AMDGPU.h +++ lib/Basic/Targets/AMDGPU.h @@ -17,6 +17,7 @@ #include "clang/AST/Type.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ADT/Triple.h" #include "llvm/Support/Compiler.h" @@ -115,17 +116,79 @@ return None; } + /// Accepted register names: (n, m
[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints
yaxunl added inline comments. Comment at: test/Sema/inline-asm-validate-amdgpu.cl:38 +__asm("v_add_f32_e32 v1, v2, v3" : "=v1"(ci) : "v2"(ai), "v3"(bi) : ); /// expected-error {{invalid output constraint '=v1' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "=v1:2"(ci) : "v3:4"(ai), "v4:5"(bi) : ); // expected-error {{invalid output constraint '=v1:2' in asm}} + arsenm wrote: > arsenm wrote: > > I don't understand. The backend parsed syntax is v[3:4]. Why should v3:4 be > > valid? Also in this example you are using a 64-bit input to a 32-bit operand > i.e. this won't parse in the backend and is invalid. The brackets are required Will fix the syntax about []. The test did not consider validity of the inline assembly for the backend since FE only check format. I agree it is better to make it valid for backend and will try to fix that. https://reviews.llvm.org/D37568 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints
arsenm added inline comments. Comment at: test/Sema/inline-asm-validate-amdgpu.cl:38 +__asm("v_add_f32_e32 v1, v2, v3" : "=v1"(ci) : "v2"(ai), "v3"(bi) : ); /// expected-error {{invalid output constraint '=v1' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "=v1:2"(ci) : "v3:4"(ai), "v4:5"(bi) : ); // expected-error {{invalid output constraint '=v1:2' in asm}} + arsenm wrote: > I don't understand. The backend parsed syntax is v[3:4]. Why should v3:4 be > valid? Also in this example you are using a 64-bit input to a 32-bit operand i.e. this won't parse in the backend and is invalid. The brackets are required https://reviews.llvm.org/D37568 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints
arsenm added inline comments. Comment at: test/Sema/inline-asm-validate-amdgpu.cl:38 +__asm("v_add_f32_e32 v1, v2, v3" : "=v1"(ci) : "v2"(ai), "v3"(bi) : ); /// expected-error {{invalid output constraint '=v1' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "=v1:2"(ci) : "v3:4"(ai), "v4:5"(bi) : ); // expected-error {{invalid output constraint '=v1:2' in asm}} + I don't understand. The backend parsed syntax is v[3:4]. Why should v3:4 be valid? Also in this example you are using a 64-bit input to a 32-bit operand https://reviews.llvm.org/D37568 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37568: [AMDGPU] Allow flexible register names in inline asm constraints
yaxunl created this revision. Herald added subscribers: eraman, t-tye, tpr, dstuttard, nhaehnle, wdng, kzhuravl. Currently AMDGPU inline asm only allow "v" and "s" as register names in constraints. This patch allows the following register names in constraints: (n, m is unsigned integer, n < m) v s {vn} {sn} {S} , wheere S is a special register name {vn:m} {sn:m} https://reviews.llvm.org/D37568 Files: lib/Basic/Targets/AMDGPU.h test/CodeGenOpenCL/amdgcn-inline-asm.cl test/Sema/inline-asm-validate-amdgpu.cl Index: test/Sema/inline-asm-validate-amdgpu.cl === --- test/Sema/inline-asm-validate-amdgpu.cl +++ test/Sema/inline-asm-validate-amdgpu.cl @@ -1,6 +1,5 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -x cl -triple amdgcn -fsyntax-only %s -// expected-no-diagnostics +// RUN: %clang_cc1 -triple amdgcn -fsyntax-only -verify %s kernel void test () { @@ -12,3 +11,33 @@ // vgpr constraints __asm__ ("v_mov_b32 %0, %1" : "=v" (vgpr) : "v" (imm) : ); } + +__kernel void +ker(const __global float *a, const __global float *b, __global float *c, unsigned i) +{ +float ai = a[i]; +float bi = b[i]; +float ci; + +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); +__asm("v_add_f32_e32 v1, v2, v3" : ""(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "="(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '=' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={a}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v1a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1a}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}a"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1}a' in asm}} + +__asm("v_add_f32_e32 v1, v2, v3" : "={exec}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); +__asm("v_add_f32_e32 v1, v2, v3" : "={exec}a"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={exec}a' in asm}} + +__asm("v_add_f32_e32 v1, v2, v3" : "={v1:2}"(ci) : "{v3:4}"(ai), "{v4:5}"(bi) : ); +__asm("v_add_f32_e32 v1, v2, v3" : "=v{1:2}"(ci) : "{v3:4}"(ai), "{v4:5}"(bi) : ); //expected-error {{invalid output constraint '=v{1:2}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v1:2}a"(ci) : "{v3:4}"(ai), "{v4:5}"(bi) : ); //expected-error {{invalid output constraint '={v1:2}a' in asm}} + +__asm("v_add_f32_e32 v1, v2, v3" : "=v"(ci) : "v"(ai), "v"(bi) : ); +__asm("v_add_f32_e32 v1, v2, v3" : "=v1"(ci) : "v2"(ai), "v3"(bi) : ); /// expected-error {{invalid output constraint '=v1' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "=v1:2"(ci) : "v3:4"(ai), "v4:5"(bi) : ); // expected-error {{invalid output constraint '=v1:2' in asm}} + +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{a}"(ai), "{v3}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}} +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{a}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}} +c[i] = ci; +} Index: test/CodeGenOpenCL/amdgcn-inline-asm.cl === --- /dev/null +++ test/CodeGenOpenCL/amdgcn-inline-asm.cl @@ -0,0 +1,16 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn -O0 -emit-llvm -o - %s | FileCheck %s + +// CHECK-LABEL: @ker +__kernel void +ker(const __global float *a, const __global float *b, __global float *c, unsigned i) +{ +float ai = a[i]; +float bi = b[i]; +float ci; +// CHECK: call float asm "v_add_f32_e32 v1, v2, v3", "={v1},{v2},{v3}"(float %{{.*}}, float %{{.*}}) +__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); +// CHECK: call float asm "v_add_f32_e32 $0, $1, $2", "={v1},{v2},{v3}"(float %{{.*}}, float %{{.*}}) +__asm("v_add_f32_e32 %0, %1, %2" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); +c[i] = ci; +} Index: lib/Basic/Targets/AMDGPU.h === --- lib/Basic/Targets/AMDGPU.h +++ lib/Basic/Targets/AMDGPU.h @@ -17,6 +17,7 @@ #include "clang/AST/Type.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ADT/Triple.h" #include "llvm/Support/Compiler.h" @@ -115,17 +116,70 @@ return None; } + /// Accepted register names: (n, m is unsigned integer, n < m) + /// v + /// s + /// {vn} + /// {sn} + /// {S} , wheere S is a special register name + {vn:m} + /// {sn:m} bool validateAsmConstraint(const char *, TargetInfo::ConstraintInfo ) const override { -switch (*Name) { -default: -