[PATCH] D33945: [OpenCL] Add support for missing sub_group functions.
joey closed this revision. joey added a comment. I committed all the parts separately: r309567 (with r309571 to fix a test), r309678 and r310477. https://reviews.llvm.org/D33945 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D33945: [OpenCL] Add support for missing sub_group functions.
Anastasia accepted this revision. Anastasia added a comment. LGTM! Thanks! https://reviews.llvm.org/D33945 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D33945: [OpenCL] Add support for missing sub_group functions.
bader accepted this revision. bader added a comment. This revision is now accepted and ready to land. Thanks! Overall the patch looks good, but I would suggest splitting it into three commits (as they seems to be independent): 1. [OpenCL] Check that cl_khr_subgroups pragma is enabled if respective extension is used. 2. [OpenCL] Add support for missing sub_group functions. 3. [OpenCL] Fix return type for reserve pipe built-ins. Please, add a regression test for the part #3. You might also review this patch with @Anastasia (OpenCL code owner). Comment at: Sema/SemaChecking.cpp:685-689 + // Since return type of reserve_read/write_pipe built-in function is + // reserve_id_t, which is not defined in the builtin def file , we used int + // as return type and need to override the return type of these functions. + Call->setType(S.Context.OCLReserveIDTy); + This change is not covered with regression tests. https://reviews.llvm.org/D33945 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D33945: [OpenCL] Add support for missing sub_group functions.
joey updated this revision to Diff 108452. joey added a comment. Updated all the comments you made and rebased. Sorry for the long delay. https://reviews.llvm.org/D33945 Files: CodeGen/CGBuiltin.cpp CodeGenOpenCL/cl20-device-side-enqueue.cl CodeGenOpenCL/pipe_builtin.cl Sema/SemaChecking.cpp SemaOpenCL/cl20-device-side-enqueue.cl SemaOpenCL/invalid-pipe-builtin-cl2.0.cl clang/Basic/Builtins.def Index: SemaOpenCL/invalid-pipe-builtin-cl2.0.cl === --- SemaOpenCL/invalid-pipe-builtin-cl2.0.cl +++ SemaOpenCL/invalid-pipe-builtin-cl2.0.cl @@ -1,5 +1,7 @@ // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 +#pragma OPENCL EXTENSION cl_khr_subgroups : enable + void test1(read_only pipe int p, global int* ptr){ int tmp; reserve_id_t rid; Index: SemaOpenCL/cl20-device-side-enqueue.cl === --- SemaOpenCL/cl20-device-side-enqueue.cl +++ SemaOpenCL/cl20-device-side-enqueue.cl @@ -209,3 +209,35 @@ size = get_kernel_preferred_work_group_size_multiple(1); // expected-error{{expected block argument}} size = get_kernel_preferred_work_group_size_multiple(block_A, 1); // expected-error{{too many arguments to function call, expected 1, have 2}} } + +#pragma OPENCL EXTENSION cl_khr_subgroups : enable + +kernel void foo(global int *buf) +{ + ndrange_t n; + buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); + buf[0] = get_kernel_max_sub_group_size_for_ndrange(0, ^(){}); // expected-error{{illegal call to 'get_kernel_max_sub_group_size_for_ndrange', expected 'ndrange_t' argument type}} + buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, 1); // expected-error{{illegal call to 'get_kernel_max_sub_group_size_for_ndrange', expected block argument type}} +} + +kernel void bar(global int *buf) +{ + ndrange_t n; + buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); + buf[0] = get_kernel_sub_group_count_for_ndrange(0, ^(){}); // expected-error{{illegal call to 'get_kernel_sub_group_count_for_ndrange', expected 'ndrange_t' argument type}} + buf[0] = get_kernel_sub_group_count_for_ndrange(n, 1); // expected-error{{illegal call to 'get_kernel_sub_group_count_for_ndrange', expected block argument type}} +} + +#pragma OPENCL EXTENSION cl_khr_subgroups : disable + +kernel void foo1(global int *buf) +{ + ndrange_t n; + buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}} +} + +kernel void bar1(global int *buf) +{ + ndrange_t n; + buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}} +} Index: CodeGenOpenCL/pipe_builtin.cl === --- CodeGenOpenCL/pipe_builtin.cl +++ CodeGenOpenCL/pipe_builtin.cl @@ -3,6 +3,8 @@ // CHECK: %opencl.pipe_t = type opaque // CHECK: %opencl.reserve_id_t = type opaque +#pragma OPENCL EXTENSION cl_khr_subgroups : enable + void test1(read_only pipe int p, global int *ptr) { // CHECK: call i32 @__read_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4) read_pipe(p, ptr); Index: CodeGenOpenCL/cl20-device-side-enqueue.cl === --- CodeGenOpenCL/cl20-device-side-enqueue.cl +++ CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -1,6 +1,8 @@ // RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefix=COMMON --check-prefix=B32 // RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=COMMON --check-prefix=B64 +#pragma OPENCL EXTENSION cl_khr_subgroups : enable + typedef void (^bl_t)(local void *); typedef struct {int a;} ndrange_t; @@ -138,4 +140,9 @@ size = get_kernel_preferred_work_group_size_multiple(block_A); // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*)) size = get_kernel_preferred_work_group_size_multiple(block_G); + + // COMMON: call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* {{.*}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* {{.*}} to i8 addrspace(1)*) to i8 addrspace(4)*)) + size = get_kernel_max_sub_group_size_for_ndrange(ndrange, ^(){}); + // COMMON: call i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* {{.*}}, i8 addrspace(4)* addrspacecast (i8 addrs
[PATCH] D33945: [OpenCL] Add support for missing sub_group functions.
bader requested changes to this revision. bader added a comment. This revision now requires changes to proceed. Please, split this patch into two parts: 1. Improve diagnostics on extension enabling. 2. Add missing `sub_group_*` built-in functions. Comment at: Sema/SemaChecking.cpp:1091-1092 case Builtin::BIwork_group_reserve_write_pipe: +if (SemaBuiltinReserveRWPipe(*this, TheCall, false)) + return ExprError(); +// Since return type of reserve_read/write_pipe built-in function is I suggest leaving `SemaBuiltinReserveRWPipe` as is (i.e. two parameter) and modify the check for `sub_group_*` functions only: ``` if (checkOpenCLSubgroupExt(S, TheCall) || SemaBuiltinReserveRWPipe(*this, TheCall)) return ExprError(); ``` It think it's more readable than looking at SemaBuiltinReserveRWPipe declaration or leaving the comment like this: ``` if (SemaBuiltinReserveRWPipe(*this, TheCall, false /*isSubgroup*/)) ``` Please, apply the same approach to `SemaBuiltinCommitRWPipe`. In addition to that, it makes sense to set the `TheCall` type inside the `SemaBuiltinReserveRWPipe` to avoid code duplication. Comment at: SemaOpenCL/cl20-device-side-enqueue.cl:219 + buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); + buf[0] = get_kernel_max_sub_group_size_for_ndrange(0, ^(){}); // expected-error{{illegal call to 'get_kernel_max_sub_group_size_for_ndrange', expected 'ndrange_t' argument type}} +} Please, add a test case(s) on invalid block parameters to cover the checks you added for new `sub_group_` built-ins.. Comment at: SemaOpenCL/sub-group-bifs.cl:1 +// RUN: %clang_cc1 %s -verify -fsyntax-only -cl-std=CL2.0 + I don't think it makes sense to add this test. This test look like a duplicate of test cases added to SemaOpenCL/cl20-device-side-enqueue.cl. Comment at: clang/Basic/Builtins.def:1402-1403 LANGBUILTIN(get_kernel_preferred_work_group_size_multiple, "i.", "tn", OCLC20_LANG) +LANGBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "i.", "tn", OCLC20_LANG) +LANGBUILTIN(get_kernel_sub_group_count_for_ndrange, "i.", "tn", OCLC20_LANG) This built-in functions should return unsigned integers: "i." -> "Ui.". Please, fix `get_kernel_work_group_size` and `get_kernel_preferred_work_group_size_multiple` too. Comment at: clang/Basic/DiagnosticSemaKinds.td:8423-8424 "illegal call to enqueue_kernel, incorrect argument types">; def err_opencl_enqueue_kernel_expected_type : Error< - "illegal call to enqueue_kernel, expected %0 argument type">; + "illegal call to %0, expected %1 argument type">; def err_opencl_enqueue_kernel_local_size_args : Error< Since, this message is not specific to enqueue_kernel anymore, I suggest either rename it or better re-use existing diagnostics if possible. I think there are already message to report argument mismatch + probably additional note diagnostics that hints correct argument type. https://reviews.llvm.org/D33945 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D33945: [OpenCL] Add support for missing sub_group functions.
joey created this revision. Herald added subscribers: Anastasia, yaxunl. This adds get_kernel_max_sub_group_size_for_ndrange and get_kernel_sub_group_count_for_ndrange. Note this also changes err_opencl_requires_extension to print the name of the function that the diagnostic is warning about. https://reviews.llvm.org/D33945 Files: CodeGen/CGBuiltin.cpp CodeGenOpenCL/cl20-device-side-enqueue.cl CodeGenOpenCL/pipe_builtin.cl Sema/Sema.cpp Sema/SemaChecking.cpp SemaOpenCL/cl20-device-side-enqueue.cl SemaOpenCL/extension-begin.cl SemaOpenCL/invalid-pipe-builtin-cl2.0.cl SemaOpenCL/sub-group-bifs.cl clang/Basic/Builtins.def clang/Basic/DiagnosticSemaKinds.td clang/Sema/Sema.h Index: SemaOpenCL/sub-group-bifs.cl === --- /dev/null +++ SemaOpenCL/sub-group-bifs.cl @@ -0,0 +1,33 @@ +// RUN: %clang_cc1 %s -verify -fsyntax-only -cl-std=CL2.0 + +#pragma OPENCL EXTENSION cl_khr_subgroups : enable + +typedef struct {} ndrange_t; + +kernel void foo(global int *buf) +{ + ndrange_t n; + buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); + buf[0] = get_kernel_max_sub_group_size_for_ndrange(0, ^(){}); // expected-error{{illegal call to 'get_kernel_max_sub_group_size_for_ndrange', expected 'ndrange_t' argument type}} +} + +kernel void bar(global int *buf) +{ + ndrange_t n; + buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); + buf[0] = get_kernel_sub_group_count_for_ndrange(0, ^(){}); // expected-error{{illegal call to 'get_kernel_sub_group_count_for_ndrange', expected 'ndrange_t' argument type}} +} + +#pragma OPENCL EXTENSION cl_khr_subgroups : disable + +kernel void foo1(global int *buf) +{ + ndrange_t n; + buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}} +} + +kernel void bar1(global int *buf) +{ + ndrange_t n; + buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}} +} Index: SemaOpenCL/invalid-pipe-builtin-cl2.0.cl === --- SemaOpenCL/invalid-pipe-builtin-cl2.0.cl +++ SemaOpenCL/invalid-pipe-builtin-cl2.0.cl @@ -1,5 +1,7 @@ // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 +#pragma OPENCL EXTENSION cl_khr_subgroups : enable + void test1(read_only pipe int p, global int* ptr){ int tmp; reserve_id_t rid; Index: SemaOpenCL/extension-begin.cl === --- SemaOpenCL/extension-begin.cl +++ SemaOpenCL/extension-begin.cl @@ -46,7 +46,7 @@ const struct A test_A_local; // expected-error {{use of type 'struct A' requires my_ext extension to be enabled}} TypedefOfA test_typedef_A; // expected-error {{use of type 'TypedefOfA' (aka 'struct A') requires my_ext extension to be enabled}} PointerOfA test_A_pointer; // expected-error {{use of type 'PointerOfA' (aka 'const struct A *') requires my_ext extension to be enabled}} - f(); // expected-error {{use of declaration requires my_ext extension to be enabled}} + f(); // expected-error {{use of declaration 'f' requires my_ext extension to be enabled}} g(0); // expected-error {{no matching function for call to 'g'}} // expected-note@-26 {{candidate disabled due to OpenCL extension}} // expected-note@-22 {{candidate function not viable: requires 0 arguments, but 1 was provided}} Index: SemaOpenCL/cl20-device-side-enqueue.cl === --- SemaOpenCL/cl20-device-side-enqueue.cl +++ SemaOpenCL/cl20-device-side-enqueue.cl @@ -19,19 +19,19 @@ return 0; }); - enqueue_kernel(vptr, flags, ndrange, ^(void) { // expected-error{{illegal call to enqueue_kernel, expected 'queue_t' argument type}} + enqueue_kernel(vptr, flags, ndrange, ^(void) { // expected-error{{illegal call to 'enqueue_kernel', expected 'queue_t' argument type}} return 0; }); - enqueue_kernel(default_queue, vptr, ndrange, ^(void) { // expected-error{{illegal call to enqueue_kernel, expected 'kernel_enqueue_flags_t' (i.e. uint) argument type}} + enqueue_kernel(default_queue, vptr, ndrange, ^(void) { // expected-error{{illegal call to 'enqueue_kernel', expected 'kernel_enqueue_flags_t' (i.e. uint) argument type}} return 0; }); - enqueue_kernel(default_queue, flags, vptr, ^(void) { // expected-error{{illegal call to enqueue_kernel, expected 'ndrange_t' argument type}} + enqueue_kernel(default_queue, flags, vptr, ^(void) { // expected-error{{illegal call to 'enqueue_kernel', expected 'ndrange_t' argument type}} return 0; }); - enqueue_kernel(default_queue, flags, ndrange, vptr); // expected-error{{illegal call to enqueue_kernel, expected block argument}}