A smoke test suite. The patch has been tested more thoroughly with the proprietary HSA PRM conformance suite.
Requires the HSAILasm tool to first compile the .hsail to .brig. -- Pekka Jääskeläinen Parmance
A smoke test suite. The patch has been tested more thoroughly with the proprietary HSA PRM conformance suite. Requires the HSAILasm tool to first compile the .hsail to .brig. diff --git a/gcc/testsuite/brig.dg/README b/gcc/testsuite/brig.dg/README new file mode 100644 index 0000000..cc313c4 --- /dev/null +++ b/gcc/testsuite/brig.dg/README @@ -0,0 +1,10 @@ +BRIG (HSAIL) frontend test cases +-------------------------------- + +The suite consists of "smoke tests" that test several features of +the compilation and regression tests, but is not an exhaustive test +suite for all HSAIL instructions. The HSA PRM conformance suite +is supposed to be used for that. + +HSAILasm is required for converting the text HSAIL files to BRIGs +which the compiler consumes. diff --git a/gcc/testsuite/brig.dg/dg.exp b/gcc/testsuite/brig.dg/dg.exp new file mode 100644 index 0000000..fd75cae --- /dev/null +++ b/gcc/testsuite/brig.dg/dg.exp @@ -0,0 +1,27 @@ +# Copyright (C) 2009-2014 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# GCC testsuite that uses the `dg.exp' driver. + +load_lib brig-dg.exp + +# Initialize `dg'. +dg-init + +dg-runtest [find $srcdir/$subdir *.hsail] "" "" + +# All done. +dg-finish diff --git a/gcc/testsuite/brig.dg/test/gimple/alloca.hsail b/gcc/testsuite/brig.dg/test/gimple/alloca.hsail new file mode 100644 index 0000000..73c2f93 --- /dev/null +++ b/gcc/testsuite/brig.dg/test/gimple/alloca.hsail @@ -0,0 +1,37 @@ +module &module:1:0:$full:$large:$default; + +/* Tests for alloca. */ + +/* { dg-do compile } */ +/* { dg-options "-fdump-tree-gimple" } */ + +prog function &subfunction(arg_u32 %return_value)() { + alloca_align(1)_u32 $s2, 256; + st_arg_u32 $s2, [%return_value]; + ret; +}; + +prog kernel &kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) +{ + ld_kernarg_u64 $d0, [%input_ptr]; + ld_global_u32 $s0, [$d0]; + + alloca_align(256)_u32 $s1, 16; + { + arg_u32 %return_value; + call &subfunction(%return_value)(); + ld_arg_u32 $s1, [%return_value]; + } + ld_kernarg_u64 $d1, [%output_ptr]; + st_global_u32 $s1, [$d0]; +}; + +/* { dg-final { scan-tree-dump "s2 = __phsa_builtin_alloca \\\(256, 1, __context\\\);" "gimple" } } */ + +/* { dg-final { scan-tree-dump "s1 = __phsa_builtin_alloca \\\(16, 256, __context\\\);" "gimple" } } */ + + +/* Both functions should have an alloca frame push and pop. */ +/* { dg-final { scan-tree-dump-times "__phsa_builtin_alloca_push_frame \\\(__context\\\);" 2 "gimple" } } */ + +/* { dg-final { scan-tree-dump-times "__phsa_builtin_alloca_pop_frame \\\(__context\\\);" 2 "gimple" } } */ diff --git a/gcc/testsuite/brig.dg/test/gimple/atomics.hsail b/gcc/testsuite/brig.dg/test/gimple/atomics.hsail new file mode 100644 index 0000000..a0b2f85 --- /dev/null +++ b/gcc/testsuite/brig.dg/test/gimple/atomics.hsail @@ -0,0 +1,33 @@ +module &module:1:0:$full:$large:$default; + +/* Test for atomic instructions. */ + +/* { dg-do compile } */ +/* { dg-options "-fdump-tree-original" } */ + +prog kernel &Kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) +{ + ld_kernarg_u64 $d0, [%input_ptr]; + + atomic_ld_global_rlx_system_b32 $s0, [$d0]; + atomic_add_global_rlx_system_u32 $s1, [$d0 + 4], $s0; + + ld_kernarg_u64 $d0, [%output_ptr]; + atomicnoret_st_global_rlx_system_b32 [$d0], $s2; + + atomicnoret_min_global_rlx_system_u32 [$d0 + 4], $s1; + + ret; +}; + +/* The atomic loads are implemented by casting to an atomic pointer. */ +/* { dg-final { scan-tree-dump "s0 = VIEW_CONVERT_EXPR<unsigned int>\\\(\\\*\\\(atomic unsigned int \\\*\\\)" "original"} } */ + +/* The atomic add should call a gcc builtin. */ +/* { dg-final { scan-tree-dump "= __sync_fetch_and_add_4 \\\(" "original"} } */ + +/* The atomic stores are implemented by casting to an atomic pointer. */ +/* { dg-final { scan-tree-dump "\\\*\\\(atomic unsigned int \\\*\\\) VIEW_CONVERT_EXPR<unsigned int \\\*>\\\(VIEW_CONVERT_EXPR<unsigned char \\\*>\\\(d0\\\)\\\) = s2;" "original"} } */ + +/* The atomic min is implemented by a custom builtin. */ +/* { dg-final { scan-tree-dump "builtin_out.\[0-9\]+ = __phsa_builtin_atomic_min_u32 \\\(" "original"} } */ diff --git a/gcc/testsuite/brig.dg/test/gimple/branches.hsail b/gcc/testsuite/brig.dg/test/gimple/branches.hsail new file mode 100644 index 0000000..081fde3 --- /dev/null +++ b/gcc/testsuite/brig.dg/test/gimple/branches.hsail @@ -0,0 +1,58 @@ +module &module:1:0:$full:$large:$default; + +/* Test different style of branches. */ + +/* { dg-do compile } */ +/* { dg-options "-fdump-tree-gimple" } */ + +prog kernel &Kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) +{ + ld_kernarg_u64 $d0, [%input_ptr]; + ld_global_u64 $d1, [$d0]; + ld_global_u64 $d2, [$d0 + 8]; + + ld_global_u32 $s0, [$d0 + 16]; + ld_global_u32 $s1, [$d0 + 20]; + + sbr_width(all)_u32 $s1 [@case0, @case1, @case2]; +@case0: + st_global_u64 0, [$d0]; + br @out; +@case1: + st_global_u64 1, [$d0]; + br @out; +@case2: + st_global_u64 2, [$d0]; +@out: + cmp_eq_u32_u32 $s2, $s1, $s0; + cvt_b1_u32 $c0, $s2; + + cbr_width(all)_b1 $c0, @true_branch; +@false_branch: + st_global_u64 $d1, [$d0]; + +@true_branch: + ld_kernarg_u64 $d0, [%output_ptr]; + + st_global_u32 $s2, [$d0 + 8]; + br @skip; + st_global_u32 $s3, [$d0 + 12]; + +@skip: + ret; +}; + +/* sbr is converted to a switch */ +/* { dg-final { scan-tree-dump "switch \\\(s1\\\) <default: <D.\[0-9\]+>, case 0: <D.\[0-9\]+>, case 1: <D.\[0-9\]+>, case 2: <D.\[0-9\]+>>" "gimple"} } */ + +/* br @out converted to gotos */ +/* { dg-final { scan-tree-dump-times "goto @out" 2 "gimple"} } */ + +/* the comparison instruction */ +/* { dg-final { scan-tree-dump "c0 = s2 != 0;" "gimple" } } */ + +/* cbr to an if clause */ +/* { dg-final { scan-tree-dump "if \\\(c0 != 0\\\) goto @true_branch; else goto <D.\[0-9\]+>;" "gimple" } } */ + +/* br @skip converted to a goto */ +/* { dg-final { scan-tree-dump "goto @skip" "gimple"} } */ diff --git a/gcc/testsuite/brig.dg/test/gimple/fbarrier.hsail b/gcc/testsuite/brig.dg/test/gimple/fbarrier.hsail new file mode 100644 index 0000000..de1a6dc --- /dev/null +++ b/gcc/testsuite/brig.dg/test/gimple/fbarrier.hsail @@ -0,0 +1,74 @@ +module &module:1:0:$full:$large:$default; + +/* Tests for fbarrier. */ + +/* { dg-do compile } */ +/* { dg-options "-fdump-tree-gimple" } */ + +fbarrier &fb_module_scope; + +prog function &subfunction(arg_u32 %return_value)() { + + workitemflatabsid_u32 $s3; + cvt_b1_u32 $c1, $s3; + cbr_width(all)_b1 $c1, @skip_fbar; + waitfbar &fb_module_scope; +@skip_fbar: + + st_arg_u32 $s3, [%return_value]; + ret; +}; + +prog kernel &kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) +{ + fbarrier %fb_func_scope; + + ld_kernarg_u64 $d0, [%input_ptr]; + ld_global_u32 $s0, [$d0]; + + workitemflatabsid_u32 $s1; + cvt_b1_u32 $c1, $s1; + cbr_width(all)_b1 $c1, @skip_init; + + initfbar &fb_module_scope; + initfbar %fb_func_scope; + + joinfbar &fb_module_scope; + +@skip_init: + barrier_width(all); + + joinfbar %fb_func_scope; + + { + arg_u32 %return_value; + call &subfunction(%return_value)(); + ld_arg_u32 $s1, [%return_value]; + } + arrivefbar %fb_func_scope; + + ld_kernarg_u64 $d1, [%output_ptr]; + st_global_u32 $s1, [$d0]; + + workitemflatabsid_u32 $s1; + cvt_b1_u32 $c0, $s1; + cbr_width(all)_b1 $c0, @skip_fini; + + releasefbar &fb_module_scope; + releasefbar %fb_func_scope; + +@skip_fini: + +}; +/* fbarriers are allocated from the group memory in the order of + appearance. The current implementation allocates 32B per fbarrier. */ + +/* { dg-final { scan-tree-dump "__phsa_builtin_waitfbar \\\(0, __context\\\);" "gimple"} } */ +/* { dg-final { scan-tree-dump "__phsa_builtin_initfbar \\\(0, __context\\\);" "gimple"} } */ +/* { dg-final { scan-tree-dump "__phsa_builtin_initfbar \\\(32, __context\\\);" "gimple"} } */ +/* { dg-final { scan-tree-dump "__phsa_builtin_joinfbar \\\(0, __context\\\);" "gimple"} } */ +/* { dg-final { scan-tree-dump "@skip_init:\[\n ]+__phsa_builtin_barrier \\\(__context\\\);\[\n ]+__phsa_builtin_joinfbar \\\(32, __context\\\);" "gimple"} } */ + +/* { dg-final { scan-tree-dump "__phsa_builtin_arrivefbar \\\(32, __context\\\);" "gimple"} } */ + +/* { dg-final { scan-tree-dump "__phsa_builtin_releasefbar \\\(0, __context\\\);\[\n ]+__phsa_builtin_releasefbar \\\(32, __context\\\);" "gimple"} } */ diff --git a/gcc/testsuite/brig.dg/test/gimple/function_calls.hsail b/gcc/testsuite/brig.dg/test/gimple/function_calls.hsail new file mode 100644 index 0000000..d3b690c --- /dev/null +++ b/gcc/testsuite/brig.dg/test/gimple/function_calls.hsail @@ -0,0 +1,59 @@ +module &module:1:0:$full:$large:$default; + +/* Function calls and argument passing. */ + +/* { dg-do compile } */ +/* { dg-options "-fdump-tree-gimple" } */ + +prog function &subfunction(arg_u32 %return_value)(arg_f32 %float_arg, arg_f64 %double_arg, arg_f16 %half_arg) { + ld_arg_f32 $s0, [%float_arg]; + cvt_u32_f32 $s0, $s0; + + ld_arg_f64 $d0, [%double_arg]; + cvt_u32_f64 $s1, $d0; + + ld_arg_f16 $s2, [%half_arg]; + cvt_u32_f16 $s2, $s2; + + add_u32 $s3, $s0, $s1; + add_u32 $s3, $s3, $s2; + + st_arg_u32 $s3, [%return_value]; + ret; +}; + +prog kernel &kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) +{ + ld_kernarg_u64 $d0, [%input_ptr]; + ld_global_u32 $s0, [$d0]; + { + arg_f32 %float_arg; + arg_f64 %double_arg; + arg_f16 %half_arg; + arg_u32 %return_value; + + st_arg_f32 12.0f, [%float_arg]; + st_arg_f64 640.0d, [%double_arg]; + st_arg_f16 12.0h, [%half_arg]; + + call &subfunction(%return_value)(%float_arg, %double_arg, %half_arg); + + ld_arg_u32 $s1, [%return_value]; + } + ld_kernarg_u64 $d1, [%output_ptr]; + st_global_u32 $s1, [$d0]; +}; + +/* The generated function call should have the incoming arguments and three hidden arguments. */ + +/* { dg-final { scan-tree-dump "_\[0-9\]+ = subfunction \\\(_kernel.float_arg.\[_0-9\]+, _kernel.double_arg.\[_0-9\]+, _kernel.half_arg.\[_0-9\]+, __context, __group_base_addr, __private_base_addr\\\);" "gimple"} } */ + +/* The callee should refer directly to the scalar arguments when it reads them. */ +/* { dg-final { scan-tree-dump "= float_arg;" "gimple"} } */ +/* { dg-final { scan-tree-dump "= double_arg;" "gimple"} } */ +/* { dg-final { scan-tree-dump "= half_arg;" "gimple"} } */ + +/* The return value is stored to a temporary before returned. */ +/* { dg-final { scan-tree-dump "_retvalue_temp = s3;" "gimple"} } */ +/* { dg-final { scan-tree-dump "D.\[0-9\]+ = _retvalue_temp;" "gimple"} } */ +/* { dg-final { scan-tree-dump "return D.\[0-9\]+;" "gimple"} } */ diff --git a/gcc/testsuite/brig.dg/test/gimple/mem.hsail b/gcc/testsuite/brig.dg/test/gimple/mem.hsail new file mode 100644 index 0000000..777faa1 --- /dev/null +++ b/gcc/testsuite/brig.dg/test/gimple/mem.hsail @@ -0,0 +1,39 @@ +module &module:1:0:$full:$large:$default; + +/* Tests for load/store addressing modes. */ + +/* { dg-do compile } */ +/* { dg-options "-fdump-tree-original" } */ + +prog kernel &Kernel(kernarg_u64 %input_ptr, kernarg_u64 %input_ptr2, kernarg_u64 %output_ptr) +{ + global_u32 %global_array[4]; + + ld_kernarg_u64 $d0, [%input_ptr]; + ld_kernarg_u64 $d2, [%input_ptr2]; + ld_global_u32 $s0, [$d0]; + ld_global_u64 $d1, [$d2 + 4]; + + ld_global_u32 $s2, [%global_array][$d1 + 4]; + + ld_kernarg_u64 $d0, [%output_ptr]; + st_global_u32 $s0, [$d0]; + st_global_u32 $s1, [$d0 + 4]; + st_global_u32 $s2, [$d0 + 8]; + + ret; +}; + +/* %input_ptr, %input_ptr2 and %output_ptr accesses should generate offsets to the __args array */ +/* { dg-final { scan-tree-dump "\\\(__args\\\);\[\n \]+d0 =" "original"} } */ +/* { dg-final { scan-tree-dump "\\\(__args\\\) \\\+ 8\\\);\[\n \]+d2 =" "original"} } */ +/* { dg-final { scan-tree-dump "\\\(__args\\\) \\\+ 16\\\);\[\n \]+d0 =" "original"} } */ + +/* ld_global_u32 $s0, [$d0] */ +/* { dg-final { scan-tree-dump "<unsigned char \\\*>\\\(d0\\\)\\\);\[\n \]+s0 =" "original"} } */ + +/* ld_global_u64 $d1, [$d2 + 4] pointer arithmetics*/ +/* { dg-final { scan-tree-dump "\\\(d2\\\)\\\) \\\+ 4\\\);\[\n \]+d1 = " "original"} } */ + +/* ld_global_u32 $s2, [%global_array][$d1 + 4]; is the most complex form */ +/* { dg-final { scan-tree-dump "&_Kernel.global_array\\\) \\\+ VIEW_CONVERT_EXPR<unsigned long>\\\(VIEW_CONVERT_EXPR<unsigned int \\\*>\\\(VIEW_CONVERT_EXPR<unsigned long>\\\(VIEW_CONVERT_EXPR<unsigned char \\\*>\\\(d1\\\)\\\) \\\+ 4\\\)\\\)\\\);\[\n \]+s2 = " "original" } } */ diff --git a/gcc/testsuite/brig.dg/test/gimple/mulhi.hsail b/gcc/testsuite/brig.dg/test/gimple/mulhi.hsail new file mode 100644 index 0000000..acdced9 --- /dev/null +++ b/gcc/testsuite/brig.dg/test/gimple/mulhi.hsail @@ -0,0 +1,33 @@ +module &module:1:0:$full:$large:$default; + +/* Test high part multiplies. */ + +/* { dg-do compile } */ +/* { dg-options "-fdump-tree-gimple" } */ + +prog kernel &Kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) +{ + ld_kernarg_u64 $d0, [%input_ptr]; + ld_global_u64 $d1, [$d0]; + ld_global_u64 $d2, [$d0 + 8]; + + ld_global_u32 $s0, [$d0 + 16]; + ld_global_u32 $s1, [$d0 + 20]; + + mulhi_s32 $s2, $s0, $s1; + mulhi_s64 $d2, $d1, $d2; + + mad24hi_s32 $s3, $s0, $s1, $s2; + mul24hi_s32 $s3, $s3, $s1; + + ld_kernarg_u64 $d0, [%output_ptr]; + st_global_u64 $d1, [$d0]; + st_global_u32 $s2, [$d0 + 8]; + st_global_u32 $s3, [$d0 + 12]; + + ret; +}; + +/* All of the hipart mults areImplemented using MULT_HIGHPART_EXPR (h*). */ +/* { dg-final { scan-tree-dump-times " h\\\* " 4 "gimple"} } */ + diff --git a/gcc/testsuite/brig.dg/test/gimple/packed.hsail b/gcc/testsuite/brig.dg/test/gimple/packed.hsail new file mode 100644 index 0000000..c939f61 --- /dev/null +++ b/gcc/testsuite/brig.dg/test/gimple/packed.hsail @@ -0,0 +1,78 @@ +module &module:1:0:$full:$large:$default; + +/* Test for different cases of packed instruction controls. */ + +/* { dg-do compile } */ +/* { dg-options "-fdump-tree-gimple -fdump-tree-original" } */ + +prog kernel &Kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) +{ + ld_kernarg_u64 $d0, [%input_ptr]; + ld_global_b128 $q0, [$d0]; + + add_pp_u8x16 $q1, $q0, u8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + /* Broadcast the 15 as it's the lowest element (pos 0) in the resulting vector. */ + add_ps_u8x16 $q2, $q1, u8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + /* Broadcast the lowest element of q1. */ + add_sp_u8x16 $q3, $q1, $q2; + + /* Perform a scalar computation with the lowest element of both inputs and store it to the lowest element of dest. */ + add_ss_u8x16 $q4, $q2, $q3; + + /* Saturating arithmetics variations. */ + add_pp_sat_u8x16 $q5, $q4, u8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + /* Broadcast the 15 as it's the lowest element (pos 0) in the resulting vector. */ + add_ps_sat_u8x16 $q6, $q5, u8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + /* Broadcast the lowest element of q1. */ + add_sp_sat_u8x16 $q7, $q6, $q5; + + /* Perform a scalar computation with the lowest element of both inputs and store it to the lowest element of dest. */ + add_ss_sat_u8x16 $q8, $q7, $q6; + + /* Single operand vector computation. */ + neg_p_s16x8 $q9, $q8; + + ld_kernarg_u64 $d0, [%output_ptr]; + st_global_b128 $q8, [$d0]; + + ret; +}; + +/* The b128 load is done using uint128_t*. +/* { dg-final { scan-tree-dump "q0 = VIEW_CONVERT_EXPR<uint128_t>\\\(mem_read.\[0-9\]+\\\);" "original"} } */ + +/* Before arithmetics, the uint128_t is casted to a vector datatype. */ +/* { dg-final { scan-tree-dump "<vector\\\(16\\\) unsigned char>\\\(q0\\\) \\\+ \\\{" "original"} } */ + +/* The u8x16 constant is generated to an array with elements in reverse order */ +/* in comparison to the HSAIL syntax. */ +/* { dg-final { scan-tree-dump "\\\+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }" "original"} } */ + +/* After arithmetics, the vector DT is casted back to a uint128_t. */ +/* { dg-final { scan-tree-dump "q1 = VIEW_CONVERT_EXPR<uint128_t>" "original"} } */ + +/* Broadcasted the constant vector's lowest element and summed it up in the next line. */ +/* { dg-final { scan-tree-dump "= { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 };\[\n \]+_\[0-9\]+ = _\[0-9\]+ \\\+ _\[0-9\]+;" "gimple"} } */ + +/* Broadcasted the registers lowest element via a VEC_PERM_EXPR that has an all-zeros mask. */ +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR <_\[0-9\]+, _\[0-9\]+, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }>;" "gimple" } } */ + +/* For the add_ss we assume performing the computation over the whole vector is cheaper than */ +/* extracting the scalar and performing a scalar operation. This aims to stay in the vector +/* datapath as long as possible. */ +/* { dg-final { scan-tree-dump "_\[0-9\]+ = VIEW_CONVERT_EXPR<vector\\\(16\\\) unsigned char>\\\(q2\\\);\[\n \]+_\[0-9\]+ = VIEW_CONVERT_EXPR<vector\\\(16\\\) unsigned char>\\\(q3\\\);\[\n \]+_\[0-9\]+ = _\[0-9\]+ \\\+ _\[0-9\]+;" "gimple" } } */ + +/* Insert the lowest element of the result to the lowest element of the result register. */ +/* { dg-final { scan-tree-dump "= VEC_PERM_EXPR <_\[0-9\]+, new_output.\[0-9\]+_\[0-9\]+, { 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }>;" "gimple" } } */ + +/* { dg-final { scan-tree-dump "q4 = VIEW_CONVERT_EXPR<uint128_t>\\\(s_output.\[0-9\]+_\[0-9\]+\\\);" "gimple" } } */ + +/* The saturating arithmetics are (curently) implemented using scalar builtin calls. */ +/* { dg-final { scan-tree-dump-times "= __phsa_builtin_sat_add_u8" 64 "gimple" } } */ + +/* A single operand vector instr (neg.) */ +/* { dg-final { scan-tree-dump " = VIEW_CONVERT_EXPR<vector\\\(8\\\) signed short>\\\(q8\\\);\[\n \]+_\[0-9\]+ = -_\[0-9\]+;\[\n \]+" "gimple" } } */ diff --git a/gcc/testsuite/brig.dg/test/gimple/smoke_test.hsail b/gcc/testsuite/brig.dg/test/gimple/smoke_test.hsail new file mode 100644 index 0000000..7a38352 --- /dev/null +++ b/gcc/testsuite/brig.dg/test/gimple/smoke_test.hsail @@ -0,0 +1,91 @@ +module &module:1:0:$full:$large:$default; + +/* A basic smoke test. */ + +/* { dg-do compile } */ +/* { dg-options "-fdump-tree-gimple" } */ + +prog kernel &Kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) +{ + ld_kernarg_u64 $d0, [%input_ptr]; + ld_global_u32 $s0, [$d0]; + ld_global_u32 $s1, [$d0 + 4]; + + add_u32 $s2, $s0, $s1; + add_u32 $s3, $s0, 4294967295; + + ld_kernarg_u64 $d0, [%output_ptr]; + st_global_u32 $s2, [$d0]; + st_global_u32 $s3, [$d0 + 4]; + + ret; +}; + +prog kernel &KernelWithBarrier(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) +{ + ld_kernarg_u64 $d0, [%input_ptr]; + ld_global_u32 $s0, [$d0]; + ld_global_u32 $s1, [$d0 + 4]; + + add_u32 $s2, $s0, $s1; + + barrier_width(all); + + add_u32 $s3, $s0, 4294967295; + + ld_kernarg_u64 $d0, [%output_ptr]; + st_global_u32 $s2, [$d0]; + st_global_u32 $s3, [$d0 + 4]; + + ret; +}; + +/* The kernel function itself should have a fingerprint as follows */ +/* _Kernel (unsigned char * __args, void * __context, void * __group_base_addr, void * __private_base_addr) */ +/* { dg-final { scan-tree-dump "_Kernel \\\(unsigned char \\\* __args, void \\\* __context, void \\\* __group_base_addr, void \\\* __private_base_addr\\\)" "gimple"} } */ + +/* ld_kernarg: mem_read.0 = MEM[(unsigned long *)__args]; */ +/* { dg-final { scan-tree-dump "mem_read.\[0-9\] = MEM\\\[\\\(unsigned long \\\*\\\)__args\\\];" "gimple"} } */ + +/* The latter ld_global_u32 should be visible as a pointer dereference (after pointer arithmetics on a temporary var): */ +/* mem_read.2 = *D.1691; */ +/* { dg-final { scan-tree-dump "mem_read.\[0-9\] = \\\*\[_0-9\]+;" "gimple"} } */ + +/* add_u32s should generate +operators */ +/* { dg-final { scan-tree-dump "s2 = s0 \\\+ s1;" "gimple"} } */ +/* { dg-final { scan-tree-dump "s3 = s0 \\\+ 4294967295;" "gimple"} } */ + +/* The latter st_global_u32 should be visible as a pointer dereference (after pointer arithmetics on a temporary var): */ +/* *D.1694 = s3; */ +/* { dg-final { scan-tree-dump "\\\*\[_0-9\]+ = s3;" "gimple"} } */ + +/* The return inside the kernel should be generated to a goto to the end of the kernel. */ +/* goto __kernel_exit; */ +/* __kernel_exit: */ +/* { dg-final { scan-tree-dump "goto __kernel_exit;" "gimple"} } */ +/* { dg-final { scan-tree-dump "__kernel_exit:" "gimple"} } */ + +/* Expecting a work item loop because there are no barrier calls. */ +/* { dg-final { scan-tree-dump "if \\\(__local_x < __cur_wg_size_x\\\) goto __wi_loop_x; else goto" "gimple"} } */ +/* { dg-final { scan-tree-dump "if \\\(__local_y < __cur_wg_size_y\\\) goto __wi_loop_y; else goto" "gimple"} } */ +/* { dg-final { scan-tree-dump "if \\\(__local_z < __cur_wg_size_z\\\) goto __wi_loop_z; else goto" "gimple"} } */ + +/* The launcher should call __phsa_launch_wg_function in this case: */ +/* Kernel (void * __context, void * __group_base_addr) */ +/* { dg-final { scan-tree-dump "Kernel \\\(void \\\* __context, void \\\* __group_base_addr\\\)" "gimple"} } */ +/* { dg-final { scan-tree-dump "__phsa_launch_wg_function \\\(_Kernel, __context, __group_base_addr\\\);" "gimple"} }*/ + +/* The kernel should have the magic metadata section injected to the ELF. */ +/* TODO: this should be disabled in case not outputting to an ELF. */ +/* Currently ELF is assumed by the brig frontend. Do not check for the context */ +/* as it is likely to change. */ +/* { dg-final { scan-tree-dump "\\\.pushsection phsa\\\.kerneldesc\\\.Kernel" "gimple"} }*/ + +/* The kernel with the barrier call should have the barrier builtin call in between the two summations. */ +/* { dg-final { scan-tree-dump "s2 = s0 \\\+ s1;\[\n \]+__phsa_builtin_barrier \\\(__context\\\);\[\n \]+s3 = s0 \\\+ 4294967295;" "gimple"} } */ + +/* The kernel with the barrier call's launcher function should call the thread-spawning function. */ +/* { dg-final { scan-tree-dump "__phsa_launch_kernel \\\(_KernelWithBarrier, __context, __group_base_addr\\\);" "gimple" } } */ + + + diff --git a/gcc/testsuite/brig.dg/test/gimple/variables.hsail b/gcc/testsuite/brig.dg/test/gimple/variables.hsail new file mode 100644 index 0000000..da30899 --- /dev/null +++ b/gcc/testsuite/brig.dg/test/gimple/variables.hsail @@ -0,0 +1,124 @@ +module &module:1:0:$full:$large:$default; + +/* Tests for different variable scopes and address spaces. */ + +/* { dg-do compile } */ +/* { dg-options "-fdump-tree-gimple" } */ + +prog align(256) private_u32 &prog_private; +private_u32 &mod_private; + +prog group_u32 &prog_group; +group_u32 &mod_group; + +prog global_u32 &prog_global; +global_u32 &mod_global; + +decl prog global_u32 &prog_global_host_def; + +prog readonly_u32 &prog_readonly; +readonly_u32 &mod_readonly; + +prog function &subfunction(arg_u32 %return_value)(arg_u32 %arg) { + + private_u32 %func_private; + group_u32 %func_group; + align(256) global_u32 %func_global; + readonly_u32 %func_readonly; + + ld_private_u32 $s200, [%func_private]; + st_private_u32 $s200, [&prog_private]; + + ld_group_u32 $s203, [%func_group]; + st_group_u32 $s203, [&prog_group]; + + ld_global_u32 $s204, [%func_global]; + st_global_u32 $s204, [&prog_global]; + + ld_readonly_u32 $s205, [%func_readonly]; + st_global_u32 $s205, [%func_global]; + + st_arg_u32 $s2, [%return_value]; + ret; +}; + +prog kernel &kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) +{ + private_u32 %kern_private; + group_u32 %kern_group; + global_u32 %kern_global; + readonly_u32 %kern_readonly; + + ld_kernarg_u64 $d0, [%input_ptr]; + ld_global_u32 $s0, [$d0]; + + ld_private_u32 $s2, [&prog_private]; + st_private_u32 $s2, [%kern_private]; + ld_private_u32 $s3, [&mod_private]; + st_private_u32 $s3, [&prog_private]; + + ld_group_u32 $s4, [&prog_group]; + st_group_u32 $s4, [%kern_group]; + ld_group_u32 $s5, [&mod_group]; + st_group_u32 $s5, [&prog_group]; + + ld_global_u32 $s6, [&prog_global]; + st_global_u32 $s6, [%kern_global]; + ld_global_u32 $s7, [&mod_global]; + st_global_u32 $s7, [&prog_global]; + + ld_readonly_u32 $s8, [&prog_readonly]; + st_global_u32 $s8, [%kern_global]; + ld_readonly_u32 $s9, [&mod_readonly]; + st_global_u32 $s9, [&prog_global]; + + ld_readonly_u32 $s10, [%kern_readonly]; + st_global_u32 $s10, [%kern_global]; + ld_readonly_u32 $s11, [%kern_readonly]; + st_global_u32 $s11, [&prog_global_host_def]; + + { + arg_u32 %arg; + arg_u32 %return_value; + st_arg_u32 $s1, [%arg]; + call &subfunction(%return_value)(%arg); + ld_arg_u32 $s1, [%return_value]; + } + ld_kernarg_u64 $d1, [%output_ptr]; + st_global_u32 $s1, [$d0]; +}; + +/* Private variable offsets assigned in the order of their appearance */ +/* + prog_private @0 (align 256) -> until 254 to ensure all WIs + mod_private @256 have their chunks aligned + func_private @260 + kern_private @264 +*/ + +/* Group variable offsets assigned in the order of their appearance */ +/* + prog_group @0 (2) + mod_group @4 (4) + func_group @8 (1) + kern_group @12 (3) +*/ + +/* { dg-final { scan-tree-dump "\\\+ 8;.*\\\+ 12;.*\\\+ 4;" "gimple" } } */ + +/* The "mangling" of the global and readonly vars. */ +/* { dg-final { scan-tree-dump "\[ \]*prog_global = s204;" "gimple" } } */ + +/* { dg-final { scan-tree-dump "\.module.mod_global;" "gimple" } } */ + +/* Host defined variables need indirect access as the address is + known only at run time. */ +/* { dg-final { scan-tree-dump "MEM\\\[\\\(unsigned int \\\*\\\)__phsa.host_def.prog_global_host_def.\[0-9\]+_\[0-9\]+\\\] = s11;" "gimple" } } */ + +/* { dg-final { scan-tree-dump "\.subfunction.func_global;" "gimple" } } */ +/* { dg-final { scan-tree-dump "\.subfunction.func_readonly;" "gimple" } } */ + +/* { dg-final { scan-tree-dump "kernel.kern_global" "gimple" } } */ +/* { dg-final { scan-tree-dump "kernel.kern_readonly" "gimple" } } */ + + diff --git a/gcc/testsuite/brig.dg/test/gimple/vector.hsail b/gcc/testsuite/brig.dg/test/gimple/vector.hsail new file mode 100644 index 0000000..7b247fc --- /dev/null +++ b/gcc/testsuite/brig.dg/test/gimple/vector.hsail @@ -0,0 +1,57 @@ +module &module:1:0:$full:$large:$default; + +/* A test for vector operands. */ + +/* { dg-do compile } */ +/* { dg-options "-fdump-tree-original" } */ + +prog kernel &Kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) +{ + ld_kernarg_u64 $d0, [%input_ptr]; + ld_v2_global_f32 ($s0, $s1), [$d0]; + ld_v3_global_f32 ($s2, $s3, $s4), [$d0 + 8]; + ld_v4_global_f32 ($s5, $s6, $s7, $s8), [$d0 + 20]; + + add_f32 $s9, $s0, $s1; + combine_v2_b64_b32 $d2, ($s1, $s0); + combine_v2_b64_b32 $d3, ($s2, $s3); + + add_pp_f32x2 $d4, $d2, $d3; + + expand_v2_b32_b64 ($s0, $s3), $d4; + + ld_kernarg_u64 $d1, [%output_ptr]; + st_v2_global_f32 ($s0, $s1), [$d1]; + st_v3_global_f32 ($s2, $s3, $s4), [$d1 + 8]; + st_v4_global_f32 ($s5, $s6, $s7, $s8), [$d1 + 20]; + + ret; +}; + +/* The v2 load is done via casting to a vector datatype ptr. */ +/* { dg-final { scan-tree-dump " = MEM\\\[\\\(vector\\\(2\\\) <float:32> \\\*\\\)" "original"} } */ + +/* The v3 load is scalarized (at the moment) due to gcc requiring 2's exponent wide vectors. */ +/* { dg-final { scan-tree-dump "s0 = VIEW_CONVERT_EXPR<unsigned int>\\\(BIT_FIELD_REF <mem_read.\[0-9\]+, 32, 0>\\\);\[\n ]+s1 = VIEW_CONVERT_EXPR<unsigned int>\\\(BIT_FIELD_REF <mem_read.\[0-9\]+, 32, 32>\\\);" "original"} } */ + +/* The v4 load is done via casting to a vector datatype ptr. */ +/* { dg-final { scan-tree-dump " = MEM\\\[\\\(vector\\\(4\\\) <float:32> \\\*\\\)" "original"} } */ + +/* The combines are generated to vector constructors. */ +/* { dg-final { scan-tree-dump "{s1, s0}" "original"} } */ +/* { dg-final { scan-tree-dump "{s2, s3}" "original"} } */ + +/* Expands to BIT_FIELD_REFs. */ +/* { dg-final { scan-tree-dump "s0 = BIT_FIELD_REF <d4, 32, 0>;" "original"} } */ +/* { dg-final { scan-tree-dump "s3 = BIT_FIELD_REF <d4, 32, 32>;" "original"} } */ + +/* The v1 store is done via casting to a vector datatype ptr and constructing a vector from the inputs. */ +/* { dg-final { scan-tree-dump "MEM\\\[\\\(vector\\\(2\\\) <float:32> \\\*\\\)VIEW_CONVERT_EXPR<<float:32> \\\*>\\\(VIEW_CONVERT_EXPR<unsigned char \\\*>\\\(d1\\\)\\\)\\\] = " "original"} } */ + +/* The v3 store is scalarized (at the moment) due to gcc requiring 2's exponent wide vectors. */ +/* { dg-final { scan-tree-dump "\\\*VIEW_CONVERT_EXPR<<float:32> \\\*>\\\(VIEW_CONVERT_EXPR<unsigned long>\\\(VIEW_CONVERT_EXPR<unsigned char \\\*>\\\(d1\\\)\\\) \\\+ 8\\\) \\\+ 0 = VIEW_CONVERT_EXPR<<float:32>>\\\(s2\\\);" "original"} } */ +/* { dg-final { scan-tree-dump "\\\*VIEW_CONVERT_EXPR<<float:32> \\\*>\\\(VIEW_CONVERT_EXPR<unsigned long>\\\(VIEW_CONVERT_EXPR<unsigned char \\\*>\\\(d1\\\)\\\) \\\+ 8\\\) \\\+ 4 = VIEW_CONVERT_EXPR<<float:32>>\\\(s3\\\);" "original"} } */ +/* { dg-final { scan-tree-dump "\\\*VIEW_CONVERT_EXPR<<float:32> \\\*>\\\(VIEW_CONVERT_EXPR<unsigned long>\\\(VIEW_CONVERT_EXPR<unsigned char \\\*>\\\(d1\\\)\\\) \\\+ 8\\\) \\\+ 8 = VIEW_CONVERT_EXPR<<float:32>>\\\(s4\\\);" "original"} } */ + +/* The v4 store is done via casting to a vector datatype and constructing a vector from the inputs. */ +/* { dg-final { scan-tree-dump "MEM\\\[\\\(vector\\\(4\\\) <float:32> \\\*\\\)VIEW_CONVERT_EXPR<<float:32> \\\*>\\\(VIEW_CONVERT_EXPR<unsigned long>\\\(VIEW_CONVERT_EXPR<unsigned char \\\*>\\\(d1\\\)\\\) \\\+ 20\\\)\\\] = {VIEW_CONVERT_EXPR<<float:32>>\\\(s5\\\), VIEW_CONVERT_EXPR<<float:32>>\\\(s6\\\), VIEW_CONVERT_EXPR<<float:32>>\\\(s7\\\), VIEW_CONVERT_EXPR<<float:32>>\\\(s8\\\)};" "original"} } */ diff --git a/gcc/testsuite/lib/brig-dg.exp b/gcc/testsuite/lib/brig-dg.exp new file mode 100644 index 0000000..ee96708 --- /dev/null +++ b/gcc/testsuite/lib/brig-dg.exp @@ -0,0 +1,29 @@ +# Copyright (C) 2009-2014 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +load_lib gcc-dg.exp + +# Define brig callbacks for dg.exp. + +proc brig-dg-test { prog do_what extra_tool_flags } { + set result \ + [gcc-dg-test-1 brig_target_compile $prog $do_what $extra_tool_flags] + + set comp_output [lindex $result 0] + set output_file [lindex $result 1] + + return [list $comp_output $output_file] +} diff --git a/gcc/testsuite/lib/brig.exp b/gcc/testsuite/lib/brig.exp new file mode 100644 index 0000000..d1c967d --- /dev/null +++ b/gcc/testsuite/lib/brig.exp @@ -0,0 +1,40 @@ +# Copyright (C) 2009-2016 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +load_lib prune.exp +load_lib gcc-defs.exp +load_lib timeout.exp +load_lib target-libpath.exp +# +# brig_target_compile -- compile a HSAIL input to BRIG using HSAILasm and then +# compile the BRIG to target ISA using gcc + +proc brig_target_compile { source dest type options } { + global tmpdir + global testname_with_flags + if { [file extension $source] == ".hsail" } { + # We cannot assume all inputs are .hsail as the dg machinery + # calls this for a some c files to check linker plugin support or + # similar. + set brig_source ${tmpdir}/[file tail ${source}].brig + exec HSAILasm $source -o ${brig_source} + set source ${brig_source} + # Change the testname the .brig. + set testname_with_flags [file tail $source] + } + return [target_compile $source $dest $type $options] +} +