A smoke test suite. The patch has been tested more thoroughly with the
proprietary HSA PRM conformance suite.

Requires the HSAILasm tool to first compile the .hsail to .brig.

--
Pekka Jääskeläinen
Parmance
A smoke test suite. The patch has been tested more thoroughly with the
proprietary HSA PRM conformance suite.

Requires the HSAILasm tool to first compile the .hsail to .brig.
diff --git a/gcc/testsuite/brig.dg/README b/gcc/testsuite/brig.dg/README
new file mode 100644
index 0000000..cc313c4
--- /dev/null
+++ b/gcc/testsuite/brig.dg/README
@@ -0,0 +1,10 @@
+BRIG (HSAIL) frontend test cases
+--------------------------------
+
+The suite consists of "smoke tests" that test several features of
+the compilation and regression tests, but is not an exhaustive test
+suite for all HSAIL instructions. The HSA PRM conformance suite
+is supposed to be used for that.
+
+HSAILasm is required for converting the text HSAIL files to BRIGs
+which the compiler consumes.
diff --git a/gcc/testsuite/brig.dg/dg.exp b/gcc/testsuite/brig.dg/dg.exp
new file mode 100644
index 0000000..fd75cae
--- /dev/null
+++ b/gcc/testsuite/brig.dg/dg.exp
@@ -0,0 +1,27 @@
+#   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+load_lib brig-dg.exp
+
+# Initialize `dg'.
+dg-init
+
+dg-runtest [find $srcdir/$subdir *.hsail] "" ""
+
+# All done.
+dg-finish
diff --git a/gcc/testsuite/brig.dg/test/gimple/alloca.hsail b/gcc/testsuite/brig.dg/test/gimple/alloca.hsail
new file mode 100644
index 0000000..73c2f93
--- /dev/null
+++ b/gcc/testsuite/brig.dg/test/gimple/alloca.hsail
@@ -0,0 +1,37 @@
+module &module:1:0:$full:$large:$default;
+
+/* Tests for alloca. */
+
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-gimple" } */
+
+prog function &subfunction(arg_u32 %return_value)() {
+     alloca_align(1)_u32 $s2, 256;
+     st_arg_u32 $s2, [%return_value];
+     ret;
+};
+
+prog kernel &kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
+{
+        ld_kernarg_u64 $d0, [%input_ptr];
+        ld_global_u32 $s0, [$d0];
+
+	alloca_align(256)_u32 $s1, 16;
+	{
+		arg_u32 %return_value;
+		call &subfunction(%return_value)();
+		ld_arg_u32 $s1, [%return_value];
+	}
+        ld_kernarg_u64 $d1, [%output_ptr];
+        st_global_u32 $s1, [$d0];
+};
+
+/* { dg-final { scan-tree-dump "s2 = __phsa_builtin_alloca \\\(256, 1, __context\\\);" "gimple" } } */
+
+/* { dg-final { scan-tree-dump "s1 = __phsa_builtin_alloca \\\(16, 256, __context\\\);" "gimple" } } */
+
+
+/* Both functions should have an alloca frame push and pop. */
+/* { dg-final { scan-tree-dump-times "__phsa_builtin_alloca_push_frame \\\(__context\\\);" 2 "gimple" } } */
+
+/* { dg-final { scan-tree-dump-times "__phsa_builtin_alloca_pop_frame \\\(__context\\\);" 2 "gimple" } } */
diff --git a/gcc/testsuite/brig.dg/test/gimple/atomics.hsail b/gcc/testsuite/brig.dg/test/gimple/atomics.hsail
new file mode 100644
index 0000000..a0b2f85
--- /dev/null
+++ b/gcc/testsuite/brig.dg/test/gimple/atomics.hsail
@@ -0,0 +1,33 @@
+module &module:1:0:$full:$large:$default;
+
+/* Test for atomic instructions. */
+
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-original" } */
+
+prog kernel &Kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
+{
+        ld_kernarg_u64 $d0, [%input_ptr];
+
+	atomic_ld_global_rlx_system_b32 $s0, [$d0];
+	atomic_add_global_rlx_system_u32 $s1, [$d0 + 4], $s0;
+
+        ld_kernarg_u64 $d0, [%output_ptr];
+        atomicnoret_st_global_rlx_system_b32 [$d0], $s2;
+
+	atomicnoret_min_global_rlx_system_u32 [$d0 + 4], $s1;
+
+        ret;
+};
+
+/* The atomic loads are implemented by casting to an atomic pointer. */
+/* { dg-final { scan-tree-dump "s0 = VIEW_CONVERT_EXPR<unsigned int>\\\(\\\*\\\(atomic unsigned int \\\*\\\)" "original"} } */
+
+/* The atomic add should call a gcc builtin. */
+/* { dg-final { scan-tree-dump "= __sync_fetch_and_add_4 \\\(" "original"} } */
+
+/* The atomic stores are implemented by casting to an atomic pointer. */
+/* { dg-final { scan-tree-dump "\\\*\\\(atomic unsigned int \\\*\\\) VIEW_CONVERT_EXPR<unsigned int \\\*>\\\(VIEW_CONVERT_EXPR<unsigned char \\\*>\\\(d0\\\)\\\) = s2;" "original"} } */
+
+/* The atomic min is implemented by a custom builtin. */
+/* { dg-final { scan-tree-dump "builtin_out.\[0-9\]+ = __phsa_builtin_atomic_min_u32 \\\(" "original"} } */
diff --git a/gcc/testsuite/brig.dg/test/gimple/branches.hsail b/gcc/testsuite/brig.dg/test/gimple/branches.hsail
new file mode 100644
index 0000000..081fde3
--- /dev/null
+++ b/gcc/testsuite/brig.dg/test/gimple/branches.hsail
@@ -0,0 +1,58 @@
+module &module:1:0:$full:$large:$default;
+
+/* Test different style of branches. */
+
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-gimple" } */
+
+prog kernel &Kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
+{
+        ld_kernarg_u64 $d0, [%input_ptr];
+        ld_global_u64 $d1, [$d0];
+        ld_global_u64 $d2, [$d0 + 8];
+
+        ld_global_u32 $s0, [$d0 + 16];
+        ld_global_u32 $s1, [$d0 + 20];
+
+	sbr_width(all)_u32 $s1 [@case0, @case1, @case2];
+@case0:
+        st_global_u64 0, [$d0];
+	br @out;
+@case1:
+        st_global_u64 1, [$d0];
+	br @out;
+@case2:
+        st_global_u64 2, [$d0];
+@out:
+	cmp_eq_u32_u32 $s2, $s1, $s0;
+	cvt_b1_u32 $c0, $s2;
+
+	cbr_width(all)_b1 $c0, @true_branch;
+@false_branch:
+        st_global_u64 $d1, [$d0];
+
+@true_branch:
+        ld_kernarg_u64 $d0, [%output_ptr];
+
+        st_global_u32 $s2, [$d0 + 8];
+	br @skip;
+        st_global_u32 $s3, [$d0 + 12];
+
+@skip:
+        ret;
+};
+
+/* sbr is converted to a switch */
+/* { dg-final { scan-tree-dump "switch \\\(s1\\\) <default: <D.\[0-9\]+>, case 0: <D.\[0-9\]+>, case 1: <D.\[0-9\]+>, case 2: <D.\[0-9\]+>>" "gimple"} } */
+
+/* br @out converted to gotos */
+/* { dg-final { scan-tree-dump-times "goto @out" 2 "gimple"} } */ 
+
+/* the comparison instruction */
+/* { dg-final { scan-tree-dump "c0 = s2 != 0;" "gimple" } } */
+
+/* cbr to an if clause */
+/* { dg-final { scan-tree-dump "if \\\(c0 != 0\\\) goto @true_branch; else goto <D.\[0-9\]+>;" "gimple" } } */
+
+/* br @skip converted to a goto */
+/* { dg-final { scan-tree-dump "goto @skip" "gimple"} } */ 
diff --git a/gcc/testsuite/brig.dg/test/gimple/fbarrier.hsail b/gcc/testsuite/brig.dg/test/gimple/fbarrier.hsail
new file mode 100644
index 0000000..de1a6dc
--- /dev/null
+++ b/gcc/testsuite/brig.dg/test/gimple/fbarrier.hsail
@@ -0,0 +1,74 @@
+module &module:1:0:$full:$large:$default;
+
+/* Tests for fbarrier. */
+
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-gimple" } */
+
+fbarrier &fb_module_scope;
+
+prog function &subfunction(arg_u32 %return_value)() {
+
+     workitemflatabsid_u32 $s3;
+     cvt_b1_u32 $c1, $s3;
+     cbr_width(all)_b1 $c1, @skip_fbar;
+     waitfbar &fb_module_scope;
+@skip_fbar:
+
+     st_arg_u32 $s3, [%return_value];
+     ret;
+};
+
+prog kernel &kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
+{
+	fbarrier %fb_func_scope;
+
+        ld_kernarg_u64 $d0, [%input_ptr];
+        ld_global_u32 $s0, [$d0];
+
+	workitemflatabsid_u32 $s1;
+	cvt_b1_u32 $c1, $s1;
+	cbr_width(all)_b1 $c1, @skip_init;
+
+	initfbar &fb_module_scope;
+	initfbar %fb_func_scope;
+
+        joinfbar &fb_module_scope;
+
+@skip_init:
+	barrier_width(all);
+
+        joinfbar %fb_func_scope;
+
+	{
+		arg_u32 %return_value;
+		call &subfunction(%return_value)();
+		ld_arg_u32 $s1, [%return_value];
+	}
+	arrivefbar %fb_func_scope;
+
+        ld_kernarg_u64 $d1, [%output_ptr];
+        st_global_u32 $s1, [$d0];
+
+	workitemflatabsid_u32 $s1;
+	cvt_b1_u32 $c0, $s1;
+	cbr_width(all)_b1 $c0, @skip_fini;
+
+	releasefbar &fb_module_scope;
+	releasefbar %fb_func_scope;
+
+@skip_fini:
+
+};
+/* fbarriers are allocated from the group memory in the order of 
+   appearance. The current implementation allocates 32B per fbarrier. */
+
+/* { dg-final { scan-tree-dump "__phsa_builtin_waitfbar \\\(0, __context\\\);" "gimple"} } */
+/* { dg-final { scan-tree-dump "__phsa_builtin_initfbar \\\(0, __context\\\);" "gimple"} } */
+/* { dg-final { scan-tree-dump "__phsa_builtin_initfbar \\\(32, __context\\\);" "gimple"} } */
+/* { dg-final { scan-tree-dump "__phsa_builtin_joinfbar \\\(0, __context\\\);" "gimple"} } */
+/* { dg-final { scan-tree-dump "@skip_init:\[\n ]+__phsa_builtin_barrier \\\(__context\\\);\[\n ]+__phsa_builtin_joinfbar \\\(32, __context\\\);" "gimple"} } */
+
+/* { dg-final { scan-tree-dump "__phsa_builtin_arrivefbar \\\(32, __context\\\);" "gimple"} } */
+
+/* { dg-final { scan-tree-dump "__phsa_builtin_releasefbar \\\(0, __context\\\);\[\n ]+__phsa_builtin_releasefbar \\\(32, __context\\\);" "gimple"} } */
diff --git a/gcc/testsuite/brig.dg/test/gimple/function_calls.hsail b/gcc/testsuite/brig.dg/test/gimple/function_calls.hsail
new file mode 100644
index 0000000..d3b690c
--- /dev/null
+++ b/gcc/testsuite/brig.dg/test/gimple/function_calls.hsail
@@ -0,0 +1,59 @@
+module &module:1:0:$full:$large:$default;
+
+/* Function calls and argument passing. */
+
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-gimple" } */
+
+prog function &subfunction(arg_u32 %return_value)(arg_f32 %float_arg, arg_f64 %double_arg, arg_f16 %half_arg) {
+     ld_arg_f32 $s0, [%float_arg];
+     cvt_u32_f32 $s0, $s0;
+
+     ld_arg_f64 $d0, [%double_arg];
+     cvt_u32_f64 $s1, $d0;
+
+     ld_arg_f16 $s2, [%half_arg];
+     cvt_u32_f16 $s2, $s2;
+
+     add_u32 $s3, $s0, $s1;
+     add_u32 $s3, $s3, $s2;
+
+     st_arg_u32 $s3, [%return_value];
+     ret;
+};
+
+prog kernel &kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
+{
+        ld_kernarg_u64 $d0, [%input_ptr];
+        ld_global_u32 $s0, [$d0];
+	{
+		arg_f32 %float_arg;
+		arg_f64 %double_arg;
+		arg_f16 %half_arg;
+		arg_u32 %return_value;
+
+		st_arg_f32 12.0f, [%float_arg];
+		st_arg_f64 640.0d, [%double_arg];
+		st_arg_f16 12.0h, [%half_arg];
+
+		call &subfunction(%return_value)(%float_arg, %double_arg, %half_arg);
+
+		ld_arg_u32 $s1, [%return_value];
+	}	
+        ld_kernarg_u64 $d1, [%output_ptr];
+        st_global_u32 $s1, [$d0];
+};
+
+/* The generated function call should have the incoming arguments and three hidden arguments. */
+
+/* { dg-final { scan-tree-dump "_\[0-9\]+ = subfunction \\\(_kernel.float_arg.\[_0-9\]+, _kernel.double_arg.\[_0-9\]+, _kernel.half_arg.\[_0-9\]+, __context, __group_base_addr, __private_base_addr\\\);" "gimple"} } */
+
+/* The callee should refer directly to the scalar arguments when it reads them. */
+/* { dg-final { scan-tree-dump "= float_arg;" "gimple"} } */
+/* { dg-final { scan-tree-dump "= double_arg;" "gimple"} } */
+/* { dg-final { scan-tree-dump "= half_arg;" "gimple"} } */
+
+/* The return value is stored to a temporary before returned. */
+/* { dg-final { scan-tree-dump "_retvalue_temp = s3;" "gimple"} } */
+/* { dg-final { scan-tree-dump "D.\[0-9\]+ = _retvalue_temp;" "gimple"} } */
+/* { dg-final { scan-tree-dump "return D.\[0-9\]+;" "gimple"} } */
diff --git a/gcc/testsuite/brig.dg/test/gimple/mem.hsail b/gcc/testsuite/brig.dg/test/gimple/mem.hsail
new file mode 100644
index 0000000..777faa1
--- /dev/null
+++ b/gcc/testsuite/brig.dg/test/gimple/mem.hsail
@@ -0,0 +1,39 @@
+module &module:1:0:$full:$large:$default;
+
+/* Tests for load/store addressing modes. */
+
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-original" } */
+
+prog kernel &Kernel(kernarg_u64 %input_ptr, kernarg_u64 %input_ptr2, kernarg_u64 %output_ptr)
+{
+	global_u32 %global_array[4];  
+
+	ld_kernarg_u64 $d0, [%input_ptr];
+	ld_kernarg_u64 $d2, [%input_ptr2];
+	ld_global_u32 $s0, [$d0];
+	ld_global_u64 $d1, [$d2 + 4];
+
+	ld_global_u32 $s2, [%global_array][$d1 + 4];  
+
+	ld_kernarg_u64 $d0, [%output_ptr];
+	st_global_u32 $s0, [$d0];
+	st_global_u32 $s1, [$d0 + 4];
+	st_global_u32 $s2, [$d0 + 8];
+
+	ret;
+};
+
+/* %input_ptr, %input_ptr2 and %output_ptr accesses should generate offsets to the __args array */
+/* { dg-final { scan-tree-dump "\\\(__args\\\);\[\n \]+d0 =" "original"} } */
+/* { dg-final { scan-tree-dump "\\\(__args\\\) \\\+ 8\\\);\[\n \]+d2 =" "original"} } */
+/* { dg-final { scan-tree-dump "\\\(__args\\\) \\\+ 16\\\);\[\n \]+d0 =" "original"} } */
+
+/* ld_global_u32 $s0, [$d0] */
+/* { dg-final { scan-tree-dump "<unsigned char \\\*>\\\(d0\\\)\\\);\[\n \]+s0 =" "original"} } */
+
+/* ld_global_u64 $d1, [$d2 + 4] pointer arithmetics*/
+/* { dg-final { scan-tree-dump "\\\(d2\\\)\\\) \\\+ 4\\\);\[\n \]+d1 = " "original"} } */
+
+/* ld_global_u32 $s2, [%global_array][$d1 + 4]; is the most complex form */
+/* { dg-final { scan-tree-dump "&_Kernel.global_array\\\) \\\+ VIEW_CONVERT_EXPR<unsigned long>\\\(VIEW_CONVERT_EXPR<unsigned int \\\*>\\\(VIEW_CONVERT_EXPR<unsigned long>\\\(VIEW_CONVERT_EXPR<unsigned char \\\*>\\\(d1\\\)\\\) \\\+ 4\\\)\\\)\\\);\[\n \]+s2 = " "original" } } */
diff --git a/gcc/testsuite/brig.dg/test/gimple/mulhi.hsail b/gcc/testsuite/brig.dg/test/gimple/mulhi.hsail
new file mode 100644
index 0000000..acdced9
--- /dev/null
+++ b/gcc/testsuite/brig.dg/test/gimple/mulhi.hsail
@@ -0,0 +1,33 @@
+module &module:1:0:$full:$large:$default;
+
+/* Test high part multiplies. */
+
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-gimple" } */
+
+prog kernel &Kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
+{
+        ld_kernarg_u64 $d0, [%input_ptr];
+        ld_global_u64 $d1, [$d0];
+        ld_global_u64 $d2, [$d0 + 8];
+
+        ld_global_u32 $s0, [$d0 + 16];
+        ld_global_u32 $s1, [$d0 + 20];
+
+	mulhi_s32 $s2, $s0, $s1;
+	mulhi_s64 $d2, $d1, $d2;
+
+	mad24hi_s32 $s3, $s0, $s1, $s2;
+	mul24hi_s32 $s3, $s3, $s1;
+
+        ld_kernarg_u64 $d0, [%output_ptr];
+        st_global_u64 $d1, [$d0];
+        st_global_u32 $s2, [$d0 + 8];
+        st_global_u32 $s3, [$d0 + 12];
+
+        ret;
+};
+
+/* All of the hipart mults areImplemented using MULT_HIGHPART_EXPR (h*). */
+/* { dg-final { scan-tree-dump-times " h\\\* " 4 "gimple"} } */
+
diff --git a/gcc/testsuite/brig.dg/test/gimple/packed.hsail b/gcc/testsuite/brig.dg/test/gimple/packed.hsail
new file mode 100644
index 0000000..c939f61
--- /dev/null
+++ b/gcc/testsuite/brig.dg/test/gimple/packed.hsail
@@ -0,0 +1,78 @@
+module &module:1:0:$full:$large:$default;
+
+/* Test for different cases of packed instruction controls. */
+
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-gimple -fdump-tree-original" } */
+
+prog kernel &Kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
+{
+        ld_kernarg_u64 $d0, [%input_ptr];
+        ld_global_b128 $q0, [$d0];
+
+	add_pp_u8x16 $q1, $q0, u8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+
+	/* Broadcast the 15 as it's the lowest element (pos 0) in the resulting vector. */
+	add_ps_u8x16 $q2, $q1, u8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+
+	/* Broadcast the lowest element of q1. */
+	add_sp_u8x16 $q3, $q1, $q2;
+
+	/* Perform a scalar computation with the lowest element of both inputs and store it to the lowest element of dest. */
+	add_ss_u8x16 $q4, $q2, $q3;
+
+	/* Saturating arithmetics variations. */
+	add_pp_sat_u8x16 $q5, $q4, u8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+
+	/* Broadcast the 15 as it's the lowest element (pos 0) in the resulting vector. */
+	add_ps_sat_u8x16 $q6, $q5, u8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+
+	/* Broadcast the lowest element of q1. */
+	add_sp_sat_u8x16 $q7, $q6, $q5;
+
+	/* Perform a scalar computation with the lowest element of both inputs and store it to the lowest element of dest. */
+	add_ss_sat_u8x16 $q8, $q7, $q6;
+
+	/* Single operand vector computation. */
+	neg_p_s16x8 $q9, $q8;
+
+        ld_kernarg_u64 $d0, [%output_ptr];
+        st_global_b128 $q8, [$d0];
+
+        ret;
+};
+
+/* The b128 load is done using uint128_t*.
+/* { dg-final { scan-tree-dump "q0 = VIEW_CONVERT_EXPR<uint128_t>\\\(mem_read.\[0-9\]+\\\);" "original"} } */
+
+/* Before arithmetics, the uint128_t is casted to a vector datatype. */
+/* { dg-final { scan-tree-dump "<vector\\\(16\\\) unsigned char>\\\(q0\\\) \\\+ \\\{" "original"} } */
+
+/* The u8x16 constant is generated to an array with elements in reverse order */
+/* in comparison to the HSAIL syntax. */
+/* { dg-final { scan-tree-dump "\\\+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }" "original"} } */
+
+/* After arithmetics, the vector DT is casted back to a uint128_t. */
+/* { dg-final { scan-tree-dump "q1 = VIEW_CONVERT_EXPR<uint128_t>" "original"} } */
+
+/* Broadcasted the constant vector's lowest element and summed it up in the next line. */
+/* { dg-final { scan-tree-dump "= { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 };\[\n \]+_\[0-9\]+ = _\[0-9\]+ \\\+ _\[0-9\]+;" "gimple"} } */
+
+/* Broadcasted the registers lowest element via a VEC_PERM_EXPR that has an all-zeros mask. */
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR <_\[0-9\]+, _\[0-9\]+, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }>;" "gimple" } } */
+
+/* For the add_ss we assume performing the computation over the whole vector is cheaper than */
+/* extracting the scalar and performing a scalar operation. This aims to stay in the vector
+/* datapath as long as possible. */
+/* { dg-final { scan-tree-dump "_\[0-9\]+ = VIEW_CONVERT_EXPR<vector\\\(16\\\) unsigned char>\\\(q2\\\);\[\n \]+_\[0-9\]+ = VIEW_CONVERT_EXPR<vector\\\(16\\\) unsigned char>\\\(q3\\\);\[\n \]+_\[0-9\]+ = _\[0-9\]+ \\\+ _\[0-9\]+;" "gimple" } } */
+
+/* Insert the lowest element of the result to the lowest element of the result register. */
+/* { dg-final { scan-tree-dump "= VEC_PERM_EXPR <_\[0-9\]+, new_output.\[0-9\]+_\[0-9\]+, { 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }>;" "gimple" } } */
+
+/* { dg-final { scan-tree-dump "q4 = VIEW_CONVERT_EXPR<uint128_t>\\\(s_output.\[0-9\]+_\[0-9\]+\\\);" "gimple" } } */
+
+/* The saturating arithmetics are (curently) implemented using scalar builtin calls. */
+/* { dg-final { scan-tree-dump-times "= __phsa_builtin_sat_add_u8" 64 "gimple" } } */
+
+/* A single operand vector instr (neg.) */
+/* { dg-final { scan-tree-dump " = VIEW_CONVERT_EXPR<vector\\\(8\\\) signed short>\\\(q8\\\);\[\n \]+_\[0-9\]+ = -_\[0-9\]+;\[\n \]+" "gimple" } } */
diff --git a/gcc/testsuite/brig.dg/test/gimple/smoke_test.hsail b/gcc/testsuite/brig.dg/test/gimple/smoke_test.hsail
new file mode 100644
index 0000000..7a38352
--- /dev/null
+++ b/gcc/testsuite/brig.dg/test/gimple/smoke_test.hsail
@@ -0,0 +1,91 @@
+module &module:1:0:$full:$large:$default;
+
+/* A basic smoke test. */
+
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-gimple" } */
+
+prog kernel &Kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
+{
+	ld_kernarg_u64 $d0, [%input_ptr];
+	ld_global_u32 $s0, [$d0];
+	ld_global_u32 $s1, [$d0 + 4];
+
+	add_u32 $s2, $s0, $s1;
+	add_u32 $s3, $s0, 4294967295;
+
+	ld_kernarg_u64 $d0, [%output_ptr];
+	st_global_u32 $s2, [$d0];
+	st_global_u32 $s3, [$d0 + 4];
+
+	ret;
+};
+
+prog kernel &KernelWithBarrier(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
+{
+	ld_kernarg_u64 $d0, [%input_ptr];
+	ld_global_u32 $s0, [$d0];
+	ld_global_u32 $s1, [$d0 + 4];
+
+	add_u32 $s2, $s0, $s1;
+
+	barrier_width(all);
+
+	add_u32 $s3, $s0, 4294967295;
+
+	ld_kernarg_u64 $d0, [%output_ptr];
+	st_global_u32 $s2, [$d0];
+	st_global_u32 $s3, [$d0 + 4];
+
+	ret;
+};
+
+/* The kernel function itself should have a fingerprint as follows */
+/* _Kernel (unsigned char * __args, void * __context, void * __group_base_addr, void * __private_base_addr) */
+/* { dg-final { scan-tree-dump "_Kernel \\\(unsigned char \\\* __args, void \\\* __context, void \\\* __group_base_addr, void \\\* __private_base_addr\\\)" "gimple"} } */
+
+/* ld_kernarg: mem_read.0 = MEM[(unsigned long *)__args]; */
+/* { dg-final { scan-tree-dump "mem_read.\[0-9\] = MEM\\\[\\\(unsigned long \\\*\\\)__args\\\];" "gimple"} } */
+
+/* The latter ld_global_u32 should be visible as a pointer dereference (after pointer arithmetics on a temporary var): */
+/* mem_read.2 = *D.1691; */
+/* { dg-final { scan-tree-dump "mem_read.\[0-9\] = \\\*\[_0-9\]+;" "gimple"} } */
+
+/* add_u32s should generate +operators */
+/* { dg-final { scan-tree-dump "s2 = s0 \\\+ s1;" "gimple"} } */
+/* { dg-final { scan-tree-dump "s3 = s0 \\\+ 4294967295;" "gimple"} } */
+
+/* The latter st_global_u32 should be visible as a pointer dereference (after pointer arithmetics on a temporary var): */
+/* *D.1694 = s3; */
+/* { dg-final { scan-tree-dump "\\\*\[_0-9\]+ = s3;" "gimple"} } */
+
+/* The return inside the kernel should be generated to a goto to the end of the kernel. */
+/*  goto __kernel_exit; */
+/*  __kernel_exit: */
+/* { dg-final { scan-tree-dump "goto __kernel_exit;" "gimple"} } */
+/* { dg-final { scan-tree-dump "__kernel_exit:" "gimple"} } */
+
+/* Expecting a work item loop because there are no barrier calls. */
+/* { dg-final { scan-tree-dump "if \\\(__local_x < __cur_wg_size_x\\\) goto __wi_loop_x; else goto" "gimple"} } */
+/* { dg-final { scan-tree-dump "if \\\(__local_y < __cur_wg_size_y\\\) goto __wi_loop_y; else goto" "gimple"} } */
+/* { dg-final { scan-tree-dump "if \\\(__local_z < __cur_wg_size_z\\\) goto __wi_loop_z; else goto" "gimple"} } */
+
+/* The launcher should call __phsa_launch_wg_function in this case: */
+/* Kernel (void * __context, void * __group_base_addr) */
+/* { dg-final { scan-tree-dump "Kernel \\\(void \\\* __context, void \\\* __group_base_addr\\\)" "gimple"} } */
+/* { dg-final { scan-tree-dump "__phsa_launch_wg_function \\\(_Kernel, __context, __group_base_addr\\\);" "gimple"} }*/
+
+/* The kernel should have the magic metadata section injected to the ELF. */
+/* TODO: this should be disabled in case not outputting to an ELF. */
+/* Currently ELF is assumed by the brig frontend. Do not check for the context */
+/* as it is likely to change. */
+/* { dg-final { scan-tree-dump "\\\.pushsection phsa\\\.kerneldesc\\\.Kernel" "gimple"} }*/
+
+/* The kernel with the barrier call should have the barrier builtin call in between the two summations. */
+/* { dg-final { scan-tree-dump "s2 = s0 \\\+ s1;\[\n \]+__phsa_builtin_barrier \\\(__context\\\);\[\n \]+s3 = s0 \\\+ 4294967295;" "gimple"} } */
+
+/* The kernel with the barrier call's launcher function should call the thread-spawning function. */
+/* { dg-final { scan-tree-dump "__phsa_launch_kernel \\\(_KernelWithBarrier, __context, __group_base_addr\\\);" "gimple" } } */
+
+
+
diff --git a/gcc/testsuite/brig.dg/test/gimple/variables.hsail b/gcc/testsuite/brig.dg/test/gimple/variables.hsail
new file mode 100644
index 0000000..da30899
--- /dev/null
+++ b/gcc/testsuite/brig.dg/test/gimple/variables.hsail
@@ -0,0 +1,124 @@
+module &module:1:0:$full:$large:$default;
+
+/* Tests for different variable scopes and address spaces. */
+
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-gimple" } */
+
+prog align(256) private_u32 &prog_private;
+private_u32 &mod_private;
+
+prog group_u32 &prog_group;
+group_u32 &mod_group;
+
+prog global_u32 &prog_global;
+global_u32 &mod_global;
+
+decl prog global_u32 &prog_global_host_def;
+
+prog readonly_u32 &prog_readonly;
+readonly_u32 &mod_readonly;
+
+prog function &subfunction(arg_u32 %return_value)(arg_u32 %arg) {
+
+     private_u32 %func_private;
+     group_u32 %func_group;
+     align(256) global_u32 %func_global;
+     readonly_u32 %func_readonly;
+
+     ld_private_u32 $s200, [%func_private];
+     st_private_u32 $s200, [&prog_private];
+
+     ld_group_u32 $s203, [%func_group];
+     st_group_u32 $s203, [&prog_group];
+
+     ld_global_u32 $s204, [%func_global];
+     st_global_u32 $s204, [&prog_global];
+
+     ld_readonly_u32 $s205, [%func_readonly];
+     st_global_u32 $s205, [%func_global];
+
+     st_arg_u32 $s2, [%return_value];
+     ret;
+};
+
+prog kernel &kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
+{
+	private_u32 %kern_private;
+	group_u32 %kern_group;
+	global_u32 %kern_global;
+	readonly_u32 %kern_readonly;
+
+        ld_kernarg_u64 $d0, [%input_ptr];
+        ld_global_u32 $s0, [$d0];
+
+	ld_private_u32 $s2, [&prog_private];
+	st_private_u32 $s2, [%kern_private];
+	ld_private_u32 $s3, [&mod_private];
+	st_private_u32 $s3, [&prog_private];
+
+	ld_group_u32 $s4, [&prog_group];
+	st_group_u32 $s4, [%kern_group];
+	ld_group_u32 $s5, [&mod_group];
+	st_group_u32 $s5, [&prog_group];
+
+	ld_global_u32 $s6, [&prog_global];
+	st_global_u32 $s6, [%kern_global];
+	ld_global_u32 $s7, [&mod_global];
+	st_global_u32 $s7, [&prog_global];
+
+	ld_readonly_u32 $s8, [&prog_readonly];
+	st_global_u32 $s8, [%kern_global];
+	ld_readonly_u32 $s9, [&mod_readonly];
+	st_global_u32 $s9, [&prog_global];
+
+	ld_readonly_u32 $s10, [%kern_readonly];
+	st_global_u32 $s10, [%kern_global];
+	ld_readonly_u32 $s11, [%kern_readonly];
+	st_global_u32 $s11, [&prog_global_host_def];
+
+	{
+		arg_u32 %arg;
+		arg_u32 %return_value;
+		st_arg_u32 $s1, [%arg];
+		call &subfunction(%return_value)(%arg);
+		ld_arg_u32 $s1, [%return_value];
+	}
+        ld_kernarg_u64 $d1, [%output_ptr];
+        st_global_u32 $s1, [$d0];
+};
+
+/* Private variable offsets assigned in the order of their appearance */
+/*
+ prog_private @0	(align 256) -> until 254 to ensure all WIs 
+ mod_private  @256	               have their chunks aligned
+ func_private @260
+ kern_private @264
+*/
+
+/* Group variable offsets assigned in the order of their appearance */
+/*
+ prog_group @0		(2)
+ mod_group  @4		(4)
+ func_group @8		(1)
+ kern_group @12		(3)
+*/
+
+/* { dg-final { scan-tree-dump "\\\+ 8;.*\\\+ 12;.*\\\+ 4;" "gimple" } } */
+
+/* The "mangling" of the global and readonly vars. */
+/* { dg-final { scan-tree-dump "\[ \]*prog_global = s204;" "gimple" } } */
+
+/* { dg-final { scan-tree-dump "\.module.mod_global;" "gimple" } } */
+
+/* Host defined variables need indirect access as the address is
+   known only at run time. */
+/* { dg-final { scan-tree-dump "MEM\\\[\\\(unsigned int \\\*\\\)__phsa.host_def.prog_global_host_def.\[0-9\]+_\[0-9\]+\\\] = s11;" "gimple" } } */
+
+/* { dg-final { scan-tree-dump "\.subfunction.func_global;" "gimple" } } */
+/* { dg-final { scan-tree-dump "\.subfunction.func_readonly;" "gimple" } } */
+
+/* { dg-final { scan-tree-dump "kernel.kern_global" "gimple" } } */
+/* { dg-final { scan-tree-dump "kernel.kern_readonly" "gimple" } } */
+
+
diff --git a/gcc/testsuite/brig.dg/test/gimple/vector.hsail b/gcc/testsuite/brig.dg/test/gimple/vector.hsail
new file mode 100644
index 0000000..7b247fc
--- /dev/null
+++ b/gcc/testsuite/brig.dg/test/gimple/vector.hsail
@@ -0,0 +1,57 @@
+module &module:1:0:$full:$large:$default;
+
+/* A test for vector operands. */
+
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-original" } */
+
+prog kernel &Kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
+{
+        ld_kernarg_u64 $d0, [%input_ptr];
+	ld_v2_global_f32 ($s0, $s1), [$d0];
+	ld_v3_global_f32 ($s2, $s3, $s4), [$d0 + 8];
+	ld_v4_global_f32 ($s5, $s6, $s7, $s8), [$d0 + 20];
+
+	add_f32 $s9, $s0, $s1;
+	combine_v2_b64_b32 $d2, ($s1, $s0);
+	combine_v2_b64_b32 $d3, ($s2, $s3);
+
+	add_pp_f32x2 $d4, $d2, $d3;
+
+	expand_v2_b32_b64 ($s0, $s3), $d4; 
+
+        ld_kernarg_u64 $d1, [%output_ptr];
+        st_v2_global_f32 ($s0, $s1), [$d1];
+        st_v3_global_f32 ($s2, $s3, $s4), [$d1 + 8];
+        st_v4_global_f32 ($s5, $s6, $s7, $s8), [$d1 + 20];
+
+        ret;
+};
+
+/* The v2 load is done via casting to a vector datatype ptr. */
+/* { dg-final { scan-tree-dump " = MEM\\\[\\\(vector\\\(2\\\) <float:32> \\\*\\\)" "original"} } */
+
+/* The v3 load is scalarized (at the moment) due to gcc requiring 2's exponent wide vectors. */
+/* { dg-final { scan-tree-dump "s0 = VIEW_CONVERT_EXPR<unsigned int>\\\(BIT_FIELD_REF <mem_read.\[0-9\]+, 32, 0>\\\);\[\n ]+s1 = VIEW_CONVERT_EXPR<unsigned int>\\\(BIT_FIELD_REF <mem_read.\[0-9\]+, 32, 32>\\\);" "original"} } */
+
+/* The v4 load is done via casting to a vector datatype ptr. */
+/* { dg-final { scan-tree-dump " = MEM\\\[\\\(vector\\\(4\\\) <float:32> \\\*\\\)" "original"} } */
+
+/* The combines are generated to vector constructors. */
+/* { dg-final { scan-tree-dump "{s1, s0}" "original"} } */
+/* { dg-final { scan-tree-dump "{s2, s3}" "original"} } */
+
+/* Expands to BIT_FIELD_REFs. */
+/* { dg-final { scan-tree-dump "s0 = BIT_FIELD_REF <d4, 32, 0>;" "original"} } */
+/* { dg-final { scan-tree-dump "s3 = BIT_FIELD_REF <d4, 32, 32>;" "original"} } */
+
+/* The v1 store is done via casting to a vector datatype ptr and constructing a vector from the inputs. */
+/* { dg-final { scan-tree-dump "MEM\\\[\\\(vector\\\(2\\\) <float:32> \\\*\\\)VIEW_CONVERT_EXPR<<float:32> \\\*>\\\(VIEW_CONVERT_EXPR<unsigned char \\\*>\\\(d1\\\)\\\)\\\] = " "original"} } */
+
+/* The v3 store is scalarized (at the moment) due to gcc requiring 2's exponent wide vectors. */
+/* { dg-final { scan-tree-dump "\\\*VIEW_CONVERT_EXPR<<float:32> \\\*>\\\(VIEW_CONVERT_EXPR<unsigned long>\\\(VIEW_CONVERT_EXPR<unsigned char \\\*>\\\(d1\\\)\\\) \\\+ 8\\\) \\\+ 0 = VIEW_CONVERT_EXPR<<float:32>>\\\(s2\\\);" "original"} } */
+/* { dg-final { scan-tree-dump "\\\*VIEW_CONVERT_EXPR<<float:32> \\\*>\\\(VIEW_CONVERT_EXPR<unsigned long>\\\(VIEW_CONVERT_EXPR<unsigned char \\\*>\\\(d1\\\)\\\) \\\+ 8\\\) \\\+ 4 = VIEW_CONVERT_EXPR<<float:32>>\\\(s3\\\);" "original"} } */
+/* { dg-final { scan-tree-dump "\\\*VIEW_CONVERT_EXPR<<float:32> \\\*>\\\(VIEW_CONVERT_EXPR<unsigned long>\\\(VIEW_CONVERT_EXPR<unsigned char \\\*>\\\(d1\\\)\\\) \\\+ 8\\\) \\\+ 8 = VIEW_CONVERT_EXPR<<float:32>>\\\(s4\\\);" "original"} } */
+
+/* The v4 store is done via casting to a vector datatype and constructing a vector from the inputs. */
+/* { dg-final { scan-tree-dump "MEM\\\[\\\(vector\\\(4\\\) <float:32> \\\*\\\)VIEW_CONVERT_EXPR<<float:32> \\\*>\\\(VIEW_CONVERT_EXPR<unsigned long>\\\(VIEW_CONVERT_EXPR<unsigned char \\\*>\\\(d1\\\)\\\) \\\+ 20\\\)\\\] = {VIEW_CONVERT_EXPR<<float:32>>\\\(s5\\\), VIEW_CONVERT_EXPR<<float:32>>\\\(s6\\\), VIEW_CONVERT_EXPR<<float:32>>\\\(s7\\\), VIEW_CONVERT_EXPR<<float:32>>\\\(s8\\\)};" "original"} } */
diff --git a/gcc/testsuite/lib/brig-dg.exp b/gcc/testsuite/lib/brig-dg.exp
new file mode 100644
index 0000000..ee96708
--- /dev/null
+++ b/gcc/testsuite/lib/brig-dg.exp
@@ -0,0 +1,29 @@
+#   Copyright (C) 2009-2014 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+load_lib gcc-dg.exp
+
+# Define brig callbacks for dg.exp.
+
+proc brig-dg-test { prog do_what extra_tool_flags } {
+    set result \
+	[gcc-dg-test-1 brig_target_compile $prog $do_what $extra_tool_flags]
+    
+    set comp_output [lindex $result 0]
+    set output_file [lindex $result 1]
+
+    return [list $comp_output $output_file]
+}
diff --git a/gcc/testsuite/lib/brig.exp b/gcc/testsuite/lib/brig.exp
new file mode 100644
index 0000000..d1c967d
--- /dev/null
+++ b/gcc/testsuite/lib/brig.exp
@@ -0,0 +1,40 @@
+# Copyright (C) 2009-2016 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+load_lib prune.exp
+load_lib gcc-defs.exp
+load_lib timeout.exp
+load_lib target-libpath.exp
+#
+# brig_target_compile -- compile a HSAIL input to BRIG using HSAILasm and then
+#                        compile the BRIG to target ISA using gcc
+
+proc brig_target_compile { source dest type options } {
+    global tmpdir
+    global testname_with_flags
+    if { [file extension $source] == ".hsail" } {
+	# We cannot assume all inputs are .hsail as the dg machinery
+	# calls this for a some c files to check linker plugin support or
+	# similar.
+	set brig_source ${tmpdir}/[file tail ${source}].brig
+	exec HSAILasm $source -o ${brig_source}
+	set source ${brig_source}
+	# Change the testname the .brig.
+	set testname_with_flags [file tail $source]
+    }
+    return [target_compile $source $dest $type $options]
+}
+

Reply via email to