Hello. Following patch add support for internal functions that are either expanded to an HSAIL instruction, or a function call is generated. Apart from that, utilizing bit string instructions, we support all builtins that are based on the type of instructions.
Patch set: 00c2bb6 HSA: reorder BUILT_IN_* enum handling in a switch stmt d1965a7 HSA: add initial support for internal functions ad292fd HSA: implement __builtin_popcount 2e9d0a0 HSA: expand natively not handled builtins 8767212 HSA: generate HSAIL instructions for bit string insns a185160 HSA: support 'unsigned long long' type for integer builtins fc08ffd HSA: improve warning message in IPA HSA The series has been just installed to the HSA branch. Thanks, Martin
>From 00c2bb6f1c8a04f9ac28767401919fd058cc2808 Mon Sep 17 00:00:00 2001 From: marxin <mli...@suse.cz> Date: Fri, 27 Nov 2015 11:22:22 +0100 Subject: [PATCH 1/7] HSA: reorder BUILT_IN_* enum handling in a switch stmt gcc/ChangeLog: 2015-11-30 Martin Liska <mli...@suse.cz> * hsa-gen.c (gen_hsa_insns_for_call): Logically reorder cases in a switch. --- gcc/hsa-gen.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c index ed47b35..cb1dc97 100644 --- a/gcc/hsa-gen.c +++ b/gcc/hsa-gen.c @@ -4626,18 +4626,6 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb) tree fndecl = gimple_call_fndecl (stmt); switch (DECL_FUNCTION_CODE (fndecl)) { - case BUILT_IN_OMP_GET_THREAD_NUM: - { - query_hsa_grid (stmt, BRIG_OPCODE_WORKITEMABSID, 0, hbb); - break; - } - - case BUILT_IN_OMP_GET_NUM_THREADS: - { - query_hsa_grid (stmt, BRIG_OPCODE_GRIDSIZE, 0, hbb); - break; - } - case BUILT_IN_FABS: case BUILT_IN_FABSF: gen_hsa_unaryop_for_builtin (BRIG_OPCODE_ABS, stmt, hbb); @@ -4892,6 +4880,17 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb) break; } + case BUILT_IN_OMP_GET_THREAD_NUM: + { + query_hsa_grid (stmt, BRIG_OPCODE_WORKITEMABSID, 0, hbb); + break; + } + + case BUILT_IN_OMP_GET_NUM_THREADS: + { + query_hsa_grid (stmt, BRIG_OPCODE_GRIDSIZE, 0, hbb); + break; + } case BUILT_IN_GOMP_TEAMS: { gen_set_num_threads (gimple_call_arg (stmt, 1), hbb); -- 2.6.3
>From d1965a7744a14f6662cca9be40d6ab81b70e6d04 Mon Sep 17 00:00:00 2001 From: marxin <mli...@suse.cz> Date: Fri, 27 Nov 2015 15:32:45 +0100 Subject: [PATCH 2/7] HSA: add initial support for internal functions gcc/ChangeLog: 2015-11-30 Martin Liska <mli...@suse.cz> * hsa-brig.c (emit_function_directives): Use hsa_function_representation::get_linkage. Fill up code offset of internal functions. (emit_internal_fn_decl): New function. (emit_call_insn): Handle internal functions. (hsa_brig_emit_function): Likewise. (hsa_output_brig): Release memory of emitted_internal_decls. * hsa-dump.c (dump_hsa_insn_1): Print name of internal function. * hsa-gen.c (hsa_function_representation::~hsa_function_representation): Release internal function. (hsa_function_representation::get_linkage): New function. (gen_hsa_insns_for_direct_call): Fix comment of argument end block. (gen_hsa_insns_for_call_of_internal_fn): New function. (gen_hsa_unaryop_builtin_call): Dispatch between a function with declaration and an internal FN. (gen_hsa_insn_for_internal_fn_call): New function. (gen_hsa_insns_for_call): Handle internal functions. (hsa_generate_internal_fn_decl): New function. * hsa.c (hsa_float_for_bitsize): Dtto. (hsa_internal_fn::name): Dtto. (hsa_internal_fn::get_arity): Dtto. (hsa_internal_fn::get_argument_type): Dtto. * hsa.h (struct hsa_internal_fn_hasher): New structure. (hsa_internal_fn_hasher::hash): New function. (hsa_internal_fn_hasher::equal): New function. --- gcc/hsa-brig.c | 66 ++++++++++++++--- gcc/hsa-dump.c | 14 +++- gcc/hsa-gen.c | 222 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- gcc/hsa.c | 154 +++++++++++++++++++++++++++++++++++++++ gcc/hsa.h | 80 ++++++++++++++++++++- 5 files changed, 517 insertions(+), 19 deletions(-) diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c index 9f65d50..234a6c9 100644 --- a/gcc/hsa-brig.c +++ b/gcc/hsa-brig.c @@ -99,9 +99,12 @@ static bool brig_initialized = false; /* Mapping between emitted HSA functions and their offset in code segment. */ static hash_map<tree, BrigCodeOffset32_t> *function_offsets; -/* Set of emitted function declarations. */ +/* Hash map of emitted function declarations. */ static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations; +/* Hash table of emitted internal function declaration offsets. */ +hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls; + /* List of sbr instructions. */ static vec <hsa_insn_sbr *> *switch_instructions; @@ -585,17 +588,27 @@ emit_function_directives (hsa_function_representation *f, bool is_declaration) fndir.firstInArg = htole32 (inarg_off); fndir.firstCodeBlockEntry = htole32 (scoped_off); fndir.nextModuleEntry = htole32 (next_toplev_off); - fndir.linkage = f->m_kern_p || TREE_PUBLIC (f->m_decl) ? - BRIG_LINKAGE_PROGRAM : BRIG_LINKAGE_MODULE; - + fndir.linkage = f->get_linkage (); if (!f->m_declaration_p) fndir.modifier.allBits |= BRIG_EXECUTABLE_DEFINITION; memset (&fndir.reserved, 0, sizeof (fndir.reserved)); /* Once we put a definition of function_offsets, we should not overwrite it with a declaration of the function. */ - if (!function_offsets->get (f->m_decl) || !is_declaration) - function_offsets->put (f->m_decl, brig_code.total_size); + if (f->m_internal_fn == NULL) + { + if (!function_offsets->get (f->m_decl) || !is_declaration) + function_offsets->put (f->m_decl, brig_code.total_size); + } + else + { + /* Internal function. */ + hsa_internal_fn **slot = hsa_emitted_internal_decls->find_slot + (f->m_internal_fn, INSERT); + hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn); + int_fn->m_offset = brig_code.total_size; + *slot = int_fn; + } brig_code.add (&fndir, sizeof (fndir)); /* XXX terrible hack: we need to set instCount after we emit all @@ -1053,7 +1066,7 @@ emit_queued_operands (void) } /* Emit directives describing the function that is used for -a function declaration. */ + a function declaration. */ static BrigDirectiveExecutable * emit_function_declaration (tree decl) @@ -1068,6 +1081,23 @@ emit_function_declaration (tree decl) return e; } +/* Emit directives describing the function that is used for + an internal function declaration. */ + +static BrigDirectiveExecutable * +emit_internal_fn_decl (hsa_internal_fn *fn) +{ + hsa_function_representation *f = hsa_generate_internal_fn_decl (fn); + + BrigDirectiveExecutable *e = emit_function_directives (f, true); + emit_queued_operands (); + + delete f; + + return e; +} + + /* Enqueue all operands of INSN and return offset to BRIG data section to list of operand offsets. */ @@ -1476,9 +1506,19 @@ emit_call_insn (hsa_insn_call *call) (emit_operands (call->m_result_code_list, &call->m_func, call->m_args_code_list)); - function_call_linkage.safe_push - (function_linkage_pair (call->m_called_function, - call->m_func.m_brig_op_offset)); + /* Internal functions have not set m_called_function. */ + if (call->m_called_function) + function_call_linkage.safe_push + (function_linkage_pair (call->m_called_function, + call->m_func.m_brig_op_offset)); + else + { + hsa_internal_fn *slot = hsa_emitted_internal_decls->find + (call->m_called_internal_fn); + gcc_assert (slot); + gcc_assert (slot->m_offset > 0); + call->m_func.m_directive_offset = slot->m_offset; + } repr.width = BRIG_WIDTH_ALL; memset (&repr.reserved, 0, sizeof (repr.reserved)); @@ -1788,6 +1828,12 @@ hsa_brig_emit_function (void) } } + for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++) + { + hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i]; + emit_internal_fn_decl (called); + } + ptr_to_fndir = emit_function_directives (hsa_cfun, false); for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn; insn; diff --git a/gcc/hsa-dump.c b/gcc/hsa-dump.c index 70c71bc..d462023 100644 --- a/gcc/hsa-dump.c +++ b/gcc/hsa-dump.c @@ -950,9 +950,17 @@ dump_hsa_insn_1 (FILE *f, hsa_insn_basic *insn, int *indent) else if (is_a <hsa_insn_call *> (insn)) { hsa_insn_call *call = as_a <hsa_insn_call *> (insn); - const char *name = hsa_get_declaration_name (call->m_called_function); - - fprintf (f, "call &%s", name); + if (call->m_called_function) + { + const char *name = hsa_get_declaration_name (call->m_called_function); + fprintf (f, "call &%s", name); + } + else + { + char *name = call->m_called_internal_fn->name (); + fprintf (f, "call &%s", name); + free (name); + } if (call->m_output_arg) fprintf (f, "(%%res) "); diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c index cb1dc97..8e30544 100644 --- a/gcc/hsa-gen.c +++ b/gcc/hsa-gen.c @@ -56,6 +56,7 @@ along with GCC; see the file COPYING3. If not see #include "builtins.h" #include "params.h" #include "gomp-constants.h" +#include "internal-fn.h" /* Print a warning message and set that we have seen an error. */ @@ -212,16 +213,31 @@ hsa_function_representation::hsa_function_representation (tree fdecl, bool kernel_p, unsigned ssa_names_count): m_name (NULL), m_reg_count (0), m_input_args (vNULL), m_output_arg (NULL), m_spill_symbols (vNULL), m_global_symbols (vNULL), - m_private_variables (vNULL), m_called_functions (vNULL), m_hbb_count (0), + m_private_variables (vNULL), m_called_functions (vNULL), + m_called_internal_fns (vNULL), m_hbb_count (0), m_in_ssa (true), m_kern_p (kernel_p), m_declaration_p (false), m_decl (fdecl), - m_shadow_reg (NULL), m_kernel_dispatch_count (0), m_maximum_omp_data_size (0), - m_seen_error (false), m_temp_symbol_count (0), m_ssa_map () + m_internal_fn (NULL), m_shadow_reg (NULL), m_kernel_dispatch_count (0), + m_maximum_omp_data_size (0), m_seen_error (false), m_temp_symbol_count (0), + m_ssa_map () { int sym_init_len = (vec_safe_length (cfun->local_decls) / 2) + 1;; m_local_symbols = new hash_table <hsa_noop_symbol_hasher> (sym_init_len); m_ssa_map.safe_grow_cleared (ssa_names_count); } +/* Constructor of class representing HSA function information that + is derived for an internal function. */ +hsa_function_representation::hsa_function_representation (hsa_internal_fn *fn): + m_reg_count (0), m_input_args (vNULL), + m_output_arg (NULL), m_local_symbols (NULL), + m_spill_symbols (vNULL), m_global_symbols (vNULL), + m_private_variables (vNULL), m_called_functions (vNULL), + m_called_internal_fns (vNULL), m_hbb_count (0), + m_in_ssa (true), m_kern_p (false), m_declaration_p (true), m_decl (NULL), + m_internal_fn (fn), m_shadow_reg (NULL), m_kernel_dispatch_count (0), + m_maximum_omp_data_size (0), m_seen_error (false), m_temp_symbol_count (0), + m_ssa_map () {} + /* Destructor of class holding function/kernel-wide information and state. */ hsa_function_representation::~hsa_function_representation () @@ -253,6 +269,9 @@ hsa_function_representation::~hsa_function_representation () m_private_variables.release (); m_called_functions.release (); m_ssa_map.release (); + + for (unsigned i = 0; i < m_called_internal_fns.length (); i++) + delete m_called_internal_fns[i]; } hsa_op_reg * @@ -306,6 +325,16 @@ hsa_function_representation::create_hsa_temporary (BrigType16_t type) return s; } +BrigLinkage8_t +hsa_function_representation::get_linkage () +{ + if (m_internal_fn) + return BRIG_LINKAGE_PROGRAM; + + return m_kern_p || TREE_PUBLIC (m_decl) ? + BRIG_LINKAGE_PROGRAM : BRIG_LINKAGE_MODULE; +} + /* Hash map of simple OMP builtins. */ static hash_map <nofree_string_hash, omp_simple_builtin> *omp_simple_builtins = NULL; @@ -1583,6 +1612,13 @@ hsa_insn_call::hsa_insn_call (tree callee) { } +hsa_insn_call::hsa_insn_call (hsa_internal_fn *fn) + : hsa_insn_basic (0, BRIG_OPCODE_CALL), m_called_function (NULL), + m_called_internal_fn (fn), m_output_arg (NULL), m_args_code_list (NULL), + m_result_code_list (NULL) +{ +} + /* New operator to allocate call instruction from pool alloc. */ void * @@ -3450,7 +3486,67 @@ gen_hsa_insns_for_direct_call (gimple *stmt, hsa_bb *hbb) call_insn->m_result_code_list = new hsa_op_code_list (0); } - /* Argument block start. */ + /* Argument block end. */ + hsa_insn_arg_block *arg_end = new hsa_insn_arg_block + (BRIG_KIND_DIRECTIVE_ARG_BLOCK_END, call_insn); + hbb->append_insn (arg_end); +} + +/* Generate HSA instructions for a direct call of an internal fn. + Instructions will be appended to HBB, which also needs to be the + corresponding structure to the basic_block of STMT. */ + +static void +gen_hsa_insns_for_call_of_internal_fn (gimple *stmt, hsa_bb *hbb) +{ + tree lhs = gimple_call_lhs (stmt); + if (!lhs) + return; + + tree lhs_type = TREE_TYPE (lhs); + tree rhs1 = gimple_call_arg (stmt, 0); + tree rhs1_type = TREE_TYPE (rhs1); + enum internal_fn fn = gimple_call_internal_fn (stmt); + hsa_insn_call *call_insn = new hsa_insn_call + (new hsa_internal_fn (fn, tree_to_uhwi (TYPE_SIZE (rhs1_type)), + FLOAT_TYPE_P (rhs1_type))); + + if (!hsa_emitted_internal_decls) + hsa_emitted_internal_decls = new hash_table <hsa_internal_fn_hasher> (2); + + if (!hsa_emitted_internal_decls->find (call_insn->m_called_internal_fn)) + hsa_cfun->m_called_internal_fns.safe_push (call_insn->m_called_internal_fn); + + hsa_insn_arg_block *arg_start = new hsa_insn_arg_block + (BRIG_KIND_DIRECTIVE_ARG_BLOCK_START, call_insn); + hbb->append_insn (arg_start); + + unsigned num_args = gimple_call_num_args (stmt); + + /* Function arguments. */ + for (unsigned i = 0; i < num_args; i++) + { + tree parm = gimple_call_arg (stmt, (int)i); + hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (parm, hbb); + + hsa_op_address *addr = gen_hsa_addr_for_arg (TREE_TYPE (parm), i); + hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, src->m_type, + src, addr); + + call_insn->m_input_args.safe_push (addr->m_symbol); + hbb->append_insn (mem); + } + + call_insn->m_args_code_list = new hsa_op_code_list (num_args); + hbb->append_insn (call_insn); + + /* Assign returned value. */ + hsa_op_address *addr = gen_hsa_addr_for_arg (lhs_type, -1); + + call_insn->m_output_arg = addr->m_symbol; + call_insn->m_result_code_list = new hsa_op_code_list (1); + + /* Argument block end. */ hsa_insn_arg_block *arg_end = new hsa_insn_arg_block (BRIG_KIND_DIRECTIVE_ARG_BLOCK_END, call_insn); hbb->append_insn (arg_end); @@ -4364,7 +4460,10 @@ gen_hsa_unaryop_builtin_call (gimple *stmt, hsa_bb *hbb) if (!lhs) return; - gen_hsa_insns_for_direct_call (stmt, hbb); + if (gimple_call_internal_p (stmt)) + gen_hsa_insns_for_call_of_internal_fn (stmt, hbb); + else + gen_hsa_insns_for_direct_call (stmt, hbb); } /* Helper functions to create a single unary HSA operations out of calls to @@ -4592,6 +4691,87 @@ gen_hsa_ternary_atomic_for_builtin (bool ret_orig, } } +/* Generate HSA instructions for an internal fn. + Instructions will be appended to HBB, which also needs to be the + corresponding structure to the basic_block of STMT. */ + +static void +gen_hsa_insn_for_internal_fn_call (gimple *stmt, hsa_bb *hbb) +{ + gcc_checking_assert (gimple_call_internal_fn (stmt)); + internal_fn fn = gimple_call_internal_fn (stmt); + + bool is_float_type_p = false; + if (gimple_call_lhs (stmt) != NULL + && TREE_TYPE (gimple_call_lhs (stmt)) == float_type_node) + is_float_type_p = true; + + switch (fn) + { + case IFN_CEIL: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_CEIL, stmt, hbb); + break; + + case IFN_FLOOR: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FLOOR, stmt, hbb); + break; + + case IFN_RINT: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_RINT, stmt, hbb); + break; + + case IFN_SQRT: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_SQRT, stmt, hbb); + break; + + case IFN_TRUNC: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_TRUNC, stmt, hbb); + break; + + case IFN_COS: + { + if (is_float_type_p) + gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NCOS, stmt, hbb); + else + gen_hsa_unaryop_builtin_call (stmt, hbb); + + break; + } + case IFN_EXP2: + { + if (is_float_type_p) + gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NEXP2, stmt, hbb); + else + gen_hsa_unaryop_builtin_call (stmt, hbb); + + break; + } + + case IFN_LOG2: + { + if (is_float_type_p) + gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NLOG2, stmt, hbb); + else + gen_hsa_unaryop_builtin_call (stmt, hbb); + + break; + } + + case IFN_SIN: + { + if (is_float_type_p) + gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NSIN, stmt, hbb); + else + gen_hsa_unaryop_builtin_call (stmt, hbb); + break; + } + + default: + gen_hsa_insns_for_call_of_internal_fn (stmt, hbb); + break; + } +} + #define HSA_MEMORY_BUILTINS_LIMIT 128 /* Generate HSA instructions for the given call statement STMT. Instructions @@ -4604,6 +4784,12 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb) tree lhs = gimple_call_lhs (stmt); hsa_op_reg *dest; + if (gimple_call_internal_p (stmt)) + { + gen_hsa_insn_for_internal_fn_call (stmt, hbb); + return; + } + if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) { tree function_decl = gimple_call_fndecl (stmt); @@ -5399,6 +5585,32 @@ hsa_generate_function_declaration (tree decl) return fun; } + +/* Generate function representation that corresponds to + an internal FN. */ + +hsa_function_representation * +hsa_generate_internal_fn_decl (hsa_internal_fn *fn) +{ + hsa_function_representation *fun = new hsa_function_representation (fn); + + fun->m_name = fn->name (); + + for (unsigned i = 0; i < fn->get_arity (); i++) + { + hsa_symbol *arg = new hsa_symbol + (fn->get_argument_type (i), BRIG_SEGMENT_ARG, BRIG_LINKAGE_NONE); + arg->m_name_number = i; + fun->m_input_args.safe_push (arg); + } + + fun->m_output_arg = new hsa_symbol + (fn->get_argument_type (-1), BRIG_SEGMENT_ARG, BRIG_LINKAGE_NONE); + fun->m_output_arg->m_name = "res"; + + return fun; +} + /* Return true if switch statement S can be transformed to a SBR instruction in HSAIL. */ diff --git a/gcc/hsa.c b/gcc/hsa.c index c728608..db0f2e6 100644 --- a/gcc/hsa.c +++ b/gcc/hsa.c @@ -37,6 +37,8 @@ along with GCC; see the file COPYING3. If not see #include "stringpool.h" #include "symbol-summary.h" #include "hsa.h" +#include "internal-fn.h" +#include "ctype.h" /* Structure containing intermediate HSA representation of the generated function. */ @@ -101,6 +103,7 @@ hsa_init_compilation_unit_data (void) hsa_global_variable_symbols = new hash_table <hsa_noop_symbol_hasher> (8); hsa_failed_functions = new hash_set <tree> (); + hsa_emitted_internal_decls = new hash_table <hsa_internal_fn_hasher> (2); } /* Free data structures that are used when dealing with different @@ -112,6 +115,7 @@ hsa_deinit_compilation_unit_data (void) gcc_assert (compilation_unit_data_initialized); delete hsa_failed_functions; + delete hsa_emitted_internal_decls; for (hash_table <hsa_noop_symbol_hasher>::iterator it = hsa_global_variable_symbols->begin (); @@ -439,6 +443,24 @@ hsa_uint_for_bitsize (unsigned bitsize) } } +/* Return BRIG float type with BITSIZE length. */ + +BrigType16_t +hsa_float_for_bitsize (unsigned bitsize) +{ + switch (bitsize) + { + case 16: + return BRIG_TYPE_F16; + case 32: + return BRIG_TYPE_F32; + case 64: + return BRIG_TYPE_F64; + default: + gcc_unreachable (); + } +} + /* Return HSA bit-type with the same size as the type T. */ BrigType16_t @@ -798,4 +820,136 @@ hsa_fail_cfun (void) hsa_cfun->m_seen_error = true; } +char * +hsa_internal_fn::name () +{ + char *name = xstrdup (internal_fn_name (m_fn)); + for (char *ptr = name; *ptr; ptr++) + *ptr = TOLOWER (*ptr); + + const char *suffix = NULL; + if (m_float_function_p && m_type_bit_size == 32) + suffix = "f"; + else if(!m_float_function_p && m_type_bit_size == 64) + suffix = "l"; + + if (suffix) + { + char *name2 = concat (name, suffix, NULL); + free (name); + name = name2; + } + + hsa_sanitize_name (name); + return name; +} + +unsigned +hsa_internal_fn::get_arity () +{ + switch (m_fn) + { + case IFN_ACOS: + case IFN_ASIN: + case IFN_ATAN: + case IFN_COS: + case IFN_EXP: + case IFN_EXP10: + case IFN_EXP2: + case IFN_EXPM1: + case IFN_LOG: + case IFN_LOG10: + case IFN_LOG1P: + case IFN_LOG2: + case IFN_LOGB: + case IFN_SIGNIFICAND: + case IFN_SIN: + case IFN_SQRT: + case IFN_TAN: + case IFN_CEIL: + case IFN_FLOOR: + case IFN_NEARBYINT: + case IFN_RINT: + case IFN_ROUND: + case IFN_TRUNC: + case IFN_CLRSB: + case IFN_CLZ: + case IFN_CTZ: + case IFN_FFS: + case IFN_PARITY: + case IFN_POPCOUNT: + return 1; + case IFN_ATAN2: + case IFN_COPYSIGN: + case IFN_FMOD: + case IFN_POW: + case IFN_REMAINDER: + case IFN_SCALB: + case IFN_LDEXP: + return 2; + break; + default: + gcc_unreachable (); + } +} + +BrigType16_t +hsa_internal_fn::get_argument_type (int n) +{ + switch (m_fn) + { + case IFN_ACOS: + case IFN_ASIN: + case IFN_ATAN: + case IFN_COS: + case IFN_EXP: + case IFN_EXP10: + case IFN_EXP2: + case IFN_EXPM1: + case IFN_LOG: + case IFN_LOG10: + case IFN_LOG1P: + case IFN_LOG2: + case IFN_LOGB: + case IFN_SIGNIFICAND: + case IFN_SIN: + case IFN_SQRT: + case IFN_TAN: + case IFN_CEIL: + case IFN_FLOOR: + case IFN_NEARBYINT: + case IFN_RINT: + case IFN_ROUND: + case IFN_TRUNC: + case IFN_ATAN2: + case IFN_COPYSIGN: + case IFN_FMOD: + case IFN_POW: + case IFN_REMAINDER: + case IFN_SCALB: + return hsa_float_for_bitsize (m_type_bit_size); + case IFN_CLRSB: + case IFN_CLZ: + case IFN_CTZ: + case IFN_FFS: + case IFN_PARITY: + case IFN_POPCOUNT: + { + if (n == -1) + return BRIG_TYPE_S32; + else + return hsa_uint_for_bitsize (m_type_bit_size); + } + case IFN_LDEXP: + { + if (n == -1 || n == 0) + return hsa_float_for_bitsize (m_type_bit_size); + else + return BRIG_TYPE_S32; + } + default: + gcc_unreachable (); + } +} + #include "gt-hsa.h" diff --git a/gcc/hsa.h b/gcc/hsa.h index e4bea99..78f14cd 100644 --- a/gcc/hsa.h +++ b/gcc/hsa.h @@ -756,12 +756,50 @@ is_a_helper <hsa_insn_seg *>::test (hsa_insn_basic *p) || p->m_opcode == BRIG_OPCODE_FTOS); } +/* Class for internal functions for purpose of HSA emission. */ + +class hsa_internal_fn +{ +public: + hsa_internal_fn (enum internal_fn fn, unsigned type_bit_size, + bool float_function_p): + m_fn (fn), m_type_bit_size (type_bit_size), + m_float_function_p (float_function_p), m_offset (0) {} + + hsa_internal_fn (const hsa_internal_fn *f): + m_fn (f->m_fn), m_type_bit_size (f->m_type_bit_size), + m_float_function_p (f->m_float_function_p), m_offset (f->m_offset) {} + + /* Return arity of the internal function. */ + unsigned get_arity (); + + /* Return BRIG type of N-th argument, if -1 is passed, return value type + is received. */ + BrigType16_t get_argument_type (int n); + + /* Return function name. The memory must be released by a caller. */ + char *name (); + + /* Internal function. */ + enum internal_fn m_fn; + + /* Bit width of return type. */ + unsigned m_type_bit_size; + + /* True if the function accepts a float type as first argument. */ + bool m_float_function_p; + + /* BRIG offset of declaration of the function. */ + BrigCodeOffset32_t m_offset; +}; + /* HSA instruction for function call. */ class hsa_insn_call : public hsa_insn_basic { public: hsa_insn_call (tree callee); + hsa_insn_call (hsa_internal_fn *fn); /* Default destructor. */ ~hsa_insn_call (); @@ -771,6 +809,9 @@ public: /* Called function */ tree m_called_function; + /* Called internal function. */ + hsa_internal_fn *m_called_internal_fn; + /* Input formal arguments. */ auto_vec <hsa_symbol *> m_input_args; @@ -1026,7 +1067,7 @@ hsa_noop_symbol_hasher::hash (const value_type item) return DECL_UID (item->m_decl); } -/* Return true if the DECL_UIDs of decls both symbols refer to are equal. */ +/* Return true if the DECL_UIDs of decls both symbols refer to are equal. */ inline bool hsa_noop_symbol_hasher::equal (const value_type a, const compare_type b) @@ -1042,6 +1083,7 @@ class hsa_function_representation public: hsa_function_representation (tree fdecl, bool kernel_p, unsigned ssa_names_count); + hsa_function_representation (hsa_internal_fn *fn); ~hsa_function_representation (); /* Builds a shadow register that is utilized to a kernel dispatch. */ @@ -1056,6 +1098,9 @@ public: so we need hsa_bb instances of them. */ void init_extra_bbs (); + /* Return linkage of the representation. */ + BrigLinkage8_t get_linkage (); + /* Create a private symbol of requested TYPE. */ hsa_symbol *create_hsa_temporary (BrigType16_t type); @@ -1093,6 +1138,9 @@ public: /* Vector of called function declarations. */ vec <tree> m_called_functions; + /* Vector of used internal functions. */ + vec <hsa_internal_fn *> m_called_internal_fns; + /* Number of HBB BBs. */ int m_hbb_count; @@ -1108,6 +1156,9 @@ public: /* Function declaration tree. */ tree m_decl; + /* Internal function info is used for declarations of internal functions. */ + hsa_internal_fn *m_internal_fn; + /* Runtime shadow register. */ hsa_op_reg *m_shadow_reg; @@ -1208,6 +1259,30 @@ public: void (*m_emit_func) (gimple *stmt, hsa_bb *); }; +/* Class for hashing hsa_internal_fn. */ + +struct hsa_internal_fn_hasher: free_ptr_hash <hsa_internal_fn> +{ + static inline hashval_t hash (const value_type); + static inline bool equal (const value_type, const compare_type); +}; + +/* Hash hsa_symbol. */ + +inline hashval_t +hsa_internal_fn_hasher::hash (const value_type item) +{ + return item->m_fn; +} + +/* Return true if the DECL_UIDs of decls both symbols refer to are equal. */ + +inline bool +hsa_internal_fn_hasher::equal (const value_type a, const compare_type b) +{ + return a->m_fn == b->m_fn && a->m_type_bit_size == b->m_type_bit_size; +} + /* in hsa.c */ extern struct hsa_function_representation *hsa_cfun; extern hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies; @@ -1226,6 +1301,7 @@ bool hsa_opcode_floating_bit_insn_p (BrigOpcode16_t); unsigned hsa_type_bit_size (BrigType16_t t); BrigType16_t hsa_bittype_for_bitsize (unsigned bitsize); BrigType16_t hsa_uint_for_bitsize (unsigned bitsize); +BrigType16_t hsa_float_for_bitsize (unsigned bitsize); BrigType16_t hsa_bittype_for_type (BrigType16_t t); bool hsa_type_float_p (BrigType16_t type); bool hsa_type_integer_p (BrigType16_t type); @@ -1258,12 +1334,14 @@ hsa_op_reg *hsa_spill_in (hsa_insn_basic *, hsa_op_reg *, hsa_op_reg **); hsa_op_reg *hsa_spill_out (hsa_insn_basic *, hsa_op_reg *, hsa_op_reg **); hsa_bb *hsa_init_new_bb (basic_block); hsa_function_representation *hsa_generate_function_declaration (tree decl); +hsa_function_representation *hsa_generate_internal_fn_decl (hsa_internal_fn *); tree hsa_get_host_function (tree decl); /* In hsa-regalloc.c. */ void hsa_regalloc (void); /* In hsa-brig.c. */ +extern hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls; void hsa_brig_emit_function (void); void hsa_output_brig (void); BrigType16_t bittype_for_type (BrigType16_t t); -- 2.6.3
>From ad292fd89ee103c2007660a084f2bc115f5655dd Mon Sep 17 00:00:00 2001 From: marxin <mli...@suse.cz> Date: Mon, 30 Nov 2015 13:59:11 +0100 Subject: [PATCH 3/7] HSA: implement __builtin_popcount gcc/ChangeLog: 2015-11-30 Martin Liska <mli...@suse.cz> * hsa-brig.c (emit_srctype_insn): New function. (emit_insn): Call the function for hsa_insn_srctype. * hsa-gen.c (hsa_init_data_for_cfun): Allocate a new object_allocator. (hsa_deinit_data_for_cfun): Release the object_allocator. (hsa_insn_srctype::operator new): New function. (hsa_insn_srctype::hsa_insn_srctype): Likewise. (hsa_insn_packed::hsa_insn_packed): Likewise. (gen_hsa_popcount): Likewise. (gen_hsa_insn_for_internal_fn_call): Handle IFN_POPCOUNT. (gen_hsa_insns_for_call): Handle BUILT_IN_POPCOUNT and BUILT_IN_POPCOUNTL. * hsa.h (is_a_helper ::test): New function. --- gcc/hsa-brig.c | 26 +++++++++++++++++++++++ gcc/hsa-gen.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---- gcc/hsa.h | 33 +++++++++++++++++++++++++---- 3 files changed, 117 insertions(+), 8 deletions(-) diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c index 234a6c9..9ad8c89 100644 --- a/gcc/hsa-brig.c +++ b/gcc/hsa-brig.c @@ -1607,6 +1607,30 @@ emit_queue_insn (hsa_insn_queue *insn) brig_insn_count++; } +/* Emit source type instruction INSN. */ + +static void +emit_srctype_insn (hsa_insn_srctype *insn) +{ + /* We assume that BrigInstMod has a BrigInstBasic prefix. */ + struct BrigInstSourceType repr; + unsigned operand_count = insn->operand_count (); + gcc_checking_assert (operand_count >= 2); + + memset (&repr, 0, sizeof (repr)); + repr.sourceType = htole16 (insn->m_source_type); + repr.base.base.byteCount = htole16 (sizeof (repr)); + repr.base.base.kind = htole16 (BRIG_KIND_INST_SOURCE_TYPE); + repr.base.opcode = htole16 (insn->m_opcode); + repr.base.type = htole16 (insn->m_type); + + repr.base.operands = htole32 (emit_insn_operands (insn)); + brig_code.add (&repr, sizeof (struct BrigInstSourceType)); + brig_insn_count++; +} + +/* Emit packed instruction INSN. */ + static void emit_packed_insn (hsa_insn_packed *insn) { @@ -1749,6 +1773,8 @@ emit_insn (hsa_insn_basic *insn) emit_comment_insn (comment); else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn)) emit_queue_insn (queue); + else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn)) + emit_srctype_insn (srctype); else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn)) emit_packed_insn (packed); else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn)) diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c index 8e30544..717106f 100644 --- a/gcc/hsa-gen.c +++ b/gcc/hsa-gen.c @@ -143,6 +143,7 @@ static object_allocator<hsa_insn_call> *hsa_allocp_inst_call; static object_allocator<hsa_insn_arg_block> *hsa_allocp_inst_arg_block; static object_allocator<hsa_insn_comment> *hsa_allocp_inst_comment; static object_allocator<hsa_insn_queue> *hsa_allocp_inst_queue; +static object_allocator<hsa_insn_srctype> *hsa_allocp_inst_srctype; static object_allocator<hsa_insn_packed> *hsa_allocp_inst_packed; static object_allocator<hsa_insn_cvt> *hsa_allocp_inst_cvt; static object_allocator<hsa_insn_alloca> *hsa_allocp_inst_alloca; @@ -447,6 +448,8 @@ hsa_init_data_for_cfun () = new object_allocator<hsa_insn_comment> ("HSA comment instructions"); hsa_allocp_inst_queue = new object_allocator<hsa_insn_queue> ("HSA queue instructions"); + hsa_allocp_inst_srctype + = new object_allocator<hsa_insn_srctype> ("HSA source type instructions"); hsa_allocp_inst_packed = new object_allocator<hsa_insn_packed> ("HSA packed instructions"); hsa_allocp_inst_cvt @@ -505,6 +508,7 @@ hsa_deinit_data_for_cfun (void) delete hsa_allocp_inst_arg_block; delete hsa_allocp_inst_comment; delete hsa_allocp_inst_queue; + delete hsa_allocp_inst_srctype; delete hsa_allocp_inst_packed; delete hsa_allocp_inst_cvt; delete hsa_allocp_inst_alloca; @@ -1687,6 +1691,24 @@ hsa_insn_queue::hsa_insn_queue (int nops, BrigOpcode opcode) { } +/* New operator to allocate source type instruction from pool alloc. */ + +void * +hsa_insn_srctype::operator new (size_t) +{ + return hsa_allocp_inst_srctype->allocate_raw (); +} + +/* Constructor of class representing the source type instruction in HSAIL. */ + +hsa_insn_srctype::hsa_insn_srctype (int nops, BrigOpcode opcode, + BrigType16_t destt, BrigType16_t srct, + hsa_op_base *arg0, hsa_op_base *arg1, + hsa_op_base *arg2 = NULL) + : hsa_insn_basic (nops, opcode, destt, arg0, arg1, arg2), + m_source_type (srct) +{} + /* New operator to allocate packed instruction from pool alloc. */ void * @@ -1701,8 +1723,7 @@ hsa_insn_packed::hsa_insn_packed (int nops, BrigOpcode opcode, BrigType16_t destt, BrigType16_t srct, hsa_op_base *arg0, hsa_op_base *arg1, hsa_op_base *arg2) - : hsa_insn_basic (nops, opcode, destt, arg0, arg1, arg2), - m_source_type (srct) + : hsa_insn_srctype (nops, opcode, destt, srct, arg0, arg1, arg2) { m_operand_list = new hsa_op_operand_list (nops - 1); } @@ -3860,6 +3881,34 @@ gen_hsa_alloca (gcall *call, hsa_bb *hbb) hbb->append_insn (seg); } +/* Emit instructions that implement popcount builtin STMT. + Instructions are appended to basic block HBB. */ + +static void +gen_hsa_popcount (gcall *call, hsa_bb *hbb) +{ + tree lhs = gimple_call_lhs (call); + if (lhs == NULL_TREE) + return; + + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + + tree rhs1 = gimple_call_arg (call, 0); + hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); + gcc_checking_assert (hsa_type_integer_p (arg->m_type)); + + if (hsa_type_bit_size (arg->m_type) < 32) + arg = arg->get_in_type (BRIG_TYPE_B32, hbb); + + if (!hsa_btype_p (arg->m_type)) + arg = arg->get_in_type (hsa_bittype_for_type (arg->m_type), hbb); + + hsa_insn_srctype *popcount = new hsa_insn_srctype + (2, BRIG_OPCODE_POPCOUNT, BRIG_TYPE_U32, arg->m_type, NULL, arg); + hbb->append_insn (popcount); + popcount->set_output_in_type (dest, 0, hbb); +} + /* Set VALUE to a shadow kernel debug argument and append a new instruction to HBB basic block. */ @@ -4696,7 +4745,7 @@ gen_hsa_ternary_atomic_for_builtin (bool ret_orig, corresponding structure to the basic_block of STMT. */ static void -gen_hsa_insn_for_internal_fn_call (gimple *stmt, hsa_bb *hbb) +gen_hsa_insn_for_internal_fn_call (gcall *stmt, hsa_bb *hbb) { gcc_checking_assert (gimple_call_internal_fn (stmt)); internal_fn fn = gimple_call_internal_fn (stmt); @@ -4766,6 +4815,10 @@ gen_hsa_insn_for_internal_fn_call (gimple *stmt, hsa_bb *hbb) break; } + case IFN_POPCOUNT: + gen_hsa_popcount (stmt, hbb); + break; + default: gen_hsa_insns_for_call_of_internal_fn (stmt, hbb); break; @@ -4786,7 +4839,7 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb) if (gimple_call_internal_p (stmt)) { - gen_hsa_insn_for_internal_fn_call (stmt, hbb); + gen_hsa_insn_for_internal_fn_call (call, hbb); return; } @@ -4866,6 +4919,11 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb) gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NSIN, stmt, hbb); break; + case BUILT_IN_POPCOUNT: + case BUILT_IN_POPCOUNTL: + gen_hsa_popcount (call, hbb); + break; + case BUILT_IN_ATOMIC_LOAD_1: case BUILT_IN_ATOMIC_LOAD_2: case BUILT_IN_ATOMIC_LOAD_4: diff --git a/gcc/hsa.h b/gcc/hsa.h index 78f14cd..cfe08f1 100644 --- a/gcc/hsa.h +++ b/gcc/hsa.h @@ -928,12 +928,12 @@ is_a_helper <hsa_insn_queue *>::test (hsa_insn_basic *p) return (p->m_opcode == BRIG_OPCODE_ADDQUEUEWRITEINDEX); } -/* HSA packed instruction. */ +/* HSA source type instruction. */ -class hsa_insn_packed : public hsa_insn_basic +class hsa_insn_srctype: public hsa_insn_basic { public: - hsa_insn_packed (int nops, BrigOpcode opcode, BrigType16_t destt, + hsa_insn_srctype (int nops, BrigOpcode opcode, BrigType16_t destt, BrigType16_t srct, hsa_op_base *arg0, hsa_op_base *arg1, hsa_op_base *arg2); @@ -943,6 +943,32 @@ public: /* Source type. */ BrigType16_t m_source_type; + /* Destructor. */ + ~hsa_insn_srctype (); +}; + +/* Report whether or not P is a source type instruction. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_srctype *>::test (hsa_insn_basic *p) +{ + return (p->m_opcode == BRIG_OPCODE_POPCOUNT); +} + +/* HSA packed instruction. */ + +class hsa_insn_packed : public hsa_insn_srctype +{ +public: + hsa_insn_packed (int nops, BrigOpcode opcode, BrigType16_t destt, + BrigType16_t srct, hsa_op_base *arg0, hsa_op_base *arg1, + hsa_op_base *arg2); + + /* Pool allocator. */ + void *operator new (size_t); + /* Operand list for an operand of the instruction. */ hsa_op_operand_list *m_operand_list; @@ -1344,7 +1370,6 @@ void hsa_regalloc (void); extern hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls; void hsa_brig_emit_function (void); void hsa_output_brig (void); -BrigType16_t bittype_for_type (BrigType16_t t); unsigned hsa_get_imm_brig_type_len (BrigType16_t type); void hsa_brig_emit_omp_symbols (void); -- 2.6.3
>From 2e9d0a07cfb1626f5a236cc421e5684787048ab9 Mon Sep 17 00:00:00 2001 From: marxin <mli...@suse.cz> Date: Mon, 30 Nov 2015 15:27:08 +0100 Subject: [PATCH 4/7] HSA: expand natively not handled builtins gcc/ChangeLog: 2015-11-30 Martin Liska <mli...@suse.cz> * hsa-gen.c (expand_builtins): New function. (pass_gen_hsail::execute): Call the function. --- gcc/hsa-gen.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c index 717106f..c7ad4fa 100644 --- a/gcc/hsa-gen.c +++ b/gcc/hsa-gen.c @@ -57,6 +57,7 @@ along with GCC; see the file COPYING3. If not see #include "params.h" #include "gomp-constants.h" #include "internal-fn.h" +#include "builtins.h" /* Print a warning message and set that we have seen an error. */ @@ -5961,6 +5962,75 @@ convert_switch_statements () } } +/* Expand builtins that can't be handled by HSA back-end. */ + +static void +expand_builtins () +{ + function *func = DECL_STRUCT_FUNCTION (current_function_decl); + basic_block bb; + + FOR_EACH_BB_FN (bb, func) + { + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + if (gimple_code (stmt) != GIMPLE_CALL) + continue; + + gcall *call = as_a <gcall *> (stmt); + + if (!gimple_call_builtin_p (call, BUILT_IN_NORMAL)) + continue; + + tree fndecl = gimple_call_fndecl (stmt); + enum built_in_function fn = DECL_FUNCTION_CODE (fndecl); + switch (fn) + { + case BUILT_IN_CEXPF: + case BUILT_IN_CEXPIF: + case BUILT_IN_CEXPI: + { + /* Similar to builtins.c (expand_builtin_cexpi), the builtin + can be transformed to: cexp(I * z) = ccos(z) + I * csin(z). */ + tree lhs = gimple_call_lhs (stmt); + tree rhs = gimple_call_arg (stmt, 0); + tree rhs_type = TREE_TYPE (rhs); + bool float_type_p = rhs_type == float_type_node; + tree real_part = make_temp_ssa_name (rhs_type, NULL, + "cexp_real_part"); + tree imag_part = make_temp_ssa_name (rhs_type, NULL, + "cexp_imag_part"); + + tree cos_fndecl = mathfn_built_in + (rhs_type, fn == float_type_p ? BUILT_IN_COSF : BUILT_IN_COS); + gcall *cos = gimple_build_call (cos_fndecl, 1, rhs); + gimple_call_set_lhs (cos, real_part); + gsi_insert_before (&gsi, cos, GSI_SAME_STMT); + + tree sin_fndecl = mathfn_built_in + (rhs_type, fn == float_type_p ? BUILT_IN_SINF : BUILT_IN_SIN); + gcall *sin = gimple_build_call (sin_fndecl, 1, rhs); + gimple_call_set_lhs (sin, imag_part); + gsi_insert_before (&gsi, sin, GSI_SAME_STMT); + + + gassign *assign = gimple_build_assign (lhs, COMPLEX_EXPR, + real_part, imag_part); + gsi_insert_before (&gsi, assign, GSI_SAME_STMT); + gsi_remove (&gsi, true); + + break; + } + default: + break; + } + } + } +} + /* Emit HSA module variables that are global for the entire module. */ static void @@ -6094,6 +6164,7 @@ pass_gen_hsail::execute (function *) (cgraph_node::get_create (current_function_decl)); convert_switch_statements (); + expand_builtins (); generate_hsa (s->m_kind == HSA_KERNEL); TREE_ASM_WRITTEN (current_function_decl) = 1; return TODO_discard_function; -- 2.6.3
>From 8767212a31fc9fe26eb532601507725b042d2f42 Mon Sep 17 00:00:00 2001 From: marxin <mli...@suse.cz> Date: Tue, 1 Dec 2015 22:23:49 +0100 Subject: [PATCH 5/7] HSA: generate HSAIL instructions for bit string insns gcc/ChangeLog: 2015-12-01 Martin Liska <mli...@suse.cz> * hsa-dump.c (dump_hsa_insn_1): Properly dump hsa_insn_srctype. * hsa-gen.c (gen_hsa_unary_operation): Change argument type of opcode. (gen_hsa_insns_for_operation_assignment): Likewise. (gen_hsa_insns_for_call_of_internal_fn): Do not pass FLOAT_TYPE_P to ctor of hsa_internal_fn. (gen_hsa_clrsb): New function. (gen_hsa_popcount): Likewise. (gen_hsa_parity): Likewise. (gen_hsa_insn_for_internal_fn_call): Handle aforementioned builtins. (gen_hsa_insns_for_call): Likewise. * hsa.c (hsa_internal_fn::name): Do not consider integer types. (hsa_internal_fn::get_arity): Make handled cases as unreachable. (hsa_internal_fn::get_argument_type): Likewise. * hsa.h (is_a_helper ::test): Add new possible opcodes for hsa_insn_srctype. --- gcc/hsa-dump.c | 10 +++ gcc/hsa-gen.c | 201 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- gcc/hsa.c | 28 ++------ gcc/hsa.h | 15 ++--- 4 files changed, 213 insertions(+), 41 deletions(-) diff --git a/gcc/hsa-dump.c b/gcc/hsa-dump.c index d462023..b9f4de0 100644 --- a/gcc/hsa-dump.c +++ b/gcc/hsa-dump.c @@ -980,6 +980,16 @@ dump_hsa_insn_1 (FILE *f, hsa_insn_basic *insn, int *indent) hsa_insn_comment *c = as_a <hsa_insn_comment *> (insn); fprintf (f, "%s", c->m_comment); } + else if (is_a <hsa_insn_srctype *> (insn)) + { + hsa_insn_srctype *srctype = as_a <hsa_insn_srctype *> (insn); + + fprintf (f, "%s_%s_%s ", hsa_opcode_name (srctype->m_opcode), + hsa_type_name (srctype->m_type), + hsa_type_name (srctype->m_source_type)); + + dump_hsa_operands (f, insn); + } else if (is_a <hsa_insn_packed *> (insn)) { hsa_insn_packed *packed = as_a <hsa_insn_packed *> (insn); diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c index c7ad4fa..136986c 100644 --- a/gcc/hsa-gen.c +++ b/gcc/hsa-gen.c @@ -2899,14 +2899,17 @@ gen_hsa_cmp_insn_from_gimple (enum tree_code code, tree lhs, tree rhs, as a single operand. */ static void -gen_hsa_unary_operation (int opcode, hsa_op_reg *dest, +gen_hsa_unary_operation (BrigOpcode opcode, hsa_op_reg *dest, hsa_op_with_type *op1, hsa_bb *hbb) { gcc_checking_assert (dest); hsa_insn_basic *insn; if (opcode == BRIG_OPCODE_MOV && hsa_needs_cvt (dest->m_type, op1->m_type)) - insn = new hsa_insn_cvt (dest, op1); + insn = new hsa_insn_cvt (dest, op1); + else if (opcode == BRIG_OPCODE_FIRSTBIT || opcode == BRIG_OPCODE_LASTBIT) + insn = new hsa_insn_srctype (2, opcode, BRIG_TYPE_U32, op1->m_type, NULL, + op1); else { insn = new hsa_insn_basic (2, opcode, dest->m_type, dest, op1); @@ -2922,6 +2925,9 @@ gen_hsa_unary_operation (int opcode, hsa_op_reg *dest, } hbb->append_insn (insn); + + if (opcode == BRIG_OPCODE_FIRSTBIT || opcode == BRIG_OPCODE_LASTBIT) + insn->set_output_in_type (dest, 0, hbb); } /* Generate a binary instruction with OPCODE and append it to a basic block @@ -2968,7 +2974,7 @@ gen_hsa_insns_for_operation_assignment (gimple *assign, hsa_bb *hbb) tree rhs2 = gimple_assign_rhs2 (assign); tree rhs3 = gimple_assign_rhs3 (assign); - int opcode; + BrigOpcode opcode; switch (code) { @@ -3530,11 +3536,9 @@ gen_hsa_insns_for_call_of_internal_fn (gimple *stmt, hsa_bb *hbb) tree rhs1_type = TREE_TYPE (rhs1); enum internal_fn fn = gimple_call_internal_fn (stmt); hsa_insn_call *call_insn = new hsa_insn_call - (new hsa_internal_fn (fn, tree_to_uhwi (TYPE_SIZE (rhs1_type)), - FLOAT_TYPE_P (rhs1_type))); + (new hsa_internal_fn (fn, tree_to_uhwi (TYPE_SIZE (rhs1_type)))); - if (!hsa_emitted_internal_decls) - hsa_emitted_internal_decls = new hash_table <hsa_internal_fn_hasher> (2); + gcc_checking_assert (FLOAT_TYPE_P (rhs1_type)); if (!hsa_emitted_internal_decls->find (call_insn->m_called_internal_fn)) hsa_cfun->m_called_internal_fns.safe_push (call_insn->m_called_internal_fn); @@ -3882,11 +3886,82 @@ gen_hsa_alloca (gcall *call, hsa_bb *hbb) hbb->append_insn (seg); } -/* Emit instructions that implement popcount builtin STMT. +/* Emit instructions that implement clrsb builtin STMT: + Returns the number of leading redundant sign bits in x, i.e. the number + of bits following the most significant bit that are identical to it. + There are no special cases for 0 or other values. Instructions are appended to basic block HBB. */ static void -gen_hsa_popcount (gcall *call, hsa_bb *hbb) +gen_hsa_clrsb (gcall *call, hsa_bb *hbb) +{ + tree lhs = gimple_call_lhs (call); + if (lhs == NULL_TREE) + return; + + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + tree rhs1 = gimple_call_arg (call, 0); + hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); + BrigType16_t bittype = hsa_bittype_for_type (arg->m_type); + unsigned bitsize = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs1))); + gcc_checking_assert (bitsize >= 32); + + /* Set true to MOST_SIG if the most significant bit is set to one. */ + hsa_op_immed *c = new hsa_op_immed (1ul << (bitsize - 1), + hsa_uint_for_bitsize (bitsize)); + + hsa_op_reg *and_reg = new hsa_op_reg (bittype); + gen_hsa_binary_operation (BRIG_OPCODE_AND, and_reg, arg, c, hbb); + + hsa_op_reg *most_sign = new hsa_op_reg (BRIG_TYPE_B1); + hsa_insn_cmp *cmp = new hsa_insn_cmp + (BRIG_COMPARE_EQ, most_sign->m_type, most_sign, and_reg, c); + hbb->append_insn (cmp); + + /* If the most significant bit is one, negate the input. Otherwise + shift the input value to left by one bit. */ + hsa_op_reg *arg_neg = new hsa_op_reg (arg->m_type); + gen_hsa_unary_operation (BRIG_OPCODE_NEG, arg_neg, arg, hbb); + + hsa_op_reg *shifted_arg = new hsa_op_reg (arg->m_type); + gen_hsa_binary_operation (BRIG_OPCODE_SHL, shifted_arg, arg, + new hsa_op_immed (1, BRIG_TYPE_U64), hbb); + + /* Assign the value that can be used for FIRSTBIT instruction according + to the most significant bit. */ + hsa_op_reg *tmp = new hsa_op_reg (bittype); + hsa_insn_basic *cmov = new hsa_insn_basic + (4, BRIG_OPCODE_CMOV, bittype, tmp, most_sign, arg_neg, shifted_arg); + hbb->append_insn (cmov); + + hsa_op_reg *leading_bits = new hsa_op_reg (BRIG_TYPE_S32); + gen_hsa_unary_operation (BRIG_OPCODE_FIRSTBIT, leading_bits, + tmp->get_in_type (hsa_uint_for_bitsize (bitsize), + hbb), hbb); + + /* Set flag if the input value is equal to zero. */ + hsa_op_reg *is_zero = new hsa_op_reg (BRIG_TYPE_B1); + cmp = new hsa_insn_cmp + (BRIG_COMPARE_EQ, is_zero->m_type, is_zero, arg, + new hsa_op_immed (0, arg->m_type)); + hbb->append_insn (cmp); + + /* Return the number of leading bits, or 31 if the input value is zero. */ + cmov = new hsa_insn_basic + (4, BRIG_OPCODE_CMOV, BRIG_TYPE_B32, NULL, is_zero, + new hsa_op_immed (31, BRIG_TYPE_U32), + leading_bits->get_in_type (BRIG_TYPE_B32, hbb)); + hbb->append_insn (cmov); + cmov->set_output_in_type (dest, 0, hbb); +} + +/* Emit instructions that implement ffs builtin STMT: + Returns one plus the index of the least significant 1-bit of x, + or if x is zero, returns zero. + Instructions are appended to basic block HBB. */ + +static void +gen_hsa_ffs (gcall *call, hsa_bb *hbb) { tree lhs = gimple_call_lhs (call); if (lhs == NULL_TREE) @@ -3896,6 +3971,22 @@ gen_hsa_popcount (gcall *call, hsa_bb *hbb) tree rhs1 = gimple_call_arg (call, 0); hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); + + hsa_op_reg *tmp = new hsa_op_reg (BRIG_TYPE_U32); + hsa_insn_srctype *insn = new hsa_insn_srctype + (2, BRIG_OPCODE_LASTBIT, tmp->m_type, arg->m_type, tmp, arg); + hbb->append_insn (insn); + + hsa_insn_basic *addition = new hsa_insn_basic + (3, BRIG_OPCODE_ADD, tmp->m_type, NULL, tmp, + new hsa_op_immed (1, tmp->m_type)); + hbb->append_insn (addition); + addition->set_output_in_type (dest, 0, hbb); +} + +static void +gen_hsa_popcount_to_dest (hsa_op_reg *dest, hsa_op_with_type *arg, hsa_bb *hbb) +{ gcc_checking_assert (hsa_type_integer_p (arg->m_type)); if (hsa_type_bit_size (arg->m_type) < 32) @@ -3910,6 +4001,48 @@ gen_hsa_popcount (gcall *call, hsa_bb *hbb) popcount->set_output_in_type (dest, 0, hbb); } +/* Emit instructions that implement parity builtin STMT: + Returns the parity of x, i.e. the number of 1-bits in x modulo 2. + Instructions are appended to basic block HBB. */ + +static void +gen_hsa_parity (gcall *call, hsa_bb *hbb) +{ + tree lhs = gimple_call_lhs (call); + if (lhs == NULL_TREE) + return; + + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + tree rhs1 = gimple_call_arg (call, 0); + hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); + + hsa_op_reg *popcount = new hsa_op_reg (BRIG_TYPE_U32); + gen_hsa_popcount_to_dest (popcount, arg, hbb); + + hsa_insn_basic *insn = new hsa_insn_basic + (3, BRIG_OPCODE_REM, popcount->m_type, NULL, popcount, + new hsa_op_immed (2, popcount->m_type)); + hbb->append_insn (insn); + insn->set_output_in_type (dest, 0, hbb); +} + +/* Emit instructions that implement popcount builtin STMT. + Instructions are appended to basic block HBB. */ + +static void +gen_hsa_popcount (gcall *call, hsa_bb *hbb) +{ + tree lhs = gimple_call_lhs (call); + if (lhs == NULL_TREE) + return; + + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + tree rhs1 = gimple_call_arg (call, 0); + hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); + + gen_hsa_popcount_to_dest (dest, arg, hbb); +} + /* Set VALUE to a shadow kernel debug argument and append a new instruction to HBB basic block. */ @@ -4488,7 +4621,7 @@ gen_hsa_insns_for_kernel_call (hsa_bb *hbb, gcall *call) added. Note that nothing will be created if STMT does not have a LHS. */ static void -gen_hsa_unaryop_for_builtin (int opcode, gimple *stmt, hsa_bb *hbb) +gen_hsa_unaryop_for_builtin (BrigOpcode opcode, gimple *stmt, hsa_bb *hbb) { tree lhs = gimple_call_lhs (stmt); if (!lhs) @@ -4524,7 +4657,8 @@ gen_hsa_unaryop_builtin_call (gimple *stmt, hsa_bb *hbb) added. Note that nothing will be created if STMT does not have a LHS. */ static void -gen_hsa_unaryop_or_call_for_builtin (int opcode, gimple *stmt, hsa_bb *hbb) +gen_hsa_unaryop_or_call_for_builtin (BrigOpcode opcode, gimple *stmt, + hsa_bb *hbb) { if (flag_unsafe_math_optimizations) gen_hsa_unaryop_for_builtin (opcode, stmt, hbb); @@ -4816,6 +4950,26 @@ gen_hsa_insn_for_internal_fn_call (gcall *stmt, hsa_bb *hbb) break; } + case IFN_CLRSB: + gen_hsa_clrsb (stmt, hbb); + break; + + case IFN_CLZ: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FIRSTBIT, stmt, hbb); + break; + + case IFN_CTZ: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_LASTBIT, stmt, hbb); + break; + + case IFN_FFS: + gen_hsa_ffs (stmt, hbb); + break; + + case IFN_PARITY: + gen_hsa_parity (stmt, hbb); + break; + case IFN_POPCOUNT: gen_hsa_popcount (stmt, hbb); break; @@ -4920,6 +5074,31 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb) gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NSIN, stmt, hbb); break; + case BUILT_IN_CLRSB: + case BUILT_IN_CLRSBL: + gen_hsa_clrsb (call, hbb); + break; + + case BUILT_IN_CLZ: + case BUILT_IN_CLZL: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FIRSTBIT, stmt, hbb); + break; + + case BUILT_IN_CTZ: + case BUILT_IN_CTZL: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_LASTBIT, stmt, hbb); + break; + + case BUILT_IN_FFS: + case BUILT_IN_FFSL: + gen_hsa_ffs (call, hbb); + break; + + case BUILT_IN_PARITY: + case BUILT_IN_PARITYL: + gen_hsa_parity (call, hbb); + break; + case BUILT_IN_POPCOUNT: case BUILT_IN_POPCOUNTL: gen_hsa_popcount (call, hbb); diff --git a/gcc/hsa.c b/gcc/hsa.c index db0f2e6..b60b891 100644 --- a/gcc/hsa.c +++ b/gcc/hsa.c @@ -828,10 +828,8 @@ hsa_internal_fn::name () *ptr = TOLOWER (*ptr); const char *suffix = NULL; - if (m_float_function_p && m_type_bit_size == 32) + if (m_type_bit_size == 32) suffix = "f"; - else if(!m_float_function_p && m_type_bit_size == 64) - suffix = "l"; if (suffix) { @@ -872,12 +870,6 @@ hsa_internal_fn::get_arity () case IFN_RINT: case IFN_ROUND: case IFN_TRUNC: - case IFN_CLRSB: - case IFN_CLZ: - case IFN_CTZ: - case IFN_FFS: - case IFN_PARITY: - case IFN_POPCOUNT: return 1; case IFN_ATAN2: case IFN_COPYSIGN: @@ -888,6 +880,12 @@ hsa_internal_fn::get_arity () case IFN_LDEXP: return 2; break; + case IFN_CLRSB: + case IFN_CLZ: + case IFN_CTZ: + case IFN_FFS: + case IFN_PARITY: + case IFN_POPCOUNT: default: gcc_unreachable (); } @@ -928,18 +926,6 @@ hsa_internal_fn::get_argument_type (int n) case IFN_REMAINDER: case IFN_SCALB: return hsa_float_for_bitsize (m_type_bit_size); - case IFN_CLRSB: - case IFN_CLZ: - case IFN_CTZ: - case IFN_FFS: - case IFN_PARITY: - case IFN_POPCOUNT: - { - if (n == -1) - return BRIG_TYPE_S32; - else - return hsa_uint_for_bitsize (m_type_bit_size); - } case IFN_LDEXP: { if (n == -1 || n == 0) diff --git a/gcc/hsa.h b/gcc/hsa.h index cfe08f1..04eacae 100644 --- a/gcc/hsa.h +++ b/gcc/hsa.h @@ -761,14 +761,12 @@ is_a_helper <hsa_insn_seg *>::test (hsa_insn_basic *p) class hsa_internal_fn { public: - hsa_internal_fn (enum internal_fn fn, unsigned type_bit_size, - bool float_function_p): - m_fn (fn), m_type_bit_size (type_bit_size), - m_float_function_p (float_function_p), m_offset (0) {} + hsa_internal_fn (enum internal_fn fn, unsigned type_bit_size): + m_fn (fn), m_type_bit_size (type_bit_size), m_offset (0) {} hsa_internal_fn (const hsa_internal_fn *f): m_fn (f->m_fn), m_type_bit_size (f->m_type_bit_size), - m_float_function_p (f->m_float_function_p), m_offset (f->m_offset) {} + m_offset (f->m_offset) {} /* Return arity of the internal function. */ unsigned get_arity (); @@ -786,9 +784,6 @@ public: /* Bit width of return type. */ unsigned m_type_bit_size; - /* True if the function accepts a float type as first argument. */ - bool m_float_function_p; - /* BRIG offset of declaration of the function. */ BrigCodeOffset32_t m_offset; }; @@ -954,7 +949,9 @@ template <> inline bool is_a_helper <hsa_insn_srctype *>::test (hsa_insn_basic *p) { - return (p->m_opcode == BRIG_OPCODE_POPCOUNT); + return (p->m_opcode == BRIG_OPCODE_POPCOUNT + || p->m_opcode == BRIG_OPCODE_FIRSTBIT + || p->m_opcode == BRIG_OPCODE_LASTBIT); } /* HSA packed instruction. */ -- 2.6.3
>From a1851605416b3b264e0b9112bc913872ed43a4d0 Mon Sep 17 00:00:00 2001 From: marxin <mli...@suse.cz> Date: Wed, 2 Dec 2015 14:17:48 +0100 Subject: [PATCH 6/7] HSA: support 'unsigned long long' type for integer builtins gcc/ChangeLog: 2015-12-02 Martin Liska <mli...@suse.cz> * hsa-gen.c (gen_hsa_insns_for_call): Support BUILT_IN_CLRSBLL, BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_FFSLL, BUILT_IN_PARITYLL, BUILT_IN_POPCOUNTLL builtins. --- gcc/hsa-gen.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c index 136986c..3fbafb5 100644 --- a/gcc/hsa-gen.c +++ b/gcc/hsa-gen.c @@ -5076,31 +5076,37 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb) case BUILT_IN_CLRSB: case BUILT_IN_CLRSBL: + case BUILT_IN_CLRSBLL: gen_hsa_clrsb (call, hbb); break; case BUILT_IN_CLZ: case BUILT_IN_CLZL: + case BUILT_IN_CLZLL: gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FIRSTBIT, stmt, hbb); break; case BUILT_IN_CTZ: case BUILT_IN_CTZL: + case BUILT_IN_CTZLL: gen_hsa_unaryop_for_builtin (BRIG_OPCODE_LASTBIT, stmt, hbb); break; case BUILT_IN_FFS: case BUILT_IN_FFSL: + case BUILT_IN_FFSLL: gen_hsa_ffs (call, hbb); break; case BUILT_IN_PARITY: case BUILT_IN_PARITYL: + case BUILT_IN_PARITYLL: gen_hsa_parity (call, hbb); break; case BUILT_IN_POPCOUNT: case BUILT_IN_POPCOUNTL: + case BUILT_IN_POPCOUNTLL: gen_hsa_popcount (call, hbb); break; -- 2.6.3
>From fc08ffda06d13c1db5b00129295c02fb3a10adf4 Mon Sep 17 00:00:00 2001 From: marxin <mli...@suse.cz> Date: Wed, 2 Dec 2015 14:05:01 +0100 Subject: [PATCH 7/7] HSA: improve warning message in IPA HSA gcc/ChangeLog: 2015-12-02 Martin Liska <mli...@suse.cz> * ipa-hsa.c (check_warn_node_versionable): Display name of a function that cannot be cloned. --- gcc/ipa-hsa.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/gcc/ipa-hsa.c b/gcc/ipa-hsa.c index a497841..5b3e563 100644 --- a/gcc/ipa-hsa.c +++ b/gcc/ipa-hsa.c @@ -53,10 +53,9 @@ check_warn_node_versionable (cgraph_node *node) { if (!node->local.versionable) { - if (warning_at (EXPR_LOCATION (node->decl), OPT_Whsa, - HSA_SORRY_MSG)) - inform (EXPR_LOCATION (node->decl), - "Function cannot be cloned"); + warning_at (EXPR_LOCATION (node->decl), OPT_Whsa, + "could not emit HSAIL for function %s: function cannot be " + "cloned", node->name ()); return false; } return true; -- 2.6.3