On 03/31/2016 04:58 PM, Martin Jambor wrote: > Hi, > > On Wed, Mar 23, 2016 at 02:43:17PM +0100, Martin Liska wrote: >> gcc/ChangeLog: >> >> 2016-03-23 Martin Liska <mli...@suse.cz> >> >> PR hsa/70391 >> * hsa-gen.c (hsa_function_representation::update_cfg): New >> function. >> (convert_addr_to_flat_segment): Likewise. >> (gen_hsa_memory_set): New alignment argument. >> (gen_hsa_ctor_assignment): Likewise. >> (gen_hsa_insns_for_single_assignment): Provide alignment >> to gen_hsa_ctor_assignment. >> (gen_hsa_insns_for_direct_call): Add new argument. >> (expand_lhs_of_string_op): New function. >> (expand_string_operation_builtin): Likewise. >> (expand_memory_copy): New function. >> (expand_memory_set): New function. >> (gen_hsa_insns_for_call): Use HOST_WIDE_INT. >> (convert_switch_statements): Change signature. >> (generate_hsa): Use a return value of the function. >> (pass_gen_hsail::execute): Do not call >> convert_switch_statements here. >> * hsa-regalloc.c (hsa_regalloc): Call update_cfg. >> * hsa.h (hsa_function_representation::m_need_cfg_update): >> New flag. >> (hsa_function_representation::update_cfg): New function. > > As we already discussed, update_cfg and m_need_cfg_update should > really be called differently, because CFG has already been modified > and only dominance needs to be re-computed. If you havent't thought > about any names yet, what about m_modified_cfg and update_dominance() ?
Hi. Done in the attached version of the patch. > > >> --- >> gcc/hsa-gen.c | 372 >> ++++++++++++++++++++++++++++++++++++++--------------- >> gcc/hsa-regalloc.c | 1 + >> gcc/hsa.h | 9 +- >> 3 files changed, 275 insertions(+), 107 deletions(-) >> >> diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c >> index db39813..db7fc3d 100644 >> --- a/gcc/hsa-gen.c >> +++ b/gcc/hsa-gen.c >> @@ -214,7 +214,7 @@ hsa_symbol::fillup_for_decl (tree decl) >> should be set to number of SSA names used in the function. */ >> >> hsa_function_representation::hsa_function_representation >> - (tree fdecl, bool kernel_p, unsigned ssa_names_count) >> + (tree fdecl, bool kernel_p, unsigned ssa_names_count, bool >> need_cfg_update) >> : m_name (NULL), >> m_reg_count (0), m_input_args (vNULL), >> m_output_arg (NULL), m_spill_symbols (vNULL), m_global_symbols (vNULL), >> @@ -223,7 +223,8 @@ hsa_function_representation::hsa_function_representation >> m_in_ssa (true), m_kern_p (kernel_p), m_declaration_p (false), >> m_decl (fdecl), m_internal_fn (NULL), m_shadow_reg (NULL), >> m_kernel_dispatch_count (0), m_maximum_omp_data_size (0), >> - m_seen_error (false), m_temp_symbol_count (0), m_ssa_map () >> + m_seen_error (false), m_temp_symbol_count (0), m_ssa_map (), >> + m_need_cfg_update (need_cfg_update) >> { >> int sym_init_len = (vec_safe_length (cfun->local_decls) / 2) + 1;; >> m_local_symbols = new hash_table <hsa_noop_symbol_hasher> (sym_init_len); >> @@ -319,6 +320,16 @@ hsa_function_representation::init_extra_bbs () >> hsa_init_new_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); >> } >> >> +void >> +hsa_function_representation::update_cfg () >> +{ >> + if (m_need_cfg_update) >> + { >> + free_dominance_info (CDI_DOMINATORS); >> + calculate_dominance_info (CDI_DOMINATORS); >> + } >> +} >> + >> hsa_symbol * >> hsa_function_representation::create_hsa_temporary (BrigType16_t type) >> { >> @@ -2246,30 +2257,14 @@ gen_hsa_addr_for_arg (tree tree_type, int index) >> return new hsa_op_address (sym); >> } >> >> -/* Generate HSA instructions that calculate address of VAL including all >> - necessary conversions to flat addressing and place the result into DEST. >> +/* Generate HSA instructions that process all necessary conversions >> + of an ADDR to flat addressing and place the result into DEST. >> Instructions are appended to HBB. */ >> >> static void >> -gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb) >> +convert_addr_to_flat_segment (hsa_op_address *addr, hsa_op_reg *dest, >> + hsa_bb *hbb) >> { >> - /* Handle cases like tmp = NULL, where we just emit a move instruction >> - to a register. */ >> - if (TREE_CODE (val) == INTEGER_CST) >> - { >> - hsa_op_immed *c = new hsa_op_immed (val); >> - hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV, >> - dest->m_type, dest, c); >> - hbb->append_insn (insn); >> - return; >> - } >> - >> - hsa_op_address *addr; >> - >> - gcc_assert (dest->m_type == hsa_get_segment_addr_type >> (BRIG_SEGMENT_FLAT)); >> - if (TREE_CODE (val) == ADDR_EXPR) >> - val = TREE_OPERAND (val, 0); >> - addr = gen_hsa_addr (val, hbb); >> hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_LDA); >> insn->set_op (1, addr); >> if (addr->m_symbol && addr->m_symbol->m_segment != BRIG_SEGMENT_GLOBAL) >> @@ -2298,6 +2293,34 @@ gen_hsa_addr_insns (tree val, hsa_op_reg *dest, >> hsa_bb *hbb) >> } >> } >> >> +/* Generate HSA instructions that calculate address of VAL including all >> + necessary conversions to flat addressing and place the result into DEST. >> + Instructions are appended to HBB. */ >> + >> +static void >> +gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb) >> +{ >> + /* Handle cases like tmp = NULL, where we just emit a move instruction >> + to a register. */ >> + if (TREE_CODE (val) == INTEGER_CST) >> + { >> + hsa_op_immed *c = new hsa_op_immed (val); >> + hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV, >> + dest->m_type, dest, c); >> + hbb->append_insn (insn); >> + return; >> + } >> + >> + hsa_op_address *addr; >> + >> + gcc_assert (dest->m_type == hsa_get_segment_addr_type >> (BRIG_SEGMENT_FLAT)); >> + if (TREE_CODE (val) == ADDR_EXPR) >> + val = TREE_OPERAND (val, 0); >> + addr = gen_hsa_addr (val, hbb); >> + >> + convert_addr_to_flat_segment (addr, dest, hbb); >> +} >> + >> /* Return an HSA register or HSA immediate value operand corresponding to >> gimple operand OP. */ >> >> @@ -2728,9 +2751,9 @@ gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, >> hsa_bb *hbb) >> } >> >> /* Generate memory copy instructions that are going to be used >> - for copying a HSA symbol SRC_SYMBOL (or SRC_REG) to TARGET memory, >> + for copying a SRC memory to TARGET memory, >> represented by pointer in a register. MIN_ALIGN is minimal alignment >> - of provided HSA addresses. */ >> + of provided HSA addresses. */ >> >> static void >> gen_hsa_memory_copy (hsa_bb *hbb, hsa_op_address *target, hsa_op_address >> *src, >> @@ -2792,17 +2815,19 @@ build_memset_value (unsigned HOST_WIDE_INT constant, >> unsigned byte_size) >> } >> >> /* Generate memory set instructions that are going to be used >> - for setting a CONSTANT byte value to TARGET memory of SIZE bytes. */ >> + for setting a CONSTANT byte value to TARGET memory of SIZE bytes. >> + MIN_ALIGN is minimal alignment of provided HSA addresses. */ >> >> static void >> gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target, >> unsigned HOST_WIDE_INT constant, >> - unsigned size) >> + unsigned size, BrigAlignment8_t min_align) >> { >> hsa_op_address *addr; >> hsa_insn_mem *mem; >> >> unsigned offset = 0; >> + unsigned min_byte_align = hsa_byte_alignment (min_align); >> >> while (size) >> { >> @@ -2816,6 +2841,9 @@ gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address >> *target, >> else >> s = 1; >> >> + if (s > min_byte_align) >> + s = min_byte_align; >> + >> addr = new hsa_op_address (target->m_symbol, target->m_reg, >> target->m_imm_offset + offset); >> >> @@ -2832,10 +2860,12 @@ gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address >> *target, >> >> /* Generate HSAIL instructions for a single assignment >> of an empty constructor to an ADDR_LHS. Constructor is passed as a >> - tree RHS and all instructions are appended to HBB. */ >> + tree RHS and all instructions are appended to HBB. ALIGN is >> + alignment of the address. */ >> >> void >> -gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb) >> +gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb, >> + BrigAlignment8_t align) >> { >> if (vec_safe_length (CONSTRUCTOR_ELTS (rhs))) >> { >> @@ -2845,7 +2875,7 @@ gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, >> tree rhs, hsa_bb *hbb) >> } >> >> unsigned size = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (rhs))); >> - gen_hsa_memory_set (hbb, addr_lhs, 0, size); >> + gen_hsa_memory_set (hbb, addr_lhs, 0, size, align); >> } >> >> /* Generate HSA instructions for a single assignment of RHS to LHS. >> @@ -2879,7 +2909,7 @@ gen_hsa_insns_for_single_assignment (tree lhs, tree >> rhs, hsa_bb *hbb) >> &lhs_align); >> >> if (TREE_CODE (rhs) == CONSTRUCTOR) >> - gen_hsa_ctor_assignment (addr_lhs, rhs, hbb); >> + gen_hsa_ctor_assignment (addr_lhs, rhs, hbb, lhs_align); >> else >> { >> BrigAlignment8_t rhs_align; >> @@ -3523,10 +3553,13 @@ get_format_argument_type (tree formal_arg_type, >> BrigType16_t actual_arg_type) >> >> /* Generate HSA instructions for a direct call instruction. >> Instructions will be appended to HBB, which also needs to be the >> - corresponding structure to the basic_block of STMT. */ >> + corresponding structure to the basic_block of STMT. >> + If ASSIGN_LHS is set to true, assignment to a LHS of the called function >> + is not processed. */ > > I'm not sure the last sentence is really helpful becasue the word > "processed" is not very informative and also because I think it means > the opposite of what the code actually does (it depends on what being > processed means, I suppose). Can you perhaps replace it with "If > ASSIGN_LHS is false, do not copy HSA function result argument into the > corresponding HSA representation of the gimple statement LHS." Likewise. Installed as r234644. Martin > > Otherwise, the patch is fine, thanks. > > Martin >
>From ca3ff34773b0460814dd208567666210bf21b371 Mon Sep 17 00:00:00 2001 From: marxin <mli...@suse.cz> Date: Wed, 23 Mar 2016 14:43:17 +0100 Subject: [PATCH 2/2] HSA: handle alignment of string builtins (PR hsa/70391) gcc/ChangeLog: 2016-03-23 Martin Liska <mli...@suse.cz> PR hsa/70391 * hsa-gen.c (hsa_function_representation::update_dominance): New function. (convert_addr_to_flat_segment): Likewise. (gen_hsa_memory_set): New alignment argument. (gen_hsa_ctor_assignment): Likewise. (gen_hsa_insns_for_single_assignment): Provide alignment to gen_hsa_ctor_assignment. (gen_hsa_insns_for_direct_call): Add new argument. (expand_lhs_of_string_op): New function. (expand_string_operation_builtin): Likewise. (expand_memory_copy): New function. (expand_memory_set): New function. (gen_hsa_insns_for_call): Use HOST_WIDE_INT. (convert_switch_statements): Change signature. (generate_hsa): Use a return value of the function. (pass_gen_hsail::execute): Do not call convert_switch_statements here. * hsa-regalloc.c (hsa_regalloc): Call update_dominance. * hsa.h (hsa_function_representation::m_modified_cfg): New flag. (hsa_function_representation::update_dominance): New function. --- gcc/hsa-gen.c | 380 ++++++++++++++++++++++++++++++++++++++--------------- gcc/hsa-regalloc.c | 1 + gcc/hsa.h | 9 +- 3 files changed, 280 insertions(+), 110 deletions(-) diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c index 55c46ca..4c86023 100644 --- a/gcc/hsa-gen.c +++ b/gcc/hsa-gen.c @@ -211,10 +211,12 @@ hsa_symbol::fillup_for_decl (tree decl) /* Constructor of class representing global HSA function/kernel information and state. FNDECL is function declaration, KERNEL_P is true if the function is going to become a HSA kernel. If the function has body, SSA_NAMES_COUNT - should be set to number of SSA names used in the function. */ + should be set to number of SSA names used in the function. + MODIFIED_CFG is set to true in case we modified control-flow graph + of the function. */ hsa_function_representation::hsa_function_representation - (tree fdecl, bool kernel_p, unsigned ssa_names_count) + (tree fdecl, bool kernel_p, unsigned ssa_names_count, bool modified_cfg) : m_name (NULL), m_reg_count (0), m_input_args (vNULL), m_output_arg (NULL), m_spill_symbols (vNULL), m_global_symbols (vNULL), @@ -223,7 +225,8 @@ hsa_function_representation::hsa_function_representation m_in_ssa (true), m_kern_p (kernel_p), m_declaration_p (false), m_decl (fdecl), m_internal_fn (NULL), m_shadow_reg (NULL), m_kernel_dispatch_count (0), m_maximum_omp_data_size (0), - m_seen_error (false), m_temp_symbol_count (0), m_ssa_map () + m_seen_error (false), m_temp_symbol_count (0), m_ssa_map (), + m_modified_cfg (modified_cfg) { int sym_init_len = (vec_safe_length (cfun->local_decls) / 2) + 1;; m_local_symbols = new hash_table <hsa_noop_symbol_hasher> (sym_init_len); @@ -319,6 +322,16 @@ hsa_function_representation::init_extra_bbs () hsa_init_new_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); } +void +hsa_function_representation::update_dominance () +{ + if (m_modified_cfg) + { + free_dominance_info (CDI_DOMINATORS); + calculate_dominance_info (CDI_DOMINATORS); + } +} + hsa_symbol * hsa_function_representation::create_hsa_temporary (BrigType16_t type) { @@ -2246,30 +2259,14 @@ gen_hsa_addr_for_arg (tree tree_type, int index) return new hsa_op_address (sym); } -/* Generate HSA instructions that calculate address of VAL including all - necessary conversions to flat addressing and place the result into DEST. +/* Generate HSA instructions that process all necessary conversions + of an ADDR to flat addressing and place the result into DEST. Instructions are appended to HBB. */ static void -gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb) +convert_addr_to_flat_segment (hsa_op_address *addr, hsa_op_reg *dest, + hsa_bb *hbb) { - /* Handle cases like tmp = NULL, where we just emit a move instruction - to a register. */ - if (TREE_CODE (val) == INTEGER_CST) - { - hsa_op_immed *c = new hsa_op_immed (val); - hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV, - dest->m_type, dest, c); - hbb->append_insn (insn); - return; - } - - hsa_op_address *addr; - - gcc_assert (dest->m_type == hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT)); - if (TREE_CODE (val) == ADDR_EXPR) - val = TREE_OPERAND (val, 0); - addr = gen_hsa_addr (val, hbb); hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_LDA); insn->set_op (1, addr); if (addr->m_symbol && addr->m_symbol->m_segment != BRIG_SEGMENT_GLOBAL) @@ -2298,6 +2295,34 @@ gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb) } } +/* Generate HSA instructions that calculate address of VAL including all + necessary conversions to flat addressing and place the result into DEST. + Instructions are appended to HBB. */ + +static void +gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb) +{ + /* Handle cases like tmp = NULL, where we just emit a move instruction + to a register. */ + if (TREE_CODE (val) == INTEGER_CST) + { + hsa_op_immed *c = new hsa_op_immed (val); + hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV, + dest->m_type, dest, c); + hbb->append_insn (insn); + return; + } + + hsa_op_address *addr; + + gcc_assert (dest->m_type == hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT)); + if (TREE_CODE (val) == ADDR_EXPR) + val = TREE_OPERAND (val, 0); + addr = gen_hsa_addr (val, hbb); + + convert_addr_to_flat_segment (addr, dest, hbb); +} + /* Return an HSA register or HSA immediate value operand corresponding to gimple operand OP. */ @@ -2728,9 +2753,9 @@ gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb) } /* Generate memory copy instructions that are going to be used - for copying a HSA symbol SRC_SYMBOL (or SRC_REG) to TARGET memory, + for copying a SRC memory to TARGET memory, represented by pointer in a register. MIN_ALIGN is minimal alignment - of provided HSA addresses. */ + of provided HSA addresses. */ static void gen_hsa_memory_copy (hsa_bb *hbb, hsa_op_address *target, hsa_op_address *src, @@ -2792,17 +2817,19 @@ build_memset_value (unsigned HOST_WIDE_INT constant, unsigned byte_size) } /* Generate memory set instructions that are going to be used - for setting a CONSTANT byte value to TARGET memory of SIZE bytes. */ + for setting a CONSTANT byte value to TARGET memory of SIZE bytes. + MIN_ALIGN is minimal alignment of provided HSA addresses. */ static void gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target, unsigned HOST_WIDE_INT constant, - unsigned size) + unsigned size, BrigAlignment8_t min_align) { hsa_op_address *addr; hsa_insn_mem *mem; unsigned offset = 0; + unsigned min_byte_align = hsa_byte_alignment (min_align); while (size) { @@ -2816,6 +2843,9 @@ gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target, else s = 1; + if (s > min_byte_align) + s = min_byte_align; + addr = new hsa_op_address (target->m_symbol, target->m_reg, target->m_imm_offset + offset); @@ -2832,10 +2862,12 @@ gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target, /* Generate HSAIL instructions for a single assignment of an empty constructor to an ADDR_LHS. Constructor is passed as a - tree RHS and all instructions are appended to HBB. */ + tree RHS and all instructions are appended to HBB. ALIGN is + alignment of the address. */ void -gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb) +gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb, + BrigAlignment8_t align) { if (vec_safe_length (CONSTRUCTOR_ELTS (rhs))) { @@ -2845,7 +2877,7 @@ gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb) } unsigned size = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (rhs))); - gen_hsa_memory_set (hbb, addr_lhs, 0, size); + gen_hsa_memory_set (hbb, addr_lhs, 0, size, align); } /* Generate HSA instructions for a single assignment of RHS to LHS. @@ -2879,7 +2911,7 @@ gen_hsa_insns_for_single_assignment (tree lhs, tree rhs, hsa_bb *hbb) &lhs_align); if (TREE_CODE (rhs) == CONSTRUCTOR) - gen_hsa_ctor_assignment (addr_lhs, rhs, hbb); + gen_hsa_ctor_assignment (addr_lhs, rhs, hbb, lhs_align); else { BrigAlignment8_t rhs_align; @@ -3523,10 +3555,13 @@ get_format_argument_type (tree formal_arg_type, BrigType16_t actual_arg_type) /* Generate HSA instructions for a direct call instruction. Instructions will be appended to HBB, which also needs to be the - corresponding structure to the basic_block of STMT. */ + corresponding structure to the basic_block of STMT. + If ASSIGN_LHS is false, do not copy HSA function result argument into the + corresponding HSA representation of the gimple statement LHS. */ static void -gen_hsa_insns_for_direct_call (gimple *stmt, hsa_bb *hbb) +gen_hsa_insns_for_direct_call (gimple *stmt, hsa_bb *hbb, + bool assign_lhs = true) { tree decl = gimple_call_fndecl (stmt); verify_function_arguments (decl); @@ -3608,7 +3643,7 @@ gen_hsa_insns_for_direct_call (gimple *stmt, hsa_bb *hbb) /* Even if result of a function call is unused, we have to emit declaration for the result. */ - if (result) + if (result && assign_lhs) { tree lhs_type = TREE_TYPE (result); @@ -4481,6 +4516,195 @@ get_address_from_value (tree val, hsa_bb *hbb) } } +/* Expand assignment of a result of a string BUILTIN to DST. + Size of the operation is N bytes, where instructions + will be append to HBB. */ + +static void +expand_lhs_of_string_op (gimple *stmt, + unsigned HOST_WIDE_INT n, hsa_bb *hbb, + enum built_in_function builtin) +{ + /* If LHS is expected, we need to emit a PHI instruction. */ + tree lhs = gimple_call_lhs (stmt); + if (!lhs) + return; + + hsa_op_reg *lhs_reg = hsa_cfun->reg_for_gimple_ssa (lhs); + + hsa_op_with_type *dst_reg + = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 0), hbb); + hsa_op_with_type *tmp; + + switch (builtin) + { + case BUILT_IN_MEMPCPY: + { + tmp = new hsa_op_reg (dst_reg->m_type); + hsa_insn_basic *add + = new hsa_insn_basic (3, BRIG_OPCODE_ADD, tmp->m_type, + tmp, dst_reg, + new hsa_op_immed (n, dst_reg->m_type)); + hbb->append_insn (add); + break; + } + case BUILT_IN_MEMCPY: + case BUILT_IN_MEMSET: + tmp = dst_reg; + break; + default: + gcc_unreachable (); + } + + hbb->append_insn (new hsa_insn_basic (2, BRIG_OPCODE_MOV, lhs_reg->m_type, + lhs_reg, tmp)); +} + +#define HSA_MEMORY_BUILTINS_LIMIT 128 + +/* Expand a string builtin (from a gimple STMT) in a way that + according to MISALIGNED_FLAG we process either direct emission + (a bunch of memory load and store instructions), or we emit a function call + of a library function (for instance 'memcpy'). Actually, a basic block + for direct emission is just prepared, where caller is responsible + for emission of corresponding instructions. + All instruction are appended to HBB. */ + +hsa_bb * +expand_string_operation_builtin (gimple *stmt, hsa_bb *hbb, + hsa_op_reg *misaligned_flag) +{ + edge e = split_block (hbb->m_bb, stmt); + basic_block condition_bb = e->src; + hbb->append_insn (new hsa_insn_br (misaligned_flag)); + + /* Prepare the control flow. */ + edge condition_edge = EDGE_SUCC (condition_bb, 0); + basic_block call_bb = split_edge (condition_edge); + + basic_block expanded_bb = split_edge (EDGE_SUCC (call_bb, 0)); + basic_block cont_bb = EDGE_SUCC (expanded_bb, 0)->dest; + basic_block merge_bb = split_edge (EDGE_PRED (cont_bb, 0)); + + condition_edge->flags &= ~EDGE_FALLTHRU; + condition_edge->flags |= EDGE_TRUE_VALUE; + make_edge (condition_bb, expanded_bb, EDGE_FALSE_VALUE); + + redirect_edge_succ (EDGE_SUCC (call_bb, 0), merge_bb); + + hsa_cfun->m_modified_cfg = true; + + hsa_init_new_bb (expanded_bb); + + /* Slow path: function call. */ + gen_hsa_insns_for_direct_call (stmt, hsa_init_new_bb (call_bb), false); + + return hsa_bb_for_bb (expanded_bb); +} + +/* Expand a memory copy BUILTIN (BUILT_IN_MEMCPY, BUILT_IN_MEMPCPY) from + a gimple STMT and store all necessary instruction to HBB basic block. */ + +static void +expand_memory_copy (gimple *stmt, hsa_bb *hbb, enum built_in_function builtin) +{ + tree byte_size = gimple_call_arg (stmt, 2); + + if (!tree_fits_uhwi_p (byte_size)) + { + gen_hsa_insns_for_direct_call (stmt, hbb); + return; + } + + unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size); + + if (n > HSA_MEMORY_BUILTINS_LIMIT) + { + gen_hsa_insns_for_direct_call (stmt, hbb); + return; + } + + tree dst = gimple_call_arg (stmt, 0); + tree src = gimple_call_arg (stmt, 1); + + hsa_op_address *dst_addr = get_address_from_value (dst, hbb); + hsa_op_address *src_addr = get_address_from_value (src, hbb); + + /* As gen_hsa_memory_copy relies on memory alignment + greater or equal to 8 bytes, we need to verify the alignment. */ + BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT); + hsa_op_reg *src_addr_reg = new hsa_op_reg (addrtype); + hsa_op_reg *dst_addr_reg = new hsa_op_reg (addrtype); + + convert_addr_to_flat_segment (src_addr, src_addr_reg, hbb); + convert_addr_to_flat_segment (dst_addr, dst_addr_reg, hbb); + + /* Process BIT OR for source and destination addresses. */ + hsa_op_reg *or_reg = new hsa_op_reg (addrtype); + gen_hsa_binary_operation (BRIG_OPCODE_OR, or_reg, src_addr_reg, + dst_addr_reg, hbb); + + /* Process BIT AND with 0x7 to identify the desired alignment + of 8 bytes. */ + hsa_op_reg *masked = new hsa_op_reg (addrtype); + + gen_hsa_binary_operation (BRIG_OPCODE_AND, masked, or_reg, + new hsa_op_immed (7, addrtype), hbb); + + hsa_op_reg *misaligned = new hsa_op_reg (BRIG_TYPE_B1); + hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_NE, misaligned->m_type, + misaligned, masked, + new hsa_op_immed (0, masked->m_type))); + + hsa_bb *native_impl_bb + = expand_string_operation_builtin (stmt, hbb, misaligned); + + gen_hsa_memory_copy (native_impl_bb, dst_addr, src_addr, n, BRIG_ALIGNMENT_8); + hsa_bb *merge_bb + = hsa_init_new_bb (EDGE_SUCC (native_impl_bb->m_bb, 0)->dest); + expand_lhs_of_string_op (stmt, n, merge_bb, builtin); +} + + +/* Expand a memory set BUILTIN (BUILT_IN_MEMSET, BUILT_IN_BZERO) from + a gimple STMT and store all necessary instruction to HBB basic block. + The operation set N bytes with a CONSTANT value. */ + +static void +expand_memory_set (gimple *stmt, unsigned HOST_WIDE_INT n, + unsigned HOST_WIDE_INT constant, hsa_bb *hbb, + enum built_in_function builtin) +{ + tree dst = gimple_call_arg (stmt, 0); + hsa_op_address *dst_addr = get_address_from_value (dst, hbb); + + /* As gen_hsa_memory_set relies on memory alignment + greater or equal to 8 bytes, we need to verify the alignment. */ + BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT); + hsa_op_reg *dst_addr_reg = new hsa_op_reg (addrtype); + convert_addr_to_flat_segment (dst_addr, dst_addr_reg, hbb); + + /* Process BIT AND with 0x7 to identify the desired alignment + of 8 bytes. */ + hsa_op_reg *masked = new hsa_op_reg (addrtype); + + gen_hsa_binary_operation (BRIG_OPCODE_AND, masked, dst_addr_reg, + new hsa_op_immed (7, addrtype), hbb); + + hsa_op_reg *misaligned = new hsa_op_reg (BRIG_TYPE_B1); + hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_NE, misaligned->m_type, + misaligned, masked, + new hsa_op_immed (0, masked->m_type))); + + hsa_bb *native_impl_bb + = expand_string_operation_builtin (stmt, hbb, misaligned); + + gen_hsa_memory_set (native_impl_bb, dst_addr, constant, n, BRIG_ALIGNMENT_8); + hsa_bb *merge_bb + = hsa_init_new_bb (EDGE_SUCC (native_impl_bb->m_bb, 0)->dest); + expand_lhs_of_string_op (stmt, n, merge_bb, builtin); +} + /* Return string for MEMMODEL. */ static const char * @@ -4811,8 +5035,6 @@ gen_hsa_insn_for_internal_fn_call (gcall *stmt, hsa_bb *hbb) } } -#define HSA_MEMORY_BUILTINS_LIMIT 128 - /* Generate HSA instructions for the given call statement STMT. Instructions will be appended to HBB. */ @@ -5169,58 +5391,11 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb) case BUILT_IN_MEMCPY: case BUILT_IN_MEMPCPY: { - tree byte_size = gimple_call_arg (stmt, 2); - - if (!tree_fits_uhwi_p (byte_size)) - { - gen_hsa_insns_for_direct_call (stmt, hbb); - return; - } - - unsigned n = tree_to_uhwi (byte_size); - - if (n > HSA_MEMORY_BUILTINS_LIMIT) - { - gen_hsa_insns_for_direct_call (stmt, hbb); - return; - } - - tree dst = gimple_call_arg (stmt, 0); - tree src = gimple_call_arg (stmt, 1); - - hsa_op_address *dst_addr = get_address_from_value (dst, hbb); - hsa_op_address *src_addr = get_address_from_value (src, hbb); - - gen_hsa_memory_copy (hbb, dst_addr, src_addr, n, BRIG_ALIGNMENT_1); - - tree lhs = gimple_call_lhs (stmt); - if (lhs) - { - hsa_op_reg *lhs_reg = hsa_cfun->reg_for_gimple_ssa (lhs); - hsa_op_with_type *dst_reg = hsa_reg_or_immed_for_gimple_op (dst, - hbb); - hsa_op_with_type *tmp; - - if (builtin == BUILT_IN_MEMPCPY) - { - tmp = new hsa_op_reg (dst_reg->m_type); - hsa_insn_basic *add - = new hsa_insn_basic (3, BRIG_OPCODE_ADD, tmp->m_type, - tmp, dst_reg, - new hsa_op_immed (n, dst_reg->m_type)); - hbb->append_insn (add); - } - else - tmp = dst_reg; - - hsa_build_append_simple_mov (lhs_reg, tmp, hbb); - } - + expand_memory_copy (stmt, hbb, builtin); break; } case BUILT_IN_MEMSET: { - tree dst = gimple_call_arg (stmt, 0); tree c = gimple_call_arg (stmt, 1); if (TREE_CODE (c) != INTEGER_CST) @@ -5237,7 +5412,7 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb) return; } - unsigned n = tree_to_uhwi (byte_size); + unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size); if (n > HSA_MEMORY_BUILTINS_LIMIT) { @@ -5245,22 +5420,15 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb) return; } - hsa_op_address *dst_addr; - dst_addr = get_address_from_value (dst, hbb); unsigned HOST_WIDE_INT constant = tree_to_uhwi (fold_convert (unsigned_char_type_node, c)); - gen_hsa_memory_set (hbb, dst_addr, constant, n); - - tree lhs = gimple_call_lhs (stmt); - if (lhs) - gen_hsa_insns_for_single_assignment (lhs, dst, hbb); + expand_memory_set (stmt, n, constant, hbb, builtin); break; } case BUILT_IN_BZERO: { - tree dst = gimple_call_arg (stmt, 0); tree byte_size = gimple_call_arg (stmt, 1); if (!tree_fits_uhwi_p (byte_size)) @@ -5269,7 +5437,7 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb) return; } - unsigned n = tree_to_uhwi (byte_size); + unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size); if (n > HSA_MEMORY_BUILTINS_LIMIT) { @@ -5277,10 +5445,7 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb) return; } - hsa_op_address *dst_addr; - dst_addr = get_address_from_value (dst, hbb); - - gen_hsa_memory_set (hbb, dst_addr, 0, n); + expand_memory_set (stmt, n, 0, hbb, builtin); break; } @@ -5832,13 +5997,13 @@ LD: hard_work_3 (); */ -static void -convert_switch_statements () +static bool +convert_switch_statements (void) { function *func = DECL_STRUCT_FUNCTION (current_function_decl); basic_block bb; - bool need_update = false; + bool modified_cfg = false; FOR_EACH_BB_FN (bb, func) { @@ -5856,7 +6021,7 @@ convert_switch_statements () if (transformable_switch_to_sbr_p (s)) continue; - need_update = true; + modified_cfg = true; unsigned labels = gimple_switch_num_labels (s); tree index = gimple_switch_index (s); @@ -6023,11 +6188,7 @@ convert_switch_statements () if (dump_file) dump_function_to_file (current_function_decl, dump_file, TDF_DETAILS); - if (need_update) - { - free_dominance_info (CDI_DOMINATORS); - calculate_dominance_info (CDI_DOMINATORS); - } + return modified_cfg; } /* Expand builtins that can't be handled by HSA back-end. */ @@ -6127,9 +6288,11 @@ generate_hsa (bool kernel) if (hsa_num_threads == NULL) emit_hsa_module_variables (); + bool modified_cfg = convert_switch_statements (); /* Initialize hsa_cfun. */ hsa_cfun = new hsa_function_representation (cfun->decl, kernel, - SSANAMES (cfun)->length ()); + SSANAMES (cfun)->length (), + modified_cfg); hsa_cfun->init_extra_bbs (); if (flag_tm) @@ -6234,7 +6397,6 @@ pass_gen_hsail::execute (function *) hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (current_function_decl)); - convert_switch_statements (); expand_builtins (); generate_hsa (s->m_kind == HSA_KERNEL); TREE_ASM_WRITTEN (current_function_decl) = 1; diff --git a/gcc/hsa-regalloc.c b/gcc/hsa-regalloc.c index 16cde8b..50bf511 100644 --- a/gcc/hsa-regalloc.c +++ b/gcc/hsa-regalloc.c @@ -700,6 +700,7 @@ regalloc (void) void hsa_regalloc (void) { + hsa_cfun->update_dominance (); naive_outof_ssa (); if (dump_file && (dump_flags & TDF_DETAILS)) diff --git a/gcc/hsa.h b/gcc/hsa.h index ac692fb..b7b1e3c 100644 --- a/gcc/hsa.h +++ b/gcc/hsa.h @@ -1123,7 +1123,8 @@ class hsa_function_representation { public: hsa_function_representation (tree fdecl, bool kernel_p, - unsigned ssa_names_count); + unsigned ssa_names_count, + bool modified_cfg = false); hsa_function_representation (hsa_internal_fn *fn); ~hsa_function_representation (); @@ -1139,6 +1140,9 @@ public: so we need hsa_bb instances of them. */ void init_extra_bbs (); + /* Update CFG dominators if m_modified_cfg flag is set. */ + void update_dominance (); + /* Return linkage of the representation. */ BrigLinkage8_t get_linkage (); @@ -1219,6 +1223,9 @@ public: /* SSA names mapping. */ vec <hsa_op_reg_p> m_ssa_map; + + /* Flag whether a function needs update of dominators before RA. */ + bool m_modified_cfg; }; enum hsa_function_kind -- 2.7.1