On 03/31/2016 04:58 PM, Martin Jambor wrote:
> Hi,
> 
> On Wed, Mar 23, 2016 at 02:43:17PM +0100, Martin Liska wrote:
>> gcc/ChangeLog:
>>
>> 2016-03-23  Martin Liska  <mli...@suse.cz>
>>
>>      PR hsa/70391
>>      * hsa-gen.c (hsa_function_representation::update_cfg): New
>>      function.
>>      (convert_addr_to_flat_segment): Likewise.
>>      (gen_hsa_memory_set): New alignment argument.
>>      (gen_hsa_ctor_assignment): Likewise.
>>      (gen_hsa_insns_for_single_assignment): Provide alignment
>>      to gen_hsa_ctor_assignment.
>>      (gen_hsa_insns_for_direct_call): Add new argument.
>>      (expand_lhs_of_string_op): New function.
>>      (expand_string_operation_builtin): Likewise.
>>      (expand_memory_copy): New function.
>>      (expand_memory_set): New function.
>>      (gen_hsa_insns_for_call): Use HOST_WIDE_INT.
>>      (convert_switch_statements): Change signature.
>>      (generate_hsa): Use a return value of the function.
>>      (pass_gen_hsail::execute): Do not call
>>      convert_switch_statements here.
>>      * hsa-regalloc.c (hsa_regalloc): Call update_cfg.
>>      * hsa.h (hsa_function_representation::m_need_cfg_update):
>>      New flag.
>>      (hsa_function_representation::update_cfg): New function.
> 
> As we already discussed, update_cfg and m_need_cfg_update should
> really be called differently, because CFG has already been modified
> and only dominance needs to be re-computed.  If you havent't thought
> about any names yet, what about m_modified_cfg and update_dominance() ?

Hi.

Done in the attached version of the patch.

> 
> 
>> ---
>>  gcc/hsa-gen.c      | 372 
>> ++++++++++++++++++++++++++++++++++++++---------------
>>  gcc/hsa-regalloc.c |   1 +
>>  gcc/hsa.h          |   9 +-
>>  3 files changed, 275 insertions(+), 107 deletions(-)
>>
>> diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
>> index db39813..db7fc3d 100644
>> --- a/gcc/hsa-gen.c
>> +++ b/gcc/hsa-gen.c
>> @@ -214,7 +214,7 @@ hsa_symbol::fillup_for_decl (tree decl)
>>     should be set to number of SSA names used in the function.  */
>>  
>>  hsa_function_representation::hsa_function_representation
>> -  (tree fdecl, bool kernel_p, unsigned ssa_names_count)
>> +  (tree fdecl, bool kernel_p, unsigned ssa_names_count, bool 
>> need_cfg_update)
>>    : m_name (NULL),
>>      m_reg_count (0), m_input_args (vNULL),
>>      m_output_arg (NULL), m_spill_symbols (vNULL), m_global_symbols (vNULL),
>> @@ -223,7 +223,8 @@ hsa_function_representation::hsa_function_representation
>>      m_in_ssa (true), m_kern_p (kernel_p), m_declaration_p (false),
>>      m_decl (fdecl), m_internal_fn (NULL), m_shadow_reg (NULL),
>>      m_kernel_dispatch_count (0), m_maximum_omp_data_size (0),
>> -    m_seen_error (false), m_temp_symbol_count (0), m_ssa_map ()
>> +    m_seen_error (false), m_temp_symbol_count (0), m_ssa_map (),
>> +    m_need_cfg_update (need_cfg_update)
>>  {
>>    int sym_init_len = (vec_safe_length (cfun->local_decls) / 2) + 1;;
>>    m_local_symbols = new hash_table <hsa_noop_symbol_hasher> (sym_init_len);
>> @@ -319,6 +320,16 @@ hsa_function_representation::init_extra_bbs ()
>>    hsa_init_new_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
>>  }
>>  
>> +void
>> +hsa_function_representation::update_cfg ()
>> +{
>> +  if (m_need_cfg_update)
>> +    {
>> +      free_dominance_info (CDI_DOMINATORS);
>> +      calculate_dominance_info (CDI_DOMINATORS);
>> +    }
>> +}
>> +
>>  hsa_symbol *
>>  hsa_function_representation::create_hsa_temporary (BrigType16_t type)
>>  {
>> @@ -2246,30 +2257,14 @@ gen_hsa_addr_for_arg (tree tree_type, int index)
>>    return new hsa_op_address (sym);
>>  }
>>  
>> -/* Generate HSA instructions that calculate address of VAL including all
>> -   necessary conversions to flat addressing and place the result into DEST.
>> +/* Generate HSA instructions that process all necessary conversions
>> +   of an ADDR to flat addressing and place the result into DEST.
>>     Instructions are appended to HBB.  */
>>  
>>  static void
>> -gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb)
>> +convert_addr_to_flat_segment (hsa_op_address *addr, hsa_op_reg *dest,
>> +                          hsa_bb *hbb)
>>  {
>> -  /* Handle cases like tmp = NULL, where we just emit a move instruction
>> -     to a register.  */
>> -  if (TREE_CODE (val) == INTEGER_CST)
>> -    {
>> -      hsa_op_immed *c = new hsa_op_immed (val);
>> -      hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV,
>> -                                             dest->m_type, dest, c);
>> -      hbb->append_insn (insn);
>> -      return;
>> -    }
>> -
>> -  hsa_op_address *addr;
>> -
>> -  gcc_assert (dest->m_type == hsa_get_segment_addr_type 
>> (BRIG_SEGMENT_FLAT));
>> -  if (TREE_CODE (val) == ADDR_EXPR)
>> -    val = TREE_OPERAND (val, 0);
>> -  addr = gen_hsa_addr (val, hbb);
>>    hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_LDA);
>>    insn->set_op (1, addr);
>>    if (addr->m_symbol && addr->m_symbol->m_segment != BRIG_SEGMENT_GLOBAL)
>> @@ -2298,6 +2293,34 @@ gen_hsa_addr_insns (tree val, hsa_op_reg *dest, 
>> hsa_bb *hbb)
>>      }
>>  }
>>  
>> +/* Generate HSA instructions that calculate address of VAL including all
>> +   necessary conversions to flat addressing and place the result into DEST.
>> +   Instructions are appended to HBB.  */
>> +
>> +static void
>> +gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb)
>> +{
>> +  /* Handle cases like tmp = NULL, where we just emit a move instruction
>> +     to a register.  */
>> +  if (TREE_CODE (val) == INTEGER_CST)
>> +    {
>> +      hsa_op_immed *c = new hsa_op_immed (val);
>> +      hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV,
>> +                                             dest->m_type, dest, c);
>> +      hbb->append_insn (insn);
>> +      return;
>> +    }
>> +
>> +  hsa_op_address *addr;
>> +
>> +  gcc_assert (dest->m_type == hsa_get_segment_addr_type 
>> (BRIG_SEGMENT_FLAT));
>> +  if (TREE_CODE (val) == ADDR_EXPR)
>> +    val = TREE_OPERAND (val, 0);
>> +  addr = gen_hsa_addr (val, hbb);
>> +
>> +  convert_addr_to_flat_segment (addr, dest, hbb);
>> +}
>> +
>>  /* Return an HSA register or HSA immediate value operand corresponding to
>>     gimple operand OP.  */
>>  
>> @@ -2728,9 +2751,9 @@ gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, 
>> hsa_bb *hbb)
>>  }
>>  
>>  /* Generate memory copy instructions that are going to be used
>> -   for copying a HSA symbol SRC_SYMBOL (or SRC_REG) to TARGET memory,
>> +   for copying a SRC memory to TARGET memory,
>>     represented by pointer in a register.  MIN_ALIGN is minimal alignment
>> -   of provided HSA addresses. */
>> +   of provided HSA addresses.  */
>>  
>>  static void
>>  gen_hsa_memory_copy (hsa_bb *hbb, hsa_op_address *target, hsa_op_address 
>> *src,
>> @@ -2792,17 +2815,19 @@ build_memset_value (unsigned HOST_WIDE_INT constant, 
>> unsigned byte_size)
>>  }
>>  
>>  /* Generate memory set instructions that are going to be used
>> -   for setting a CONSTANT byte value to TARGET memory of SIZE bytes.  */
>> +   for setting a CONSTANT byte value to TARGET memory of SIZE bytes.
>> +   MIN_ALIGN is minimal alignment of provided HSA addresses.  */
>>  
>>  static void
>>  gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target,
>>                  unsigned HOST_WIDE_INT constant,
>> -                unsigned size)
>> +                unsigned size, BrigAlignment8_t min_align)
>>  {
>>    hsa_op_address *addr;
>>    hsa_insn_mem *mem;
>>  
>>    unsigned offset = 0;
>> +  unsigned min_byte_align = hsa_byte_alignment (min_align);
>>  
>>    while (size)
>>      {
>> @@ -2816,6 +2841,9 @@ gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address 
>> *target,
>>        else
>>      s = 1;
>>  
>> +      if (s > min_byte_align)
>> +    s = min_byte_align;
>> +
>>        addr = new hsa_op_address (target->m_symbol, target->m_reg,
>>                               target->m_imm_offset + offset);
>>  
>> @@ -2832,10 +2860,12 @@ gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address 
>> *target,
>>  
>>  /* Generate HSAIL instructions for a single assignment
>>     of an empty constructor to an ADDR_LHS.  Constructor is passed as a
>> -   tree RHS and all instructions are appended to HBB.  */
>> +   tree RHS and all instructions are appended to HBB.  ALIGN is
>> +   alignment of the address.  */
>>  
>>  void
>> -gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb)
>> +gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb,
>> +                     BrigAlignment8_t align)
>>  {
>>    if (vec_safe_length (CONSTRUCTOR_ELTS (rhs)))
>>      {
>> @@ -2845,7 +2875,7 @@ gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, 
>> tree rhs, hsa_bb *hbb)
>>      }
>>  
>>    unsigned size = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (rhs)));
>> -  gen_hsa_memory_set (hbb, addr_lhs, 0, size);
>> +  gen_hsa_memory_set (hbb, addr_lhs, 0, size, align);
>>  }
>>  
>>  /* Generate HSA instructions for a single assignment of RHS to LHS.
>> @@ -2879,7 +2909,7 @@ gen_hsa_insns_for_single_assignment (tree lhs, tree 
>> rhs, hsa_bb *hbb)
>>                                                        &lhs_align);
>>  
>>        if (TREE_CODE (rhs) == CONSTRUCTOR)
>> -    gen_hsa_ctor_assignment (addr_lhs, rhs, hbb);
>> +    gen_hsa_ctor_assignment (addr_lhs, rhs, hbb, lhs_align);
>>        else
>>      {
>>        BrigAlignment8_t rhs_align;
>> @@ -3523,10 +3553,13 @@ get_format_argument_type (tree formal_arg_type, 
>> BrigType16_t actual_arg_type)
>>  
>>  /* Generate HSA instructions for a direct call instruction.
>>     Instructions will be appended to HBB, which also needs to be the
>> -   corresponding structure to the basic_block of STMT.  */
>> +   corresponding structure to the basic_block of STMT.
>> +   If ASSIGN_LHS is set to true, assignment to a LHS of the called function
>> +   is not processed.  */
> 
> I'm not sure the last sentence is really helpful becasue the word
> "processed" is not very informative and also because I think it means
> the opposite of what the code actually does (it depends on what being
> processed means, I suppose).  Can you perhaps replace it with "If
> ASSIGN_LHS is false, do not copy HSA function result argument into the
> corresponding HSA representation of the gimple statement LHS."

Likewise.

Installed as r234644.

Martin

> 
> Otherwise, the patch is fine, thanks.
> 
> Martin
> 

>From ca3ff34773b0460814dd208567666210bf21b371 Mon Sep 17 00:00:00 2001
From: marxin <mli...@suse.cz>
Date: Wed, 23 Mar 2016 14:43:17 +0100
Subject: [PATCH 2/2] HSA: handle alignment of string builtins (PR hsa/70391)

gcc/ChangeLog:

2016-03-23  Martin Liska  <mli...@suse.cz>

	PR hsa/70391
	* hsa-gen.c (hsa_function_representation::update_dominance): New
	function.
	(convert_addr_to_flat_segment): Likewise.
	(gen_hsa_memory_set): New alignment argument.
	(gen_hsa_ctor_assignment): Likewise.
	(gen_hsa_insns_for_single_assignment): Provide alignment
	to gen_hsa_ctor_assignment.
	(gen_hsa_insns_for_direct_call): Add new argument.
	(expand_lhs_of_string_op): New function.
	(expand_string_operation_builtin): Likewise.
	(expand_memory_copy): New function.
	(expand_memory_set): New function.
	(gen_hsa_insns_for_call): Use HOST_WIDE_INT.
	(convert_switch_statements): Change signature.
	(generate_hsa): Use a return value of the function.
	(pass_gen_hsail::execute): Do not call
	convert_switch_statements here.
	* hsa-regalloc.c (hsa_regalloc): Call update_dominance.
	* hsa.h (hsa_function_representation::m_modified_cfg):
	New flag.
	(hsa_function_representation::update_dominance): New function.
---
 gcc/hsa-gen.c      | 380 ++++++++++++++++++++++++++++++++++++++---------------
 gcc/hsa-regalloc.c |   1 +
 gcc/hsa.h          |   9 +-
 3 files changed, 280 insertions(+), 110 deletions(-)

diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index 55c46ca..4c86023 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -211,10 +211,12 @@ hsa_symbol::fillup_for_decl (tree decl)
 /* Constructor of class representing global HSA function/kernel information and
    state.  FNDECL is function declaration, KERNEL_P is true if the function
    is going to become a HSA kernel.  If the function has body, SSA_NAMES_COUNT
-   should be set to number of SSA names used in the function.  */
+   should be set to number of SSA names used in the function.
+   MODIFIED_CFG is set to true in case we modified control-flow graph
+   of the function.  */
 
 hsa_function_representation::hsa_function_representation
-  (tree fdecl, bool kernel_p, unsigned ssa_names_count)
+  (tree fdecl, bool kernel_p, unsigned ssa_names_count, bool modified_cfg)
   : m_name (NULL),
     m_reg_count (0), m_input_args (vNULL),
     m_output_arg (NULL), m_spill_symbols (vNULL), m_global_symbols (vNULL),
@@ -223,7 +225,8 @@ hsa_function_representation::hsa_function_representation
     m_in_ssa (true), m_kern_p (kernel_p), m_declaration_p (false),
     m_decl (fdecl), m_internal_fn (NULL), m_shadow_reg (NULL),
     m_kernel_dispatch_count (0), m_maximum_omp_data_size (0),
-    m_seen_error (false), m_temp_symbol_count (0), m_ssa_map ()
+    m_seen_error (false), m_temp_symbol_count (0), m_ssa_map (),
+    m_modified_cfg (modified_cfg)
 {
   int sym_init_len = (vec_safe_length (cfun->local_decls) / 2) + 1;;
   m_local_symbols = new hash_table <hsa_noop_symbol_hasher> (sym_init_len);
@@ -319,6 +322,16 @@ hsa_function_representation::init_extra_bbs ()
   hsa_init_new_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
 }
 
+void
+hsa_function_representation::update_dominance ()
+{
+  if (m_modified_cfg)
+    {
+      free_dominance_info (CDI_DOMINATORS);
+      calculate_dominance_info (CDI_DOMINATORS);
+    }
+}
+
 hsa_symbol *
 hsa_function_representation::create_hsa_temporary (BrigType16_t type)
 {
@@ -2246,30 +2259,14 @@ gen_hsa_addr_for_arg (tree tree_type, int index)
   return new hsa_op_address (sym);
 }
 
-/* Generate HSA instructions that calculate address of VAL including all
-   necessary conversions to flat addressing and place the result into DEST.
+/* Generate HSA instructions that process all necessary conversions
+   of an ADDR to flat addressing and place the result into DEST.
    Instructions are appended to HBB.  */
 
 static void
-gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb)
+convert_addr_to_flat_segment (hsa_op_address *addr, hsa_op_reg *dest,
+			      hsa_bb *hbb)
 {
-  /* Handle cases like tmp = NULL, where we just emit a move instruction
-     to a register.  */
-  if (TREE_CODE (val) == INTEGER_CST)
-    {
-      hsa_op_immed *c = new hsa_op_immed (val);
-      hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV,
-						 dest->m_type, dest, c);
-      hbb->append_insn (insn);
-      return;
-    }
-
-  hsa_op_address *addr;
-
-  gcc_assert (dest->m_type == hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
-  if (TREE_CODE (val) == ADDR_EXPR)
-    val = TREE_OPERAND (val, 0);
-  addr = gen_hsa_addr (val, hbb);
   hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_LDA);
   insn->set_op (1, addr);
   if (addr->m_symbol && addr->m_symbol->m_segment != BRIG_SEGMENT_GLOBAL)
@@ -2298,6 +2295,34 @@ gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb)
     }
 }
 
+/* Generate HSA instructions that calculate address of VAL including all
+   necessary conversions to flat addressing and place the result into DEST.
+   Instructions are appended to HBB.  */
+
+static void
+gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb)
+{
+  /* Handle cases like tmp = NULL, where we just emit a move instruction
+     to a register.  */
+  if (TREE_CODE (val) == INTEGER_CST)
+    {
+      hsa_op_immed *c = new hsa_op_immed (val);
+      hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV,
+						 dest->m_type, dest, c);
+      hbb->append_insn (insn);
+      return;
+    }
+
+  hsa_op_address *addr;
+
+  gcc_assert (dest->m_type == hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
+  if (TREE_CODE (val) == ADDR_EXPR)
+    val = TREE_OPERAND (val, 0);
+  addr = gen_hsa_addr (val, hbb);
+
+  convert_addr_to_flat_segment (addr, dest, hbb);
+}
+
 /* Return an HSA register or HSA immediate value operand corresponding to
    gimple operand OP.  */
 
@@ -2728,9 +2753,9 @@ gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb)
 }
 
 /* Generate memory copy instructions that are going to be used
-   for copying a HSA symbol SRC_SYMBOL (or SRC_REG) to TARGET memory,
+   for copying a SRC memory to TARGET memory,
    represented by pointer in a register.  MIN_ALIGN is minimal alignment
-   of provided HSA addresses. */
+   of provided HSA addresses.  */
 
 static void
 gen_hsa_memory_copy (hsa_bb *hbb, hsa_op_address *target, hsa_op_address *src,
@@ -2792,17 +2817,19 @@ build_memset_value (unsigned HOST_WIDE_INT constant, unsigned byte_size)
 }
 
 /* Generate memory set instructions that are going to be used
-   for setting a CONSTANT byte value to TARGET memory of SIZE bytes.  */
+   for setting a CONSTANT byte value to TARGET memory of SIZE bytes.
+   MIN_ALIGN is minimal alignment of provided HSA addresses.  */
 
 static void
 gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target,
 		    unsigned HOST_WIDE_INT constant,
-		    unsigned size)
+		    unsigned size, BrigAlignment8_t min_align)
 {
   hsa_op_address *addr;
   hsa_insn_mem *mem;
 
   unsigned offset = 0;
+  unsigned min_byte_align = hsa_byte_alignment (min_align);
 
   while (size)
     {
@@ -2816,6 +2843,9 @@ gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target,
       else
 	s = 1;
 
+      if (s > min_byte_align)
+	s = min_byte_align;
+
       addr = new hsa_op_address (target->m_symbol, target->m_reg,
 				 target->m_imm_offset + offset);
 
@@ -2832,10 +2862,12 @@ gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target,
 
 /* Generate HSAIL instructions for a single assignment
    of an empty constructor to an ADDR_LHS.  Constructor is passed as a
-   tree RHS and all instructions are appended to HBB.  */
+   tree RHS and all instructions are appended to HBB.  ALIGN is
+   alignment of the address.  */
 
 void
-gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb)
+gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb,
+			 BrigAlignment8_t align)
 {
   if (vec_safe_length (CONSTRUCTOR_ELTS (rhs)))
     {
@@ -2845,7 +2877,7 @@ gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb)
     }
 
   unsigned size = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (rhs)));
-  gen_hsa_memory_set (hbb, addr_lhs, 0, size);
+  gen_hsa_memory_set (hbb, addr_lhs, 0, size, align);
 }
 
 /* Generate HSA instructions for a single assignment of RHS to LHS.
@@ -2879,7 +2911,7 @@ gen_hsa_insns_for_single_assignment (tree lhs, tree rhs, hsa_bb *hbb)
 							  &lhs_align);
 
       if (TREE_CODE (rhs) == CONSTRUCTOR)
-	gen_hsa_ctor_assignment (addr_lhs, rhs, hbb);
+	gen_hsa_ctor_assignment (addr_lhs, rhs, hbb, lhs_align);
       else
 	{
 	  BrigAlignment8_t rhs_align;
@@ -3523,10 +3555,13 @@ get_format_argument_type (tree formal_arg_type, BrigType16_t actual_arg_type)
 
 /* Generate HSA instructions for a direct call instruction.
    Instructions will be appended to HBB, which also needs to be the
-   corresponding structure to the basic_block of STMT.  */
+   corresponding structure to the basic_block of STMT.
+   If ASSIGN_LHS is false, do not copy HSA function result argument into the
+   corresponding HSA representation of the gimple statement LHS.  */
 
 static void
-gen_hsa_insns_for_direct_call (gimple *stmt, hsa_bb *hbb)
+gen_hsa_insns_for_direct_call (gimple *stmt, hsa_bb *hbb,
+			       bool assign_lhs = true)
 {
   tree decl = gimple_call_fndecl (stmt);
   verify_function_arguments (decl);
@@ -3608,7 +3643,7 @@ gen_hsa_insns_for_direct_call (gimple *stmt, hsa_bb *hbb)
 
       /* Even if result of a function call is unused, we have to emit
 	 declaration for the result.  */
-      if (result)
+      if (result && assign_lhs)
 	{
 	  tree lhs_type = TREE_TYPE (result);
 
@@ -4481,6 +4516,195 @@ get_address_from_value (tree val, hsa_bb *hbb)
     }
 }
 
+/* Expand assignment of a result of a string BUILTIN to DST.
+   Size of the operation is N bytes, where instructions
+   will be append to HBB.  */
+
+static void
+expand_lhs_of_string_op (gimple *stmt,
+			 unsigned HOST_WIDE_INT n, hsa_bb *hbb,
+			 enum built_in_function builtin)
+{
+  /* If LHS is expected, we need to emit a PHI instruction.  */
+  tree lhs = gimple_call_lhs (stmt);
+  if (!lhs)
+    return;
+
+  hsa_op_reg *lhs_reg = hsa_cfun->reg_for_gimple_ssa (lhs);
+
+  hsa_op_with_type *dst_reg
+    = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 0), hbb);
+  hsa_op_with_type *tmp;
+
+  switch (builtin)
+    {
+    case BUILT_IN_MEMPCPY:
+      {
+	tmp = new hsa_op_reg (dst_reg->m_type);
+	hsa_insn_basic *add
+	  = new hsa_insn_basic (3, BRIG_OPCODE_ADD, tmp->m_type,
+				tmp, dst_reg,
+				new hsa_op_immed (n, dst_reg->m_type));
+	hbb->append_insn (add);
+	break;
+      }
+    case BUILT_IN_MEMCPY:
+    case BUILT_IN_MEMSET:
+      tmp = dst_reg;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  hbb->append_insn (new hsa_insn_basic (2, BRIG_OPCODE_MOV, lhs_reg->m_type,
+					lhs_reg, tmp));
+}
+
+#define HSA_MEMORY_BUILTINS_LIMIT     128
+
+/* Expand a string builtin (from a gimple STMT) in a way that
+   according to MISALIGNED_FLAG we process either direct emission
+   (a bunch of memory load and store instructions), or we emit a function call
+   of a library function (for instance 'memcpy'). Actually, a basic block
+   for direct emission is just prepared, where caller is responsible
+   for emission of corresponding instructions.
+   All instruction are appended to HBB.  */
+
+hsa_bb *
+expand_string_operation_builtin (gimple *stmt, hsa_bb *hbb,
+				 hsa_op_reg *misaligned_flag)
+{
+  edge e = split_block (hbb->m_bb, stmt);
+  basic_block condition_bb = e->src;
+  hbb->append_insn (new hsa_insn_br (misaligned_flag));
+
+  /* Prepare the control flow.  */
+  edge condition_edge = EDGE_SUCC (condition_bb, 0);
+  basic_block call_bb = split_edge (condition_edge);
+
+  basic_block expanded_bb = split_edge (EDGE_SUCC (call_bb, 0));
+  basic_block cont_bb = EDGE_SUCC (expanded_bb, 0)->dest;
+  basic_block merge_bb = split_edge (EDGE_PRED (cont_bb, 0));
+
+  condition_edge->flags &= ~EDGE_FALLTHRU;
+  condition_edge->flags |= EDGE_TRUE_VALUE;
+  make_edge (condition_bb, expanded_bb, EDGE_FALSE_VALUE);
+
+  redirect_edge_succ (EDGE_SUCC (call_bb, 0), merge_bb);
+
+  hsa_cfun->m_modified_cfg = true;
+
+  hsa_init_new_bb (expanded_bb);
+
+  /* Slow path: function call.  */
+  gen_hsa_insns_for_direct_call (stmt, hsa_init_new_bb (call_bb), false);
+
+  return hsa_bb_for_bb (expanded_bb);
+}
+
+/* Expand a memory copy BUILTIN (BUILT_IN_MEMCPY, BUILT_IN_MEMPCPY) from
+   a gimple STMT and store all necessary instruction to HBB basic block.  */
+
+static void
+expand_memory_copy (gimple *stmt, hsa_bb *hbb, enum built_in_function builtin)
+{
+  tree byte_size = gimple_call_arg (stmt, 2);
+
+  if (!tree_fits_uhwi_p (byte_size))
+    {
+      gen_hsa_insns_for_direct_call (stmt, hbb);
+      return;
+    }
+
+  unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size);
+
+  if (n > HSA_MEMORY_BUILTINS_LIMIT)
+    {
+      gen_hsa_insns_for_direct_call (stmt, hbb);
+      return;
+    }
+
+  tree dst = gimple_call_arg (stmt, 0);
+  tree src = gimple_call_arg (stmt, 1);
+
+  hsa_op_address *dst_addr = get_address_from_value (dst, hbb);
+  hsa_op_address *src_addr = get_address_from_value (src, hbb);
+
+  /* As gen_hsa_memory_copy relies on memory alignment
+     greater or equal to 8 bytes, we need to verify the alignment.  */
+  BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
+  hsa_op_reg *src_addr_reg = new hsa_op_reg (addrtype);
+  hsa_op_reg *dst_addr_reg = new hsa_op_reg (addrtype);
+
+  convert_addr_to_flat_segment (src_addr, src_addr_reg, hbb);
+  convert_addr_to_flat_segment (dst_addr, dst_addr_reg, hbb);
+
+  /* Process BIT OR for source and destination addresses.  */
+  hsa_op_reg *or_reg = new hsa_op_reg (addrtype);
+  gen_hsa_binary_operation (BRIG_OPCODE_OR, or_reg, src_addr_reg,
+			    dst_addr_reg, hbb);
+
+  /* Process BIT AND with 0x7 to identify the desired alignment
+     of 8 bytes.  */
+  hsa_op_reg *masked = new hsa_op_reg (addrtype);
+
+  gen_hsa_binary_operation (BRIG_OPCODE_AND, masked, or_reg,
+			    new hsa_op_immed (7, addrtype), hbb);
+
+  hsa_op_reg *misaligned = new hsa_op_reg (BRIG_TYPE_B1);
+  hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_NE, misaligned->m_type,
+				      misaligned, masked,
+				      new hsa_op_immed (0, masked->m_type)));
+
+  hsa_bb *native_impl_bb
+    = expand_string_operation_builtin (stmt, hbb, misaligned);
+
+  gen_hsa_memory_copy (native_impl_bb, dst_addr, src_addr, n, BRIG_ALIGNMENT_8);
+  hsa_bb *merge_bb
+    = hsa_init_new_bb (EDGE_SUCC (native_impl_bb->m_bb, 0)->dest);
+  expand_lhs_of_string_op (stmt, n, merge_bb, builtin);
+}
+
+
+/* Expand a memory set BUILTIN (BUILT_IN_MEMSET, BUILT_IN_BZERO) from
+   a gimple STMT and store all necessary instruction to HBB basic block.
+   The operation set N bytes with a CONSTANT value.  */
+
+static void
+expand_memory_set (gimple *stmt, unsigned HOST_WIDE_INT n,
+		   unsigned HOST_WIDE_INT constant, hsa_bb *hbb,
+		   enum built_in_function builtin)
+{
+  tree dst = gimple_call_arg (stmt, 0);
+  hsa_op_address *dst_addr = get_address_from_value (dst, hbb);
+
+  /* As gen_hsa_memory_set relies on memory alignment
+     greater or equal to 8 bytes, we need to verify the alignment.  */
+  BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
+  hsa_op_reg *dst_addr_reg = new hsa_op_reg (addrtype);
+  convert_addr_to_flat_segment (dst_addr, dst_addr_reg, hbb);
+
+  /* Process BIT AND with 0x7 to identify the desired alignment
+     of 8 bytes.  */
+  hsa_op_reg *masked = new hsa_op_reg (addrtype);
+
+  gen_hsa_binary_operation (BRIG_OPCODE_AND, masked, dst_addr_reg,
+			    new hsa_op_immed (7, addrtype), hbb);
+
+  hsa_op_reg *misaligned = new hsa_op_reg (BRIG_TYPE_B1);
+  hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_NE, misaligned->m_type,
+				      misaligned, masked,
+				      new hsa_op_immed (0, masked->m_type)));
+
+  hsa_bb *native_impl_bb
+    = expand_string_operation_builtin (stmt, hbb, misaligned);
+
+  gen_hsa_memory_set (native_impl_bb, dst_addr, constant, n, BRIG_ALIGNMENT_8);
+  hsa_bb *merge_bb
+    = hsa_init_new_bb (EDGE_SUCC (native_impl_bb->m_bb, 0)->dest);
+  expand_lhs_of_string_op (stmt, n, merge_bb, builtin);
+}
+
 /* Return string for MEMMODEL.  */
 
 static const char *
@@ -4811,8 +5035,6 @@ gen_hsa_insn_for_internal_fn_call (gcall *stmt, hsa_bb *hbb)
     }
 }
 
-#define HSA_MEMORY_BUILTINS_LIMIT     128
-
 /* Generate HSA instructions for the given call statement STMT.  Instructions
    will be appended to HBB.  */
 
@@ -5169,58 +5391,11 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb)
     case BUILT_IN_MEMCPY:
     case BUILT_IN_MEMPCPY:
       {
-	tree byte_size = gimple_call_arg (stmt, 2);
-
-	if (!tree_fits_uhwi_p (byte_size))
-	  {
-	    gen_hsa_insns_for_direct_call (stmt, hbb);
-	    return;
-	  }
-
-	unsigned n = tree_to_uhwi (byte_size);
-
-	if (n > HSA_MEMORY_BUILTINS_LIMIT)
-	  {
-	    gen_hsa_insns_for_direct_call (stmt, hbb);
-	    return;
-	  }
-
-	tree dst = gimple_call_arg (stmt, 0);
-	tree src = gimple_call_arg (stmt, 1);
-
-	hsa_op_address *dst_addr = get_address_from_value (dst, hbb);
-	hsa_op_address *src_addr = get_address_from_value (src, hbb);
-
-	gen_hsa_memory_copy (hbb, dst_addr, src_addr, n, BRIG_ALIGNMENT_1);
-
-	tree lhs = gimple_call_lhs (stmt);
-	if (lhs)
-	  {
-	    hsa_op_reg *lhs_reg = hsa_cfun->reg_for_gimple_ssa (lhs);
-	    hsa_op_with_type *dst_reg = hsa_reg_or_immed_for_gimple_op (dst,
-									hbb);
-	    hsa_op_with_type *tmp;
-
-	    if (builtin == BUILT_IN_MEMPCPY)
-	      {
-		tmp = new hsa_op_reg (dst_reg->m_type);
-		hsa_insn_basic *add
-		  = new hsa_insn_basic (3, BRIG_OPCODE_ADD, tmp->m_type,
-					tmp, dst_reg,
-					new hsa_op_immed (n, dst_reg->m_type));
-		hbb->append_insn (add);
-	      }
-	    else
-	      tmp = dst_reg;
-
-	    hsa_build_append_simple_mov (lhs_reg, tmp, hbb);
-	  }
-
+	expand_memory_copy (stmt, hbb, builtin);
 	break;
       }
     case BUILT_IN_MEMSET:
       {
-	tree dst = gimple_call_arg (stmt, 0);
 	tree c = gimple_call_arg (stmt, 1);
 
 	if (TREE_CODE (c) != INTEGER_CST)
@@ -5237,7 +5412,7 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb)
 	    return;
 	  }
 
-	unsigned n = tree_to_uhwi (byte_size);
+	unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size);
 
 	if (n > HSA_MEMORY_BUILTINS_LIMIT)
 	  {
@@ -5245,22 +5420,15 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb)
 	    return;
 	  }
 
-	hsa_op_address *dst_addr;
-	dst_addr = get_address_from_value (dst, hbb);
 	unsigned HOST_WIDE_INT constant
 	  = tree_to_uhwi (fold_convert (unsigned_char_type_node, c));
 
-	gen_hsa_memory_set (hbb, dst_addr, constant, n);
-
-	tree lhs = gimple_call_lhs (stmt);
-	if (lhs)
-	  gen_hsa_insns_for_single_assignment (lhs, dst, hbb);
+	expand_memory_set (stmt, n, constant, hbb, builtin);
 
 	break;
       }
     case BUILT_IN_BZERO:
       {
-	tree dst = gimple_call_arg (stmt, 0);
 	tree byte_size = gimple_call_arg (stmt, 1);
 
 	if (!tree_fits_uhwi_p (byte_size))
@@ -5269,7 +5437,7 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb)
 	    return;
 	  }
 
-	unsigned n = tree_to_uhwi (byte_size);
+	unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size);
 
 	if (n > HSA_MEMORY_BUILTINS_LIMIT)
 	  {
@@ -5277,10 +5445,7 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb)
 	    return;
 	  }
 
-	hsa_op_address *dst_addr;
-	dst_addr = get_address_from_value (dst, hbb);
-
-	gen_hsa_memory_set (hbb, dst_addr, 0, n);
+	expand_memory_set (stmt, n, 0, hbb, builtin);
 
 	break;
       }
@@ -5832,13 +5997,13 @@ LD:    hard_work_3 ();
 
 */
 
-static void
-convert_switch_statements ()
+static bool
+convert_switch_statements (void)
 {
   function *func = DECL_STRUCT_FUNCTION (current_function_decl);
   basic_block bb;
 
-  bool need_update = false;
+  bool modified_cfg = false;
 
   FOR_EACH_BB_FN (bb, func)
   {
@@ -5856,7 +6021,7 @@ convert_switch_statements ()
 	if (transformable_switch_to_sbr_p (s))
 	  continue;
 
-	need_update = true;
+	modified_cfg = true;
 
 	unsigned labels = gimple_switch_num_labels (s);
 	tree index = gimple_switch_index (s);
@@ -6023,11 +6188,7 @@ convert_switch_statements ()
   if (dump_file)
     dump_function_to_file (current_function_decl, dump_file, TDF_DETAILS);
 
-  if (need_update)
-    {
-      free_dominance_info (CDI_DOMINATORS);
-      calculate_dominance_info (CDI_DOMINATORS);
-    }
+  return modified_cfg;
 }
 
 /* Expand builtins that can't be handled by HSA back-end.  */
@@ -6127,9 +6288,11 @@ generate_hsa (bool kernel)
   if (hsa_num_threads == NULL)
     emit_hsa_module_variables ();
 
+  bool modified_cfg = convert_switch_statements ();
   /* Initialize hsa_cfun.  */
   hsa_cfun = new hsa_function_representation (cfun->decl, kernel,
-					      SSANAMES (cfun)->length ());
+					      SSANAMES (cfun)->length (),
+					      modified_cfg);
   hsa_cfun->init_extra_bbs ();
 
   if (flag_tm)
@@ -6234,7 +6397,6 @@ pass_gen_hsail::execute (function *)
   hsa_function_summary *s
     = hsa_summaries->get (cgraph_node::get_create (current_function_decl));
 
-  convert_switch_statements ();
   expand_builtins ();
   generate_hsa (s->m_kind == HSA_KERNEL);
   TREE_ASM_WRITTEN (current_function_decl) = 1;
diff --git a/gcc/hsa-regalloc.c b/gcc/hsa-regalloc.c
index 16cde8b..50bf511 100644
--- a/gcc/hsa-regalloc.c
+++ b/gcc/hsa-regalloc.c
@@ -700,6 +700,7 @@ regalloc (void)
 void
 hsa_regalloc (void)
 {
+  hsa_cfun->update_dominance ();
   naive_outof_ssa ();
 
   if (dump_file && (dump_flags & TDF_DETAILS))
diff --git a/gcc/hsa.h b/gcc/hsa.h
index ac692fb..b7b1e3c 100644
--- a/gcc/hsa.h
+++ b/gcc/hsa.h
@@ -1123,7 +1123,8 @@ class hsa_function_representation
 {
 public:
   hsa_function_representation (tree fdecl, bool kernel_p,
-			       unsigned ssa_names_count);
+			       unsigned ssa_names_count,
+			       bool modified_cfg = false);
   hsa_function_representation (hsa_internal_fn *fn);
   ~hsa_function_representation ();
 
@@ -1139,6 +1140,9 @@ public:
      so we need hsa_bb instances of them.  */
   void init_extra_bbs ();
 
+  /* Update CFG dominators if m_modified_cfg flag is set.  */
+  void update_dominance ();
+
   /* Return linkage of the representation.  */
   BrigLinkage8_t get_linkage ();
 
@@ -1219,6 +1223,9 @@ public:
 
   /* SSA names mapping.  */
   vec <hsa_op_reg_p> m_ssa_map;
+
+  /* Flag whether a function needs update of dominators before RA.  */
+  bool m_modified_cfg;
 };
 
 enum hsa_function_kind
-- 
2.7.1

Reply via email to