Hi,
Jiufu Guo <guoji...@linux.ibm.com> writes: > Hi, > > There are a few PRs about the issues on the struct parameters and > returns, like PRs 69143/65421/108073. > > we could consider introducing a light SRA in the expander to > handle those parameters and returns in aggregate type, if they > are passed through registers. For access to the fields of > the parameters or returns, the corresponding scalar registers > can be used. > > As discussed: > https://gcc.gnu.org/pipermail/gcc-patches/2023-May/619884.html > > This is an initial patch for the light-expander-sra. In this patch, there are a few places that can be enhanced. e.g. - support the reverse storage accessing. - support accessing fields on the part of the registers. - support mixed vector/TI modes. - support accessing on-call stmt and asm stmt. - ... An enhancement, I'm investigating to do first: when querying the scalarized rtx value for a tree expression, a TREE->RTX map is used. It may be better to bind the scalar rtx value to the tree-type expression directly (like DECL_RTL/INCOMING_RTL). Then 'get_scalarized_rtx' can be simpler. But I did not figure out a suitable field of TREE for this. Thanks for any suggestions! BR, Jeff (Jiufu Guo) > > Bootstrapped and regtested on x86_64-redhat-linux, and > powerpc64{,le}-linux-gnu. > > Is it ok for trunk? > > > BR, > Jeff (Jiufu Guo) > > > PR target/65421 > PR target/69143 > > gcc/ChangeLog: > > * cfgexpand.cc (expand_shift): Extern declare. > (struct access): New class. > (struct expand_sra): New class. > (expand_sra::build_access): New member function. > (expand_sra::visit_base): Likewise. > (expand_sra::analyze_default_stmt): Likewise. > (expand_sra::analyze_assign): Likewise. > (expand_sra::add_sra_candidate): Likewise. > (expand_sra::collect_sra_candidates): Likewise. > (expand_sra::valid_scalariable_accesses): Likewise. > (expand_sra::prepare_expander_sra): Likewise. > (expand_sra::expand_sra): Class constructor. > (expand_sra::~expand_sra): Class destructor. > (expand_sra::get_scalarized_rtx): New member function. > (extract_one_reg): New function. > (extract_sub_reg): New function. > (expand_sra::scalarize_access): New member function. > (expand_sra::scalarize_accesses): New member function. > (get_scalar_rtx_for_aggregate_expr): New function. > (set_scalar_rtx_for_aggregate_access): New function. > (set_scalar_rtx_for_returns): New function. > (expand_return): Call get_scalar_rtx_for_aggregate_expr. > (expand_debug_expr): Call get_scalar_rtx_for_aggregate_expr. > (pass_expand::execute): Update to use the expand_sra. > * expr.cc (get_scalar_rtx_for_aggregate_expr): Extern declare. > (expand_assignment): Call get_scalar_rtx_for_aggregate_expr. > (expand_expr_real): Call get_scalar_rtx_for_aggregate_expr. > * function.cc (set_scalar_rtx_for_aggregate_access): Extern declare. > (set_scalar_rtx_for_returns): Extern declare. > (assign_parm_setup_block): Call set_scalar_rtx_for_aggregate_access. > (assign_parms): Call set_scalar_rtx_for_aggregate_access. > (expand_function_start): Call set_scalar_rtx_for_returns. > * tree-sra.h (struct base_access): New class. > (struct default_analyzer): New class. > (scan_function): New function template. > > gcc/testsuite/ChangeLog: > > * g++.target/powerpc/pr102024.C: Updated. > * gcc.target/powerpc/pr108073.c: New test. > * gcc.target/powerpc/pr65421-1.c: New test. > * gcc.target/powerpc/pr65421-2.c: New test. > > --- > gcc/cfgexpand.cc | 478 ++++++++++++++++++- > gcc/expr.cc | 15 +- > gcc/function.cc | 28 +- > gcc/tree-sra.h | 80 +++- > gcc/testsuite/g++.target/powerpc/pr102024.C | 2 +- > gcc/testsuite/gcc.target/powerpc/pr108073.c | 29 ++ > gcc/testsuite/gcc.target/powerpc/pr65421-1.c | 6 + > gcc/testsuite/gcc.target/powerpc/pr65421-2.c | 32 ++ > 8 files changed, 660 insertions(+), 10 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108073.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421-1.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421-2.c > > diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc > index > edf292cfbe95ac2711faee7769e839cb4edb0dd3..21a09ebac96bbcddc67da73c42f470c6d5f60e6c > 100644 > --- a/gcc/cfgexpand.cc > +++ b/gcc/cfgexpand.cc > @@ -74,6 +74,7 @@ along with GCC; see the file COPYING3. If not see > #include "output.h" > #include "builtins.h" > #include "opts.h" > +#include "tree-sra.h" > > /* Some systems use __main in a way incompatible with its use in gcc, in > these > cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN > to > @@ -97,6 +98,472 @@ static bool defer_stack_allocation (tree, bool); > > static void record_alignment_for_reg_var (unsigned int); > > +extern rtx > +expand_shift (enum tree_code, machine_mode, rtx, poly_int64, rtx, int); > + > +/* For light SRA in expander about paramaters and returns. */ > +struct access : public base_access > +{ > + /* The rtx for the access: link to incoming/returning register(s). */ > + rtx rtx_val; > +}; > + > +typedef struct access *access_p; > + > +struct expand_sra : public default_analyzer > +{ > + expand_sra (); > + ~expand_sra (); > + > + /* Now use default APIs, no actions for > + pre_analyze_stmt, analyze_return. */ > + > + /* overwrite analyze_default_stmt. */ > + void analyze_default_stmt (gimple *); > + > + /* overwrite analyze phi,call,asm . */ > + void analyze_phi (gphi *stmt) { analyze_default_stmt (stmt); }; > + void analyze_call (gcall *stmt) { analyze_default_stmt (stmt); }; > + void analyze_asm (gasm *stmt) { analyze_default_stmt (stmt); }; > + /* overwrite analyze_assign. */ > + void analyze_assign (gassign *); > + > + /* Compute the scalar rtx(s) for all access of BASE from a parrallel REGS. > */ > + bool scalarize_accesses (tree base, rtx regs); > + /* Return the scalarized rtx for EXPR. */ > + rtx get_scalarized_rtx (tree expr); > + > +private: > + void prepare_expander_sra (void); > + > + /* Return true if VAR is a candidate for SRA. */ > + bool add_sra_candidate (tree var); > + > + /* Collect the parameter and returns with type which is suitable for > + scalarization. */ > + bool collect_sra_candidates (void); > + > + /* Return true if EXPR has interesting access to the sra candidates, > + and created access, return false otherwise. */ > + access_p build_access (tree expr, bool write); > + > + /* Check if the accesses of BASE are scalarizbale. > + Now support the parms only with reading or returns only with writing. > */ > + bool valid_scalariable_accesses (vec<access_p> *access_vec, bool is_parm); > + > + /* Compute the scalar rtx for one access ACC from a parrallel REGS. */ > + bool scalarize_access (access_p acc, rtx regs); > + > + /* Callback of walk_stmt_load_store_addr_ops, used to remove > + unscalarizable accesses. */ > + static bool visit_base (gimple *, tree op, tree, void *data); > + > + /* Expr (tree) -> Scalarized value (rtx) map. */ > + hash_map<tree, rtx> *expr_rtx_vec; > + > + /* Base (tree) -> Vector (vec<access_p> *) map. */ > + hash_map<tree, auto_vec<access_p> > *base_access_vec; > +}; > + > +access_p > +expand_sra::build_access (tree expr, bool write) > +{ > + enum tree_code code = TREE_CODE (expr); > + if (code != VAR_DECL && code != PARM_DECL && code != COMPONENT_REF > + && code != ARRAY_REF && code != ARRAY_RANGE_REF) > + return NULL; > + > + HOST_WIDE_INT offset, size; > + bool reverse; > + tree base = get_ref_base_and_extent_hwi (expr, &offset, &size, &reverse); > + if (!base || !DECL_P (base)) > + return NULL; > + if (storage_order_barrier_p (expr) || TREE_THIS_VOLATILE (expr)) > + { > + base_access_vec->remove (base); > + return NULL; > + } > + > + vec<access_p> *access_vec = base_access_vec->get (base); > + if (!access_vec) > + return NULL; > + > + /* TODO: support reverse. */ > + if (reverse || size <= 0 || offset + size > tree_to_shwi (DECL_SIZE > (base))) > + { > + base_access_vec->remove (base); > + return NULL; > + } > + > + struct access *access = XNEWVEC (struct access, 1); > + > + memset (access, 0, sizeof (struct access)); > + access->offset = offset; > + access->size = size; > + access->expr = expr; > + access->write = write; > + access->rtx_val = NULL_RTX; > + > + access_vec->safe_push (access); > + > + return access; > +} > + > +bool > +expand_sra::visit_base (gimple *, tree op, tree, void *data) > +{ > + op = get_base_address (op); > + if (op && DECL_P (op)) > + { > + expand_sra *p = (expand_sra *) data; > + p->base_access_vec->remove (op); > + } > + return false; > +} > + > +void > +expand_sra::analyze_default_stmt (gimple *stmt) > +{ > + if (base_access_vec && !base_access_vec->is_empty ()) > + walk_stmt_load_store_addr_ops (stmt, this, visit_base, visit_base, > + visit_base); > +} > + > +void > +expand_sra::analyze_assign (gassign *stmt) > +{ > + if (!base_access_vec || base_access_vec->is_empty ()) > + return; > + > + if (gimple_assign_single_p (stmt) && !gimple_clobber_p (stmt)) > + { > + tree rhs = gimple_assign_rhs1 (stmt); > + tree lhs = gimple_assign_lhs (stmt); > + bool res_r = build_access (rhs, false); > + bool res_l = build_access (lhs, true); > + if (res_l && TREE_CODE (rhs) == CONSTRUCTOR) > + base_access_vec->remove (get_base_address (lhs)); > + > + if (res_l || res_r) > + return; > + } > + > + analyze_default_stmt (stmt); > +} > + > +/* Return true if VAR is a candidate for SRA. */ > + > +bool > +expand_sra::add_sra_candidate (tree var) > +{ > + tree type = TREE_TYPE (var); > + > + if (!AGGREGATE_TYPE_P (type) || !tree_fits_shwi_p (TYPE_SIZE (type)) > + || tree_to_shwi (TYPE_SIZE (type)) == 0 || TREE_THIS_VOLATILE (var) > + || is_va_list_type (type)) > + return false; > + gcc_assert (COMPLETE_TYPE_P (type)); > + > + base_access_vec->get_or_insert (var); > + > + return true; > +} > + > +bool > +expand_sra::collect_sra_candidates (void) > +{ > + bool ret = false; > + > + /* Collect parameters. */ > + for (tree parm = DECL_ARGUMENTS (current_function_decl); parm; > + parm = DECL_CHAIN (parm)) > + ret |= add_sra_candidate (parm); > + > + /* Collect VARs on returns. */ > + if (DECL_RESULT (current_function_decl)) > + { > + edge_iterator ei; > + edge e; > + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > + if (greturn *r = safe_dyn_cast<greturn *> (*gsi_last_bb (e->src))) > + { > + tree val = gimple_return_retval (r); > + /* To sclaraized the return, the return value should be only > + writen, except this return stmt. > + Then using 'true(write)' to create the access. */ > + if (val && VAR_P (val)) > + ret |= add_sra_candidate (val) && build_access (val, true); > + } > + } > + > + return ret; > +} > + > +bool > +expand_sra::valid_scalariable_accesses (vec<access_p> *access_vec, bool > is_parm) > +{ > + if (access_vec->is_empty ()) > + return false; > + > + for (unsigned int j = 0; j < access_vec->length (); j++) > + { > + struct access *access = (*access_vec)[j]; > + if (is_parm && access->write) > + return false; > + > + if (!is_parm && !access->write) > + return false; > + } > + > + return true; > +} > + > +void > +expand_sra::prepare_expander_sra () > +{ > + if (optimize <= 0) > + return; > + > + base_access_vec = new hash_map<tree, auto_vec<access_p> >; > + expr_rtx_vec = new hash_map<tree, rtx>; > + > + collect_sra_candidates (); > +} > + > +expand_sra::expand_sra () : expr_rtx_vec (NULL), base_access_vec (NULL) > +{ > + prepare_expander_sra (); > +} > + > +expand_sra::~expand_sra () > +{ > + if (optimize <= 0) > + return; > + delete expr_rtx_vec; > + expr_rtx_vec = NULL; > + delete base_access_vec; > + base_access_vec = NULL; > +} > + > +rtx > +expand_sra::get_scalarized_rtx (tree expr) > +{ > + if (!expr_rtx_vec) > + return NULL_RTX; > + rtx *val = expr_rtx_vec->get (expr); > + return val ? *val : NULL_RTX; > +} > + > +/* Get the register at INDEX from a parallel REGS. */ > + > +static rtx > +extract_one_reg (rtx regs, int index) > +{ > + rtx orig_reg = XEXP (XVECEXP (regs, 0, index), 0); > + if (!HARD_REGISTER_P (orig_reg)) > + return orig_reg; > + > + /* Reading from param hard reg need to be moved to a temp. */ > + rtx reg = gen_reg_rtx (GET_MODE (orig_reg)); > + emit_move_insn (reg, orig_reg); > + return reg; > +} > + > +/* Get IMODE part from REG at OFF_BITS. */ > + > +static rtx > +extract_sub_reg (rtx orig_reg, int off_bits, machine_mode mode) > +{ > + scalar_int_mode imode; > + if (!int_mode_for_mode (mode).exists (&imode)) > + return NULL_RTX; > + > + machine_mode orig_mode = GET_MODE (orig_reg); > + gcc_assert (GET_MODE_CLASS (orig_mode) == MODE_INT); > + > + poly_uint64 lowpart_off = subreg_lowpart_offset (imode, orig_mode); > + int lowpart_off_bits = lowpart_off.to_constant () * BITS_PER_UNIT; > + int shift_bits; > + if (lowpart_off_bits >= off_bits) > + shift_bits = lowpart_off_bits - off_bits; > + else > + shift_bits = off_bits - lowpart_off_bits; > + > + rtx reg = orig_reg; > + if (shift_bits > 0) > + reg = expand_shift (RSHIFT_EXPR, orig_mode, reg, shift_bits, NULL, 1); > + > + rtx subreg = gen_lowpart (imode, reg); > + rtx result = gen_reg_rtx (imode); > + emit_move_insn (result, subreg); > + > + if (mode != imode) > + result = gen_lowpart (mode, result); > + > + return result; > +} > + > +bool > +expand_sra::scalarize_access (access_p acc, rtx regs) > +{ > + machine_mode expr_mode = TYPE_MODE (TREE_TYPE (acc->expr)); > + > + /* mode of mult registers. */ > + if (expr_mode != BLKmode > + && known_gt (acc->size, GET_MODE_BITSIZE (word_mode))) > + return false; > + > + /* Compute the position of the access in the whole parallel rtx. */ > + int start_index = -1; > + int end_index = -1; > + HOST_WIDE_INT left_bits = 0; > + HOST_WIDE_INT right_bits = 0; > + int cur_index = XEXP (XVECEXP (regs, 0, 0), 0) ? 0 : 1; > + for (; cur_index < XVECLEN (regs, 0); cur_index++) > + { > + rtx slot = XVECEXP (regs, 0, cur_index); > + HOST_WIDE_INT off = UINTVAL (XEXP (slot, 1)) * BITS_PER_UNIT; > + machine_mode mode = GET_MODE (XEXP (slot, 0)); > + HOST_WIDE_INT size = GET_MODE_BITSIZE (mode).to_constant (); > + if (off <= acc->offset && off + size > acc->offset) > + { > + start_index = cur_index; > + left_bits = acc->offset - off; > + } > + if (off + size >= acc->offset + acc->size) > + { > + end_index = cur_index; > + right_bits = off + size - (acc->offset + acc->size); > + break; > + } > + } > + /* Invalid access possition: padding or outof bound. */ > + if (start_index < 0 || end_index < 0) > + return false; > + > + /* Need multi-registers in a parallel for the access. */ > + if (expr_mode == BLKmode || end_index > start_index) > + { > + if (left_bits || right_bits) > + return false; > + > + int num_words = end_index - start_index + 1; > + rtx *tmps = XALLOCAVEC (rtx, num_words); > + > + int pos = 0; > + HOST_WIDE_INT start; > + start = UINTVAL (XEXP (XVECEXP (regs, 0, start_index), 1)); > + /* Extract whole registers. */ > + for (; pos < num_words; pos++) > + { > + int index = start_index + pos; > + rtx reg = extract_one_reg (regs, index); > + machine_mode mode = GET_MODE (reg); > + HOST_WIDE_INT off; > + off = UINTVAL (XEXP (XVECEXP (regs, 0, index), 1)) - start; > + tmps[pos] = gen_rtx_EXPR_LIST (mode, reg, GEN_INT (off)); > + } > + > + rtx reg = gen_rtx_PARALLEL (expr_mode, gen_rtvec_v (pos, tmps)); > + acc->rtx_val = reg; > + return true; > + } > + > + /* Just need one reg for the access. */ > + if (end_index == start_index && left_bits == 0 && right_bits == 0) > + { > + rtx reg = extract_one_reg (regs, start_index); > + if (GET_MODE (reg) != expr_mode) > + reg = gen_lowpart (expr_mode, reg); > + > + acc->rtx_val = reg; > + return true; > + } > + > + /* Need to extract part reg for the access. */ > + if (!acc->write && end_index == start_index > + && (acc->size % BITS_PER_UNIT) == 0) > + { > + rtx orig_reg = XEXP (XVECEXP (regs, 0, start_index), 0); > + acc->rtx_val = extract_sub_reg (orig_reg, left_bits, expr_mode); > + if (acc->rtx_val) > + return true; > + } > + > + return false; > +} > + > +bool > +expand_sra::scalarize_accesses (tree base, rtx regs) > +{ > + if (!base_access_vec) > + return false; > + vec<access_p> *access_vec = base_access_vec->get (base); > + if (!access_vec) > + return false; > + bool is_parm = TREE_CODE (base) == PARM_DECL; > + if (!valid_scalariable_accesses (access_vec, is_parm)) > + return false; > + > + /* Go through each access, compute corresponding rtx(regs or subregs) > + for the expression. */ > + int n = access_vec->length (); > + int cur_access_index = 0; > + for (; cur_access_index < n; cur_access_index++) > + if (!scalarize_access ((*access_vec)[cur_access_index], regs)) > + break; > + > + /* Bind/map expr(tree) to sclarized rtx if all access scalarized. */ > + if (cur_access_index == n) > + for (int j = 0; j < n; j++) > + { > + access_p access = (*access_vec)[j]; > + expr_rtx_vec->put (access->expr, access->rtx_val); > + } > + > + return true; > +} > + > +static expand_sra *current_sra = NULL; > + > +/* Check If there is an sra access for the expr. > + Return the correspond scalar sym for the access. */ > + > +rtx > +get_scalar_rtx_for_aggregate_expr (tree expr) > +{ > + return current_sra ? current_sra->get_scalarized_rtx (expr) : NULL_RTX; > +} > + > +/* Compute/Set RTX registers for those accesses on BASE. */ > + > +void > +set_scalar_rtx_for_aggregate_access (tree base, rtx regs) > +{ > + if (!current_sra) > + return; > + current_sra->scalarize_accesses (base, regs); > +} > + > +void > +set_scalar_rtx_for_returns () > +{ > + if (!current_sra) > + return; > + > + tree res = DECL_RESULT (current_function_decl); > + gcc_assert (res); > + edge_iterator ei; > + edge e; > + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > + if (greturn *r = safe_dyn_cast<greturn *> (*gsi_last_bb (e->src))) > + { > + tree val = gimple_return_retval (r); > + if (val && VAR_P (val)) > + current_sra->scalarize_accesses (val, DECL_RTL (res)); > + } > +} > + > /* Return an expression tree corresponding to the RHS of GIMPLE > statement STMT. */ > > @@ -3778,7 +4245,8 @@ expand_return (tree retval) > > /* If we are returning the RESULT_DECL, then the value has already > been stored into it, so we don't have to do anything special. */ > - if (TREE_CODE (retval_rhs) == RESULT_DECL) > + if (TREE_CODE (retval_rhs) == RESULT_DECL > + || get_scalar_rtx_for_aggregate_expr (retval_rhs)) > expand_value_return (result_rtl); > > /* If the result is an aggregate that is being returned in one (or more) > @@ -4422,6 +4890,9 @@ expand_debug_expr (tree exp) > int unsignedp = TYPE_UNSIGNED (TREE_TYPE (exp)); > addr_space_t as; > scalar_int_mode op0_mode, op1_mode, addr_mode; > + rtx x = get_scalar_rtx_for_aggregate_expr (exp); > + if (x) > + return NULL_RTX;/* optimized out. */ > > switch (TREE_CODE_CLASS (TREE_CODE (exp))) > { > @@ -6624,6 +7095,9 @@ pass_expand::execute (function *fun) > auto_bitmap forced_stack_vars; > discover_nonconstant_array_refs (forced_stack_vars); > > + current_sra = new expand_sra; > + scan_function (cfun, *current_sra); > + > /* Make sure all values used by the optimization passes have sane > defaults. */ > reg_renumber = 0; > @@ -7052,6 +7526,8 @@ pass_expand::execute (function *fun) > loop_optimizer_finalize (); > } > > + delete current_sra; > + current_sra = NULL; > timevar_pop (TV_POST_EXPAND); > > return 0; > diff --git a/gcc/expr.cc b/gcc/expr.cc > index > 174f8acb269ab5450fc799516471d5a2bd9b9efa..53b48aba790d4dd8ade326a2b33a0c7ec3fffc47 > 100644 > --- a/gcc/expr.cc > +++ b/gcc/expr.cc > @@ -100,6 +100,7 @@ static void do_tablejump (rtx, machine_mode, rtx, rtx, > rtx, > static rtx const_vector_from_tree (tree); > static tree tree_expr_size (const_tree); > static void convert_mode_scalar (rtx, rtx, int); > +rtx get_scalar_rtx_for_aggregate_expr (tree); > > > /* This is run to set up which modes can be used > @@ -5618,11 +5619,12 @@ expand_assignment (tree to, tree from, bool > nontemporal) > Assignment of an array element at a constant index, and assignment of > an array element in an unaligned packed structure field, has the same > problem. Same for (partially) storing into a non-memory object. */ > - if (handled_component_p (to) > - || (TREE_CODE (to) == MEM_REF > - && (REF_REVERSE_STORAGE_ORDER (to) > - || mem_ref_refers_to_non_mem_p (to))) > - || TREE_CODE (TREE_TYPE (to)) == ARRAY_TYPE) > + if (!get_scalar_rtx_for_aggregate_expr (to) > + && (handled_component_p (to) > + || (TREE_CODE (to) == MEM_REF > + && (REF_REVERSE_STORAGE_ORDER (to) > + || mem_ref_refers_to_non_mem_p (to))) > + || TREE_CODE (TREE_TYPE (to)) == ARRAY_TYPE)) > { > machine_mode mode1; > poly_int64 bitsize, bitpos; > @@ -9006,6 +9008,9 @@ expand_expr_real (tree exp, rtx target, machine_mode > tmode, > ret = CONST0_RTX (tmode); > return ret ? ret : const0_rtx; > } > + rtx x = get_scalar_rtx_for_aggregate_expr (exp); > + if (x) > + return x; > > ret = expand_expr_real_1 (exp, target, tmode, modifier, alt_rtl, > inner_reference_p); > diff --git a/gcc/function.cc b/gcc/function.cc > index > dd2c1136e0725f55673f28e0eeaf4c91ad18e93f..7fe927bd36beac11466ca9fca12800892b57f0be > 100644 > --- a/gcc/function.cc > +++ b/gcc/function.cc > @@ -2740,6 +2740,9 @@ assign_parm_find_stack_rtl (tree parm, struct > assign_parm_data_one *data) > data->stack_parm = stack_parm; > } > > +extern void set_scalar_rtx_for_aggregate_access (tree, rtx); > +extern void set_scalar_rtx_for_returns (); > + > /* A subroutine of assign_parms. Adjust DATA->ENTRY_RTL such that it's > always valid and contiguous. */ > > @@ -3115,8 +3118,24 @@ assign_parm_setup_block (struct assign_parm_data_all > *all, > emit_move_insn (mem, entry_parm); > } > else > - move_block_from_reg (REGNO (entry_parm), mem, > - size_stored / UNITS_PER_WORD); > + { > + int regno = REGNO (entry_parm); > + int nregs = size_stored / UNITS_PER_WORD; > + move_block_from_reg (regno, mem, nregs); > + > + rtx *tmps = XALLOCAVEC (rtx, nregs); > + machine_mode mode = word_mode; > + HOST_WIDE_INT word_size = GET_MODE_SIZE (mode).to_constant (); > + for (int i = 0; i < nregs; i++) > + { > + rtx reg = gen_rtx_REG (mode, regno + i); > + rtx off = GEN_INT (word_size * i); > + tmps[i] = gen_rtx_EXPR_LIST (VOIDmode, reg, off); > + } > + > + rtx regs = gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (nregs, tmps)); > + set_scalar_rtx_for_aggregate_access (parm, regs); > + } > } > else if (data->stack_parm == 0 && !TYPE_EMPTY_P (data->arg.type)) > { > @@ -3716,6 +3735,10 @@ assign_parms (tree fndecl) > else > set_decl_incoming_rtl (parm, data.entry_parm, false); > > + rtx incoming = DECL_INCOMING_RTL (parm); > + if (GET_CODE (incoming) == PARALLEL) > + set_scalar_rtx_for_aggregate_access (parm, incoming); > + > assign_parm_adjust_stack_rtl (&data); > > if (assign_parm_setup_block_p (&data)) > @@ -5136,6 +5159,7 @@ expand_function_start (tree subr) > { > gcc_assert (GET_CODE (hard_reg) == PARALLEL); > set_parm_rtl (res, gen_group_rtx (hard_reg)); > + set_scalar_rtx_for_returns (); > } > } > > diff --git a/gcc/tree-sra.h b/gcc/tree-sra.h > index > f20266c46226f7840299a768cb575f6f92b54207..7af87bccf1b43badbc3f8a4c51a87c84d5020b9e > 100644 > --- a/gcc/tree-sra.h > +++ b/gcc/tree-sra.h > @@ -19,7 +19,85 @@ You should have received a copy of the GNU General Public > License > along with GCC; see the file COPYING3. If not see > <http://www.gnu.org/licenses/>. */ > > -bool type_internals_preclude_sra_p (tree type, const char **msg); > +struct base_access > +{ > + /* Values returned by get_ref_base_and_extent, indicates the > + OFFSET, SIZE and BASE of the access. */ > + HOST_WIDE_INT offset; > + HOST_WIDE_INT size; > + tree base; > + > + /* The context expression of this access. */ > + tree expr; > + > + /* Indicates this is a write access. */ > + bool write : 1; > + > + /* Indicates if this access is made in reverse storage order. */ > + bool reverse : 1; > +}; > + > +/* Default template for sra_scan_function. */ > + > +struct default_analyzer > +{ > + /* Template analyze functions. */ > + void analyze_phi (gphi *){}; > + void pre_analyze_stmt (gimple *){}; > + void analyze_return (greturn *){}; > + void analyze_assign (gassign *){}; > + void analyze_call (gcall *){}; > + void analyze_asm (gasm *){}; > + void analyze_default_stmt (gimple *){}; > +}; > + > +/* Scan function and look for interesting expressions. */ > + > +template <typename analyzer> > +void > +scan_function (struct function *fun, analyzer &a) > +{ > + basic_block bb; > + FOR_EACH_BB_FN (bb, fun) > + { > + for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); > + gsi_next (&gsi)) > + a.analyze_phi (gsi.phi ()); > + > + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); > + gsi_next (&gsi)) > + { > + gimple *stmt = gsi_stmt (gsi); > + a.pre_analyze_stmt (stmt); > + > + switch (gimple_code (stmt)) > + { > + case GIMPLE_RETURN: > + a.analyze_return (as_a<greturn *> (stmt)); > + break; > + > + case GIMPLE_ASSIGN: > + a.analyze_assign (as_a<gassign *> (stmt)); > + break; > + > + case GIMPLE_CALL: > + a.analyze_call (as_a<gcall *> (stmt)); > + break; > + > + case GIMPLE_ASM: > + a.analyze_asm (as_a<gasm *> (stmt)); > + break; > + > + default: > + a.analyze_default_stmt (stmt); > + break; > + } > + } > + } > +} > + > +bool > +type_internals_preclude_sra_p (tree type, const char **msg); > > /* Return true iff TYPE is stdarg va_list type (which early SRA and IPA-SRA > should leave alone). */ > diff --git a/gcc/testsuite/g++.target/powerpc/pr102024.C > b/gcc/testsuite/g++.target/powerpc/pr102024.C > index > 769585052b507ad971868795f861106230c976e3..c8995cae707bb6e2e849275b823d2ba14d24a966 > 100644 > --- a/gcc/testsuite/g++.target/powerpc/pr102024.C > +++ b/gcc/testsuite/g++.target/powerpc/pr102024.C > @@ -5,7 +5,7 @@ > // Test that a zero-width bit field in an otherwise homogeneous aggregate > // generates a psabi warning and passes arguments in GPRs. > > -// { dg-final { scan-assembler-times {\mstd\M} 4 } } > +// { dg-final { scan-assembler-times {\mmtvsrd\M} 4 } } > > struct a_thing > { > diff --git a/gcc/testsuite/gcc.target/powerpc/pr108073.c > b/gcc/testsuite/gcc.target/powerpc/pr108073.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..7dd1a4a326a181e0f35c9418af20a9bebabdfe4b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr108073.c > @@ -0,0 +1,29 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -save-temps" } */ > + > +typedef struct DF {double a[4]; short s1; short s2; short s3; short s4; } DF; > +typedef struct SF {float a[4]; int i1; int i2; } SF; > + > +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 3 {target { has_arch_ppc64 > && has_arch_pwr8 } } } } */ > +/* { dg-final { scan-assembler-not {\mlwz\M} {target { has_arch_ppc64 && > has_arch_pwr8 } } } } */ > +/* { dg-final { scan-assembler-not {\mlhz\M} {target { has_arch_ppc64 && > has_arch_pwr8 } } } } */ > +short __attribute__ ((noipa)) foo_hi (DF a, int flag){if (flag == 2)return > a.s2+a.s3;return 0;} > +int __attribute__ ((noipa)) foo_si (SF a, int flag){if (flag == 2)return > a.i2+a.i1;return 0;} > +double __attribute__ ((noipa)) foo_df (DF arg, int flag){if (flag == > 2)return arg.a[3];else return 0.0;} > +float __attribute__ ((noipa)) foo_sf (SF arg, int flag){if (flag == > 2)return arg.a[2]; return 0;} > +float __attribute__ ((noipa)) foo_sf1 (SF arg, int flag){if (flag == > 2)return arg.a[1];return 0;} > + > +DF gdf = {{1.0,2.0,3.0,4.0}, 1, 2, 3, 4}; > +SF gsf = {{1.0f,2.0f,3.0f,4.0f}, 1, 2}; > + > +int main() > +{ > + if (!(foo_hi (gdf, 2) == 5 && foo_si (gsf, 2) == 3 && foo_df (gdf, 2) == > 4.0 > + && foo_sf (gsf, 2) == 3.0 && foo_sf1 (gsf, 2) == 2.0)) > + __builtin_abort (); > + if (!(foo_hi (gdf, 1) == 0 && foo_si (gsf, 1) == 0 && foo_df (gdf, 1) == 0 > + && foo_sf (gsf, 1) == 0 && foo_sf1 (gsf, 1) == 0)) > + __builtin_abort (); > + return 0; > +} > + > diff --git a/gcc/testsuite/gcc.target/powerpc/pr65421-1.c > b/gcc/testsuite/gcc.target/powerpc/pr65421-1.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..4e1f87f7939cbf1423772023ee392fc5200b6708 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr65421-1.c > @@ -0,0 +1,6 @@ > +/* PR target/65421 */ > +/* { dg-options "-O2" } */ > + > +typedef struct LARGE {double a[4]; int arr[32];} LARGE; > +LARGE foo (LARGE a){return a;} > +/* { dg-final { scan-assembler-times {\mmemcpy\M} 1 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pr65421-2.c > b/gcc/testsuite/gcc.target/powerpc/pr65421-2.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..8a8e1a0e9962317ba2c0942af8891b3c51f4d3a4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr65421-2.c > @@ -0,0 +1,32 @@ > +/* PR target/65421 */ > +/* { dg-options "-O2" } */ > +/* { dg-require-effective-target powerpc_elfv2 } */ > +/* { dg-require-effective-target has_arch_ppc64 } */ > + > +typedef struct FLOATS > +{ > + double a[3]; > +} FLOATS; > + > +/* 3 lfd after returns also optimized */ > +/* FLOATS ret_arg_pt (FLOATS *a){return *a;} */ > + > +/* 3 stfd */ > +void st_arg (FLOATS a, FLOATS *p) {*p = a;} > +/* { dg-final { scan-assembler-times {\mstfd\M} 3 } } */ > + > +/* blr */ > +FLOATS ret_arg (FLOATS a) {return a;} > + > +typedef struct MIX > +{ > + double a[2]; > + long l; > +} MIX; > + > +/* std 3 param regs to return slot */ > +MIX ret_arg1 (MIX a) {return a;} > +/* { dg-final { scan-assembler-times {\mstd\M} 3 } } */ > + > +/* count insns */ > +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 9 } } */