Re: [AArch64][3/4] Don't generate redundant checks when there is no composite arg
On Fri, May 06, 2016 at 04:00:40PM +0100, Jiong Wang wrote: > 2016-05-06 Jiong Wang > > gcc/ > * config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Avoid > duplicated check code. > > gcc/testsuite/ > * gcc.target/aarch64/va_arg_4.c: New testcase. I wonder whether this is safe for the int128_t/uint128_t data types and overaligned data types? My concern is that something with alignment of 16-bytes may still need the check to skip the final hard register slot on the stack. I'm not sure here, so I'll need some more time to think this through, or for Richard or Marcus to review this and help me understand why there is nothing to worry about. Thanks, James > From b92a4c4b8e52a9a952e91f307836022f667ab403 Mon Sep 17 00:00:00 2001 > From: "Jiong.Wang" > Date: Fri, 6 May 2016 14:37:37 +0100 > Subject: [PATCH 3/4] 3 > > --- > gcc/config/aarch64/aarch64.c| 94 > - > gcc/testsuite/gcc.target/aarch64/va_arg_4.c | 23 +++ > 2 files changed, 87 insertions(+), 30 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_4.c > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index b1a0287..06904d5 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -9587,6 +9587,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, > gimple_seq *pre_p, >bool indirect_p; >bool is_ha;/* is HFA or HVA. */ >bool dw_align; /* double-word align. */ > + bool composite_type_p; >machine_mode ag_mode = VOIDmode; >int nregs; >machine_mode mode; > @@ -9594,13 +9595,14 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, > gimple_seq *pre_p, >tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff; >tree stack, f_top, f_off, off, arg, roundup, on_stack; >HOST_WIDE_INT size, rsize, adjust, align; > - tree t, u, cond1, cond2; > + tree t, t1, u, cond1, cond2; > >indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); >if (indirect_p) > type = build_pointer_type (type); > >mode = TYPE_MODE (type); > + composite_type_p = aarch64_composite_type_p (type, mode); > >f_stack = TYPE_FIELDS (va_list_type_node); >f_grtop = DECL_CHAIN (f_stack); > @@ -9671,35 +9673,38 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, > gimple_seq *pre_p, > build_int_cst (TREE_TYPE (off), 0)); >cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE); > > - if (dw_align) > + if (composite_type_p) > { > - /* Emit: offs = (offs + 15) & -16. */ > - t = build2 (PLUS_EXPR, TREE_TYPE (off), off, > - build_int_cst (TREE_TYPE (off), 15)); > - t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t, > - build_int_cst (TREE_TYPE (off), -16)); > - roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t); > -} > - else > -roundup = NULL; > + if (dw_align) > + { > + /* Emit: offs = (offs + 15) & -16. */ > + t = build2 (PLUS_EXPR, TREE_TYPE (off), off, > + build_int_cst (TREE_TYPE (off), 15)); > + t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t, > + build_int_cst (TREE_TYPE (off), -16)); > + roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t); > + } > + else > + roundup = NULL; > > - /* Update ap.__[g|v]r_offs */ > - t = build2 (PLUS_EXPR, TREE_TYPE (off), off, > - build_int_cst (TREE_TYPE (off), rsize)); > - t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t); > + /* Update ap.__[g|v]r_offs */ > + t = build2 (PLUS_EXPR, TREE_TYPE (off), off, > + build_int_cst (TREE_TYPE (off), rsize)); > + t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t); > > - /* String up. */ > - if (roundup) > -t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t); > + /* String up. */ > + if (roundup) > + t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t); > > - /* [cond2] if (ap.__[g|v]r_offs > 0) */ > - u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off), > - build_int_cst (TREE_TYPE (f_off), 0)); > - cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE); > + /* [cond2] if (ap.__[g|v]r_offs > 0) */ > + u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off), > + build_int_cst (TREE_TYPE (f_off), 0)); > + cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE); > > - /* String up: make sure the assignment happens before the use. */ > - t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2); > - COND_EXPR_ELSE (cond1) = t; > + /* String up: make sure the assignment happens before the use. */ > + t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2); > + COND_EXPR_ELSE (cond1) = t; > +} > >/* Prepare the trees handling the argument that is passed on the stack; > the to
[AArch64][3/4] Don't generate redundant checks when there is no composite arg
AArch64 va_arg gimplify hook is generating redundant instructions. The current va_arg fetch logic is: 1 if (va_arg offset shows the arg is saved at reg_save_area) 2 if ((va_arg_offset + va_arg_type_size) <= 0) 3fetch va_arg from reg_save_area. 4 else 5fetch va_arg from incoming_stack. 6 else 7fetch va_arg from incoming_stack. The logic hunk "fetch va_arg from incoming_stack" will be generated *twice*, thus cause redundance. There is a particular further "if" check at line 2 because for composite argument, we don't support argument split, so it's either passed entirely from reg_save_area, or entirely from incoming_stack area. Thus, we need the further check at A to decide whether the left space at reg_save_area is enough, if not, then it's passed from incoming_stack. While this complex logic is only necessary for composite types, not for others. this patch thus *let those redundance only generated for composite types*, while for basic types like "int", "float" etc, we could just simplify it into: if (va_arg_offset < 0) fetch va_arg from reg_save_area. else fetch va_arg from incoming_stack. And this simplified version actually is the most usual case. For example, this patch reduced this instructions number from about 130 to 100 for the included testcase. ok for trunk? 2016-05-06 Jiong Wang gcc/ * config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Avoid duplicated check code. gcc/testsuite/ * gcc.target/aarch64/va_arg_4.c: New testcase. >From b92a4c4b8e52a9a952e91f307836022f667ab403 Mon Sep 17 00:00:00 2001 From: "Jiong.Wang" Date: Fri, 6 May 2016 14:37:37 +0100 Subject: [PATCH 3/4] 3 --- gcc/config/aarch64/aarch64.c| 94 - gcc/testsuite/gcc.target/aarch64/va_arg_4.c | 23 +++ 2 files changed, 87 insertions(+), 30 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_4.c diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index b1a0287..06904d5 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -9587,6 +9587,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, bool indirect_p; bool is_ha; /* is HFA or HVA. */ bool dw_align; /* double-word align. */ + bool composite_type_p; machine_mode ag_mode = VOIDmode; int nregs; machine_mode mode; @@ -9594,13 +9595,14 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff; tree stack, f_top, f_off, off, arg, roundup, on_stack; HOST_WIDE_INT size, rsize, adjust, align; - tree t, u, cond1, cond2; + tree t, t1, u, cond1, cond2; indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); if (indirect_p) type = build_pointer_type (type); mode = TYPE_MODE (type); + composite_type_p = aarch64_composite_type_p (type, mode); f_stack = TYPE_FIELDS (va_list_type_node); f_grtop = DECL_CHAIN (f_stack); @@ -9671,35 +9673,38 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, build_int_cst (TREE_TYPE (off), 0)); cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE); - if (dw_align) + if (composite_type_p) { - /* Emit: offs = (offs + 15) & -16. */ - t = build2 (PLUS_EXPR, TREE_TYPE (off), off, - build_int_cst (TREE_TYPE (off), 15)); - t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t, - build_int_cst (TREE_TYPE (off), -16)); - roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t); -} - else -roundup = NULL; + if (dw_align) + { + /* Emit: offs = (offs + 15) & -16. */ + t = build2 (PLUS_EXPR, TREE_TYPE (off), off, + build_int_cst (TREE_TYPE (off), 15)); + t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t, + build_int_cst (TREE_TYPE (off), -16)); + roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t); + } + else + roundup = NULL; - /* Update ap.__[g|v]r_offs */ - t = build2 (PLUS_EXPR, TREE_TYPE (off), off, - build_int_cst (TREE_TYPE (off), rsize)); - t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t); + /* Update ap.__[g|v]r_offs */ + t = build2 (PLUS_EXPR, TREE_TYPE (off), off, + build_int_cst (TREE_TYPE (off), rsize)); + t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t); - /* String up. */ - if (roundup) -t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t); + /* String up. */ + if (roundup) + t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t); - /* [cond2] if (ap.__[g|v]r_offs > 0) */ - u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off), - build_int_cst (TREE_TYPE (f_off), 0)); - cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE); + /* [cond2] if (ap.__[g|v]r_offs > 0) */ + u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off), + build_int_cst (TREE_TYPE (f_off), 0));