Based on patch [3/4], we can further optimize the vaarg gimplification logic, this time not for redundant checks, but for redundant basic blocks. Thus we can simplify the control graph and eventually generate less branch instructions.
The current gimplification logic requires three basic blocks: // check if we already stepped into stack area if (vaarg_offset >= 0) { // we still in register area, but composite type will not // be passed partly in registers and partly on stack, make // sure the left register area is not left empty by composite // type. if it is, then skip them, and fetch from stack. if (vaarg_offset + arg_size > 0) fetch from stack else fetch from register } else fetch from register while we can further optimize the logic into the following to reduce BB number into two: if (vaarg_offset < 0 || (vaarg_offset + arg_size > 0)) fetch from stack else fetch from register OK for trunk? 2016-05-06 Alan Lawrence <alan.lawre...@arm.com> Jiong Wang <jiong.w...@arm.com> gcc/ * config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Use TRUTH_ORIF_EXPR. gcc/testsuite/ * gcc.target/aarch64/va_arg_5.c: New test.
>From d742eaa3469f28e4207034f3fe4ebd4d54b3dd42 Mon Sep 17 00:00:00 2001 From: "Jiong.Wang" <jiong.w...@arm.com> Date: Fri, 6 May 2016 14:38:00 +0100 Subject: [PATCH 4/4] 4 --- gcc/config/aarch64/aarch64.c | 53 +++++++++++++++++++++-------- gcc/testsuite/gcc.target/aarch64/va_arg_5.c | 20 +++++++++++ 2 files changed, 58 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_5.c diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 06904d5..bd4a9fe 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -9577,7 +9577,32 @@ aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); } -/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */ +/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. + The VA_ARG gimplify logic was: + + // check if we already stepped into stack area + if (vaarg_offset >= 0) + { + // we still in register area, but composite type will not + // be passed partly in registers and partly on stack, make + // sure the left register area is not left empty by composite + // type. if it is, then skip them, and fetch from stack. + if (vaarg_offset + arg_size > 0) + fetch from stack + else + fetch from register + } + else + fetch from register + + we can further optimize the logic into the following to reduce BB. + + if (vaarg_offset < 0 || (vaarg_offset + arg_size > 0)) + fetch from stack + else + fetch from register + + the tree node TRUTH_ORIF_EXPR can express the condition we want. */ static tree aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, @@ -9595,7 +9620,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff; tree stack, f_top, f_off, off, arg, roundup, on_stack; HOST_WIDE_INT size, rsize, adjust, align; - tree t, t1, u, cond1, cond2; + tree t, t1, u, cond1, pred1, pred2; indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); if (indirect_p) @@ -9669,9 +9694,8 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, off = get_initialized_tmp_var (f_off, pre_p, NULL); /* Emit code to branch if off >= 0. */ - t = build2 (GE_EXPR, boolean_type_node, off, - build_int_cst (TREE_TYPE (off), 0)); - cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE); + pred1 = build2 (GE_EXPR, boolean_type_node, off, + build_int_cst (TREE_TYPE (off), 0)); if (composite_type_p) { @@ -9696,16 +9720,16 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, if (roundup) t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t); - /* [cond2] if (ap.__[g|v]r_offs > 0) */ - u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off), - build_int_cst (TREE_TYPE (f_off), 0)); - cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE); + /* [pred2] if (ap.__[g|v]r_offs > 0) */ + pred2 = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off), + build_int_cst (TREE_TYPE (f_off), 0)); + pred2 = build2 (COMPOUND_EXPR, TREE_TYPE (pred2), t, pred2); - /* String up: make sure the assignment happens before the use. */ - t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2); - COND_EXPR_ELSE (cond1) = t; + pred1 = build2 (TRUTH_ORIF_EXPR, boolean_type_node, pred1, pred2); } + cond1 = build3 (COND_EXPR, ptr_type_node, pred1, NULL_TREE, NULL_TREE); + /* Prepare the trees handling the argument that is passed on the stack; the top level node will store in ON_STACK. */ arg = get_initialized_tmp_var (stack, pre_p, NULL); @@ -9746,8 +9770,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, if (composite_type_p) { - COND_EXPR_THEN (cond1) = unshare_expr (on_stack); - COND_EXPR_THEN (cond2) = unshare_expr (on_stack); + COND_EXPR_THEN (cond1) = on_stack; t = off; } @@ -9854,7 +9877,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, } if (composite_type_p) - COND_EXPR_ELSE (cond2) = t; + COND_EXPR_ELSE (cond1) = t; else { t1 = build2 (PLUS_EXPR, TREE_TYPE (off), roundup, diff --git a/gcc/testsuite/gcc.target/aarch64/va_arg_5.c b/gcc/testsuite/gcc.target/aarch64/va_arg_5.c new file mode 100644 index 0000000..0d6daef --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/va_arg_5.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fdump-tree-lower_vaarg" } */ + +typedef struct A { + float a; +} T; + +T +foo (char *fmt, ...) +{ + T a; + __builtin_va_list ap; + + __builtin_va_start (ap, fmt); + a = __builtin_va_arg (ap, T); + __builtin_va_end (ap); + + /* { dg-final { scan-tree-dump-times "ap.__stack =" 1 "lower_vaarg"} } */ + return a; +} -- 1.9.1