Re: [AArch64][3/4] Don't generate redundant checks when there is no composite arg

2016-05-31 Thread James Greenhalgh
On Fri, May 06, 2016 at 04:00:40PM +0100, Jiong Wang wrote:
> 2016-05-06  Jiong Wang  
> 
> gcc/
>   * config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Avoid
>   duplicated check code.
> 
> gcc/testsuite/
>   * gcc.target/aarch64/va_arg_4.c: New testcase.

I wonder whether this is safe for the int128_t/uint128_t data types and
overaligned data types? My concern is that something with alignment of
16-bytes may still need the check to skip the final hard register slot
on the stack.

I'm not sure here, so I'll need some more time to think this through, or
for Richard or Marcus to review this and help me understand why there
is nothing to worry about.

Thanks,
James

> From b92a4c4b8e52a9a952e91f307836022f667ab403 Mon Sep 17 00:00:00 2001
> From: "Jiong.Wang" 
> Date: Fri, 6 May 2016 14:37:37 +0100
> Subject: [PATCH 3/4] 3
> 
> ---
>  gcc/config/aarch64/aarch64.c| 94 
> -
>  gcc/testsuite/gcc.target/aarch64/va_arg_4.c | 23 +++
>  2 files changed, 87 insertions(+), 30 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_4.c
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index b1a0287..06904d5 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -9587,6 +9587,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, 
> gimple_seq *pre_p,
>bool indirect_p;
>bool is_ha;/* is HFA or HVA.  */
>bool dw_align; /* double-word align.  */
> +  bool composite_type_p;
>machine_mode ag_mode = VOIDmode;
>int nregs;
>machine_mode mode;
> @@ -9594,13 +9595,14 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, 
> gimple_seq *pre_p,
>tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
>tree stack, f_top, f_off, off, arg, roundup, on_stack;
>HOST_WIDE_INT size, rsize, adjust, align;
> -  tree t, u, cond1, cond2;
> +  tree t, t1, u, cond1, cond2;
>  
>indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
>if (indirect_p)
>  type = build_pointer_type (type);
>  
>mode = TYPE_MODE (type);
> +  composite_type_p = aarch64_composite_type_p (type, mode);
>  
>f_stack = TYPE_FIELDS (va_list_type_node);
>f_grtop = DECL_CHAIN (f_stack);
> @@ -9671,35 +9673,38 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, 
> gimple_seq *pre_p,
> build_int_cst (TREE_TYPE (off), 0));
>cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
>  
> -  if (dw_align)
> +  if (composite_type_p)
>  {
> -  /* Emit: offs = (offs + 15) & -16.  */
> -  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
> -   build_int_cst (TREE_TYPE (off), 15));
> -  t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
> -   build_int_cst (TREE_TYPE (off), -16));
> -  roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
> -}
> -  else
> -roundup = NULL;
> +  if (dw_align)
> + {
> +   /* Emit: offs = (offs + 15) & -16.  */
> +   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
> +   build_int_cst (TREE_TYPE (off), 15));
> +   t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
> +   build_int_cst (TREE_TYPE (off), -16));
> +   roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
> + }
> +  else
> + roundup = NULL;
>  
> -  /* Update ap.__[g|v]r_offs  */
> -  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
> -   build_int_cst (TREE_TYPE (off), rsize));
> -  t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
> +  /* Update ap.__[g|v]r_offs  */
> +  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
> +   build_int_cst (TREE_TYPE (off), rsize));
> +  t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
>  
> -  /* String up.  */
> -  if (roundup)
> -t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
> +  /* String up.  */
> +  if (roundup)
> + t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
>  
> -  /* [cond2] if (ap.__[g|v]r_offs > 0)  */
> -  u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
> -   build_int_cst (TREE_TYPE (f_off), 0));
> -  cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
> +  /* [cond2] if (ap.__[g|v]r_offs > 0)  */
> +  u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
> +   build_int_cst (TREE_TYPE (f_off), 0));
> +  cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
>  
> -  /* String up: make sure the assignment happens before the use.  */
> -  t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
> -  COND_EXPR_ELSE (cond1) = t;
> +  /* String up: make sure the assignment happens before the use.  */
> +  t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
> +  COND_EXPR_ELSE (cond1) = t;
> +}
>  
>/* Prepare the trees handling the argument 

[AArch64][3/4] Don't generate redundant checks when there is no composite arg

2016-05-06 Thread Jiong Wang

AArch64 va_arg gimplify hook is generating redundant instructions.

The current va_arg fetch logic is:

1  if (va_arg offset shows the arg is saved at reg_save_area)
2 if ((va_arg_offset + va_arg_type_size) <= 0)
3fetch va_arg from reg_save_area.
4 else
5fetch va_arg from incoming_stack.
6  else
7fetch va_arg from incoming_stack.

The logic hunk "fetch va_arg from incoming_stack" will be generated
*twice*, thus cause redundance.

There is a particular further "if" check at line 2 because for composite
argument, we don't support argument split, so it's either passed
entirely from reg_save_area, or entirely from incoming_stack area.

Thus, we need the further check at A to decide whether the left space at
reg_save_area is enough, if not, then it's passed from incoming_stack.

While this complex logic is only necessary for composite types, not for
others.

this patch thus *let those redundance only generated for composite types*,
while for basic types like "int", "float" etc, we could just simplify it
into:

  if (va_arg_offset < 0)
fetch va_arg from reg_save_area.
  else
fetch va_arg from incoming_stack.

And this simplified version actually is the most usual case.

For example, this patch reduced this instructions number from about 130 to
100 for the included testcase.

ok for trunk?

2016-05-06  Jiong Wang  

gcc/
  * config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Avoid
  duplicated check code.

gcc/testsuite/
  * gcc.target/aarch64/va_arg_4.c: New testcase.
>From b92a4c4b8e52a9a952e91f307836022f667ab403 Mon Sep 17 00:00:00 2001
From: "Jiong.Wang" 
Date: Fri, 6 May 2016 14:37:37 +0100
Subject: [PATCH 3/4] 3

---
 gcc/config/aarch64/aarch64.c| 94 -
 gcc/testsuite/gcc.target/aarch64/va_arg_4.c | 23 +++
 2 files changed, 87 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_4.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b1a0287..06904d5 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9587,6 +9587,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
   bool indirect_p;
   bool is_ha;		/* is HFA or HVA.  */
   bool dw_align;	/* double-word align.  */
+  bool composite_type_p;
   machine_mode ag_mode = VOIDmode;
   int nregs;
   machine_mode mode;
@@ -9594,13 +9595,14 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
   tree stack, f_top, f_off, off, arg, roundup, on_stack;
   HOST_WIDE_INT size, rsize, adjust, align;
-  tree t, u, cond1, cond2;
+  tree t, t1, u, cond1, cond2;
 
   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
   if (indirect_p)
 type = build_pointer_type (type);
 
   mode = TYPE_MODE (type);
+  composite_type_p = aarch64_composite_type_p (type, mode);
 
   f_stack = TYPE_FIELDS (va_list_type_node);
   f_grtop = DECL_CHAIN (f_stack);
@@ -9671,35 +9673,38 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
 	  build_int_cst (TREE_TYPE (off), 0));
   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
 
-  if (dw_align)
+  if (composite_type_p)
 {
-  /* Emit: offs = (offs + 15) & -16.  */
-  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
-		  build_int_cst (TREE_TYPE (off), 15));
-  t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
-		  build_int_cst (TREE_TYPE (off), -16));
-  roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
-}
-  else
-roundup = NULL;
+  if (dw_align)
+	{
+	  /* Emit: offs = (offs + 15) & -16.  */
+	  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
+		  build_int_cst (TREE_TYPE (off), 15));
+	  t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
+		  build_int_cst (TREE_TYPE (off), -16));
+	  roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
+	}
+  else
+	roundup = NULL;
 
-  /* Update ap.__[g|v]r_offs  */
-  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
-	  build_int_cst (TREE_TYPE (off), rsize));
-  t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
+  /* Update ap.__[g|v]r_offs  */
+  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
+		  build_int_cst (TREE_TYPE (off), rsize));
+  t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
 
-  /* String up.  */
-  if (roundup)
-t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
+  /* String up.  */
+  if (roundup)
+	t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
 
-  /* [cond2] if (ap.__[g|v]r_offs > 0)  */
-  u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
-	  build_int_cst (TREE_TYPE (f_off), 0));
-  cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
+  /* [cond2] if (ap.__[g|v]r_offs > 0)  */
+  u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
+