Re: [PATCH] Improve SCEV for array element

2012-03-12 Thread Richard Guenther
On Thu, Mar 8, 2012 at 8:13 AM, Jiangning Liu jiangning@arm.com wrote:


 -Original Message-
 From: Richard Guenther [mailto:richard.guent...@gmail.com]
 Sent: Tuesday, March 06, 2012 9:12 PM
 To: Jiangning Liu
 Cc: gcc-patches@gcc.gnu.org
 Subject: Re: [PATCH] Improve SCEV for array element

 On Fri, Jan 20, 2012 at 10:06 AM, Jiangning Liu jiangning@arm.com
 wrote:
  It's definitely not ok at this stage but at most for next stage1.
 
  OK. I may wait until next stage1.
 
  This is a very narrow pattern-match.  It doesn't allow for a[i].x
 for
  example,
  even if a[i] is a one-element structure.  I think the canonical way
 of
  handling
  ADDR_EXPR is to use sth like
 
  base = get_inner_reference (TREE_OPERAND (rhs1, 0), ...,
 offset,  ...);
  base = build1 (ADDR_EXPR, TREE_TYPE (rhs1), base);
          chrec1 = analyze_scalar_evolution (loop, base);
          chrec2 = analyze_scalar_evolution (loop, offset);
          chrec1 = chrec_convert (type, chrec1, at_stmt);
          chrec2 = chrec_convert (TREE_TYPE (offset), chrec2, at_stmt);
          res = chrec_fold_plus (type, chrec1, chrec2);
 
  where you probably need to handle scev_not_known when analyzing
 offset
  (which might be NULL).  You also need to add bitpos to the base
 address
  (in bytes, of course).  Note that the MEM_REF case would naturally
  work
  with this as well.
 
  OK. New patch is like below, and bootstrapped on x86-32.

 You want instead of

 +      if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == ARRAY_REF
 +          || TREE_CODE (TREE_OPERAND (rhs1, 0)) == MEM_REF
 +          || TREE_CODE (TREE_OPERAND (rhs1, 0)) == COMPONENT_REF)
 +        {

 if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == MEM_REF
     || handled_component_p (TREE_OPERAND (rhs1, 0)))
   {

 +             base = build1 (ADDR_EXPR, TREE_TYPE (rhs1), base);
 +             chrec1 = analyze_scalar_evolution (loop, base);

 can you please add a wrapper

 tree
 analyze_scalar_evolution_for_address_of (struct loop *loop, tree var)
 {
   return analyze_scalar_evolution (loop, build_fold_addr_expr (var));
 }

 and call that instead of building the ADDR_EXPR there?  We want
 to avoid building that tree node, but even such a simple wrapper would
 be prefered.

 +         if (bitpos)

 if (bitpos != 0)

 +             chrec3 = build_int_cst (integer_type_node,
 +                                     bitpos / BITS_PER_UNIT);

 please use size_int (bitpos / BITS_PER_UNIT) instead.  Using
 integer_type_node is definitely wrong.

 Ok with that changes.


 Richard,

 Modified as all you suggested, and new code is like below. Bootstrapped on
 x86-32. OK for trunk now?

Ok.

Thanks,
Richard.

 Thanks,
 -Jiangning

 ChangeLog:

 2012-03-08  Jiangning Liu  jiangning@arm.com

        * tree-scalar-evolution (interpret_rhs_expr): generate chrec for
        array reference and component reference.
        (analyze_scalar_evolution_for_address_of): New.

 ChangeLog for testsuite:

 2012-03-08  Jiangning Liu  jiangning@arm.com

        * gcc.dg/tree-ssa/scev-3.c: New.
        * gcc.dg/tree-ssa/scev-4.c: New.

 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
 b/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
 new file mode 100644
 index 000..28d5c93
 --- /dev/null
 +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
 @@ -0,0 +1,18 @@
 +/* { dg-do compile } */
 +/* { dg-options -O2 -fdump-tree-optimized } */
 +
 +int *a_p;
 +int a[1000];
 +
 +f(int k)
 +{
 +       int i;
 +
 +       for (i=k; i1000; i+=k) {
 +               a_p = a[i];
 +               *a_p = 100;
 +        }
 +}
 +
 +/* { dg-final { scan-tree-dump-times a 1 optimized } } */
 +/* { dg-final { cleanup-tree-dump optimized } } */
 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
 b/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
 new file mode 100644
 index 000..6c1e530
 --- /dev/null
 +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
 @@ -0,0 +1,23 @@
 +/* { dg-do compile } */
 +/* { dg-options -O2 -fdump-tree-optimized } */
 +
 +typedef struct {
 +       int x;
 +       int y;
 +} S;
 +
 +int *a_p;
 +S a[1000];
 +
 +f(int k)
 +{
 +       int i;
 +
 +       for (i=k; i1000; i+=k) {
 +               a_p = a[i].y;
 +               *a_p = 100;
 +        }
 +}
 +
 +/* { dg-final { scan-tree-dump-times a 1 optimized } } */
 +/* { dg-final { cleanup-tree-dump optimized } } */
 diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
 index 2077c8d..c719984
 --- a/gcc/tree-scalar-evolution.c
 +++ b/gcc/tree-scalar-evolution.c
 @@ -266,6 +266,8 @@ along with GCC; see the file COPYING3.  If not see
  #include params.h

  static tree analyze_scalar_evolution_1 (struct loop *, tree, tree);
 +static tree analyze_scalar_evolution_for_address_of (struct loop *loop,
 +                                                    tree var);

  /* The cached information about an SSA name VAR, claiming that below
    basic block INSTANTIATED_BELOW, the value of VAR can be expressed
 @@ -1712,16 +1714,59 @@ interpret_rhs_expr (struct loop

RE: [PATCH] Improve SCEV for array element

2012-03-07 Thread Jiangning Liu


 -Original Message-
 From: Richard Guenther [mailto:richard.guent...@gmail.com]
 Sent: Tuesday, March 06, 2012 9:12 PM
 To: Jiangning Liu
 Cc: gcc-patches@gcc.gnu.org
 Subject: Re: [PATCH] Improve SCEV for array element
 
 On Fri, Jan 20, 2012 at 10:06 AM, Jiangning Liu jiangning@arm.com
 wrote:
  It's definitely not ok at this stage but at most for next stage1.
 
  OK. I may wait until next stage1.
 
  This is a very narrow pattern-match.  It doesn't allow for a[i].x
 for
  example,
  even if a[i] is a one-element structure.  I think the canonical way
 of
  handling
  ADDR_EXPR is to use sth like
 
  base = get_inner_reference (TREE_OPERAND (rhs1, 0), ...,
 offset,  ...);
  base = build1 (ADDR_EXPR, TREE_TYPE (rhs1), base);
          chrec1 = analyze_scalar_evolution (loop, base);
          chrec2 = analyze_scalar_evolution (loop, offset);
          chrec1 = chrec_convert (type, chrec1, at_stmt);
          chrec2 = chrec_convert (TREE_TYPE (offset), chrec2, at_stmt);
          res = chrec_fold_plus (type, chrec1, chrec2);
 
  where you probably need to handle scev_not_known when analyzing
 offset
  (which might be NULL).  You also need to add bitpos to the base
 address
  (in bytes, of course).  Note that the MEM_REF case would naturally
  work
  with this as well.
 
  OK. New patch is like below, and bootstrapped on x86-32.
 
 You want instead of
 
 +  if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == ARRAY_REF
 +  || TREE_CODE (TREE_OPERAND (rhs1, 0)) == MEM_REF
 +  || TREE_CODE (TREE_OPERAND (rhs1, 0)) == COMPONENT_REF)
 +{
 
 if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == MEM_REF
 || handled_component_p (TREE_OPERAND (rhs1, 0)))
   {
 
 + base = build1 (ADDR_EXPR, TREE_TYPE (rhs1), base);
 + chrec1 = analyze_scalar_evolution (loop, base);
 
 can you please add a wrapper
 
 tree
 analyze_scalar_evolution_for_address_of (struct loop *loop, tree var)
 {
   return analyze_scalar_evolution (loop, build_fold_addr_expr (var));
 }
 
 and call that instead of building the ADDR_EXPR there?  We want
 to avoid building that tree node, but even such a simple wrapper would
 be prefered.
 
 + if (bitpos)
 
 if (bitpos != 0)
 
 + chrec3 = build_int_cst (integer_type_node,
 + bitpos / BITS_PER_UNIT);
 
 please use size_int (bitpos / BITS_PER_UNIT) instead.  Using
 integer_type_node is definitely wrong.
 
 Ok with that changes.
 

Richard,

Modified as all you suggested, and new code is like below. Bootstrapped on
x86-32. OK for trunk now?

Thanks,
-Jiangning

ChangeLog:

2012-03-08  Jiangning Liu  jiangning@arm.com

* tree-scalar-evolution (interpret_rhs_expr): generate chrec for
array reference and component reference.
(analyze_scalar_evolution_for_address_of): New.

ChangeLog for testsuite:

2012-03-08  Jiangning Liu  jiangning@arm.com

* gcc.dg/tree-ssa/scev-3.c: New.
* gcc.dg/tree-ssa/scev-4.c: New.

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
b/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
new file mode 100644
index 000..28d5c93
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options -O2 -fdump-tree-optimized } */
+
+int *a_p;
+int a[1000];
+
+f(int k)
+{
+   int i;
+
+   for (i=k; i1000; i+=k) {
+   a_p = a[i];
+   *a_p = 100;
+}
+}
+
+/* { dg-final { scan-tree-dump-times a 1 optimized } } */
+/* { dg-final { cleanup-tree-dump optimized } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
b/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
new file mode 100644
index 000..6c1e530
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options -O2 -fdump-tree-optimized } */
+
+typedef struct {
+   int x;
+   int y;
+} S;
+
+int *a_p;
+S a[1000];
+
+f(int k)
+{
+   int i;
+
+   for (i=k; i1000; i+=k) {
+   a_p = a[i].y;
+   *a_p = 100;
+}
+}
+
+/* { dg-final { scan-tree-dump-times a 1 optimized } } */
+/* { dg-final { cleanup-tree-dump optimized } } */
diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index 2077c8d..c719984
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -266,6 +266,8 @@ along with GCC; see the file COPYING3.  If not see
 #include params.h
 
 static tree analyze_scalar_evolution_1 (struct loop *, tree, tree);
+static tree analyze_scalar_evolution_for_address_of (struct loop *loop,
+tree var);
 
 /* The cached information about an SSA name VAR, claiming that below
basic block INSTANTIATED_BELOW, the value of VAR can be expressed
@@ -1712,16 +1714,59 @@ interpret_rhs_expr (struct loop *loop, gimple
at_stmt,
   switch (code)
 {
 case ADDR_EXPR:
-  /* Handle MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR

Re: [PATCH] Improve SCEV for array element

2012-03-06 Thread Richard Guenther
On Fri, Jan 20, 2012 at 10:06 AM, Jiangning Liu jiangning@arm.com wrote:
 It's definitely not ok at this stage but at most for next stage1.

 OK. I may wait until next stage1.

 This is a very narrow pattern-match.  It doesn't allow for a[i].x for
 example,
 even if a[i] is a one-element structure.  I think the canonical way of
 handling
 ADDR_EXPR is to use sth like

 base = get_inner_reference (TREE_OPERAND (rhs1, 0), ..., offset,  ...);
 base = build1 (ADDR_EXPR, TREE_TYPE (rhs1), base);
         chrec1 = analyze_scalar_evolution (loop, base);
         chrec2 = analyze_scalar_evolution (loop, offset);
         chrec1 = chrec_convert (type, chrec1, at_stmt);
         chrec2 = chrec_convert (TREE_TYPE (offset), chrec2, at_stmt);
         res = chrec_fold_plus (type, chrec1, chrec2);

 where you probably need to handle scev_not_known when analyzing offset
 (which might be NULL).  You also need to add bitpos to the base address
 (in bytes, of course).  Note that the MEM_REF case would naturally
 work
 with this as well.

 OK. New patch is like below, and bootstrapped on x86-32.

You want instead of

+  if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == ARRAY_REF
+  || TREE_CODE (TREE_OPERAND (rhs1, 0)) == MEM_REF
+  || TREE_CODE (TREE_OPERAND (rhs1, 0)) == COMPONENT_REF)
+{

if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == MEM_REF
|| handled_component_p (TREE_OPERAND (rhs1, 0)))
  {

+ base = build1 (ADDR_EXPR, TREE_TYPE (rhs1), base);
+ chrec1 = analyze_scalar_evolution (loop, base);

can you please add a wrapper

tree
analyze_scalar_evolution_for_address_of (struct loop *loop, tree var)
{
  return analyze_scalar_evolution (loop, build_fold_addr_expr (var));
}

and call that instead of building the ADDR_EXPR there?  We want
to avoid building that tree node, but even such a simple wrapper would
be prefered.

+ if (bitpos)

if (bitpos != 0)

+ chrec3 = build_int_cst (integer_type_node,
+ bitpos / BITS_PER_UNIT);

please use size_int (bitpos / BITS_PER_UNIT) instead.  Using
integer_type_node is definitely wrong.

Ok with that changes.

Thanks,
Richard.


 ChangeLog:

 2012-01-20  Jiangning Liu  jiangning@arm.com

        * tree-scalar-evolution (interpret_rhs_expr): generate chrec for
        array reference and component reference.


 ChangeLog for testsuite:

 2012-01-20  Jiangning Liu  jiangning@arm.com

        * gcc.dg/tree-ssa/scev-3.c: New.
        * gcc.dg/tree-ssa/scev-4.c: New.

 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
 b/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
 new file mode 100644
 index 000..28d5c93
 --- /dev/null
 +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
 @@ -0,0 +1,18 @@
 +/* { dg-do compile } */
 +/* { dg-options -O2 -fdump-tree-optimized } */
 +
 +int *a_p;
 +int a[1000];
 +
 +f(int k)
 +{
 +       int i;
 +
 +       for (i=k; i1000; i+=k) {
 +               a_p = a[i];
 +               *a_p = 100;
 +        }
 +}
 +
 +/* { dg-final { scan-tree-dump-times a 1 optimized } } */
 +/* { dg-final { cleanup-tree-dump optimized } } */
 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
 b/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
 new file mode 100644
 index 000..6c1e530
 --- /dev/null
 +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
 @@ -0,0 +1,23 @@
 +/* { dg-do compile } */
 +/* { dg-options -O2 -fdump-tree-optimized } */
 +
 +typedef struct {
 +       int x;
 +       int y;
 +} S;
 +
 +int *a_p;
 +S a[1000];
 +
 +f(int k)
 +{
 +       int i;
 +
 +       for (i=k; i1000; i+=k) {
 +               a_p = a[i].y;
 +               *a_p = 100;
 +        }
 +}
 +
 +/* { dg-final { scan-tree-dump-times a 1 optimized } } */
 +/* { dg-final { cleanup-tree-dump optimized } } */
 diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
 index 2077c8d..4e06b75
 --- a/gcc/tree-scalar-evolution.c
 +++ b/gcc/tree-scalar-evolution.c
 @@ -1712,16 +1712,61 @@ interpret_rhs_expr (struct loop *loop, gimple
 at_stmt,
   switch (code)
     {
     case ADDR_EXPR:
 -      /* Handle MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR.
 */
 -      if (TREE_CODE (TREE_OPERAND (rhs1, 0)) != MEM_REF)
 -       {
 -         res = chrec_dont_know;
 -         break;
 -       }
 +      if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == ARRAY_REF
 +          || TREE_CODE (TREE_OPERAND (rhs1, 0)) == MEM_REF
 +          || TREE_CODE (TREE_OPERAND (rhs1, 0)) == COMPONENT_REF)
 +        {
 +         enum machine_mode mode;
 +         HOST_WIDE_INT bitsize, bitpos;
 +         int unsignedp;
 +         int volatilep = 0;
 +         tree base, offset;
 +         tree chrec3;
 +
 +         base = get_inner_reference (TREE_OPERAND (rhs1, 0),
 +                                     bitsize, bitpos, offset,
 +                                     mode, unsignedp, volatilep, false);
 +
 +         if (TREE_CODE (base) == MEM_REF)
 +           {
 +             rhs2 = TREE_OPERAND (base, 1);
 +             rhs1 = 

RE: [PATCH] Improve SCEV for array element

2012-02-13 Thread Jiangning Liu


 -Original Message-
 From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-
 ow...@gcc.gnu.org] On Behalf Of Jiangning Liu
 Sent: Friday, January 20, 2012 5:07 PM
 To: 'Richard Guenther'
 Cc: gcc-patches@gcc.gnu.org
 Subject: RE: [PATCH] Improve SCEV for array element
 
  It's definitely not ok at this stage but at most for next stage1.
 
 OK. I may wait until next stage1.
 
  This is a very narrow pattern-match.  It doesn't allow for a[i].x
 for
  example, even if a[i] is a one-element structure.  I think the
  canonical way of handling ADDR_EXPR is to use sth like
 
  base = get_inner_reference (TREE_OPERAND (rhs1, 0), ..., offset,
  ...); base = build1 (ADDR_EXPR, TREE_TYPE (rhs1), base);
  chrec1 = analyze_scalar_evolution (loop, base);
  chrec2 = analyze_scalar_evolution (loop, offset);
  chrec1 = chrec_convert (type, chrec1, at_stmt);
  chrec2 = chrec_convert (TREE_TYPE (offset), chrec2, at_stmt);
  res = chrec_fold_plus (type, chrec1, chrec2);
 
  where you probably need to handle scev_not_known when analyzing
 offset
  (which might be NULL).  You also need to add bitpos to the base
  address (in bytes, of course).  Note that the MEM_REF case would
  naturally work with this as well.
 
 OK. New patch is like below, and bootstrapped on x86-32.
 
 ChangeLog:
 
 2012-01-20  Jiangning Liu  jiangning@arm.com
 
 * tree-scalar-evolution (interpret_rhs_expr): generate chrec
 for
 array reference and component reference.
 
 
 ChangeLog for testsuite:
 
 2012-01-20  Jiangning Liu  jiangning@arm.com
 
 * gcc.dg/tree-ssa/scev-3.c: New.
 * gcc.dg/tree-ssa/scev-4.c: New.
 

Richard,

PING... Is this patch OK after branch 4.7 is created and trunk is open
again?

Thanks,
-Jiangning

 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
 b/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
 new file mode 100644
 index 000..28d5c93
 --- /dev/null
 +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
 @@ -0,0 +1,18 @@
 +/* { dg-do compile } */
 +/* { dg-options -O2 -fdump-tree-optimized } */
 +
 +int *a_p;
 +int a[1000];
 +
 +f(int k)
 +{
 + int i;
 +
 + for (i=k; i1000; i+=k) {
 + a_p = a[i];
 + *a_p = 100;
 +}
 +}
 +
 +/* { dg-final { scan-tree-dump-times a 1 optimized } } */
 +/* { dg-final { cleanup-tree-dump optimized } } */
 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
 b/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
 new file mode 100644
 index 000..6c1e530
 --- /dev/null
 +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
 @@ -0,0 +1,23 @@
 +/* { dg-do compile } */
 +/* { dg-options -O2 -fdump-tree-optimized } */
 +
 +typedef struct {
 + int x;
 + int y;
 +} S;
 +
 +int *a_p;
 +S a[1000];
 +
 +f(int k)
 +{
 + int i;
 +
 + for (i=k; i1000; i+=k) {
 + a_p = a[i].y;
 + *a_p = 100;
 +}
 +}
 +
 +/* { dg-final { scan-tree-dump-times a 1 optimized } } */
 +/* { dg-final { cleanup-tree-dump optimized } } */
 diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
 index 2077c8d..4e06b75
 --- a/gcc/tree-scalar-evolution.c
 +++ b/gcc/tree-scalar-evolution.c
 @@ -1712,16 +1712,61 @@ interpret_rhs_expr (struct loop *loop, gimple
 at_stmt,
switch (code)
  {
  case ADDR_EXPR:
 -  /* Handle MEM[ptr + CST] which is equivalent to
 POINTER_PLUS_EXPR.
 */
 -  if (TREE_CODE (TREE_OPERAND (rhs1, 0)) != MEM_REF)
 - {
 -   res = chrec_dont_know;
 -   break;
 - }
 +  if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == ARRAY_REF
 +  || TREE_CODE (TREE_OPERAND (rhs1, 0)) == MEM_REF
 +  || TREE_CODE (TREE_OPERAND (rhs1, 0)) == COMPONENT_REF)
 +{
 +   enum machine_mode mode;
 +   HOST_WIDE_INT bitsize, bitpos;
 +   int unsignedp;
 +   int volatilep = 0;
 +   tree base, offset;
 +   tree chrec3;
 +
 +   base = get_inner_reference (TREE_OPERAND (rhs1, 0),
 +   bitsize, bitpos, offset,
 +   mode, unsignedp, volatilep, false);
 +
 +   if (TREE_CODE (base) == MEM_REF)
 + {
 +   rhs2 = TREE_OPERAND (base, 1);
 +   rhs1 = TREE_OPERAND (base, 0);
 +
 +   chrec1 = analyze_scalar_evolution (loop, rhs1);
 +   chrec2 = analyze_scalar_evolution (loop, rhs2);
 +   chrec1 = chrec_convert (type, chrec1, at_stmt);
 +   chrec2 = chrec_convert (TREE_TYPE (rhs2), chrec2, at_stmt);
 +   res = chrec_fold_plus (type, chrec1, chrec2);
 + }
 +   else
 + {
 +   base = build1 (ADDR_EXPR, TREE_TYPE (rhs1), base);
 +   chrec1 = analyze_scalar_evolution (loop, base);
 +   chrec1 = chrec_convert (type, chrec1, at_stmt);
 +   res = chrec1;
 + }
 
 -  rhs2 = TREE_OPERAND (TREE_OPERAND (rhs1, 0), 1);
 -  rhs1 = TREE_OPERAND (TREE_OPERAND (rhs1, 0), 0);
 -  /* Fall through.  */
 +   if (offset != NULL_TREE

Re: [PATCH] Improve SCEV for array element

2012-02-13 Thread Richard Guenther
On Mon, Feb 13, 2012 at 10:54 AM, Jiangning Liu jiangning@arm.com wrote:


 -Original Message-
 From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-
 ow...@gcc.gnu.org] On Behalf Of Jiangning Liu
 Sent: Friday, January 20, 2012 5:07 PM
 To: 'Richard Guenther'
 Cc: gcc-patches@gcc.gnu.org
 Subject: RE: [PATCH] Improve SCEV for array element

  It's definitely not ok at this stage but at most for next stage1.

 OK. I may wait until next stage1.

  This is a very narrow pattern-match.  It doesn't allow for a[i].x
 for
  example, even if a[i] is a one-element structure.  I think the
  canonical way of handling ADDR_EXPR is to use sth like
 
  base = get_inner_reference (TREE_OPERAND (rhs1, 0), ..., offset,
  ...); base = build1 (ADDR_EXPR, TREE_TYPE (rhs1), base);
          chrec1 = analyze_scalar_evolution (loop, base);
          chrec2 = analyze_scalar_evolution (loop, offset);
          chrec1 = chrec_convert (type, chrec1, at_stmt);
          chrec2 = chrec_convert (TREE_TYPE (offset), chrec2, at_stmt);
          res = chrec_fold_plus (type, chrec1, chrec2);
 
  where you probably need to handle scev_not_known when analyzing
 offset
  (which might be NULL).  You also need to add bitpos to the base
  address (in bytes, of course).  Note that the MEM_REF case would
  naturally work with this as well.

 OK. New patch is like below, and bootstrapped on x86-32.

 ChangeLog:

 2012-01-20  Jiangning Liu  jiangning@arm.com

         * tree-scalar-evolution (interpret_rhs_expr): generate chrec
 for
         array reference and component reference.


 ChangeLog for testsuite:

 2012-01-20  Jiangning Liu  jiangning@arm.com

         * gcc.dg/tree-ssa/scev-3.c: New.
         * gcc.dg/tree-ssa/scev-4.c: New.


 Richard,

 PING... Is this patch OK after branch 4.7 is created and trunk is open
 again?

It's on my (rather large) list of things to review for 4.8.  Be patient ...

Richard.

 Thanks,
 -Jiangning

 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
 b/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
 new file mode 100644
 index 000..28d5c93
 --- /dev/null
 +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
 @@ -0,0 +1,18 @@
 +/* { dg-do compile } */
 +/* { dg-options -O2 -fdump-tree-optimized } */
 +
 +int *a_p;
 +int a[1000];
 +
 +f(int k)
 +{
 +     int i;
 +
 +     for (i=k; i1000; i+=k) {
 +             a_p = a[i];
 +             *a_p = 100;
 +        }
 +}
 +
 +/* { dg-final { scan-tree-dump-times a 1 optimized } } */
 +/* { dg-final { cleanup-tree-dump optimized } } */
 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
 b/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
 new file mode 100644
 index 000..6c1e530
 --- /dev/null
 +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
 @@ -0,0 +1,23 @@
 +/* { dg-do compile } */
 +/* { dg-options -O2 -fdump-tree-optimized } */
 +
 +typedef struct {
 +     int x;
 +     int y;
 +} S;
 +
 +int *a_p;
 +S a[1000];
 +
 +f(int k)
 +{
 +     int i;
 +
 +     for (i=k; i1000; i+=k) {
 +             a_p = a[i].y;
 +             *a_p = 100;
 +        }
 +}
 +
 +/* { dg-final { scan-tree-dump-times a 1 optimized } } */
 +/* { dg-final { cleanup-tree-dump optimized } } */
 diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
 index 2077c8d..4e06b75
 --- a/gcc/tree-scalar-evolution.c
 +++ b/gcc/tree-scalar-evolution.c
 @@ -1712,16 +1712,61 @@ interpret_rhs_expr (struct loop *loop, gimple
 at_stmt,
    switch (code)
      {
      case ADDR_EXPR:
 -      /* Handle MEM[ptr + CST] which is equivalent to
 POINTER_PLUS_EXPR.
 */
 -      if (TREE_CODE (TREE_OPERAND (rhs1, 0)) != MEM_REF)
 -     {
 -       res = chrec_dont_know;
 -       break;
 -     }
 +      if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == ARRAY_REF
 +          || TREE_CODE (TREE_OPERAND (rhs1, 0)) == MEM_REF
 +          || TREE_CODE (TREE_OPERAND (rhs1, 0)) == COMPONENT_REF)
 +        {
 +       enum machine_mode mode;
 +       HOST_WIDE_INT bitsize, bitpos;
 +       int unsignedp;
 +       int volatilep = 0;
 +       tree base, offset;
 +       tree chrec3;
 +
 +       base = get_inner_reference (TREE_OPERAND (rhs1, 0),
 +                                   bitsize, bitpos, offset,
 +                                   mode, unsignedp, volatilep, false);
 +
 +       if (TREE_CODE (base) == MEM_REF)
 +         {
 +           rhs2 = TREE_OPERAND (base, 1);
 +           rhs1 = TREE_OPERAND (base, 0);
 +
 +           chrec1 = analyze_scalar_evolution (loop, rhs1);
 +           chrec2 = analyze_scalar_evolution (loop, rhs2);
 +           chrec1 = chrec_convert (type, chrec1, at_stmt);
 +           chrec2 = chrec_convert (TREE_TYPE (rhs2), chrec2, at_stmt);
 +           res = chrec_fold_plus (type, chrec1, chrec2);
 +         }
 +       else
 +         {
 +           base = build1 (ADDR_EXPR, TREE_TYPE (rhs1), base);
 +           chrec1 = analyze_scalar_evolution (loop, base);
 +           chrec1 = chrec_convert (type, chrec1, at_stmt);
 +           res = chrec1;
 +         }

 -      rhs2 = TREE_OPERAND

RE: [PATCH] Improve SCEV for array element

2012-01-20 Thread Jiangning Liu
 It's definitely not ok at this stage but at most for next stage1.

OK. I may wait until next stage1.

 This is a very narrow pattern-match.  It doesn't allow for a[i].x for
 example,
 even if a[i] is a one-element structure.  I think the canonical way of
 handling
 ADDR_EXPR is to use sth like
 
 base = get_inner_reference (TREE_OPERAND (rhs1, 0), ..., offset,  ...);
 base = build1 (ADDR_EXPR, TREE_TYPE (rhs1), base);
 chrec1 = analyze_scalar_evolution (loop, base);
 chrec2 = analyze_scalar_evolution (loop, offset);
 chrec1 = chrec_convert (type, chrec1, at_stmt);
 chrec2 = chrec_convert (TREE_TYPE (offset), chrec2, at_stmt);
 res = chrec_fold_plus (type, chrec1, chrec2);
 
 where you probably need to handle scev_not_known when analyzing offset
 (which might be NULL).  You also need to add bitpos to the base address
 (in bytes, of course).  Note that the MEM_REF case would naturally
 work
 with this as well.

OK. New patch is like below, and bootstrapped on x86-32.

ChangeLog:

2012-01-20  Jiangning Liu  jiangning@arm.com

* tree-scalar-evolution (interpret_rhs_expr): generate chrec for
array reference and component reference.


ChangeLog for testsuite:

2012-01-20  Jiangning Liu  jiangning@arm.com

* gcc.dg/tree-ssa/scev-3.c: New.
* gcc.dg/tree-ssa/scev-4.c: New.

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
b/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
new file mode 100644
index 000..28d5c93
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options -O2 -fdump-tree-optimized } */
+
+int *a_p;
+int a[1000];
+
+f(int k)
+{
+   int i;
+
+   for (i=k; i1000; i+=k) {
+   a_p = a[i];
+   *a_p = 100;
+}
+}
+
+/* { dg-final { scan-tree-dump-times a 1 optimized } } */
+/* { dg-final { cleanup-tree-dump optimized } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
b/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
new file mode 100644
index 000..6c1e530
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options -O2 -fdump-tree-optimized } */
+
+typedef struct {
+   int x;
+   int y;
+} S;
+
+int *a_p;
+S a[1000];
+
+f(int k)
+{
+   int i;
+
+   for (i=k; i1000; i+=k) {
+   a_p = a[i].y;
+   *a_p = 100;
+}
+}
+
+/* { dg-final { scan-tree-dump-times a 1 optimized } } */
+/* { dg-final { cleanup-tree-dump optimized } } */
diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index 2077c8d..4e06b75
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -1712,16 +1712,61 @@ interpret_rhs_expr (struct loop *loop, gimple
at_stmt,
   switch (code)
 {
 case ADDR_EXPR:
-  /* Handle MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR.
*/
-  if (TREE_CODE (TREE_OPERAND (rhs1, 0)) != MEM_REF)
-   {
- res = chrec_dont_know;
- break;
-   }
+  if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == ARRAY_REF
+  || TREE_CODE (TREE_OPERAND (rhs1, 0)) == MEM_REF
+  || TREE_CODE (TREE_OPERAND (rhs1, 0)) == COMPONENT_REF)
+{
+ enum machine_mode mode;
+ HOST_WIDE_INT bitsize, bitpos;
+ int unsignedp;
+ int volatilep = 0;
+ tree base, offset;
+ tree chrec3;
+
+ base = get_inner_reference (TREE_OPERAND (rhs1, 0),
+ bitsize, bitpos, offset,
+ mode, unsignedp, volatilep, false);
+
+ if (TREE_CODE (base) == MEM_REF)
+   {
+ rhs2 = TREE_OPERAND (base, 1);
+ rhs1 = TREE_OPERAND (base, 0);
+
+ chrec1 = analyze_scalar_evolution (loop, rhs1);
+ chrec2 = analyze_scalar_evolution (loop, rhs2);
+ chrec1 = chrec_convert (type, chrec1, at_stmt);
+ chrec2 = chrec_convert (TREE_TYPE (rhs2), chrec2, at_stmt);
+ res = chrec_fold_plus (type, chrec1, chrec2);
+   }
+ else
+   {
+ base = build1 (ADDR_EXPR, TREE_TYPE (rhs1), base);
+ chrec1 = analyze_scalar_evolution (loop, base);
+ chrec1 = chrec_convert (type, chrec1, at_stmt);
+ res = chrec1;
+   }
 
-  rhs2 = TREE_OPERAND (TREE_OPERAND (rhs1, 0), 1);
-  rhs1 = TREE_OPERAND (TREE_OPERAND (rhs1, 0), 0);
-  /* Fall through.  */
+ if (offset != NULL_TREE)
+   {
+ chrec2 = analyze_scalar_evolution (loop, offset);
+ chrec2 = chrec_convert (TREE_TYPE (offset), chrec2, at_stmt);
+ res = chrec_fold_plus (type, res, chrec2);
+   }
+
+ if (bitpos)
+   {
+ gcc_assert ((bitpos % BITS_PER_UNIT) == 0);
+
+ chrec3 = build_int_cst (integer_type_node,
+ bitpos / BITS_PER_UNIT);
+ 

Re: [PATCH] Improve SCEV for array element

2012-01-18 Thread Richard Guenther
On Wed, Jan 18, 2012 at 8:41 AM, Jiangning Liu jiangning@arm.com wrote:
 This code change intends to improve scev for array element and reduce the
 common sub-expressions in loop, which may be introduced by multiple
 reference of expression like a[i]. With this optimization the register
 pressure can be reduced in loops.

 The problem is originally from a real benchmark, and the test case only
 tries to detect the GIMPLE level changes.

 Bootstraped on x86-32. OK for trunk?

It's definitely not ok at this stage but at most for next stage1.  But ...

 ChangeLog:

 2012-01-05  Jiangning Liu  jiangning@arm.com

        * tree-scalar-evolution (interpret_rhs_expr): generate chrec for
        array reference.

 ChangeLog for testsuite:

 2012-01-05  Jiangning Liu  jiangning@arm.com

        * gcc.dg/scev-1.c: New.

 diff --git a/gcc/testsuite/gcc.dg/scev-1.c b/gcc/testsuite/gcc.dg/scev-1.c
 new file mode 100644 index 000..28d5c93
 --- /dev/null
 +++ b/gcc/testsuite/gcc.dg/scev-1.c
 @@ -0,0 +1,18 @@
 +/* { dg-do compile } */
 +/* { dg-options -O2 -fdump-tree-optimized } */
 +
 +int *a_p;
 +int a[1000];
 +
 +f(int k)
 +{
 +       int i;
 +
 +       for (i=k; i1000; i+=k) {
 +               a_p = a[i];
 +               *a_p = 100;
 +        }
 +}
 +
 +/* { dg-final { scan-tree-dump-times a 1 optimized } } */
 +/* { dg-final { cleanup-tree-dump optimized } } */
 diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c index
 2077c8d..de89fc4
 --- a/gcc/tree-scalar-evolution.c
 +++ b/gcc/tree-scalar-evolution.c
 @@ -1712,6 +1712,42 @@ interpret_rhs_expr (struct loop *loop, gimple
 at_stmt,
   switch (code)
     {
     case ADDR_EXPR:
 +      if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == ARRAY_REF)

This is a very narrow pattern-match.  It doesn't allow for a[i].x for example,
even if a[i] is a one-element structure.  I think the canonical way of handling
ADDR_EXPR is to use sth like

base = get_inner_reference (TREE_OPERAND (rhs1, 0), ..., offset,  ...);
base = build1 (ADDR_EXPR, TREE_TYPE (rhs1), base);
chrec1 = analyze_scalar_evolution (loop, base);
chrec2 = analyze_scalar_evolution (loop, offset);
chrec1 = chrec_convert (type, chrec1, at_stmt);
chrec2 = chrec_convert (TREE_TYPE (offset), chrec2, at_stmt);
res = chrec_fold_plus (type, chrec1, chrec2);

where you probably need to handle scev_not_known when analyzing offset
(which might be NULL).  You also need to add bitpos to the base address
(in bytes, of course).  Note that the MEM_REF case would naturally work
with this as well.

Richard.

 +        {
 +         tree array_ref;
 +         tree var_decl, base, offset;
 +         tree low_bound;
 +         tree unit_size;
 +         tree index;
 +
 +         array_ref = TREE_OPERAND (rhs1, 0);
 +         var_decl = TREE_OPERAND (array_ref, 0);
 +         index = TREE_OPERAND (array_ref, 1);
 +
 +         low_bound = array_ref_low_bound (array_ref);
 +         unit_size = array_ref_element_size (array_ref);
 +
 +         /* We assume all arrays have sizes that are a multiple of a byte.
 +            First subtract the lower bound, if any, in the type of the
 +            index, then convert to sizetype and multiply by the size of
 +            the array element.  */
 +         if (! integer_zerop (low_bound))
 +           index = fold_build2 (MINUS_EXPR, TREE_TYPE (index),
 +                                index, low_bound);
 +
 +         offset = size_binop (MULT_EXPR,
 +                              fold_convert (sizetype, index),
 +                              unit_size);
 +
 +         base = build1 (ADDR_EXPR, TREE_TYPE (rhs1), var_decl);
 +         chrec1 = analyze_scalar_evolution (loop, base);
 +         chrec2 = analyze_scalar_evolution (loop, offset);
 +          chrec1 = chrec_convert (type, chrec1, at_stmt);
 +          chrec2 = chrec_convert (TREE_TYPE (offset), chrec2, at_stmt);
 +         res = chrec_fold_plus (type, chrec1, chrec2);
 +         break;
 +        }
 +
       /* Handle MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR.
 */
       if (TREE_CODE (TREE_OPERAND (rhs1, 0)) != MEM_REF)
        {

 Thanks,
 -Jiangning


[PATCH] Improve SCEV for array element

2012-01-17 Thread Jiangning Liu
This code change intends to improve scev for array element and reduce the
common sub-expressions in loop, which may be introduced by multiple
reference of expression like a[i]. With this optimization the register
pressure can be reduced in loops. 

The problem is originally from a real benchmark, and the test case only
tries to detect the GIMPLE level changes.

Bootstraped on x86-32. OK for trunk?

ChangeLog:

2012-01-05  Jiangning Liu  jiangning@arm.com

* tree-scalar-evolution (interpret_rhs_expr): generate chrec for
array reference.

ChangeLog for testsuite:

2012-01-05  Jiangning Liu  jiangning@arm.com

* gcc.dg/scev-1.c: New.

diff --git a/gcc/testsuite/gcc.dg/scev-1.c b/gcc/testsuite/gcc.dg/scev-1.c
new file mode 100644 index 000..28d5c93
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/scev-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options -O2 -fdump-tree-optimized } */
+
+int *a_p;
+int a[1000];
+
+f(int k)
+{
+   int i;
+
+   for (i=k; i1000; i+=k) {
+   a_p = a[i];
+   *a_p = 100;
+}
+}
+
+/* { dg-final { scan-tree-dump-times a 1 optimized } } */
+/* { dg-final { cleanup-tree-dump optimized } } */
diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c index
2077c8d..de89fc4
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -1712,6 +1712,42 @@ interpret_rhs_expr (struct loop *loop, gimple
at_stmt,
   switch (code)
 {
 case ADDR_EXPR:
+  if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == ARRAY_REF)
+{
+ tree array_ref;
+ tree var_decl, base, offset;
+ tree low_bound;
+ tree unit_size;
+ tree index;
+
+ array_ref = TREE_OPERAND (rhs1, 0);
+ var_decl = TREE_OPERAND (array_ref, 0);
+ index = TREE_OPERAND (array_ref, 1);
+
+ low_bound = array_ref_low_bound (array_ref);
+ unit_size = array_ref_element_size (array_ref);
+
+ /* We assume all arrays have sizes that are a multiple of a byte.
+First subtract the lower bound, if any, in the type of the
+index, then convert to sizetype and multiply by the size of
+the array element.  */
+ if (! integer_zerop (low_bound))
+   index = fold_build2 (MINUS_EXPR, TREE_TYPE (index),
+index, low_bound);
+
+ offset = size_binop (MULT_EXPR,
+  fold_convert (sizetype, index),
+  unit_size);
+
+ base = build1 (ADDR_EXPR, TREE_TYPE (rhs1), var_decl);
+ chrec1 = analyze_scalar_evolution (loop, base);
+ chrec2 = analyze_scalar_evolution (loop, offset);
+  chrec1 = chrec_convert (type, chrec1, at_stmt);
+  chrec2 = chrec_convert (TREE_TYPE (offset), chrec2, at_stmt);
+ res = chrec_fold_plus (type, chrec1, chrec2);
+ break;
+}
+
   /* Handle MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR.
*/
   if (TREE_CODE (TREE_OPERAND (rhs1, 0)) != MEM_REF)
{

Thanks,
-Jiangning

scev.patch
Description: Binary data


[RFC][patch] improve scev for array element

2012-01-05 Thread Jiangning Liu
This code change intends to improve scev for array element and reduce the
common sub-expressions in loop, which may be introduced by multiple
reference of expression a[i]. In the end the register pressure may be
reduced in the loop. The test case is simplified from a real benchmark, and
only want to show the GIMPLE level changes.

Any suggestions?

diff --git a/gcc/testsuite/gcc.dg/scev-1.c b/gcc/testsuite/gcc.dg/scev-1.c
new file mode 100644
index 000..28d5c93
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/scev-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options -O2 -fdump-tree-optimized } */
+
+int *a_p;
+int a[1000];
+
+f(int k)
+{
+   int i;
+
+   for (i=k; i1000; i+=k) {
+   a_p = a[i];
+   *a_p = 100;
+}
+}
+
+/* { dg-final { scan-tree-dump-times a 1 optimized } } */
+/* { dg-final { cleanup-tree-dump optimized } } */
diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index 2077c8d..de89fc4
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -1712,6 +1712,42 @@ interpret_rhs_expr (struct loop *loop, gimple
at_stmt,
   switch (code)
 {
 case ADDR_EXPR:
+  if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == ARRAY_REF)
+{
+ tree array_ref;
+ tree var_decl, base, offset;
+ tree low_bound;
+ tree unit_size;
+ tree index;
+
+ array_ref = TREE_OPERAND (rhs1, 0);
+ var_decl = TREE_OPERAND (array_ref, 0);
+ index = TREE_OPERAND (array_ref, 1);
+
+ low_bound = array_ref_low_bound (array_ref);
+ unit_size = array_ref_element_size (array_ref);
+
+ /* We assume all arrays have sizes that are a multiple of a byte.
+First subtract the lower bound, if any, in the type of the
+index, then convert to sizetype and multiply by the size of
+the array element.  */
+ if (! integer_zerop (low_bound))
+   index = fold_build2 (MINUS_EXPR, TREE_TYPE (index),
+index, low_bound);
+
+ offset = size_binop (MULT_EXPR,
+  fold_convert (sizetype, index),
+  unit_size);
+
+ base = build1 (ADDR_EXPR, TREE_TYPE (rhs1), var_decl);
+ chrec1 = analyze_scalar_evolution (loop, base);
+ chrec2 = analyze_scalar_evolution (loop, offset);
+  chrec1 = chrec_convert (type, chrec1, at_stmt);
+  chrec2 = chrec_convert (TREE_TYPE (offset), chrec2, at_stmt);
+ res = chrec_fold_plus (type, chrec1, chrec2);
+ break;
+}
+
   /* Handle MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR.
*/
   if (TREE_CODE (TREE_OPERAND (rhs1, 0)) != MEM_REF)
{

Another bigger case may be like below, and register pressure could be
observed lower.

int a[512] ;
int b[512] ;
int *ax ;
int *bx ;
int *ay ;
int *by ;
int i ;
int j ;

int f(int k)
{
for( j = 0 ; j  k ; j++)
{
for( i = j ; i  512 ; i += k)
{
ax = a[i+k] ;
bx = b[i+k] ;
ay = a[i] ;
by = b[i] ;

*ax = 7 ;
*bx = 7 ;

a[i] = 8;
b[i] = 8;
}
}
}

Thanks,
-Jiangning