Re: [PATCH] Support folding min(poly,poly) to const

2023-09-07 Thread Lehua Ding

Hi Richard,

On 2023/9/8 14:37, Richard Sandiford wrote:
I think this should instead use poly_int_tree_p and wi::to_poly_widest. 
There's no need to handle int64 and uint64 separately. (And there's no 
need to handle just 64-bit types.)


Thanks for the correction. I used wi::to_poly_wide(instead of 
wi::to_poly_widest) incorrectly and found that the returned type did not 
implement some functions such as known_le.
Here is the V2 patch: 
https://gcc.gnu.org/pipermail/gcc-patches/2023-September/629659.html


--
Best,
Lehua



Re: [PATCH] Support folding min(poly,poly) to const

2023-09-07 Thread Richard Sandiford via Gcc-patches
Lehua Ding  writes:
> Hi,
>
> This patch adds support that tries to fold `MIN (poly, poly)` to
> a constant. Consider the following C Code:
>
> ```
> void foo2 (int* restrict a, int* restrict b, int n)
> {
> for (int i = 0; i < 3; i += 1)
>   a[i] += b[i];
> }
> ```
>
> Before this patch:
>
> ```
> void foo2 (int * restrict a, int * restrict b, int n)
> {
>   vector([4,4]) int vect__7.27;
>   vector([4,4]) int vect__6.26;
>   vector([4,4]) int vect__4.23;
>   unsigned long _32;
>
>[local count: 268435456]:
>   _32 = MIN_EXPR <3, POLY_INT_CST [4, 4]>;
>   vect__4.23_20 = .MASK_LEN_LOAD (a_11(D), 32B, { -1, ... }, _32, 0);
>   vect__6.26_15 = .MASK_LEN_LOAD (b_12(D), 32B, { -1, ... }, _32, 0);
>   vect__7.27_9 = vect__6.26_15 + vect__4.23_20;
>   .MASK_LEN_STORE (a_11(D), 32B, { -1, ... }, _32, 0, vect__7.27_9); [tail 
> call]
>   return;
>
> }
> ```
>
> After this patch:
>
> ```
> void foo2 (int * restrict a, int * restrict b, int n)
> {
>   vector([4,4]) int vect__7.27;
>   vector([4,4]) int vect__6.26;
>   vector([4,4]) int vect__4.23;
>
>[local count: 268435456]:
>   vect__4.23_20 = .MASK_LEN_LOAD (a_11(D), 32B, { -1, ... }, 3, 0);
>   vect__6.26_15 = .MASK_LEN_LOAD (b_12(D), 32B, { -1, ... }, 3, 0);
>   vect__7.27_9 = vect__6.26_15 + vect__4.23_20;
>   .MASK_LEN_STORE (a_11(D), 32B, { -1, ... }, 3, 0, vect__7.27_9); [tail call]
>   return;
>
> }
> ```
>
> For RISC-V RVV, one branch instruction can be reduced:
>
> Before this patch:
>
> ```
> foo2:
> csrra4,vlenb
> srlia4,a4,2
> li  a5,3
> bleua5,a4,.L5
> mv  a5,a4
> .L5:
> vsetvli zero,a5,e32,m1,ta,ma
> ...
> ```
>
> After this patch.
>
> ```
> foo2:
>   vsetivlizero,3,e32,m1,ta,ma
> ...
> ```
>
> Best,
> Lehua
>
> gcc/ChangeLog:
>
>   * fold-const.cc (can_min_p): New function.
>   (poly_int_binop): Try fold MIN_EXPR.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/riscv/rvv/autovec/vls/div-1.c: Adjust.
>   * gcc.target/riscv/rvv/autovec/vls/shift-3.c: Adjust.
>   * gcc.target/riscv/rvv/autovec/fold-min-poly.c: New test.
>
> ---
>  gcc/fold-const.cc | 33 +++
>  .../riscv/rvv/autovec/fold-min-poly.c | 24 ++
>  .../gcc.target/riscv/rvv/autovec/vls/div-1.c  |  2 +-
>  .../riscv/rvv/autovec/vls/shift-3.c   |  2 +-
>  4 files changed, 59 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c
>
> diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> index 1da498a3152..f7f793cc326 100644
> --- a/gcc/fold-const.cc
> +++ b/gcc/fold-const.cc
> @@ -1213,6 +1213,34 @@ wide_int_binop (wide_int &res,
>return true;
>  }
>  
> +/* Returns true if we know who is smaller or equal, ARG1 or ARG2., and set 
> the
> +   min value to RES.  */
> +bool
> +can_min_p (const_tree arg1, const_tree arg2, poly_wide_int &res)
> +{
> +  if (tree_fits_poly_int64_p (arg1) && tree_fits_poly_int64_p (arg2))
> +{
> +  if (known_le (tree_to_poly_int64 (arg1), tree_to_poly_int64 (arg2)))
> + res = wi::to_poly_wide (arg1);
> +  else if (known_le (tree_to_poly_int64 (arg2), tree_to_poly_int64 
> (arg1)))
> + res = wi::to_poly_wide (arg2);
> +  else
> + return false;
> +}
> +  else if (tree_fits_poly_uint64_p (arg1) && tree_fits_poly_uint64_p (arg2))
> +{
> +  if (known_le (tree_to_poly_uint64 (arg1), tree_to_poly_uint64 (arg2)))
> + res = wi::to_poly_wide (arg1);
> +  else if (known_le (tree_to_poly_int64 (arg2), tree_to_poly_int64 
> (arg1)))
> + res = wi::to_poly_wide (arg2);
> +  else
> + return false;
> +}
> +  else
> +return false;
> +  return true;
> +}

I think this should instead use poly_int_tree_p and wi::to_poly_widest.
There's no need to handle int64 and uint64 separately.  (And there's
no need to handle just 64-bit types.)

Thanks,
Richard

> +
>  /* Combine two poly int's ARG1 and ARG2 under operation CODE to
> produce a new constant in RES.  Return FALSE if we don't know how
> to evaluate CODE at compile-time.  */
> @@ -1261,6 +1289,11 @@ poly_int_binop (poly_wide_int &res, enum tree_code 
> code,
>   return false;
>break;
>  
> +case MIN_EXPR:
> +  if (!can_min_p (arg1, arg2, res))
> + return false;
> +  break;
> +
>  default:
>return false;
>  }
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c
> new file mode 100644
> index 000..de4c472c76e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile } */
> +/* { dg-options " -march=rv64gcv_zvl128b -mabi=lp64d -O3 --param 
> riscv-autovec-preference=scalable --param riscv-autovec-lmul=m1 
> -fno-vect-cost-model" } */
> +
> +void foo1 (int* restrict a, int* restrict b, int n)
> +{
> +for (int

Re: [PATCH] Support folding min(poly,poly) to const

2023-09-07 Thread Lehua Ding

Hi Andrew,

On 2023/9/8 13:28, Andrew Pinski wrote:

On Thu, Sep 7, 2023 at 10:25 PM Lehua Ding  wrote:


Hi,

This patch adds support that tries to fold `MIN (poly, poly)` to
a constant. Consider the following C Code:


Does it make sense to handle max also?



At the moment I can't construct a C program that can generates MIN_EXPR, 
so I don't add it, although I implement both min and max locally.


--
Best,
Lehua



Re: [PATCH] Support folding min(poly,poly) to const

2023-09-07 Thread Andrew Pinski via Gcc-patches
On Thu, Sep 7, 2023 at 10:25 PM Lehua Ding  wrote:
>
> Hi,
>
> This patch adds support that tries to fold `MIN (poly, poly)` to
> a constant. Consider the following C Code:

Does it make sense to handle max also?

Thanks,
Andrew


>
> ```
> void foo2 (int* restrict a, int* restrict b, int n)
> {
> for (int i = 0; i < 3; i += 1)
>   a[i] += b[i];
> }
> ```
>
> Before this patch:
>
> ```
> void foo2 (int * restrict a, int * restrict b, int n)
> {
>   vector([4,4]) int vect__7.27;
>   vector([4,4]) int vect__6.26;
>   vector([4,4]) int vect__4.23;
>   unsigned long _32;
>
>[local count: 268435456]:
>   _32 = MIN_EXPR <3, POLY_INT_CST [4, 4]>;
>   vect__4.23_20 = .MASK_LEN_LOAD (a_11(D), 32B, { -1, ... }, _32, 0);
>   vect__6.26_15 = .MASK_LEN_LOAD (b_12(D), 32B, { -1, ... }, _32, 0);
>   vect__7.27_9 = vect__6.26_15 + vect__4.23_20;
>   .MASK_LEN_STORE (a_11(D), 32B, { -1, ... }, _32, 0, vect__7.27_9); [tail 
> call]
>   return;
>
> }
> ```
>
> After this patch:
>
> ```
> void foo2 (int * restrict a, int * restrict b, int n)
> {
>   vector([4,4]) int vect__7.27;
>   vector([4,4]) int vect__6.26;
>   vector([4,4]) int vect__4.23;
>
>[local count: 268435456]:
>   vect__4.23_20 = .MASK_LEN_LOAD (a_11(D), 32B, { -1, ... }, 3, 0);
>   vect__6.26_15 = .MASK_LEN_LOAD (b_12(D), 32B, { -1, ... }, 3, 0);
>   vect__7.27_9 = vect__6.26_15 + vect__4.23_20;
>   .MASK_LEN_STORE (a_11(D), 32B, { -1, ... }, 3, 0, vect__7.27_9); [tail call]
>   return;
>
> }
> ```
>
> For RISC-V RVV, one branch instruction can be reduced:
>
> Before this patch:
>
> ```
> foo2:
> csrra4,vlenb
> srlia4,a4,2
> li  a5,3
> bleua5,a4,.L5
> mv  a5,a4
> .L5:
> vsetvli zero,a5,e32,m1,ta,ma
> ...
> ```
>
> After this patch.
>
> ```
> foo2:
> vsetivlizero,3,e32,m1,ta,ma
> ...
> ```
>
> Best,
> Lehua
>
> gcc/ChangeLog:
>
> * fold-const.cc (can_min_p): New function.
> (poly_int_binop): Try fold MIN_EXPR.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/vls/div-1.c: Adjust.
> * gcc.target/riscv/rvv/autovec/vls/shift-3.c: Adjust.
> * gcc.target/riscv/rvv/autovec/fold-min-poly.c: New test.
>
> ---
>  gcc/fold-const.cc | 33 +++
>  .../riscv/rvv/autovec/fold-min-poly.c | 24 ++
>  .../gcc.target/riscv/rvv/autovec/vls/div-1.c  |  2 +-
>  .../riscv/rvv/autovec/vls/shift-3.c   |  2 +-
>  4 files changed, 59 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c
>
> diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> index 1da498a3152..f7f793cc326 100644
> --- a/gcc/fold-const.cc
> +++ b/gcc/fold-const.cc
> @@ -1213,6 +1213,34 @@ wide_int_binop (wide_int &res,
>return true;
>  }
>
> +/* Returns true if we know who is smaller or equal, ARG1 or ARG2., and set 
> the
> +   min value to RES.  */
> +bool
> +can_min_p (const_tree arg1, const_tree arg2, poly_wide_int &res)
> +{
> +  if (tree_fits_poly_int64_p (arg1) && tree_fits_poly_int64_p (arg2))
> +{
> +  if (known_le (tree_to_poly_int64 (arg1), tree_to_poly_int64 (arg2)))
> +   res = wi::to_poly_wide (arg1);
> +  else if (known_le (tree_to_poly_int64 (arg2), tree_to_poly_int64 
> (arg1)))
> +   res = wi::to_poly_wide (arg2);
> +  else
> +   return false;
> +}
> +  else if (tree_fits_poly_uint64_p (arg1) && tree_fits_poly_uint64_p (arg2))
> +{
> +  if (known_le (tree_to_poly_uint64 (arg1), tree_to_poly_uint64 (arg2)))
> +   res = wi::to_poly_wide (arg1);
> +  else if (known_le (tree_to_poly_int64 (arg2), tree_to_poly_int64 
> (arg1)))
> +   res = wi::to_poly_wide (arg2);
> +  else
> +   return false;
> +}
> +  else
> +return false;
> +  return true;
> +}
> +
>  /* Combine two poly int's ARG1 and ARG2 under operation CODE to
> produce a new constant in RES.  Return FALSE if we don't know how
> to evaluate CODE at compile-time.  */
> @@ -1261,6 +1289,11 @@ poly_int_binop (poly_wide_int &res, enum tree_code 
> code,
> return false;
>break;
>
> +case MIN_EXPR:
> +  if (!can_min_p (arg1, arg2, res))
> +   return false;
> +  break;
> +
>  default:
>return false;
>  }
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c
> new file mode 100644
> index 000..de4c472c76e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile } */
> +/* { dg-options " -march=rv64gcv_zvl128b -mabi=lp64d -O3 --param 
> riscv-autovec-preference=scalable --param riscv-autovec-lmul=m1 
> -fno-vect-cost-model" } */
> +
> +void foo1 (int* restrict a, int* restrict b, int n)
> +{
> +for (int i = 0; i < 4; i += 1)
> +  a[i] += b[i];
> +}
> +
> +void foo2 (int* restrict 

[PATCH] Support folding min(poly,poly) to const

2023-09-07 Thread Lehua Ding
Hi,

This patch adds support that tries to fold `MIN (poly, poly)` to
a constant. Consider the following C Code:

```
void foo2 (int* restrict a, int* restrict b, int n)
{
for (int i = 0; i < 3; i += 1)
  a[i] += b[i];
}
```

Before this patch:

```
void foo2 (int * restrict a, int * restrict b, int n)
{
  vector([4,4]) int vect__7.27;
  vector([4,4]) int vect__6.26;
  vector([4,4]) int vect__4.23;
  unsigned long _32;

   [local count: 268435456]:
  _32 = MIN_EXPR <3, POLY_INT_CST [4, 4]>;
  vect__4.23_20 = .MASK_LEN_LOAD (a_11(D), 32B, { -1, ... }, _32, 0);
  vect__6.26_15 = .MASK_LEN_LOAD (b_12(D), 32B, { -1, ... }, _32, 0);
  vect__7.27_9 = vect__6.26_15 + vect__4.23_20;
  .MASK_LEN_STORE (a_11(D), 32B, { -1, ... }, _32, 0, vect__7.27_9); [tail call]
  return;

}
```

After this patch:

```
void foo2 (int * restrict a, int * restrict b, int n)
{
  vector([4,4]) int vect__7.27;
  vector([4,4]) int vect__6.26;
  vector([4,4]) int vect__4.23;

   [local count: 268435456]:
  vect__4.23_20 = .MASK_LEN_LOAD (a_11(D), 32B, { -1, ... }, 3, 0);
  vect__6.26_15 = .MASK_LEN_LOAD (b_12(D), 32B, { -1, ... }, 3, 0);
  vect__7.27_9 = vect__6.26_15 + vect__4.23_20;
  .MASK_LEN_STORE (a_11(D), 32B, { -1, ... }, 3, 0, vect__7.27_9); [tail call]
  return;

}
```

For RISC-V RVV, one branch instruction can be reduced:

Before this patch:

```
foo2:
csrra4,vlenb
srlia4,a4,2
li  a5,3
bleua5,a4,.L5
mv  a5,a4
.L5:
vsetvli zero,a5,e32,m1,ta,ma
...
```

After this patch.

```
foo2:
vsetivlizero,3,e32,m1,ta,ma
...
```

Best,
Lehua

gcc/ChangeLog:

* fold-const.cc (can_min_p): New function.
(poly_int_binop): Try fold MIN_EXPR.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls/div-1.c: Adjust.
* gcc.target/riscv/rvv/autovec/vls/shift-3.c: Adjust.
* gcc.target/riscv/rvv/autovec/fold-min-poly.c: New test.

---
 gcc/fold-const.cc | 33 +++
 .../riscv/rvv/autovec/fold-min-poly.c | 24 ++
 .../gcc.target/riscv/rvv/autovec/vls/div-1.c  |  2 +-
 .../riscv/rvv/autovec/vls/shift-3.c   |  2 +-
 4 files changed, 59 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 1da498a3152..f7f793cc326 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -1213,6 +1213,34 @@ wide_int_binop (wide_int &res,
   return true;
 }
 
+/* Returns true if we know who is smaller or equal, ARG1 or ARG2., and set the
+   min value to RES.  */
+bool
+can_min_p (const_tree arg1, const_tree arg2, poly_wide_int &res)
+{
+  if (tree_fits_poly_int64_p (arg1) && tree_fits_poly_int64_p (arg2))
+{
+  if (known_le (tree_to_poly_int64 (arg1), tree_to_poly_int64 (arg2)))
+   res = wi::to_poly_wide (arg1);
+  else if (known_le (tree_to_poly_int64 (arg2), tree_to_poly_int64 (arg1)))
+   res = wi::to_poly_wide (arg2);
+  else
+   return false;
+}
+  else if (tree_fits_poly_uint64_p (arg1) && tree_fits_poly_uint64_p (arg2))
+{
+  if (known_le (tree_to_poly_uint64 (arg1), tree_to_poly_uint64 (arg2)))
+   res = wi::to_poly_wide (arg1);
+  else if (known_le (tree_to_poly_int64 (arg2), tree_to_poly_int64 (arg1)))
+   res = wi::to_poly_wide (arg2);
+  else
+   return false;
+}
+  else
+return false;
+  return true;
+}
+
 /* Combine two poly int's ARG1 and ARG2 under operation CODE to
produce a new constant in RES.  Return FALSE if we don't know how
to evaluate CODE at compile-time.  */
@@ -1261,6 +1289,11 @@ poly_int_binop (poly_wide_int &res, enum tree_code code,
return false;
   break;
 
+case MIN_EXPR:
+  if (!can_min_p (arg1, arg2, res))
+   return false;
+  break;
+
 default:
   return false;
 }
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c
new file mode 100644
index 000..de4c472c76e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options " -march=rv64gcv_zvl128b -mabi=lp64d -O3 --param 
riscv-autovec-preference=scalable --param riscv-autovec-lmul=m1 
-fno-vect-cost-model" } */
+
+void foo1 (int* restrict a, int* restrict b, int n)
+{
+for (int i = 0; i < 4; i += 1)
+  a[i] += b[i];
+}
+
+void foo2 (int* restrict a, int* restrict b, int n)
+{
+for (int i = 0; i < 3; i += 1)
+  a[i] += b[i];
+}
+
+void foo3 (int* restrict a, int* restrict b, int n)
+{
+for (int i = 0; i < 5; i += 1)
+  a[i] += b[i];
+}
+
+/* { dg-final { scan-assembler-not {\tcsrr\t} } } */
+/* { dg-final { scan-assembler {\tvsetivli\tzero,4,e32,m1,t[au],m[au]} } } */
+/* { dg-final { scan-assembler {\tvsetivli\tzero,3,e32,m1,t[au],m[au]} } } */
diff --git a/gcc