Re: [PATCH] vect: Tighten vect_determine_precisions_from_range [PR113281]

2024-01-29 Thread Richard Biener
On Sat, Jan 27, 2024 at 4:44 PM Richard Sandiford
 wrote:
>
> This was another PR caused by the way that
> vect_determine_precisions_from_range handle shifts.  We tried to
> narrow 32768 >> x to a 16-bit shift based on range information for
> the inputs and outputs, with vect_recog_over_widening_pattern
> (after PR110828) adjusting the shift amount.  But this doesn't
> work for the case where x is in [16, 31], since then 32-bit
> 32768 >> x is a well-defined zero, whereas no well-defined
> 16-bit 32768 >> y will produce 0.
>
> We could perhaps generate x < 16 ? 32768 >> x : 0 instead,
> but since vect_determine_precisions_from_range was never really
> supposed to rely on fix-ups, it seems better to fix that instead.
>
> The patch also makes the code more selective about which codes
> can be narrowed based on input and output ranges.  This showed
> that vect_truncatable_operation_p was missing cases for
> BIT_NOT_EXPR (equivalent to BIT_XOR_EXPR of -1) and NEGATE_EXPR
> (equivalent to BIT_NOT_EXPR followed by a PLUS_EXPR of 1).
>
> pr113281-1.c is the original testcase.  pr113281-[23].c failed
> before the patch due to overly optimistic narrowing.  pr113281-[45].c
> previously passed and are meant to protect against accidental
> optimisation regressions.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

OK.

Thanks,
Richard.

> Richard
>
>
> gcc/
> PR target/113281
> * tree-vect-patterns.cc (vect_recog_over_widening_pattern): Remove
> workaround for right shifts.
> (vect_truncatable_operation_p): Handle NEGATE_EXPR and BIT_NOT_EXPR.
> (vect_determine_precisions_from_range): Be more selective about
> which codes can be narrowed based on their input and output ranges.
> For shifts, require at least one more bit of precision than the
> maximum shift amount.
>
> gcc/testsuite/
> PR target/113281
> * gcc.dg/vect/pr113281-1.c: New test.
> * gcc.dg/vect/pr113281-2.c: Likewise.
> * gcc.dg/vect/pr113281-3.c: Likewise.
> * gcc.dg/vect/pr113281-4.c: Likewise.
> * gcc.dg/vect/pr113281-5.c: Likewise.
> ---
>  gcc/testsuite/gcc.dg/vect/pr113281-1.c |  17 +++
>  gcc/testsuite/gcc.dg/vect/pr113281-2.c |  50 +
>  gcc/testsuite/gcc.dg/vect/pr113281-3.c |  39 +++
>  gcc/testsuite/gcc.dg/vect/pr113281-4.c |  55 ++
>  gcc/testsuite/gcc.dg/vect/pr113281-5.c |  66 
>  gcc/tree-vect-patterns.cc  | 144 +
>  6 files changed, 305 insertions(+), 66 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr113281-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr113281-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr113281-3.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr113281-4.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr113281-5.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/pr113281-1.c 
> b/gcc/testsuite/gcc.dg/vect/pr113281-1.c
> new file mode 100644
> index 000..6df4231cb5f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr113281-1.c
> @@ -0,0 +1,17 @@
> +#include "tree-vect.h"
> +
> +unsigned char a;
> +
> +int main() {
> +  check_vect ();
> +
> +  short b = a = 0;
> +  for (; a != 19; a++)
> +if (a)
> +  b = 32872 >> a;
> +
> +  if (b == 0)
> +return 0;
> +  else
> +return 1;
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/pr113281-2.c 
> b/gcc/testsuite/gcc.dg/vect/pr113281-2.c
> new file mode 100644
> index 000..3a1170c28b6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr113281-2.c
> @@ -0,0 +1,50 @@
> +/* { dg-do compile } */
> +
> +#define N 128
> +
> +short x[N];
> +short y[N];
> +
> +void
> +f1 (void)
> +{
> +  for (int i = 0; i < N; ++i)
> +x[i] >>= y[i];
> +}
> +
> +void
> +f2 (void)
> +{
> +  for (int i = 0; i < N; ++i)
> +x[i] >>= (y[i] < 32 ? y[i] : 32);
> +}
> +
> +void
> +f3 (void)
> +{
> +  for (int i = 0; i < N; ++i)
> +x[i] >>= (y[i] < 31 ? y[i] : 31);
> +}
> +
> +void
> +f4 (void)
> +{
> +  for (int i = 0; i < N; ++i)
> +x[i] >>= (y[i] & 31);
> +}
> +
> +void
> +f5 (void)
> +{
> +  for (int i = 0; i < N; ++i)
> +x[i] >>= 0x8000 >> y[i];
> +}
> +
> +void
> +f6 (void)
> +{
> +  for (int i = 0; i < N; ++i)
> +x[i] >>= 0x8000 >> (y[i] & 31);
> +}
> +
> +/* { dg-final { scan-tree-dump-not {can narrow[^\n]+>>} "vect" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/pr113281-3.c 
> b/gcc/testsuite/gcc.dg/vect/pr113281-3.c
> new file mode 100644
> index 000..5982dd2d16f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr113281-3.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile } */
> +
> +#define N 128
> +
> +short x[N];
> +short y[N];
> +
> +void
> +f1 (void)
> +{
> +  for (int i = 0; i < N; ++i)
> +x[i] >>= (y[i] < 30 ? y[i] : 30);
> +}
> +
> +void
> +f2 (void)
> +{
> +  for (int i = 0; i < N; ++i)
> +x[i] >>= ((y[i] & 15) + 2);
> +}
> +
> +void
> +f3 (void)
> +{
> +  for (int i = 0; i < N; ++i)
> +x[i] >>= (y[i] < 16 ? 

[PATCH] vect: Tighten vect_determine_precisions_from_range [PR113281]

2024-01-27 Thread Richard Sandiford
This was another PR caused by the way that
vect_determine_precisions_from_range handle shifts.  We tried to
narrow 32768 >> x to a 16-bit shift based on range information for
the inputs and outputs, with vect_recog_over_widening_pattern
(after PR110828) adjusting the shift amount.  But this doesn't
work for the case where x is in [16, 31], since then 32-bit
32768 >> x is a well-defined zero, whereas no well-defined
16-bit 32768 >> y will produce 0.

We could perhaps generate x < 16 ? 32768 >> x : 0 instead,
but since vect_determine_precisions_from_range was never really
supposed to rely on fix-ups, it seems better to fix that instead.

The patch also makes the code more selective about which codes
can be narrowed based on input and output ranges.  This showed
that vect_truncatable_operation_p was missing cases for
BIT_NOT_EXPR (equivalent to BIT_XOR_EXPR of -1) and NEGATE_EXPR
(equivalent to BIT_NOT_EXPR followed by a PLUS_EXPR of 1).

pr113281-1.c is the original testcase.  pr113281-[23].c failed
before the patch due to overly optimistic narrowing.  pr113281-[45].c
previously passed and are meant to protect against accidental
optimisation regressions.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


gcc/
PR target/113281
* tree-vect-patterns.cc (vect_recog_over_widening_pattern): Remove
workaround for right shifts.
(vect_truncatable_operation_p): Handle NEGATE_EXPR and BIT_NOT_EXPR.
(vect_determine_precisions_from_range): Be more selective about
which codes can be narrowed based on their input and output ranges.
For shifts, require at least one more bit of precision than the
maximum shift amount.

gcc/testsuite/
PR target/113281
* gcc.dg/vect/pr113281-1.c: New test.
* gcc.dg/vect/pr113281-2.c: Likewise.
* gcc.dg/vect/pr113281-3.c: Likewise.
* gcc.dg/vect/pr113281-4.c: Likewise.
* gcc.dg/vect/pr113281-5.c: Likewise.
---
 gcc/testsuite/gcc.dg/vect/pr113281-1.c |  17 +++
 gcc/testsuite/gcc.dg/vect/pr113281-2.c |  50 +
 gcc/testsuite/gcc.dg/vect/pr113281-3.c |  39 +++
 gcc/testsuite/gcc.dg/vect/pr113281-4.c |  55 ++
 gcc/testsuite/gcc.dg/vect/pr113281-5.c |  66 
 gcc/tree-vect-patterns.cc  | 144 +
 6 files changed, 305 insertions(+), 66 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr113281-1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr113281-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr113281-3.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr113281-4.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr113281-5.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr113281-1.c 
b/gcc/testsuite/gcc.dg/vect/pr113281-1.c
new file mode 100644
index 000..6df4231cb5f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr113281-1.c
@@ -0,0 +1,17 @@
+#include "tree-vect.h"
+
+unsigned char a;
+
+int main() {
+  check_vect ();
+
+  short b = a = 0;
+  for (; a != 19; a++)
+if (a)
+  b = 32872 >> a;
+
+  if (b == 0)
+return 0;
+  else
+return 1;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr113281-2.c 
b/gcc/testsuite/gcc.dg/vect/pr113281-2.c
new file mode 100644
index 000..3a1170c28b6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr113281-2.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+
+#define N 128
+
+short x[N];
+short y[N];
+
+void
+f1 (void)
+{
+  for (int i = 0; i < N; ++i)
+x[i] >>= y[i];
+}
+
+void
+f2 (void)
+{
+  for (int i = 0; i < N; ++i)
+x[i] >>= (y[i] < 32 ? y[i] : 32);
+}
+
+void
+f3 (void)
+{
+  for (int i = 0; i < N; ++i)
+x[i] >>= (y[i] < 31 ? y[i] : 31);
+}
+
+void
+f4 (void)
+{
+  for (int i = 0; i < N; ++i)
+x[i] >>= (y[i] & 31);
+}
+
+void
+f5 (void)
+{
+  for (int i = 0; i < N; ++i)
+x[i] >>= 0x8000 >> y[i];
+}
+
+void
+f6 (void)
+{
+  for (int i = 0; i < N; ++i)
+x[i] >>= 0x8000 >> (y[i] & 31);
+}
+
+/* { dg-final { scan-tree-dump-not {can narrow[^\n]+>>} "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr113281-3.c 
b/gcc/testsuite/gcc.dg/vect/pr113281-3.c
new file mode 100644
index 000..5982dd2d16f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr113281-3.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+
+#define N 128
+
+short x[N];
+short y[N];
+
+void
+f1 (void)
+{
+  for (int i = 0; i < N; ++i)
+x[i] >>= (y[i] < 30 ? y[i] : 30);
+}
+
+void
+f2 (void)
+{
+  for (int i = 0; i < N; ++i)
+x[i] >>= ((y[i] & 15) + 2);
+}
+
+void
+f3 (void)
+{
+  for (int i = 0; i < N; ++i)
+x[i] >>= (y[i] < 16 ? y[i] : 16);
+}
+
+void
+f4 (void)
+{
+  for (int i = 0; i < N; ++i)
+x[i] = 32768 >> ((y[i] & 15) + 3);
+}
+
+/* { dg-final { scan-tree-dump {can narrow to signed:31 without loss [^\n]+>>} 
"vect" } } */
+/* { dg-final { scan-tree-dump {can narrow to signed:18 without loss [^\n]+>>} 
"vect" } } */
+/* { dg-final { scan-tree-dump {can narrow to signed:17 without loss [^\n]+>>} 
"vect" } } */
+/* { dg-final { scan-t