Re: [PATCH][AArch64] Implement new intrinsics vabsd_s64 and vnegd_s64

Vlad Lazar Wed, 08 Aug 2018 09:38:45 -0700

On 01/08/18 18:35, James Greenhalgh wrote:

On Wed, Aug 01, 2018 at 07:13:53AM -0500, Vlad Lazar wrote:

On 31/07/18 22:48, James Greenhalgh wrote:

On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:

Hi,


The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
(https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)

Bootstrapped and regtested on aarch64-none-linux-gnu and there are no 
regressions.

OK for trunk?

+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vnegd_s64 (int64_t __a)
+{
+  return -__a;
+}


Does this give the correct behaviour for the minimum value of int64_t? That
would be undefined behaviour in C, but well-defined under ACLE.

Thanks,
James


Hi. Thanks for the review.

For the minimum value of int64_t it behaves as the ACLE specifies:
"The negative of the minimum (signed) value is itself."


What should happen in this testcase? The spoiler is below, but try to work out
what should happen and what goes wrong with your implementation.

   int foo (int64_t x)
   {
     if (x < (int64_t) 0)
       return vnegd_s64(x) < (int64_t) 0;
     else
       return 0;
   }

int bar (void)

   {
     return foo (INT64_MIN);
   }

Thanks,

James


-----

<spoiler!>




INT64_MIN < 0 should be true, so we should return vnegd_s64(INT64_MIN) < 0.
vnegd_s64(INT64_MIN) is identity, so the return value should be
INT64_MIN < 0; i.e. True.

This isn't what the compiler thinks... The compiler makes use of the fact
that -INT64_MIN is undefined behaviour in C, and doesn't need to be considered
as a special case. The if statement gives you a range reduction to [-INF, -1],
negating that gives you a range [1, INF], and [1, INF] is never less than 0,
so the compiler folds the function to return false. We have a mismatch in
semantics

I see your point now. I have updated the vnegd_s64 intrinsic to convert to
unsigned before negating. This means that if the predicted range of x is
[INT64_MIN, y], then the predicted range of vnegd_s64 (x) will be
~[INT64_MIN + 1, y] which seems to resolve the issue. I've also added testcases
which reflect the issue you've pointed out. Note that I've change the vabsd_s64
intrinsic in order to avoid moves between integer and vector registers.

See the updated patch below. Ok for trunk?

---

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 
2d18400040f031dfcdaf60269ad484647804e1be..fc734e1aa9e93c171c0670164e5a3a54209905d3
 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -11822,6 +11822,18 @@ vabsq_s64 (int64x2_t __a)
   return __builtin_aarch64_absv2di (__a);
 }

+/* Try to avoid moving between integer and vector registers.

+   For why the cast to unsigned is needed check the vnegd_s64 intrinsic.
+   There is a testcase related to this issue:
+   gcc.target/aarch64/vabsd_s64.c.  */
+
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vabsd_s64 (int64_t __a)
+{
+  return __a < 0 ? - (uint64_t) __a : __a;
+}
+
 /* vadd */

__extension__ extern __inline int64_t

@@ -22907,6 +22919,25 @@ vneg_s64 (int64x1_t __a)
   return -__a;
 }

+/* According to the ACLE, the negative of the minimum (signed)

+   value is itself.  This leads to a semantics mismatch, as this is
+   undefined behaviour in C.  The value range predictor is not
+   aware that the negation of a negative number can still be negative
+   and it may try to fold the expression.  See the test in
+   gcc.target/aarch64/vnegd_s64.c for an example.
+
+   The cast below tricks the value range predictor to include
+   INT64_MIN in the range it computes.  So for x in the range
+   [INT64_MIN, y] the range prediction after vnegd_s64 (x) will
+   be ~[INT64_MIN + 1, y].  */
+
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vnegd_s64 (int64_t __a)
+{
+  return - (uint64_t) __a;
+}
+
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vnegq_f32 (float32x4_t __a)
diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c 
b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
index 
ea29066e369b967d0781d31c8a5208bda9e4f685..d943989768dd8c9aa87d9dcb899e199029ef3f8b
 100644
--- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
+++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
@@ -627,6 +627,14 @@ test_vqabss_s32 (int32_t a)
   return vqabss_s32 (a);
 }

+/* { dg-final { scan-assembler-times "\\tneg\\tx\[0-9\]+" 1 } } */

+
+int64_t
+test_vnegd_s64 (int64_t a)
+{
+  return vnegd_s64 (a);
+}
+
 /* { dg-final { scan-assembler-times "\\tsqneg\\tb\[0-9\]+" 1 } } */

int8_t

diff --git a/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_3.c 
b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_3.c
new file mode 100644
index 
0000000000000000000000000000000000000000..cf4e7ae4679d5b1896f35e3bf3135b0bd42befde
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_3.c
@@ -0,0 +1,39 @@
+/* Test the vabsd_s64 intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+#define force_simd(V1)   asm volatile ("mov %d0, %1.d[0]"       \
+           : "=w"(V1)                                           \
+           : "w"(V1)                                            \
+           : /* No clobbers */);
+
+#define RUN_TEST(test, answ)   \
+{                                      \
+  force_simd (test);                   \
+  force_simd (answ);                   \
+  int64_t res = vabsd_s64 (test);      \
+  force_simd (res);                    \
+  if (res != answ)                     \
+    abort ();                          \
+}
+
+int64_t input[] = {INT64_MAX, 10, 0, -10, INT64_MIN + 1, INT64_MIN};
+int64_t expected[] = {INT64_MAX, 10, 0, 10, INT64_MAX, INT64_MIN};
+
+int main (void)
+{
+  RUN_TEST (input[0], expected[0]);
+  RUN_TEST (input[1], expected[1]);
+  RUN_TEST (input[2], expected[2]);
+  RUN_TEST (input[3], expected[3]);
+  RUN_TEST (input[4], expected[4]);
+  RUN_TEST (input[5], expected[5]);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/vabsd_s64.c 
b/gcc/testsuite/gcc.target/aarch64/vabsd_s64.c
new file mode 100644
index 
0000000000000000000000000000000000000000..a0f88ee12c3ea0269041213899a68f6677d80d42
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vabsd_s64.c
@@ -0,0 +1,34 @@
+/* Check that the compiler does not optimise the vabsd_s64 call out.
+   We need to check for this because there is a mismatch in semantics
+   between the ACLE, which states that he absolute value of the minimum
+   (signed) value is itself, and C, where this is undefined behaviour.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -fno-inline -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+int
+bar (int64_t x)
+{
+  if (x < (int64_t) 0)
+    return vabsd_s64 (x) < (int64_t) 0;
+  else
+       return -1;
+}
+
+int
+main (void)
+{
+  int ans = 1;
+  int res_abs = bar (INT64_MIN);
+
+  if (res_abs != ans)
+    abort ();
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/vneg_s.c 
b/gcc/testsuite/gcc.target/aarch64/vneg_s.c
index 
911054053eaefb5a67b48578fac9e2ba428c3ab2..f708e97c34570eb75595915c040e5175562c2bea
 100644
--- a/gcc/testsuite/gcc.target/aarch64/vneg_s.c
+++ b/gcc/testsuite/gcc.target/aarch64/vneg_s.c
@@ -75,6 +75,18 @@ extern void abort (void);
       }                                                                        
\
   }

+#define RUN_TEST_SCALAR(test_val, answ_val, a, b) \

+  {                                                   \
+    int64_t res;                                      \
+    INHIB_OPTIMIZATION;                               \
+    a = test_val;                                     \
+    b = answ_val;                                     \
+    force_simd (b);                                   \
+    force_simd (a);                                   \
+    res = vnegd_s64 (a);                              \
+    force_simd (res);                                 \
+  }
+
 int
 test_vneg_s8 ()
 {
@@ -179,6 +191,25 @@ test_vneg_s64 ()

/* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 8 } } */+int

+test_vnegd_s64 ()
+{
+  int64_t a, b;
+
+  RUN_TEST_SCALAR (TEST0, ANSW0, a, b);
+  RUN_TEST_SCALAR (TEST1, ANSW1, a, b);
+  RUN_TEST_SCALAR (TEST2, ANSW2, a, b);
+  RUN_TEST_SCALAR (TEST3, ANSW3, a, b);
+  RUN_TEST_SCALAR (TEST4, ANSW4, a, b);
+  RUN_TEST_SCALAR (TEST5, ANSW5, a, b);
+  RUN_TEST_SCALAR (LLONG_MAX, LLONG_MIN + 1, a, b);
+  RUN_TEST_SCALAR (LLONG_MIN, LLONG_MIN, a, b);
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 8 } } */
+
 int
 test_vnegq_s8 ()
 {
@@ -283,6 +314,9 @@ main (int argc, char **argv)
   if (test_vneg_s64 ())
     abort ();

+ if (test_vnegd_s64 ())

+    abort ();
+
   if (test_vnegq_s8 ())
     abort ();

diff --git a/gcc/testsuite/gcc.target/aarch64/vnegd_s64.c b/gcc/testsuite/gcc.target/aarch64/vnegd_s64.c

new file mode 100644
index 
0000000000000000000000000000000000000000..73d478ff49daf758e233958d134de8fb864090c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vnegd_s64.c
@@ -0,0 +1,36 @@
+/* Check that the compiler does not optimise the negation out.
+   We need to check for this because there is a mismatch in semantics
+   between the ACLE, which states that he negative of the minimum
+   (signed) value is itself and C, where this is undefined behaviour.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+int
+foo (int64_t x)
+{
+  if (x < (int64_t) 0)
+    return vnegd_s64 (x) < (int64_t) 0;
+  else
+    return -1;
+}
+
+/* { dg-final { scan-assembler-times {neg\tx[0-9]+, x[0-9]+} 1 } } */
+
+int
+main (void)
+{
+  int ans = 1;
+  int res = foo (INT64_MIN);
+
+  if (res != ans)
+    abort ();
+
+  return 0;
+}
+

Re: [PATCH][AArch64] Implement new intrinsics vabsd_s64 and vnegd_s64

Reply via email to