Re: [PATCH] Avoid UB in ia32intrin.h rotate patterns (PR target/82498)

2017-10-12 Thread Uros Bizjak
On Thu, Oct 12, 2017 at 9:39 PM, Jakub Jelinek  wrote:
> Hi!
>
> The ia32intrin.h rotate intrinsics require the second argument to be
> in between 1 and 31 (or 63), otherwise they invoke UB.  But, we can do much
> better while generating the same instruction when optimizing, so the
> following patch uses the patterns we pattern recognize well and where
> the second argument can be any value.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2017-10-12  Jakub Jelinek  
>
> PR target/82498
> * config/i386/ia32intrin.h (__rold, __rord, __rolq, __rorq): Allow
> any values of __C while still being pattern recognizable as a simple
> rotate instruction.
>
> * gcc.dg/ubsan/pr82498.c: New test.

LGTM.

Thanks,
Uros.

> --- gcc/config/i386/ia32intrin.h.jj 2017-01-01 12:45:42.0 +0100
> +++ gcc/config/i386/ia32intrin.h2017-10-12 09:55:24.235602737 +0200
> @@ -147,7 +147,8 @@ extern __inline unsigned int
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>  __rold (unsigned int __X, int __C)
>  {
> -  return (__X << __C) | (__X >> (32 - __C));
> +  __C &= 31;
> +  return (__X << __C) | (__X >> (-__C & 31));
>  }
>
>  /* 8bit ror */
> @@ -171,7 +172,8 @@ extern __inline unsigned int
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>  __rord (unsigned int __X, int __C)
>  {
> -  return (__X >> __C) | (__X << (32 - __C));
> +  __C &= 31;
> +  return (__X >> __C) | (__X << (-__C & 31));
>  }
>
>  /* Pause */
> @@ -239,7 +241,8 @@ extern __inline unsigned long long
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>  __rolq (unsigned long long __X, int __C)
>  {
> -  return (__X << __C) | (__X >> (64 - __C));
> +  __C &= 63;
> +  return (__X << __C) | (__X >> (-__C & 63));
>  }
>
>  /* 64bit ror */
> @@ -247,7 +250,8 @@ extern __inline unsigned long long
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>  __rorq (unsigned long long __X, int __C)
>  {
> -  return (__X >> __C) | (__X << (64 - __C));
> +  __C &= 63;
> +  return (__X >> __C) | (__X << (-__C & 63));
>  }
>
>  /* Read flags register */
> --- gcc/testsuite/gcc.dg/ubsan/pr82498.c.jj 2017-10-12 09:40:36.025438511 
> +0200
> +++ gcc/testsuite/gcc.dg/ubsan/pr82498.c2017-10-12 10:06:06.636790077 
> +0200
> @@ -0,0 +1,159 @@
> +/* PR target/82498 */
> +/* { dg-do run { target i?86-*-* x86_64-*-* } } */
> +/* { dg-options "-fsanitize=undefined -fno-sanitize-recover=undefined" } */
> +
> +#include 
> +
> +volatile unsigned int a;
> +volatile unsigned long long b;
> +volatile int c;
> +
> +int
> +main ()
> +{
> +  a = 0x12345678U;
> +  a = __rold (a, 0);
> +  if (a != 0x12345678U)
> +__builtin_abort ();
> +  a = __rold (a, 32);
> +  if (a != 0x12345678U)
> +__builtin_abort ();
> +  a = __rold (a, -32);
> +  if (a != 0x12345678U)
> +__builtin_abort ();
> +  a = __rold (a, 37);
> +  if (a != 0x468acf02U)
> +__builtin_abort ();
> +  a = __rold (a, -5);
> +  if (a != 0x12345678U)
> +__builtin_abort ();
> +  a = __rord (a, 0);
> +  if (a != 0x12345678U)
> +__builtin_abort ();
> +  a = __rord (a, 32);
> +  if (a != 0x12345678U)
> +__builtin_abort ();
> +  a = __rord (a, -32);
> +  if (a != 0x12345678U)
> +__builtin_abort ();
> +  a = __rord (a, -37);
> +  if (a != 0x468acf02U)
> +__builtin_abort ();
> +  a = __rord (a, 5);
> +  if (a != 0x12345678U)
> +__builtin_abort ();
> +  c = 0;
> +  a = __rold (a, c);
> +  if (a != 0x12345678U)
> +__builtin_abort ();
> +  c = 32;
> +  a = __rold (a, c);
> +  if (a != 0x12345678U)
> +__builtin_abort ();
> +  c = -32;
> +  a = __rold (a, c);
> +  if (a != 0x12345678U)
> +__builtin_abort ();
> +  c = 37;
> +  a = __rold (a, c);
> +  if (a != 0x468acf02U)
> +__builtin_abort ();
> +  c = -5;
> +  a = __rold (a, c);
> +  if (a != 0x12345678U)
> +__builtin_abort ();
> +  c = 0;
> +  a = __rord (a, c);
> +  if (a != 0x12345678U)
> +__builtin_abort ();
> +  c = 32;
> +  a = __rord (a, c);
> +  if (a != 0x12345678U)
> +__builtin_abort ();
> +  c = -32;
> +  a = __rord (a, c);
> +  if (a != 0x12345678U)
> +__builtin_abort ();
> +  c = -37;
> +  a = __rord (a, c);
> +  if (a != 0x468acf02U)
> +__builtin_abort ();
> +  c = 5;
> +  a = __rord (a, c);
> +  if (a != 0x12345678U)
> +__builtin_abort ();
> +#ifdef __x86_64__
> +  b = 0x123456789abcdef1ULL;
> +  b = __rolq (b, 0);
> +  if (b != 0x123456789abcdef1ULL)
> +__builtin_abort ();
> +  b = __rolq (b, 64);
> +  if (b != 0x123456789abcdef1ULL)
> +__builtin_abort ();
> +  b = __rolq (b, -64);
> +  if (b != 0x123456789abcdef1ULL)
> +__builtin_abort ();
> +  b = __rolq (b, 69);
> +  if (b != 0x468acf13579bde22ULL)
> +__builtin_abort ();
> +  b = __rolq (b, -5);
> +  if (b != 0x123456789abcdef1ULL)
> +__builtin_abort ();
> +  b = __rorq (b, 0);
> +  if (b != 0x123456789abcdef1ULL)
> +__builtin_abort ();
> +  b = __rorq (b, 64);

[PATCH] Avoid UB in ia32intrin.h rotate patterns (PR target/82498)

2017-10-12 Thread Jakub Jelinek
Hi!

The ia32intrin.h rotate intrinsics require the second argument to be
in between 1 and 31 (or 63), otherwise they invoke UB.  But, we can do much
better while generating the same instruction when optimizing, so the
following patch uses the patterns we pattern recognize well and where
the second argument can be any value.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-10-12  Jakub Jelinek  

PR target/82498
* config/i386/ia32intrin.h (__rold, __rord, __rolq, __rorq): Allow
any values of __C while still being pattern recognizable as a simple
rotate instruction.

* gcc.dg/ubsan/pr82498.c: New test.

--- gcc/config/i386/ia32intrin.h.jj 2017-01-01 12:45:42.0 +0100
+++ gcc/config/i386/ia32intrin.h2017-10-12 09:55:24.235602737 +0200
@@ -147,7 +147,8 @@ extern __inline unsigned int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __rold (unsigned int __X, int __C)
 {
-  return (__X << __C) | (__X >> (32 - __C));
+  __C &= 31;
+  return (__X << __C) | (__X >> (-__C & 31));
 }
 
 /* 8bit ror */
@@ -171,7 +172,8 @@ extern __inline unsigned int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __rord (unsigned int __X, int __C)
 {
-  return (__X >> __C) | (__X << (32 - __C));
+  __C &= 31;
+  return (__X >> __C) | (__X << (-__C & 31));
 }
 
 /* Pause */
@@ -239,7 +241,8 @@ extern __inline unsigned long long
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __rolq (unsigned long long __X, int __C)
 {
-  return (__X << __C) | (__X >> (64 - __C));
+  __C &= 63;
+  return (__X << __C) | (__X >> (-__C & 63));
 }
 
 /* 64bit ror */
@@ -247,7 +250,8 @@ extern __inline unsigned long long
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __rorq (unsigned long long __X, int __C)
 {
-  return (__X >> __C) | (__X << (64 - __C));
+  __C &= 63;
+  return (__X >> __C) | (__X << (-__C & 63));
 }
 
 /* Read flags register */
--- gcc/testsuite/gcc.dg/ubsan/pr82498.c.jj 2017-10-12 09:40:36.025438511 
+0200
+++ gcc/testsuite/gcc.dg/ubsan/pr82498.c2017-10-12 10:06:06.636790077 
+0200
@@ -0,0 +1,159 @@
+/* PR target/82498 */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-fsanitize=undefined -fno-sanitize-recover=undefined" } */
+
+#include 
+
+volatile unsigned int a;
+volatile unsigned long long b;
+volatile int c;
+
+int
+main ()
+{
+  a = 0x12345678U;
+  a = __rold (a, 0);
+  if (a != 0x12345678U)
+__builtin_abort ();
+  a = __rold (a, 32);
+  if (a != 0x12345678U)
+__builtin_abort ();
+  a = __rold (a, -32);
+  if (a != 0x12345678U)
+__builtin_abort ();
+  a = __rold (a, 37);
+  if (a != 0x468acf02U)
+__builtin_abort ();
+  a = __rold (a, -5);
+  if (a != 0x12345678U)
+__builtin_abort ();
+  a = __rord (a, 0);
+  if (a != 0x12345678U)
+__builtin_abort ();
+  a = __rord (a, 32);
+  if (a != 0x12345678U)
+__builtin_abort ();
+  a = __rord (a, -32);
+  if (a != 0x12345678U)
+__builtin_abort ();
+  a = __rord (a, -37);
+  if (a != 0x468acf02U)
+__builtin_abort ();
+  a = __rord (a, 5);
+  if (a != 0x12345678U)
+__builtin_abort ();
+  c = 0;
+  a = __rold (a, c);
+  if (a != 0x12345678U)
+__builtin_abort ();
+  c = 32;
+  a = __rold (a, c);
+  if (a != 0x12345678U)
+__builtin_abort ();
+  c = -32;
+  a = __rold (a, c);
+  if (a != 0x12345678U)
+__builtin_abort ();
+  c = 37;
+  a = __rold (a, c);
+  if (a != 0x468acf02U)
+__builtin_abort ();
+  c = -5;
+  a = __rold (a, c);
+  if (a != 0x12345678U)
+__builtin_abort ();
+  c = 0;
+  a = __rord (a, c);
+  if (a != 0x12345678U)
+__builtin_abort ();
+  c = 32;
+  a = __rord (a, c);
+  if (a != 0x12345678U)
+__builtin_abort ();
+  c = -32;
+  a = __rord (a, c);
+  if (a != 0x12345678U)
+__builtin_abort ();
+  c = -37;
+  a = __rord (a, c);
+  if (a != 0x468acf02U)
+__builtin_abort ();
+  c = 5;
+  a = __rord (a, c);
+  if (a != 0x12345678U)
+__builtin_abort ();
+#ifdef __x86_64__
+  b = 0x123456789abcdef1ULL;
+  b = __rolq (b, 0);
+  if (b != 0x123456789abcdef1ULL)
+__builtin_abort ();
+  b = __rolq (b, 64);
+  if (b != 0x123456789abcdef1ULL)
+__builtin_abort ();
+  b = __rolq (b, -64);
+  if (b != 0x123456789abcdef1ULL)
+__builtin_abort ();
+  b = __rolq (b, 69);
+  if (b != 0x468acf13579bde22ULL)
+__builtin_abort ();
+  b = __rolq (b, -5);
+  if (b != 0x123456789abcdef1ULL)
+__builtin_abort ();
+  b = __rorq (b, 0);
+  if (b != 0x123456789abcdef1ULL)
+__builtin_abort ();
+  b = __rorq (b, 64);
+  if (b != 0x123456789abcdef1ULL)
+__builtin_abort ();
+  b = __rorq (b, -64);
+  if (b != 0x123456789abcdef1ULL)
+__builtin_abort ();
+  b = __rorq (b, -69);
+  if (b != 0x468acf13579bde22ULL)
+__builtin_abort ();
+  b = __rorq (b, 5);
+  if (b != 0x123456789abcdef1ULL)
+__builtin_abort ();
+  c = 0;
+  b = __rolq (b, c);
+  if (b != 0x123456789abcdef1ULL)
+__builtin_abort ();
+