Re: [PATCH 11/13 ver 3] rs6000, extend vec_xxpermdi built-in for __int128 args

2024-06-13 Thread Carl Love
Kewen:

On 6/3/24 22:58, Kewen.Lin wrote:
> Hi,
> 
> on 2024/5/30 00:10, Carl Love wrote:
>>  This was patch 10 from the previous series.  The patch was updated to 
>> address feedback comments.
>>
>> Carl 
>> ---
>>
>> rs6000, extend vec_xxpermdi built-in for __int128 args
>>
>> Add a new signed and unsigned overloaded instances for vec_xxpermdi
>>
>>__int128 vec_xxpermdi (__int128, __int128, const int);
>>__uint128 vec_xxpermdi (__uint128, __uint128, const int);
>>
>> Update the documentation to include a reference to the new built-in
>> instances.
>>
>> Add test cases for the new overloaded instances.
>>
>> gcc/ChangeLog:
>>  * config/rs6000/rs6000-overload.def (vec_xxpermdi): Add new
>>  overloaded built-in instances.
>>  * doc/extend.texi:  Add documentation for new overloaded built-in
>>  instances.
>>
>> gcc/testsuite/ChangeLog:gcc/testsuite/ChangeLog:
>>  * gcc.target/powerpc/vec_perm-runnable-i128.c: New test file.
>> ---
>>  gcc/config/rs6000/rs6000-overload.def |   4 +
>>  gcc/doc/extend.texi   |   2 +
>>  .../powerpc/vec_perm-runnable-i128.c  | 229 ++
>>  3 files changed, 235 insertions(+)
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vec_perm-runnable-i128.c
>>
>> diff --git a/gcc/config/rs6000/rs6000-overload.def 
>> b/gcc/config/rs6000/rs6000-overload.def
>> index a210c5ad10d..45000f161e4 100644
>> --- a/gcc/config/rs6000/rs6000-overload.def
>> +++ b/gcc/config/rs6000/rs6000-overload.def
>> @@ -4932,6 +4932,10 @@
>>  XXPERMDI_4SF  XXPERMDI_VF
>>vd __builtin_vsx_xxpermdi (vd, vd, const int);
>>  XXPERMDI_2DF  XXPERMDI_VD
>> +  vsq __builtin_vsx_xxpermdi (vsq, vsq, const int);
>> +XXPERMDI_1TI  XXPERMDI_1TI
>> +  vuq __builtin_vsx_xxpermdi (vuq, vuq, const int);
>> +XXPERMDI_1TI  XXPERMDI_1TUI
> 
> Nits:
>   - Move them before "vf __builtin_vsx_xxpermdi (vf, vf, const int);" so
> they are close to instances for other integral types.
>   - As the existing name convention, _{SQ,UQ} are better.
> 
> vsq __builtin_vsx_xxpermdi (vsq, vsq, const int);
>XXPERMDI_1TI  XXPERMDI_1SQ
> vuq __builtin_vsx_xxpermdi (vuq, vuq, const int);
>XXPERMDI_1TI  XXPERMDI_1UQ
> 

OK, moved the definitions up and changed the names.

>>  
>>  [VEC_XXSLDWI, vec_xxsldwi, __builtin_vsx_xxsldwi]
>>vsc __builtin_vsx_xxsldwi (vsc, vsc, const int);
>> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
>> index 0756230b19e..edfef1bdab7 100644
>> --- a/gcc/doc/extend.texi
>> +++ b/gcc/doc/extend.texi
>> @@ -22555,6 +22555,8 @@ void vec_vsx_st (vector bool char, int, signed char 
>> *);
>>  vector double vec_xxpermdi (vector double, vector double, const int);
>>  vector float vec_xxpermdi (vector float, vector float, const int);
>>  vector long long vec_xxpermdi (vector long long, vector long long, const 
>> int);
> 
>> +vector __int128 vec_xxpermdi (vector __int128, vector __int128, const int);
>> +vector __int128 vec_xxpermdi (vector __uint128, vector __uint128, const 
>> int);
> 
> Nit: These two lines break the long long and unsigned long long lines, can 
> you move
> them one line upward?  Also using the explicit "signed" and "unsigned" would 
> be
> better than "__{u,}int128".
> 

Yup, I didn't get them in the right place.  Fixed.

>>  vector unsigned long long vec_xxpermdi (vector unsigned long long,
>>  vector unsigned long long, const 
>> int);
>>  vector int vec_xxpermdi (vector int, vector int, const int);
>> diff --git a/gcc/testsuite/gcc.target/powerpc/vec_perm-runnable-i128.c 
>> b/gcc/testsuite/gcc.target/powerpc/vec_perm-runnable-i128.c
>> new file mode 100644
>> index 000..2d5dce09404
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/vec_perm-runnable-i128.c
>> @@ -0,0 +1,229 @@
>> +/* { dg-do run } */
>> +/* { dg-require-effective-target vmx_hw } */
>> +/* { dg-options "-save-temps" } */
> 
> Nit: dg-options line isn't needed as it doesn't check assembly.

Removed the save-temps.

> 
> BR,
> Kewen
> 
>> +
>> +#include 
>> +
>> +#define DEBUG 0
>> +
>> +#if DEBUG
>> +#include 
>> +void print_i128 (unsigned __int128 val)
>> +{
>> +  printf(" 0x%016llx%016llx",
>> + (unsigned long long)(val >> 64),
>> + (unsigned long long)(val & 0x));
>> +}
>> +#endif
>> +
>> +extern void abort (void);
>> +
>> +union convert_union {
>> +  vector signed __int128s128;
>> +  vector unsigned __int128  u128;
>> +  char  val[16];
>> +} convert;
>> +
>> +int check_u128_result(vector unsigned __int128 vresult_u128,
>> +  vector unsigned __int128 expected_vresult_u128)
>> +{
>> +  /* Use a for loop to check each byte manually so the test case will
>> + run with ISA 2.06.
>> +
>> + Return 1 if they match, 0 otherwise.  */
>> +
>> +  int i;
>> +
>> +  union convert_union result;
>> +  union convert_union 

Re: [PATCH 11/13 ver 3] rs6000, extend vec_xxpermdi built-in for __int128 args

2024-06-03 Thread Kewen.Lin
Hi,

on 2024/5/30 00:10, Carl Love wrote:
>  This was patch 10 from the previous series.  The patch was updated to 
> address feedback comments.
> 
> Carl 
> ---
> 
> rs6000, extend vec_xxpermdi built-in for __int128 args
> 
> Add a new signed and unsigned overloaded instances for vec_xxpermdi
> 
>__int128 vec_xxpermdi (__int128, __int128, const int);
>__uint128 vec_xxpermdi (__uint128, __uint128, const int);
> 
> Update the documentation to include a reference to the new built-in
> instances.
> 
> Add test cases for the new overloaded instances.
> 
> gcc/ChangeLog:
>   * config/rs6000/rs6000-overload.def (vec_xxpermdi): Add new
>   overloaded built-in instances.
>   * doc/extend.texi:  Add documentation for new overloaded built-in
>   instances.
> 
> gcc/testsuite/ChangeLog:gcc/testsuite/ChangeLog:
>   * gcc.target/powerpc/vec_perm-runnable-i128.c: New test file.
> ---
>  gcc/config/rs6000/rs6000-overload.def |   4 +
>  gcc/doc/extend.texi   |   2 +
>  .../powerpc/vec_perm-runnable-i128.c  | 229 ++
>  3 files changed, 235 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vec_perm-runnable-i128.c
> 
> diff --git a/gcc/config/rs6000/rs6000-overload.def 
> b/gcc/config/rs6000/rs6000-overload.def
> index a210c5ad10d..45000f161e4 100644
> --- a/gcc/config/rs6000/rs6000-overload.def
> +++ b/gcc/config/rs6000/rs6000-overload.def
> @@ -4932,6 +4932,10 @@
>  XXPERMDI_4SF  XXPERMDI_VF
>vd __builtin_vsx_xxpermdi (vd, vd, const int);
>  XXPERMDI_2DF  XXPERMDI_VD
> +  vsq __builtin_vsx_xxpermdi (vsq, vsq, const int);
> +XXPERMDI_1TI  XXPERMDI_1TI
> +  vuq __builtin_vsx_xxpermdi (vuq, vuq, const int);
> +XXPERMDI_1TI  XXPERMDI_1TUI

Nits:
  - Move them before "vf __builtin_vsx_xxpermdi (vf, vf, const int);" so
they are close to instances for other integral types.
  - As the existing name convention, _{SQ,UQ} are better.

vsq __builtin_vsx_xxpermdi (vsq, vsq, const int);
   XXPERMDI_1TI  XXPERMDI_1SQ
vuq __builtin_vsx_xxpermdi (vuq, vuq, const int);
   XXPERMDI_1TI  XXPERMDI_1UQ

>  
>  [VEC_XXSLDWI, vec_xxsldwi, __builtin_vsx_xxsldwi]
>vsc __builtin_vsx_xxsldwi (vsc, vsc, const int);
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 0756230b19e..edfef1bdab7 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -22555,6 +22555,8 @@ void vec_vsx_st (vector bool char, int, signed char 
> *);
>  vector double vec_xxpermdi (vector double, vector double, const int);
>  vector float vec_xxpermdi (vector float, vector float, const int);
>  vector long long vec_xxpermdi (vector long long, vector long long, const 
> int);

> +vector __int128 vec_xxpermdi (vector __int128, vector __int128, const int);
> +vector __int128 vec_xxpermdi (vector __uint128, vector __uint128, const int);

Nit: These two lines break the long long and unsigned long long lines, can you 
move
them one line upward?  Also using the explicit "signed" and "unsigned" would be
better than "__{u,}int128".

>  vector unsigned long long vec_xxpermdi (vector unsigned long long,
>  vector unsigned long long, const 
> int);
>  vector int vec_xxpermdi (vector int, vector int, const int);
> diff --git a/gcc/testsuite/gcc.target/powerpc/vec_perm-runnable-i128.c 
> b/gcc/testsuite/gcc.target/powerpc/vec_perm-runnable-i128.c
> new file mode 100644
> index 000..2d5dce09404
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vec_perm-runnable-i128.c
> @@ -0,0 +1,229 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target vmx_hw } */
> +/* { dg-options "-save-temps" } */

Nit: dg-options line isn't needed as it doesn't check assembly.

BR,
Kewen

> +
> +#include 
> +
> +#define DEBUG 0
> +
> +#if DEBUG
> +#include 
> +void print_i128 (unsigned __int128 val)
> +{
> +  printf(" 0x%016llx%016llx",
> + (unsigned long long)(val >> 64),
> + (unsigned long long)(val & 0x));
> +}
> +#endif
> +
> +extern void abort (void);
> +
> +union convert_union {
> +  vector signed __int128s128;
> +  vector unsigned __int128  u128;
> +  char  val[16];
> +} convert;
> +
> +int check_u128_result(vector unsigned __int128 vresult_u128,
> +   vector unsigned __int128 expected_vresult_u128)
> +{
> +  /* Use a for loop to check each byte manually so the test case will
> + run with ISA 2.06.
> +
> + Return 1 if they match, 0 otherwise.  */
> +
> +  int i;
> +
> +  union convert_union result;
> +  union convert_union expected;
> +
> +  result.u128 = vresult_u128;
> +  expected.u128 = expected_vresult_u128;
> +
> +  /* Check if each byte of the result and expected match. */
> +  for (i = 0; i < 16; i++)
> +{
> +  if (result.val[i] != expected.val[i])
> + return 0;
> +}
> +  return 1;
> +}
> +
> +int check_s128_result(vector sig

Re: [PATCH 11/13 ver 3] rs6000, extend vec_xxpermdi built-in for __int128 args

2024-05-29 Thread Carl Love
 This was patch 10 from the previous series.  The patch was updated to address 
feedback comments.

Carl 
---

rs6000, extend vec_xxpermdi built-in for __int128 args

Add a new signed and unsigned overloaded instances for vec_xxpermdi

   __int128 vec_xxpermdi (__int128, __int128, const int);
   __uint128 vec_xxpermdi (__uint128, __uint128, const int);

Update the documentation to include a reference to the new built-in
instances.

Add test cases for the new overloaded instances.

gcc/ChangeLog:
* config/rs6000/rs6000-overload.def (vec_xxpermdi): Add new
overloaded built-in instances.
* doc/extend.texi:  Add documentation for new overloaded built-in
instances.

gcc/testsuite/ChangeLog:gcc/testsuite/ChangeLog:
* gcc.target/powerpc/vec_perm-runnable-i128.c: New test file.
---
 gcc/config/rs6000/rs6000-overload.def |   4 +
 gcc/doc/extend.texi   |   2 +
 .../powerpc/vec_perm-runnable-i128.c  | 229 ++
 3 files changed, 235 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec_perm-runnable-i128.c

diff --git a/gcc/config/rs6000/rs6000-overload.def 
b/gcc/config/rs6000/rs6000-overload.def
index a210c5ad10d..45000f161e4 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -4932,6 +4932,10 @@
 XXPERMDI_4SF  XXPERMDI_VF
   vd __builtin_vsx_xxpermdi (vd, vd, const int);
 XXPERMDI_2DF  XXPERMDI_VD
+  vsq __builtin_vsx_xxpermdi (vsq, vsq, const int);
+XXPERMDI_1TI  XXPERMDI_1TI
+  vuq __builtin_vsx_xxpermdi (vuq, vuq, const int);
+XXPERMDI_1TI  XXPERMDI_1TUI
 
 [VEC_XXSLDWI, vec_xxsldwi, __builtin_vsx_xxsldwi]
   vsc __builtin_vsx_xxsldwi (vsc, vsc, const int);
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 0756230b19e..edfef1bdab7 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -22555,6 +22555,8 @@ void vec_vsx_st (vector bool char, int, signed char *);
 vector double vec_xxpermdi (vector double, vector double, const int);
 vector float vec_xxpermdi (vector float, vector float, const int);
 vector long long vec_xxpermdi (vector long long, vector long long, const int);
+vector __int128 vec_xxpermdi (vector __int128, vector __int128, const int);
+vector __int128 vec_xxpermdi (vector __uint128, vector __uint128, const int);
 vector unsigned long long vec_xxpermdi (vector unsigned long long,
 vector unsigned long long, const int);
 vector int vec_xxpermdi (vector int, vector int, const int);
diff --git a/gcc/testsuite/gcc.target/powerpc/vec_perm-runnable-i128.c 
b/gcc/testsuite/gcc.target/powerpc/vec_perm-runnable-i128.c
new file mode 100644
index 000..2d5dce09404
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec_perm-runnable-i128.c
@@ -0,0 +1,229 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vmx_hw } */
+/* { dg-options "-save-temps" } */
+
+#include 
+
+#define DEBUG 0
+
+#if DEBUG
+#include 
+void print_i128 (unsigned __int128 val)
+{
+  printf(" 0x%016llx%016llx",
+ (unsigned long long)(val >> 64),
+ (unsigned long long)(val & 0x));
+}
+#endif
+
+extern void abort (void);
+
+union convert_union {
+  vector signed __int128s128;
+  vector unsigned __int128  u128;
+  char  val[16];
+} convert;
+
+int check_u128_result(vector unsigned __int128 vresult_u128,
+ vector unsigned __int128 expected_vresult_u128)
+{
+  /* Use a for loop to check each byte manually so the test case will
+ run with ISA 2.06.
+
+ Return 1 if they match, 0 otherwise.  */
+
+  int i;
+
+  union convert_union result;
+  union convert_union expected;
+
+  result.u128 = vresult_u128;
+  expected.u128 = expected_vresult_u128;
+
+  /* Check if each byte of the result and expected match. */
+  for (i = 0; i < 16; i++)
+{
+  if (result.val[i] != expected.val[i])
+   return 0;
+}
+  return 1;
+}
+
+int check_s128_result(vector signed __int128 vresult_s128,
+ vector signed __int128 expected_vresult_s128)
+{
+  /* Convert the arguments to unsigned, then check equality.  */
+  union convert_union result;
+  union convert_union expected;
+
+  result.s128 = vresult_s128;
+  expected.s128 = expected_vresult_s128;
+
+  return check_u128_result (result.u128, expected.u128);
+}
+
+
+int
+main (int argc, char *argv [])
+{
+  int i;
+  
+  vector signed __int128 src_va_s128;
+  vector signed __int128 src_vb_s128;
+  vector signed __int128 vresult_s128;
+  vector signed __int128 expected_vresult_s128;
+
+  vector unsigned __int128 src_va_u128;
+  vector unsigned __int128 src_vb_u128;
+  vector unsigned __int128 src_vc_u128;
+  vector unsigned __int128 vresult_u128;
+  vector unsigned __int128 expected_vresult_u128;
+
+  src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0};
+  src_va_s128 = src_va_s128 << 64; 
+  src_va_s128