Hi Carl,

on 2024/6/14 03:40, Carl Love wrote:
> 
> GCC maintainers:
> 
> The patch has been updated per the comments from version 3.  Please let me 
> know if the patch is acceptable for mainline.
> 
>                      Carl 
> 
> -----------------------------------------------------------------
> 
> rs6000, add overloaded vec_sel with int128 arguments
> 
> Extend the vec_sel built-in to take three signed/unsigned/bool int128
> arguments and return a signed/unsigned/bool int128 result.
> 
> Extending the vec_sel built-in makes the existing buit-ins
> __builtin_vsx_xxsel_1ti and __builtin_vsx_xxsel_1ti_uns obsolete.  The
> patch removes these built-ins.
> 
> The patch adds documentation and test cases for the new overloaded
> vec_sel built-ins.
> 
> gcc/ChangeLog:
>       * config/rs6000/rs6000-builtins.def (__builtin_vsx_xxsel_1ti,
>       __builtin_vsx_xxsel_1ti_uns): Remove built-in definitions.
>       * config/rs6000/rs6000-overload.def (vec_sel): Add new
>       overloaded      definitions.

Nit: unexpected tab between "overloaded" and "definitions", should be a space,
better to mention which types of overloaded function are added, like "
for vector signed, unsigned and bool int128 types."

>       * doc/extend.texi: Add documentation for new vec_sel instances.

Likewise.

> 
> gcc/testsuite/ChangeLog:
>       * gcc.target/powerpc/builtins-10-runnable.c: New runnable test
>       file.
>       * gcc.target/powerpc/builtins-10.c: New compile only test file.
> ---
>  gcc/config/rs6000/rs6000-builtins.def         |   6 -
>  gcc/config/rs6000/rs6000-overload.def         |  12 +
>  gcc/doc/extend.texi                           |  20 ++
>  .../gcc.target/powerpc/builtins-10-runnable.c | 220 ++++++++++++++++++
>  .../gcc.target/powerpc/builtins-10.c          |  63 +++++
>  5 files changed, 315 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-10-runnable.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-10.c
> 
> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
> b/gcc/config/rs6000/rs6000-builtins.def
> index b90b3f34167..c969cd0f3f6 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -1907,12 +1907,6 @@
>    const vuc __builtin_vsx_xxsel_16qi_uns (vuc, vuc, vuc);
>      XXSEL_16QI_UNS vector_select_v16qi_uns {}
>  
> -  const vsq __builtin_vsx_xxsel_1ti (vsq, vsq, vsq);
> -    XXSEL_1TI vector_select_v1ti {}
> -
> -  const vsq __builtin_vsx_xxsel_1ti_uns (vsq, vsq, vsq);
> -    XXSEL_1TI_UNS vector_select_v1ti_uns {}
> -
>    const vd __builtin_vsx_xxsel_2df (vd, vd, vd);
>      XXSEL_2DF vector_select_v2df {}
>  
> diff --git a/gcc/config/rs6000/rs6000-overload.def 
> b/gcc/config/rs6000/rs6000-overload.def
> index 4d857bb1af3..6cec1ad4f1a 100644
> --- a/gcc/config/rs6000/rs6000-overload.def
> +++ b/gcc/config/rs6000/rs6000-overload.def
> @@ -3274,6 +3274,18 @@
>      VSEL_2DF  VSEL_2DF_B
>    vd __builtin_vec_sel (vd, vd, vull);
>      VSEL_2DF  VSEL_2DF_U
> +  vsq __builtin_vec_sel (vsq, vsq, vbq);
> +    VSEL_1TI  VSEL_1TI_B
> +  vsq __builtin_vec_sel (vsq, vsq, vuq);
> +    VSEL_1TI  VSEL_1TI_U
> +  vuq __builtin_vec_sel (vuq, vuq, vbq);
> +    VSEL_1TI_UNS  VSEL_1TI_UB
> +  vuq __builtin_vec_sel (vuq, vuq, vuq);
> +    VSEL_1TI_UNS  VSEL_1TI_UU
> +  vbq __builtin_vec_sel (vbq, vbq, vbq);
> +    VSEL_1TI_UNS  VSEL_1TI_BB
> +  vbq __builtin_vec_sel (vbq, vbq, vuq);
> +    VSEL_1TI_UNS  VSEL_1TI_BU

Nit: Put these new lines after line "VSEL_2DI_UNS  VSEL_2DI_BU"
and before "vf __builtin_vec_sel (vf, vf, vbi);", to make all
integral element type be placed together.

>  ; The following variants are deprecated.
>    vsll __builtin_vec_sel (vsll, vsll, vsll);
>      VSEL_2DI_B  VSEL_2DI_S
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index b1620274285..d7d8d149a43 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -21420,6 +21420,26 @@ Additional built-in functions are available for the 
> 64-bit PowerPC
>  family of processors, for efficient use of 128-bit floating point
>  (@code{__float128}) values.
>  
> +Vector select
> +
> +@smallexample
> +vector signed __int128 vec_sel (vector signed __int128,
> +               vector signed __int128, vector bool __int128);
> +vector signed __int128 vec_sel (vector signed __int128,
> +               vector signed __int128, vector unsigned __int128);
> +vector unsigned __int128 vec_sel (vector unsigned __int128,
> +               vector unsigned __int128, vector bool __int128);
> +vector unsigned __int128 vec_sel (vector unsigned __int128,
> +               vector unsigned __int128, vector unsigned __int128);
> +vector bool __int128 vec_sel (vector bool __int128,
> +               vector bool __int128, vector bool __int128);
> +vector bool __int128 vec_sel (vector bool __int128,
> +               vector bool __int128, vector unsigned __int128);
> +@end smallexample
> +
> +The instance is an extension of the exiting overloaded built-in 
> @code{vec_sel}
> +that is documented in the PVIPR.
> +
>  @node Basic PowerPC Built-in Functions Available on ISA 2.06
>  @subsubsection Basic PowerPC Built-in Functions Available on ISA 2.06
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-10-runnable.c 
> b/gcc/testsuite/gcc.target/powerpc/builtins-10-runnable.c
> new file mode 100644
> index 00000000000..b7b4a95ea0e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/builtins-10-runnable.c
> @@ -0,0 +1,220 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target vmx_hw } */
> +/* { dg-options "-maltivec -O2 " } */
> +
> +#include <altivec.h>
> +
> +#define DEBUG 0
> +
> +#if DEBUG
> +#include <stdio.h>
> +void print_i128 (unsigned __int128 val)
> +{
> +  printf(" 0x%016llx%016llx",
> +         (unsigned long long)(val >> 64),
> +         (unsigned long long)(val & 0xFFFFFFFFFFFFFFFF));
> +}
> +#endif
> +
> +extern void abort (void);
> +
> +union convert_union {
> +  vector signed __int128    s128;
> +  vector unsigned __int128  u128;
> +  vector bool __int128  b128;
> +  char  val[16];
> +} convert;
> +
> +int check_u128_result(vector unsigned __int128 vresult_u128,
> +                   vector unsigned __int128 expected_vresult_u128)
> +{
> +  /* Use a for loop to check each byte manually so the test case will run
> +     with ISA 2.06.
> +
> +     Return 1 if they match, 0 otherwise.  */
> +
> +  int i;
> +
> +  union convert_union result;
> +  union convert_union expected;
> +
> +  result.u128 = vresult_u128;
> +  expected.u128 = expected_vresult_u128;
> +
> +  /* Check if each byte of the result and expected match. */
> +  for (i = 0; i < 16; i++)
> +    {
> +      if (result.val[i] != expected.val[i])
> +     return 0;
> +    }
> +  return 1;
> +}
> +
> +int check_s128_result(vector signed __int128 vresult_s128,
> +                   vector signed __int128 expected_vresult_s128)
> +{
> +  /* Convert the arguments to unsigned, then check equality.  */
> +  union convert_union result;
> +  union convert_union expected;
> +
> +  result.s128 = vresult_s128;
> +  expected.s128 = expected_vresult_s128;
> +
> +  return check_u128_result (result.u128, expected.u128);
> +}
> +
> +int check_b128_result(vector bool __int128 vresult_b128,
> +                   vector bool __int128 expected_vresult_b128)
> +{
> +  /* Convert the arguments to unsigned, then check equality.  */
> +  union convert_union result;
> +  union convert_union expected;
> +
> +  result.b128 = vresult_b128;
> +  expected.b128 = expected_vresult_b128;
> +
> +  return check_u128_result (result.u128, expected.u128);
> +}
> +
> +
> +int
> +main (int argc, char *argv [])
> +{
> +  int i;
> +  
> +  vector signed __int128 src_va_s128;
> +  vector signed __int128 src_vb_s128;
> +  vector signed __int128 src_vc_s128;
> +  vector signed __int128 vresult_s128;
> +  vector signed __int128 expected_vresult_s128;
> +
> +  vector unsigned __int128 src_va_u128;
> +  vector unsigned __int128 src_vb_u128;
> +  vector unsigned __int128 src_vc_u128;
> +  vector unsigned __int128 vresult_u128;
> +  vector unsigned __int128 expected_vresult_u128;
> +
> +  vector bool __int128 src_va_b128;
> +  vector bool __int128 src_vb_b128;
> +  vector bool __int128 src_vc_b128;
> +  vector bool __int128 vresult_b128;
> +  vector bool __int128 expected_vresult_b128;
> +
> +  src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0};
> +  src_vb_s128 = (vector signed __int128) {0xFEDCBA9876543210};
> +  src_vc_b128 = (vector bool   __int128) {0x3333333333333333};
> +  src_vc_u128 = (vector unsigned __int128) {0xBBBBBBBBBBBBBBBB};
> +
> +  /* Signed arguments.  */
> +  expected_vresult_s128 = (vector signed __int128) {0x32147658ba9cfed0};
> +  vresult_s128 = vec_sel (src_va_s128, src_vb_s128, src_vc_b128);
> +
> +  if (!check_s128_result (vresult_s128, expected_vresult_s128))
> +#if DEBUG
> +    {
> +      printf ("ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_b128) result 
> does not match expected output.\n");
> +      printf ("  Result:          ");
> +      print_i128 ((unsigned __int128) vresult_s128);
> +      printf ("\n  Expected result: ");
> +      print_i128 ((unsigned __int128) expected_vresult_s128);
> +      printf ("\n");
> +    }
> +#else
> +    abort ();
> +#endif
> +
> +  expected_vresult_s128 = (vector signed __int128) {0xba9cfed832147650};
> +  vresult_s128 = vec_sel (src_va_s128, src_vb_s128, src_vc_u128);
> +
> +  if (!check_s128_result (vresult_s128, expected_vresult_s128))
> +#if DEBUG
> +    {
> +      printf ("ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_u128) result 
> does not match expected output.\n");
> +      printf ("  Result:          ");
> +      print_i128 ((unsigned __int128) vresult_s128);
> +      printf ("\n  Expected result: ");
> +      print_i128 ((unsigned __int128) expected_vresult_s128);
> +      printf ("\n");
> +    }
> +#else
> +    abort ();
> +#endif
> +
> +  src_va_u128 = (vector unsigned __int128) {0x13579ACE02468BDF};
> +  src_va_b128 = (vector bool __int128) {0xFFFFFFFF00000000};
> +  src_vb_u128 = (vector unsigned __int128) {0xA987654FEDCB3210};
> +  src_vb_b128 = (vector bool __int128) {0xFFFF0000FFFF0000};
> +  src_vc_u128 = (vector unsigned __int128) {0x5555555555555555};
> +
> +  /* Unigned arguments.  */

Nit: s/Unigned/Unsigned/

> +  expected_vresult_u128 = (vector unsigned __int128) {0x2147a9cf2147badc};
> +  vresult_u128 = vec_sel (src_va_u128, src_vb_u128, src_vc_b128);
> +
> +  if (!check_u128_result (vresult_u128, expected_vresult_u128))
> +#if DEBUG
> +    {
> +      printf ("ERROR, vec_sel (src_va_u128, src_vb_u128, src_vc_b128) result 
> does not match expected output.\n");
> +      printf ("  Result:          ");
> +      print_i128 ((unsigned __int128) vresult_u128);
> +      printf ("\n  Expected result: ");
> +      print_i128 ((unsigned __int128) expected_vresult_u128);
> +      printf ("\n");
> +    }
> +#else
> +    abort ();
> +#endif
> +
> +  expected_vresult_u128 = (vector unsigned __int128) {0x307cfcf47439a9a};
> +  vresult_u128 = vec_sel (src_va_u128, src_vb_u128, src_vc_u128);
> +
> +  if (!check_u128_result (vresult_u128, expected_vresult_u128))
> +#if DEBUG
> +    {
> +      printf ("ERROR, vec_sel (src_va_u128, src_vb_u128, src_vc_u128) result 
> does not match expected output.\n");
> +      printf ("  Result:          ");
> +      print_i128 ((unsigned __int128) vresult_u128);
> +      printf ("\n  Expected result: ");
> +      print_i128 ((unsigned __int128) expected_vresult_u128);
> +      printf ("\n");
> +    }
> +#else
> +    abort ();
> +#endif
> +
> +  /* Boolean arguments.  */
> +  expected_vresult_b128 = (vector bool __int128) {0xffffcccc33330000};
> +  vresult_b128 = vec_sel (src_va_b128, src_vb_b128, src_vc_b128);
> +
> +  if (!check_b128_result (vresult_b128, expected_vresult_b128))
> +#if DEBUG
> +    {
> +      printf ("ERROR, vec_sel (src_va_b128, src_vb_b128, src_vc_b128) result 
> does not match expected output.\n");
> +      printf ("  Result:          ");
> +      print_i128 ((unsigned __int128) vresult_b128);
> +      printf ("\n  Expected result: ");
> +      print_i128 ((unsigned __int128) expected_vresult_b128);
> +      printf ("\n");
> +    }
> +#else
> +    abort ();
> +#endif
> +
> +  expected_vresult_b128 = (vector bool __int128) {0xffffaaaa55550000};
> +  vresult_b128 = vec_sel (src_va_b128, src_vb_b128, src_vc_u128);
> +
> +  if (!check_b128_result (vresult_b128, expected_vresult_b128))
> +#if DEBUG
> +    {
> +      printf ("ERROR, vec_sel (src_va_b128, src_vb_b128, src_vc_u128) result 
> does not match expected output.\n");
> +      printf ("  Result:          ");
> +      print_i128 ((unsigned __int128) vresult_b128);
> +      printf ("\n  Expected result: ");
> +      print_i128 ((unsigned __int128) expected_vresult_b128);
> +      printf ("\n");
> +    }
> +#else
> +    abort ();
> +#endif
> +
> +    return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-10.c 
> b/gcc/testsuite/gcc.target/powerpc/builtins-10.c
> new file mode 100644
> index 00000000000..eddc4c93b32
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/builtins-10.c
> @@ -0,0 +1,63 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vmx_hw } */

s/vmx_hw/powerpc_altivec/, this is for compiling, shouldn't use _hw.

> +/* { dg-options "-save-temps" } */

s/-save-temps/-O2 -maltivec/

Also move dg-options line before powerpc_altivec line (as powerpc_altivec
evaluation considers current_compiler_flags now).

OK for trunk with all the above fixed.  Thanks!

BR,
Kewen

> +/* { dg-final { scan-assembler-times "xxsel" 6 } } */
> +
> +#include <altivec.h>
> +
> +/* Signed args */
> +vector signed __int128
> +test_vec_sel_ssb (vector signed __int128 src_va_s128,
> +               vector signed __int128 src_vb_s128,
> +               vector bool __int128 src_vc_b128)
> +{
> +  return vec_sel (src_va_s128, src_vb_s128, src_vc_b128);
> +}
> +
> +vector signed __int128
> +test_vec_sel_ssu (vector signed __int128 src_va_s128,
> +               vector signed __int128 src_vb_s128,
> +               vector unsigned __int128 src_vc_u128)
> +{
> +  return vec_sel (src_va_s128, src_vb_s128, src_vc_u128);
> +}
> +
> +/* Unsigned args */
> +vector unsigned __int128
> +test_vec_sel_uub (vector unsigned __int128 src_va_u128,
> +               vector unsigned __int128 src_vb_u128,
> +               vector bool __int128 src_vc_b128)
> +{
> +  return vec_sel (src_va_u128, src_vb_u128, src_vc_b128);
> +}
> +
> +vector unsigned __int128
> +test_vec_sel_uuu (vector unsigned __int128 src_va_u128,
> +               vector unsigned __int128 src_vb_u128,
> +               vector unsigned __int128 src_vc_u128)
> +{
> +  return vec_sel (src_va_u128, src_vb_u128, src_vc_u128);
> +}
> +
> +/* Boolean args */
> +vector bool __int128
> +test_vec_sel_bbb (vector bool __int128 src_va_b128,
> +               vector bool __int128 src_vb_b128,
> +               vector bool __int128 src_vc_b128)
> +{
> +  return vec_sel (src_va_b128, src_vb_b128, src_vc_b128);
> +}
> +
> +vector bool __int128
> +test_vec_sel_bbu (vector bool __int128 src_va_b128,
> +               vector bool __int128 src_vb_b128,
> +               vector unsigned __int128 src_vc_u128)
> +{
> +  return vec_sel (src_va_b128, src_vb_b128, src_vc_u128);
> +}
> +
> +/* Expected results:
> +   vec_sel              xxsel    */
> +
> +/* { dg-final { scan-assembler-times "xxsel" 6 } } */
> +

Reply via email to