LGTM

On Wed, Sep 13, 2023 at 12:25 AM Lehua Ding <lehua.d...@rivai.ai> wrote:
>
> This patch add combine patterns to combine vfsgnj.vv + vcond_mask
> to mask vfsgnj.vv. For vfsgnjx.vv, it can not be produced in midend
> currently. We will send another patch to take this issue.
>
> gcc/ChangeLog:
>
>         * config/riscv/autovec-opt.md (*copysign<mode>_neg): Move.
>         (*cond_copysign<mode>): New combine pattern.
>         * config/riscv/riscv-v.cc (needs_fp_rounding): Extend.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c: New test.
>         * gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c: New test.
>         * gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c: New test.
>         * gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h: New 
> test.
>         * gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c: New 
> test.
>
> ---
>  gcc/config/riscv/autovec-opt.md               | 68 +++++++++----
>  gcc/config/riscv/riscv-v.cc                   |  4 +-
>  .../rvv/autovec/cond/cond_copysign-run.c      | 99 +++++++++++++++++++
>  .../rvv/autovec/cond/cond_copysign-rv32gcv.c  | 12 +++
>  .../rvv/autovec/cond/cond_copysign-rv64gcv.c  | 12 +++
>  .../rvv/autovec/cond/cond_copysign-template.h | 81 +++++++++++++++
>  .../rvv/autovec/cond/cond_copysign-zvfh-run.c | 93 +++++++++++++++++
>  7 files changed, 349 insertions(+), 20 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
>
> diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
> index 58e80044f1e..f759525f96b 100644
> --- a/gcc/config/riscv/autovec-opt.md
> +++ b/gcc/config/riscv/autovec-opt.md
> @@ -609,6 +609,10 @@
>     (set_attr "mode" "<V_DOUBLE_TRUNC>")
>     (set (attr "frm_mode") (symbol_ref "riscv_vector::FRM_DYN"))])
>
> +;; 
> =============================================================================
> +;; Combine op + vmerge to cond_op
> +;; 
> =============================================================================
> +
>  ;; Combine <op> and vcond_mask generated by midend into cond_len_<op>
>  ;; Currently supported operations:
>  ;;   abs(FP)
> @@ -651,25 +655,6 @@
>    DONE;
>  })
>
> -;; Combine vlmax neg and UNSPEC_VCOPYSIGN
> -(define_insn_and_split "*copysign<mode>_neg"
> -  [(set (match_operand:VF 0 "register_operand")
> -        (neg:VF
> -          (unspec:VF [
> -            (match_operand:VF 1 "register_operand")
> -            (match_operand:VF 2 "register_operand")
> -          ] UNSPEC_VCOPYSIGN)))]
> -  "TARGET_VECTOR && can_create_pseudo_p ()"
> -  "#"
> -  "&& 1"
> -  [(const_int 0)]
> -{
> -  riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode),
> -                                  riscv_vector::BINARY_OP, operands);
> -  DONE;
> -}
> -[(set_attr "type" "vector")])
> -
>  ;; Combine sign_extend/zero_extend(vf2) and vcond_mask
>  (define_insn_and_split "*cond_<optab><v_double_trunc><mode>"
>    [(set (match_operand:VWEXTI 0 "register_operand")
> @@ -918,6 +903,27 @@
>  }
>  [(set_attr "type" "vector")])
>
> +;; Combine vfsgnj.vv + vcond_mask
> +(define_insn_and_split "*cond_copysign<mode>"
> +   [(set (match_operand:VF 0 "register_operand")
> +    (if_then_else:VF
> +      (match_operand:<VM> 1 "register_operand")
> +      (unspec:VF
> +       [(match_operand:VF 2 "register_operand")
> +        (match_operand:VF 3 "register_operand")] UNSPEC_VCOPYSIGN)
> +      (match_operand:VF 4 "register_operand")))]
> +   "TARGET_VECTOR && can_create_pseudo_p ()"
> +   "#"
> +   "&& 1"
> +   [(const_int 0)]
> +{
> +  insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, <MODE>mode);
> +  rtx ops[] = {operands[0], operands[1], operands[2], operands[3], 
> operands[4],
> +               gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
> +  riscv_vector::expand_cond_len_binop (icode, ops);
> +   DONE;
> +})
> +
>  ;; 
> =============================================================================
>  ;; Combine extend + binop to widen_binop
>  ;; 
> =============================================================================
> @@ -1119,3 +1125,27 @@
>    DONE;
>  }
>  [(set_attr "type" "vfwmul")])
> +
> +
> +;; 
> =============================================================================
> +;; Misc combine patterns
> +;; 
> =============================================================================
> +
> +;; Combine vlmax neg and UNSPEC_VCOPYSIGN
> +(define_insn_and_split "*copysign<mode>_neg"
> +  [(set (match_operand:VF 0 "register_operand")
> +        (neg:VF
> +          (unspec:VF [
> +            (match_operand:VF 1 "register_operand")
> +            (match_operand:VF 2 "register_operand")
> +          ] UNSPEC_VCOPYSIGN)))]
> +  "TARGET_VECTOR && can_create_pseudo_p ()"
> +  "#"
> +  "&& 1"
> +  [(const_int 0)]
> +{
> +  riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode),
> +                                  riscv_vector::BINARY_OP, operands);
> +  DONE;
> +}
> +[(set_attr "type" "vector")])
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 4d95bd773a2..76e6094f45b 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -2970,7 +2970,9 @@ needs_fp_rounding (unsigned icode, machine_mode mode)
>          && icode != maybe_code_for_pred_extend (mode)
>          /* narrower-INT -> FP */
>          && icode != maybe_code_for_pred_widen (FLOAT, mode)
> -        && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode);
> +        && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode)
> +        /* vfsgnj */
> +        && icode != maybe_code_for_pred (UNSPEC_VCOPYSIGN, mode);
>  }
>
>  /* Subroutine to expand COND_LEN_* patterns.  */
> diff --git 
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
> new file mode 100644
> index 00000000000..be37854c135
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
> @@ -0,0 +1,99 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-additional-options "-std=c99 -fno-vect-cost-model 
> --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
> +
> +#include "cond_copysign-template.h"
> +
> +#include <assert.h>
> +
> +#define SZ 512
> +
> +#define EPS 1e-6
> +
> +#define INIT_PRED()                                                          
>   \
> +  int pred[SZ];                                                              
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    {                                                                        
>   \
> +      pred[i] = i % 3;                                                       
>   \
> +    }
> +
> +#define RUN(TYPE, VAL)                                                       
>   \
> +  TYPE a##TYPE[SZ];                                                          
>   \
> +  TYPE b##TYPE[SZ];                                                          
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    {                                                                        
>   \
> +      a##TYPE[i] = i;                                                        
>   \
> +      b##TYPE[i] = (i & 1) ? VAL : -VAL;                                     
>   \
> +    }                                                                        
>   \
> +  copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ);                     
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < 
> EPS);
> +
> +#define RUN2(TYPE, VAL)                                                      
>   \
> +  TYPE a2##TYPE[SZ];                                                         
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    a2##TYPE[i] = i;                                                         
>   \
> +  copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ);                     
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS);
> +
> +#define RUN3(TYPE, VAL)                                                      
>   \
> +  TYPE a3##TYPE[SZ];                                                         
>   \
> +  TYPE b3##TYPE[SZ];                                                         
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    {                                                                        
>   \
> +      a3##TYPE[i] = (i & 1) ? -i : i;                                        
>   \
> +      b3##TYPE[i] = (i & 1) ? VAL : -VAL;                                    
>   \
> +    }                                                                        
>   \
> +  xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ);                   
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS);
> +
> +#define RUN4(TYPE, VAL)                                                      
>   \
> +  TYPE a4##TYPE[SZ];                                                         
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    a4##TYPE[i] = -i;                                                        
>   \
> +  xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ);                      
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS);
> +
> +#define RUN5(TYPE, VAL)                                                      
>   \
> +  TYPE a5##TYPE[SZ];                                                         
>   \
> +  TYPE b5##TYPE[SZ];                                                         
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    {                                                                        
>   \
> +      a5##TYPE[i] = i;                                                       
>   \
> +      b5##TYPE[i] = (i & 1) ? VAL : -VAL;                                    
>   \
> +    }                                                                        
>   \
> +  ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ);                  
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    assert (!pred[i]                                                         
>   \
> +           || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
> +
> +#define RUN6(TYPE, VAL)                                                      
>   \
> +  TYPE a6##TYPE[SZ];                                                         
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    a6##TYPE[i] = i;                                                         
>   \
> +  ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ);                    
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS);
> +
> +#define RUN_ALL()                                                            
>   \
> +  RUN (float, 5)                                                             
>   \
> +  RUN (double, 6)                                                            
>   \
> +  RUN2 (float, 11)                                                           
>   \
> +  RUN2 (double, 12)                                                          
>   \
> +  RUN3 (float, 16)                                                           
>   \
> +  RUN3 (double, 18)                                                          
>   \
> +  RUN4 (float, 17)                                                           
>   \
> +  RUN4 (double, 19)                                                          
>   \
> +  RUN5 (float, 123)                                                          
>   \
> +  RUN5 (double, 523)                                                         
>   \
> +  RUN6 (float, 777)                                                          
>   \
> +  RUN6 (double, 877)
> +
> +int
> +main ()
> +{
> +  INIT_PRED ()
> +  RUN_ALL ()
> +}
> diff --git 
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
> new file mode 100644
> index 00000000000..cef531b9700
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model 
> -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax 
> -ffast-math" } */
> +
> +#include "cond_copysign-template.h"
> +
> +/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */
> +/* 1. The vectorizer wraps scalar variants of copysign into vector constants 
> which
> +      expand cannot handle currently.
> +   2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently.  
> */
> +/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } 
> } } */
> +/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */
> +/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */
> diff --git 
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
> new file mode 100644
> index 00000000000..cc2aa4de757
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model 
> -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax 
> -ffast-math" } */
> +
> +#include "cond_copysign-template.h"
> +
> +/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */
> +/* 1. The vectorizer wraps scalar variants of copysign into vector constants 
> which
> +      expand cannot handle currently.
> +   2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently.  
> */
> +/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } 
> } } */
> +/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */
> +/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */
> diff --git 
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
> new file mode 100644
> index 00000000000..4191500fd83
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
> @@ -0,0 +1,81 @@
> +#include <stdint-gcc.h>
> +
> +#define TEST_TYPE(TYPE, SUFFIX)                                              
>   \
> +  __attribute__ ((noipa)) void copysign_##TYPE (TYPE *restrict dst,          
>   \
> +                                               TYPE *restrict a,             
>  \
> +                                               TYPE *restrict b,             
>  \
> +                                               int *restrict pred, int n)    
>  \
> +  {                                                                          
>   \
> +    for (int i = 0; i < n; i++)                                              
>   \
> +      dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b[i]) : dst[i];   
>   \
> +  }
> +
> +#define TEST_TYPE2(TYPE, SUFFIX)                                             
>   \
> +  __attribute__ ((noipa)) void copysigns_##TYPE (TYPE *restrict dst,         
>   \
> +                                                TYPE *restrict a, TYPE b,    
>  \
> +                                                int *restrict pred, int n)   
>  \
> +  {                                                                          
>   \
> +    for (int i = 0; i < n; i++)                                              
>   \
> +      dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b) : dst[i];      
>   \
> +  }
> +
> +#define TEST_TYPE3(TYPE, SUFFIX)                                             
>   \
> +  __attribute__ ((noipa)) void xorsign_##TYPE (TYPE *restrict dst,           
>   \
> +                                              TYPE *restrict a,              
>  \
> +                                              TYPE *restrict b,              
>  \
> +                                              int *restrict pred, int n)     
>  \
> +  {                                                                          
>   \
> +    for (int i = 0; i < n; i++)                                              
>   \
> +      dst[i]                                                                 
>   \
> +       = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b[i]) : dst[i];   
>  \
> +  }
> +
> +#define TEST_TYPE4(TYPE, SUFFIX)                                             
>   \
> +  __attribute__ ((noipa)) void xorsigns_##TYPE (TYPE *restrict dst,          
>   \
> +                                               TYPE *restrict a, TYPE b,     
>  \
> +                                               int *restrict pred, int n)    
>  \
> +  {                                                                          
>   \
> +    for (int i = 0; i < n; i++)                                              
>   \
> +      dst[i] = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b) : 
> dst[i];  \
> +  }
> +
> +#define TEST_TYPE5(TYPE, SUFFIX)                                             
>   \
> +  __attribute__ ((noipa)) void ncopysign_##TYPE (TYPE *restrict dst,         
>   \
> +                                                TYPE *restrict a,            
>  \
> +                                                TYPE *restrict b,            
>  \
> +                                                int *restrict pred, int n)   
>  \
> +  {                                                                          
>   \
> +    for (int i = 0; i < n; i++)                                              
>   \
> +      dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b[i]) : dst[i];  
>   \
> +  }
> +
> +#define TEST_TYPE6(TYPE, SUFFIX)                                             
>   \
> +  __attribute__ ((noipa)) void ncopysigns_##TYPE (TYPE *restrict dst,        
>   \
> +                                                 TYPE *restrict a, TYPE b,   
>  \
> +                                                 int *restrict pred, int n)  
>  \
> +  {                                                                          
>   \
> +    for (int i = 0; i < n; i++)                                              
>   \
> +      dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b) : dst[i];     
>   \
> +  }
> +
> +#define TEST_ALL()                                                           
>   \
> +  TEST_TYPE (_Float16, f16)                                                  
>   \
> +  TEST_TYPE (float, f)                                                       
>   \
> +  TEST_TYPE (double, )                                                       
>   \
> +  TEST_TYPE2 (_Float16, f16)                                                 
>   \
> +  TEST_TYPE2 (float, f)                                                      
>   \
> +  TEST_TYPE2 (double, )                                                      
>   \
> +  TEST_TYPE3 (_Float16, f16)                                                 
>   \
> +  TEST_TYPE3 (float, f)                                                      
>   \
> +  TEST_TYPE3 (double, )                                                      
>   \
> +  TEST_TYPE4 (_Float16, f16)                                                 
>   \
> +  TEST_TYPE4 (float, f)                                                      
>   \
> +  TEST_TYPE4 (double, )                                                      
>   \
> +  TEST_TYPE5 (_Float16, f16)                                                 
>   \
> +  TEST_TYPE5 (float, f)                                                      
>   \
> +  TEST_TYPE5 (double, )                                                      
>   \
> +  TEST_TYPE6 (_Float16, f16)                                                 
>   \
> +  TEST_TYPE6 (float, f)                                                      
>   \
> +  TEST_TYPE6 (double, )
> +
> +TEST_ALL ()
> diff --git 
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
> new file mode 100644
> index 00000000000..6e337f9e74c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
> @@ -0,0 +1,93 @@
> +/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
> +/* { dg-additional-options "-std=c99 -fno-vect-cost-model 
> --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
> +
> +#include "cond_copysign-template.h"
> +
> +#include <assert.h>
> +
> +#define SZ 512
> +
> +#define EPS 1e-6
> +
> +#define INIT_PRED()                                                          
>   \
> +  int pred[SZ];                                                              
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    {                                                                        
>   \
> +      pred[i] = i % 3;                                                       
>   \
> +    }
> +
> +#define RUN(TYPE, VAL)                                                       
>   \
> +  TYPE a##TYPE[SZ];                                                          
>   \
> +  TYPE b##TYPE[SZ];                                                          
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    {                                                                        
>   \
> +      a##TYPE[i] = i;                                                        
>   \
> +      b##TYPE[i] = (i & 1) ? VAL : -VAL;                                     
>   \
> +    }                                                                        
>   \
> +  copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ);                     
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < 
> EPS);
> +
> +#define RUN2(TYPE, VAL)                                                      
>   \
> +  TYPE a2##TYPE[SZ];                                                         
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    a2##TYPE[i] = i;                                                         
>   \
> +  copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ);                     
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS);
> +
> +#define RUN3(TYPE, VAL)                                                      
>   \
> +  TYPE a3##TYPE[SZ];                                                         
>   \
> +  TYPE b3##TYPE[SZ];                                                         
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    {                                                                        
>   \
> +      a3##TYPE[i] = (i & 1) ? -i : i;                                        
>   \
> +      b3##TYPE[i] = (i & 1) ? VAL : -VAL;                                    
>   \
> +    }                                                                        
>   \
> +  xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ);                   
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS);
> +
> +#define RUN4(TYPE, VAL)                                                      
>   \
> +  TYPE a4##TYPE[SZ];                                                         
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    a4##TYPE[i] = -i;                                                        
>   \
> +  xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ);                      
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS);
> +
> +#define RUN5(TYPE, VAL)                                                      
>   \
> +  TYPE a5##TYPE[SZ];                                                         
>   \
> +  TYPE b5##TYPE[SZ];                                                         
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    {                                                                        
>   \
> +      a5##TYPE[i] = i;                                                       
>   \
> +      b5##TYPE[i] = (i & 1) ? VAL : -VAL;                                    
>   \
> +    }                                                                        
>   \
> +  ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ);                  
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    assert (!pred[i]                                                         
>   \
> +           || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
> +
> +#define RUN6(TYPE, VAL)                                                      
>   \
> +  TYPE a6##TYPE[SZ];                                                         
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    a6##TYPE[i] = i;                                                         
>   \
> +  ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ);                    
>   \
> +  for (int i = 0; i < SZ; i++)                                               
>   \
> +    assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS);
> +
> +#define RUN_ALL()                                                            
>   \
> +  RUN (_Float16, 5)                                                          
>   \
> +  RUN2 (_Float16, 11)                                                        
>   \
> +  RUN3 (_Float16, 16)                                                        
>   \
> +  RUN4 (_Float16, 17)                                                        
>   \
> +  RUN5 (_Float16, 123)                                                       
>   \
> +  RUN6 (_Float16, 777)
> +
> +int
> +main ()
> +{
> +  INIT_PRED ()
> +  RUN_ALL ()
> +}
> --
> 2.36.3
>

Reply via email to