RE: [PATCH V2] SSA MATH: Support COND_LEN_FMA for floating-point math optimization

2023-07-13 Thread Li, Pan2 via Gcc-patches
Committed, thanks Richard.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Richard Biener via Gcc-patches
Sent: Thursday, July 13, 2023 6:51 PM
To: Ju-Zhe Zhong 
Cc: gcc-patches@gcc.gnu.org; richard.sandif...@arm.com
Subject: Re: [PATCH V2] SSA MATH: Support COND_LEN_FMA for floating-point math 
optimization

On Thu, 13 Jul 2023, juzhe.zh...@rivai.ai wrote:

> From: Ju-Zhe Zhong 
> 
> Hi, Richard and Richi.
> 
> Previous patch we support COND_LEN_* binary operations. However, we didn't
> support COND_LEN_* ternary.
> 
> Now, this patch support COND_LEN_* ternary. Consider this following case:
> 
> #define TEST_TYPE(TYPE)   
>  \
>   __attribute__ ((noipa)) void ternop_##TYPE (TYPE *__restrict dst,   
>  \
> TYPE *__restrict a,  \
> TYPE *__restrict b,\
> TYPE *__restrict c, int n)   \
>   {   
>  \
> for (int i = 0; i < n; i++)   
>  \
>   dst[i] += a[i] * b[i];  
>\
>   }
> 
> #define TEST_ALL() TEST_TYPE (double)
> 
> TEST_ALL ()
> 
> Before this patch:
> ...
> COND_LEN_MUL
> COND_LEN_ADD
> 
> Afther this patch:
> ...
> COND_LEN_FMA

OK.

Thanks,
Richard.

> gcc/ChangeLog:
> 
> * genmatch.cc (commutative_op): Add COND_LEN_*
> * internal-fn.cc (first_commutative_argument): Ditto.
> (CASE): Ditto.
> (get_unconditional_internal_fn): Ditto.
> (can_interpret_as_conditional_op_p): Ditto.
> (internal_fn_len_index): Ditto.
> * internal-fn.h (can_interpret_as_conditional_op_p): Ditt.
> * tree-ssa-math-opts.cc (convert_mult_to_fma_1): Ditto.
> (convert_mult_to_fma): Ditto.
> (math_opts_dom_walker::after_dom_children): Ditto.
> 
> ---
>  gcc/genmatch.cc   | 13 ++
>  gcc/internal-fn.cc| 87 ++-
>  gcc/internal-fn.h |  2 +-
>  gcc/tree-ssa-math-opts.cc | 80 +--
>  4 files changed, 159 insertions(+), 23 deletions(-)
> 
> diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
> index 5fceeec9780..2302f2a7ff0 100644
> --- a/gcc/genmatch.cc
> +++ b/gcc/genmatch.cc
> @@ -559,6 +559,19 @@ commutative_op (id_base *id)
>case CFN_COND_FMS:
>case CFN_COND_FNMA:
>case CFN_COND_FNMS:
> +  case CFN_COND_LEN_ADD:
> +  case CFN_COND_LEN_MUL:
> +  case CFN_COND_LEN_MIN:
> +  case CFN_COND_LEN_MAX:
> +  case CFN_COND_LEN_FMIN:
> +  case CFN_COND_LEN_FMAX:
> +  case CFN_COND_LEN_AND:
> +  case CFN_COND_LEN_IOR:
> +  case CFN_COND_LEN_XOR:
> +  case CFN_COND_LEN_FMA:
> +  case CFN_COND_LEN_FMS:
> +  case CFN_COND_LEN_FNMA:
> +  case CFN_COND_LEN_FNMS:
>   return 1;
>  
>default:
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index c11123a1173..e698f0bffc7 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -4191,6 +4191,19 @@ first_commutative_argument (internal_fn fn)
>  case IFN_COND_FMS:
>  case IFN_COND_FNMA:
>  case IFN_COND_FNMS:
> +case IFN_COND_LEN_ADD:
> +case IFN_COND_LEN_MUL:
> +case IFN_COND_LEN_MIN:
> +case IFN_COND_LEN_MAX:
> +case IFN_COND_LEN_FMIN:
> +case IFN_COND_LEN_FMAX:
> +case IFN_COND_LEN_AND:
> +case IFN_COND_LEN_IOR:
> +case IFN_COND_LEN_XOR:
> +case IFN_COND_LEN_FMA:
> +case IFN_COND_LEN_FMS:
> +case IFN_COND_LEN_FNMA:
> +case IFN_COND_LEN_FNMS:
>return 1;
>  
>  default:
> @@ -4330,11 +4343,14 @@ conditional_internal_fn_code (internal_fn ifn)
>  {
>switch (ifn)
>  {
> -#define CASE(CODE, IFN) case IFN_COND_##IFN: return CODE;
> -  FOR_EACH_CODE_MAPPING(CASE)
> +#define CASE(CODE, IFN)  
>   \
> +  case IFN_COND_##IFN:   
>   \
> +  case IFN_COND_LEN_##IFN:   
>   \
> +return CODE;
> +  FOR_EACH_CODE_MAPPING (CASE)
>  #undef CASE
> -default:
> -  return ERROR_MARK;
> +  default:
> + return ERROR_MARK;
>  }
>  }
>  
> @@ -4433,6 +4449,18 @@ get_unconditional_internal_fn (internal_fn ifn)
> operating elementwise if the operands are vectors.  This includes
> the case of an all-true COND, so that the operation alw

Re: [PATCH V2] SSA MATH: Support COND_LEN_FMA for floating-point math optimization

2023-07-13 Thread Richard Biener via Gcc-patches
On Thu, 13 Jul 2023, juzhe.zh...@rivai.ai wrote:

> From: Ju-Zhe Zhong 
> 
> Hi, Richard and Richi.
> 
> Previous patch we support COND_LEN_* binary operations. However, we didn't
> support COND_LEN_* ternary.
> 
> Now, this patch support COND_LEN_* ternary. Consider this following case:
> 
> #define TEST_TYPE(TYPE)   
>  \
>   __attribute__ ((noipa)) void ternop_##TYPE (TYPE *__restrict dst,   
>  \
> TYPE *__restrict a,  \
> TYPE *__restrict b,\
> TYPE *__restrict c, int n)   \
>   {   
>  \
> for (int i = 0; i < n; i++)   
>  \
>   dst[i] += a[i] * b[i];  
>\
>   }
> 
> #define TEST_ALL() TEST_TYPE (double)
> 
> TEST_ALL ()
> 
> Before this patch:
> ...
> COND_LEN_MUL
> COND_LEN_ADD
> 
> Afther this patch:
> ...
> COND_LEN_FMA

OK.

Thanks,
Richard.

> gcc/ChangeLog:
> 
> * genmatch.cc (commutative_op): Add COND_LEN_*
> * internal-fn.cc (first_commutative_argument): Ditto.
> (CASE): Ditto.
> (get_unconditional_internal_fn): Ditto.
> (can_interpret_as_conditional_op_p): Ditto.
> (internal_fn_len_index): Ditto.
> * internal-fn.h (can_interpret_as_conditional_op_p): Ditt.
> * tree-ssa-math-opts.cc (convert_mult_to_fma_1): Ditto.
> (convert_mult_to_fma): Ditto.
> (math_opts_dom_walker::after_dom_children): Ditto.
> 
> ---
>  gcc/genmatch.cc   | 13 ++
>  gcc/internal-fn.cc| 87 ++-
>  gcc/internal-fn.h |  2 +-
>  gcc/tree-ssa-math-opts.cc | 80 +--
>  4 files changed, 159 insertions(+), 23 deletions(-)
> 
> diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
> index 5fceeec9780..2302f2a7ff0 100644
> --- a/gcc/genmatch.cc
> +++ b/gcc/genmatch.cc
> @@ -559,6 +559,19 @@ commutative_op (id_base *id)
>case CFN_COND_FMS:
>case CFN_COND_FNMA:
>case CFN_COND_FNMS:
> +  case CFN_COND_LEN_ADD:
> +  case CFN_COND_LEN_MUL:
> +  case CFN_COND_LEN_MIN:
> +  case CFN_COND_LEN_MAX:
> +  case CFN_COND_LEN_FMIN:
> +  case CFN_COND_LEN_FMAX:
> +  case CFN_COND_LEN_AND:
> +  case CFN_COND_LEN_IOR:
> +  case CFN_COND_LEN_XOR:
> +  case CFN_COND_LEN_FMA:
> +  case CFN_COND_LEN_FMS:
> +  case CFN_COND_LEN_FNMA:
> +  case CFN_COND_LEN_FNMS:
>   return 1;
>  
>default:
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index c11123a1173..e698f0bffc7 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -4191,6 +4191,19 @@ first_commutative_argument (internal_fn fn)
>  case IFN_COND_FMS:
>  case IFN_COND_FNMA:
>  case IFN_COND_FNMS:
> +case IFN_COND_LEN_ADD:
> +case IFN_COND_LEN_MUL:
> +case IFN_COND_LEN_MIN:
> +case IFN_COND_LEN_MAX:
> +case IFN_COND_LEN_FMIN:
> +case IFN_COND_LEN_FMAX:
> +case IFN_COND_LEN_AND:
> +case IFN_COND_LEN_IOR:
> +case IFN_COND_LEN_XOR:
> +case IFN_COND_LEN_FMA:
> +case IFN_COND_LEN_FMS:
> +case IFN_COND_LEN_FNMA:
> +case IFN_COND_LEN_FNMS:
>return 1;
>  
>  default:
> @@ -4330,11 +4343,14 @@ conditional_internal_fn_code (internal_fn ifn)
>  {
>switch (ifn)
>  {
> -#define CASE(CODE, IFN) case IFN_COND_##IFN: return CODE;
> -  FOR_EACH_CODE_MAPPING(CASE)
> +#define CASE(CODE, IFN)  
>   \
> +  case IFN_COND_##IFN:   
>   \
> +  case IFN_COND_LEN_##IFN:   
>   \
> +return CODE;
> +  FOR_EACH_CODE_MAPPING (CASE)
>  #undef CASE
> -default:
> -  return ERROR_MARK;
> +  default:
> + return ERROR_MARK;
>  }
>  }
>  
> @@ -4433,6 +4449,18 @@ get_unconditional_internal_fn (internal_fn ifn)
> operating elementwise if the operands are vectors.  This includes
> the case of an all-true COND, so that the operation always happens.
>  
> +   There is an alternative approach to interpret the STMT when the operands
> +   are vectors which is the operation predicated by both conditional mask
> +   and loop control length, the equivalent C code:
> +
> + for (int i = 0; i < NUNTIS; i++)
> +  {
> + if (i < LEN + BIAS && COND[i])
> +   LHS[i] = A[i] CODE B[i];
> + else
> +   LHS[i] = ELSE[i];
> +  }
> +
> When returning true, set:
>  
> - *COND_OUT to the condition COND, or to NULL_TREE if the condition
> @@ -4440,13 +4468,18 @@ get_unconditional_internal_fn (internal_fn ifn)
> - *CODE_OUT to the tree code
> - OPS[I] to operand I of *CODE_OUT
> - *ELSE_OUT to the fallback value ELSE, or to N

[PATCH V2] SSA MATH: Support COND_LEN_FMA for floating-point math optimization

2023-07-13 Thread juzhe . zhong
From: Ju-Zhe Zhong 

Hi, Richard and Richi.

Previous patch we support COND_LEN_* binary operations. However, we didn't
support COND_LEN_* ternary.

Now, this patch support COND_LEN_* ternary. Consider this following case:

#define TEST_TYPE(TYPE)\
  __attribute__ ((noipa)) void ternop_##TYPE (TYPE *__restrict dst,\
  TYPE *__restrict a,  \
  TYPE *__restrict b,\
TYPE *__restrict c, int n)   \
  {\
for (int i = 0; i < n; i++)\
  dst[i] += a[i] * b[i];
 \
  }

#define TEST_ALL() TEST_TYPE (double)

TEST_ALL ()

Before this patch:
...
COND_LEN_MUL
COND_LEN_ADD

Afther this patch:
...
COND_LEN_FMA

gcc/ChangeLog:

* genmatch.cc (commutative_op): Add COND_LEN_*
* internal-fn.cc (first_commutative_argument): Ditto.
(CASE): Ditto.
(get_unconditional_internal_fn): Ditto.
(can_interpret_as_conditional_op_p): Ditto.
(internal_fn_len_index): Ditto.
* internal-fn.h (can_interpret_as_conditional_op_p): Ditt.
* tree-ssa-math-opts.cc (convert_mult_to_fma_1): Ditto.
(convert_mult_to_fma): Ditto.
(math_opts_dom_walker::after_dom_children): Ditto.

---
 gcc/genmatch.cc   | 13 ++
 gcc/internal-fn.cc| 87 ++-
 gcc/internal-fn.h |  2 +-
 gcc/tree-ssa-math-opts.cc | 80 +--
 4 files changed, 159 insertions(+), 23 deletions(-)

diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index 5fceeec9780..2302f2a7ff0 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -559,6 +559,19 @@ commutative_op (id_base *id)
   case CFN_COND_FMS:
   case CFN_COND_FNMA:
   case CFN_COND_FNMS:
+  case CFN_COND_LEN_ADD:
+  case CFN_COND_LEN_MUL:
+  case CFN_COND_LEN_MIN:
+  case CFN_COND_LEN_MAX:
+  case CFN_COND_LEN_FMIN:
+  case CFN_COND_LEN_FMAX:
+  case CFN_COND_LEN_AND:
+  case CFN_COND_LEN_IOR:
+  case CFN_COND_LEN_XOR:
+  case CFN_COND_LEN_FMA:
+  case CFN_COND_LEN_FMS:
+  case CFN_COND_LEN_FNMA:
+  case CFN_COND_LEN_FNMS:
return 1;
 
   default:
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index c11123a1173..e698f0bffc7 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4191,6 +4191,19 @@ first_commutative_argument (internal_fn fn)
 case IFN_COND_FMS:
 case IFN_COND_FNMA:
 case IFN_COND_FNMS:
+case IFN_COND_LEN_ADD:
+case IFN_COND_LEN_MUL:
+case IFN_COND_LEN_MIN:
+case IFN_COND_LEN_MAX:
+case IFN_COND_LEN_FMIN:
+case IFN_COND_LEN_FMAX:
+case IFN_COND_LEN_AND:
+case IFN_COND_LEN_IOR:
+case IFN_COND_LEN_XOR:
+case IFN_COND_LEN_FMA:
+case IFN_COND_LEN_FMS:
+case IFN_COND_LEN_FNMA:
+case IFN_COND_LEN_FNMS:
   return 1;
 
 default:
@@ -4330,11 +4343,14 @@ conditional_internal_fn_code (internal_fn ifn)
 {
   switch (ifn)
 {
-#define CASE(CODE, IFN) case IFN_COND_##IFN: return CODE;
-  FOR_EACH_CODE_MAPPING(CASE)
+#define CASE(CODE, IFN)
\
+  case IFN_COND_##IFN: 
\
+  case IFN_COND_LEN_##IFN: 
\
+return CODE;
+  FOR_EACH_CODE_MAPPING (CASE)
 #undef CASE
-default:
-  return ERROR_MARK;
+  default:
+   return ERROR_MARK;
 }
 }
 
@@ -4433,6 +4449,18 @@ get_unconditional_internal_fn (internal_fn ifn)
operating elementwise if the operands are vectors.  This includes
the case of an all-true COND, so that the operation always happens.
 
+   There is an alternative approach to interpret the STMT when the operands
+   are vectors which is the operation predicated by both conditional mask
+   and loop control length, the equivalent C code:
+
+ for (int i = 0; i < NUNTIS; i++)
+  {
+   if (i < LEN + BIAS && COND[i])
+ LHS[i] = A[i] CODE B[i];
+   else
+ LHS[i] = ELSE[i];
+  }
+
When returning true, set:
 
- *COND_OUT to the condition COND, or to NULL_TREE if the condition
@@ -4440,13 +4468,18 @@ get_unconditional_internal_fn (internal_fn ifn)
- *CODE_OUT to the tree code
- OPS[I] to operand I of *CODE_OUT
- *ELSE_OUT to the fallback value ELSE, or to NULL_TREE if the
- condition is known to be all true.  */
+ condition is known to be all true.
+   - *LEN to the len argument if it COND_LEN_* operations or to NULL_TREE.
+   - *BIAS to the bias argument if it COND_LEN_* operations or to NULL_TREE.  
*/
 
 bool
 can_interpret_as_conditional_op_p (gimple *stmt, tree *cond_out,