RE: [PATCH V2] SSA MATH: Support COND_LEN_FMA for floating-point math optimization
Committed, thanks Richard. Pan -Original Message- From: Gcc-patches On Behalf Of Richard Biener via Gcc-patches Sent: Thursday, July 13, 2023 6:51 PM To: Ju-Zhe Zhong Cc: gcc-patches@gcc.gnu.org; richard.sandif...@arm.com Subject: Re: [PATCH V2] SSA MATH: Support COND_LEN_FMA for floating-point math optimization On Thu, 13 Jul 2023, juzhe.zh...@rivai.ai wrote: > From: Ju-Zhe Zhong > > Hi, Richard and Richi. > > Previous patch we support COND_LEN_* binary operations. However, we didn't > support COND_LEN_* ternary. > > Now, this patch support COND_LEN_* ternary. Consider this following case: > > #define TEST_TYPE(TYPE) > \ > __attribute__ ((noipa)) void ternop_##TYPE (TYPE *__restrict dst, > \ > TYPE *__restrict a, \ > TYPE *__restrict b,\ > TYPE *__restrict c, int n) \ > { > \ > for (int i = 0; i < n; i++) > \ > dst[i] += a[i] * b[i]; >\ > } > > #define TEST_ALL() TEST_TYPE (double) > > TEST_ALL () > > Before this patch: > ... > COND_LEN_MUL > COND_LEN_ADD > > Afther this patch: > ... > COND_LEN_FMA OK. Thanks, Richard. > gcc/ChangeLog: > > * genmatch.cc (commutative_op): Add COND_LEN_* > * internal-fn.cc (first_commutative_argument): Ditto. > (CASE): Ditto. > (get_unconditional_internal_fn): Ditto. > (can_interpret_as_conditional_op_p): Ditto. > (internal_fn_len_index): Ditto. > * internal-fn.h (can_interpret_as_conditional_op_p): Ditt. > * tree-ssa-math-opts.cc (convert_mult_to_fma_1): Ditto. > (convert_mult_to_fma): Ditto. > (math_opts_dom_walker::after_dom_children): Ditto. > > --- > gcc/genmatch.cc | 13 ++ > gcc/internal-fn.cc| 87 ++- > gcc/internal-fn.h | 2 +- > gcc/tree-ssa-math-opts.cc | 80 +-- > 4 files changed, 159 insertions(+), 23 deletions(-) > > diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc > index 5fceeec9780..2302f2a7ff0 100644 > --- a/gcc/genmatch.cc > +++ b/gcc/genmatch.cc > @@ -559,6 +559,19 @@ commutative_op (id_base *id) >case CFN_COND_FMS: >case CFN_COND_FNMA: >case CFN_COND_FNMS: > + case CFN_COND_LEN_ADD: > + case CFN_COND_LEN_MUL: > + case CFN_COND_LEN_MIN: > + case CFN_COND_LEN_MAX: > + case CFN_COND_LEN_FMIN: > + case CFN_COND_LEN_FMAX: > + case CFN_COND_LEN_AND: > + case CFN_COND_LEN_IOR: > + case CFN_COND_LEN_XOR: > + case CFN_COND_LEN_FMA: > + case CFN_COND_LEN_FMS: > + case CFN_COND_LEN_FNMA: > + case CFN_COND_LEN_FNMS: > return 1; > >default: > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > index c11123a1173..e698f0bffc7 100644 > --- a/gcc/internal-fn.cc > +++ b/gcc/internal-fn.cc > @@ -4191,6 +4191,19 @@ first_commutative_argument (internal_fn fn) > case IFN_COND_FMS: > case IFN_COND_FNMA: > case IFN_COND_FNMS: > +case IFN_COND_LEN_ADD: > +case IFN_COND_LEN_MUL: > +case IFN_COND_LEN_MIN: > +case IFN_COND_LEN_MAX: > +case IFN_COND_LEN_FMIN: > +case IFN_COND_LEN_FMAX: > +case IFN_COND_LEN_AND: > +case IFN_COND_LEN_IOR: > +case IFN_COND_LEN_XOR: > +case IFN_COND_LEN_FMA: > +case IFN_COND_LEN_FMS: > +case IFN_COND_LEN_FNMA: > +case IFN_COND_LEN_FNMS: >return 1; > > default: > @@ -4330,11 +4343,14 @@ conditional_internal_fn_code (internal_fn ifn) > { >switch (ifn) > { > -#define CASE(CODE, IFN) case IFN_COND_##IFN: return CODE; > - FOR_EACH_CODE_MAPPING(CASE) > +#define CASE(CODE, IFN) > \ > + case IFN_COND_##IFN: > \ > + case IFN_COND_LEN_##IFN: > \ > +return CODE; > + FOR_EACH_CODE_MAPPING (CASE) > #undef CASE > -default: > - return ERROR_MARK; > + default: > + return ERROR_MARK; > } > } > > @@ -4433,6 +4449,18 @@ get_unconditional_internal_fn (internal_fn ifn) > operating elementwise if the operands are vectors. This includes > the case of an all-true COND, so that the operation alw
Re: [PATCH V2] SSA MATH: Support COND_LEN_FMA for floating-point math optimization
On Thu, 13 Jul 2023, juzhe.zh...@rivai.ai wrote: > From: Ju-Zhe Zhong > > Hi, Richard and Richi. > > Previous patch we support COND_LEN_* binary operations. However, we didn't > support COND_LEN_* ternary. > > Now, this patch support COND_LEN_* ternary. Consider this following case: > > #define TEST_TYPE(TYPE) > \ > __attribute__ ((noipa)) void ternop_##TYPE (TYPE *__restrict dst, > \ > TYPE *__restrict a, \ > TYPE *__restrict b,\ > TYPE *__restrict c, int n) \ > { > \ > for (int i = 0; i < n; i++) > \ > dst[i] += a[i] * b[i]; >\ > } > > #define TEST_ALL() TEST_TYPE (double) > > TEST_ALL () > > Before this patch: > ... > COND_LEN_MUL > COND_LEN_ADD > > Afther this patch: > ... > COND_LEN_FMA OK. Thanks, Richard. > gcc/ChangeLog: > > * genmatch.cc (commutative_op): Add COND_LEN_* > * internal-fn.cc (first_commutative_argument): Ditto. > (CASE): Ditto. > (get_unconditional_internal_fn): Ditto. > (can_interpret_as_conditional_op_p): Ditto. > (internal_fn_len_index): Ditto. > * internal-fn.h (can_interpret_as_conditional_op_p): Ditt. > * tree-ssa-math-opts.cc (convert_mult_to_fma_1): Ditto. > (convert_mult_to_fma): Ditto. > (math_opts_dom_walker::after_dom_children): Ditto. > > --- > gcc/genmatch.cc | 13 ++ > gcc/internal-fn.cc| 87 ++- > gcc/internal-fn.h | 2 +- > gcc/tree-ssa-math-opts.cc | 80 +-- > 4 files changed, 159 insertions(+), 23 deletions(-) > > diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc > index 5fceeec9780..2302f2a7ff0 100644 > --- a/gcc/genmatch.cc > +++ b/gcc/genmatch.cc > @@ -559,6 +559,19 @@ commutative_op (id_base *id) >case CFN_COND_FMS: >case CFN_COND_FNMA: >case CFN_COND_FNMS: > + case CFN_COND_LEN_ADD: > + case CFN_COND_LEN_MUL: > + case CFN_COND_LEN_MIN: > + case CFN_COND_LEN_MAX: > + case CFN_COND_LEN_FMIN: > + case CFN_COND_LEN_FMAX: > + case CFN_COND_LEN_AND: > + case CFN_COND_LEN_IOR: > + case CFN_COND_LEN_XOR: > + case CFN_COND_LEN_FMA: > + case CFN_COND_LEN_FMS: > + case CFN_COND_LEN_FNMA: > + case CFN_COND_LEN_FNMS: > return 1; > >default: > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > index c11123a1173..e698f0bffc7 100644 > --- a/gcc/internal-fn.cc > +++ b/gcc/internal-fn.cc > @@ -4191,6 +4191,19 @@ first_commutative_argument (internal_fn fn) > case IFN_COND_FMS: > case IFN_COND_FNMA: > case IFN_COND_FNMS: > +case IFN_COND_LEN_ADD: > +case IFN_COND_LEN_MUL: > +case IFN_COND_LEN_MIN: > +case IFN_COND_LEN_MAX: > +case IFN_COND_LEN_FMIN: > +case IFN_COND_LEN_FMAX: > +case IFN_COND_LEN_AND: > +case IFN_COND_LEN_IOR: > +case IFN_COND_LEN_XOR: > +case IFN_COND_LEN_FMA: > +case IFN_COND_LEN_FMS: > +case IFN_COND_LEN_FNMA: > +case IFN_COND_LEN_FNMS: >return 1; > > default: > @@ -4330,11 +4343,14 @@ conditional_internal_fn_code (internal_fn ifn) > { >switch (ifn) > { > -#define CASE(CODE, IFN) case IFN_COND_##IFN: return CODE; > - FOR_EACH_CODE_MAPPING(CASE) > +#define CASE(CODE, IFN) > \ > + case IFN_COND_##IFN: > \ > + case IFN_COND_LEN_##IFN: > \ > +return CODE; > + FOR_EACH_CODE_MAPPING (CASE) > #undef CASE > -default: > - return ERROR_MARK; > + default: > + return ERROR_MARK; > } > } > > @@ -4433,6 +4449,18 @@ get_unconditional_internal_fn (internal_fn ifn) > operating elementwise if the operands are vectors. This includes > the case of an all-true COND, so that the operation always happens. > > + There is an alternative approach to interpret the STMT when the operands > + are vectors which is the operation predicated by both conditional mask > + and loop control length, the equivalent C code: > + > + for (int i = 0; i < NUNTIS; i++) > + { > + if (i < LEN + BIAS && COND[i]) > + LHS[i] = A[i] CODE B[i]; > + else > + LHS[i] = ELSE[i]; > + } > + > When returning true, set: > > - *COND_OUT to the condition COND, or to NULL_TREE if the condition > @@ -4440,13 +4468,18 @@ get_unconditional_internal_fn (internal_fn ifn) > - *CODE_OUT to the tree code > - OPS[I] to operand I of *CODE_OUT > - *ELSE_OUT to the fallback value ELSE, or to N
[PATCH V2] SSA MATH: Support COND_LEN_FMA for floating-point math optimization
From: Ju-Zhe Zhong Hi, Richard and Richi. Previous patch we support COND_LEN_* binary operations. However, we didn't support COND_LEN_* ternary. Now, this patch support COND_LEN_* ternary. Consider this following case: #define TEST_TYPE(TYPE)\ __attribute__ ((noipa)) void ternop_##TYPE (TYPE *__restrict dst,\ TYPE *__restrict a, \ TYPE *__restrict b,\ TYPE *__restrict c, int n) \ {\ for (int i = 0; i < n; i++)\ dst[i] += a[i] * b[i]; \ } #define TEST_ALL() TEST_TYPE (double) TEST_ALL () Before this patch: ... COND_LEN_MUL COND_LEN_ADD Afther this patch: ... COND_LEN_FMA gcc/ChangeLog: * genmatch.cc (commutative_op): Add COND_LEN_* * internal-fn.cc (first_commutative_argument): Ditto. (CASE): Ditto. (get_unconditional_internal_fn): Ditto. (can_interpret_as_conditional_op_p): Ditto. (internal_fn_len_index): Ditto. * internal-fn.h (can_interpret_as_conditional_op_p): Ditt. * tree-ssa-math-opts.cc (convert_mult_to_fma_1): Ditto. (convert_mult_to_fma): Ditto. (math_opts_dom_walker::after_dom_children): Ditto. --- gcc/genmatch.cc | 13 ++ gcc/internal-fn.cc| 87 ++- gcc/internal-fn.h | 2 +- gcc/tree-ssa-math-opts.cc | 80 +-- 4 files changed, 159 insertions(+), 23 deletions(-) diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc index 5fceeec9780..2302f2a7ff0 100644 --- a/gcc/genmatch.cc +++ b/gcc/genmatch.cc @@ -559,6 +559,19 @@ commutative_op (id_base *id) case CFN_COND_FMS: case CFN_COND_FNMA: case CFN_COND_FNMS: + case CFN_COND_LEN_ADD: + case CFN_COND_LEN_MUL: + case CFN_COND_LEN_MIN: + case CFN_COND_LEN_MAX: + case CFN_COND_LEN_FMIN: + case CFN_COND_LEN_FMAX: + case CFN_COND_LEN_AND: + case CFN_COND_LEN_IOR: + case CFN_COND_LEN_XOR: + case CFN_COND_LEN_FMA: + case CFN_COND_LEN_FMS: + case CFN_COND_LEN_FNMA: + case CFN_COND_LEN_FNMS: return 1; default: diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index c11123a1173..e698f0bffc7 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4191,6 +4191,19 @@ first_commutative_argument (internal_fn fn) case IFN_COND_FMS: case IFN_COND_FNMA: case IFN_COND_FNMS: +case IFN_COND_LEN_ADD: +case IFN_COND_LEN_MUL: +case IFN_COND_LEN_MIN: +case IFN_COND_LEN_MAX: +case IFN_COND_LEN_FMIN: +case IFN_COND_LEN_FMAX: +case IFN_COND_LEN_AND: +case IFN_COND_LEN_IOR: +case IFN_COND_LEN_XOR: +case IFN_COND_LEN_FMA: +case IFN_COND_LEN_FMS: +case IFN_COND_LEN_FNMA: +case IFN_COND_LEN_FNMS: return 1; default: @@ -4330,11 +4343,14 @@ conditional_internal_fn_code (internal_fn ifn) { switch (ifn) { -#define CASE(CODE, IFN) case IFN_COND_##IFN: return CODE; - FOR_EACH_CODE_MAPPING(CASE) +#define CASE(CODE, IFN) \ + case IFN_COND_##IFN: \ + case IFN_COND_LEN_##IFN: \ +return CODE; + FOR_EACH_CODE_MAPPING (CASE) #undef CASE -default: - return ERROR_MARK; + default: + return ERROR_MARK; } } @@ -4433,6 +4449,18 @@ get_unconditional_internal_fn (internal_fn ifn) operating elementwise if the operands are vectors. This includes the case of an all-true COND, so that the operation always happens. + There is an alternative approach to interpret the STMT when the operands + are vectors which is the operation predicated by both conditional mask + and loop control length, the equivalent C code: + + for (int i = 0; i < NUNTIS; i++) + { + if (i < LEN + BIAS && COND[i]) + LHS[i] = A[i] CODE B[i]; + else + LHS[i] = ELSE[i]; + } + When returning true, set: - *COND_OUT to the condition COND, or to NULL_TREE if the condition @@ -4440,13 +4468,18 @@ get_unconditional_internal_fn (internal_fn ifn) - *CODE_OUT to the tree code - OPS[I] to operand I of *CODE_OUT - *ELSE_OUT to the fallback value ELSE, or to NULL_TREE if the - condition is known to be all true. */ + condition is known to be all true. + - *LEN to the len argument if it COND_LEN_* operations or to NULL_TREE. + - *BIAS to the bias argument if it COND_LEN_* operations or to NULL_TREE. */ bool can_interpret_as_conditional_op_p (gimple *stmt, tree *cond_out,