Committed, thanks Juzhe.

On 2023/11/7 15:51, juzhe.zh...@rivai.ai wrote:
LGTM. Thanks for fixing it.

------------------------------------------------------------------------
juzhe.zh...@rivai.ai

    *From:* Lehua Ding <mailto:lehua.d...@rivai.ai>
    *Date:* 2023-11-07 15:49
    *To:* gcc-patches <mailto:gcc-patches@gcc.gnu.org>
    *CC:* juzhe.zhong <mailto:juzhe.zh...@rivai.ai>; kito.cheng
    <mailto:kito.ch...@gmail.com>; rdapp.gcc
    <mailto:rdapp....@gmail.com>; palmer <mailto:pal...@rivosinc.com>;
    jeffreyalaw <mailto:jeffreya...@gmail.com>; lehua.ding
    <mailto:lehua.d...@rivai.ai>
    *Subject:* [PATCH] RISC-V: Fixed failed rvv combine testcases
    Hi,
    This patch fixed the fellowing failed testcases on the trunk:
    FAIL: gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
    scan-assembler-times
    \\tvfwredusum\\.vs\\tv[0-9]+,v[0-9]+,v[0-9]+,v0\\.t 2
    ...
    FAIL: gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
    scan-assembler-times
    \\tvwredsumu\\.vs\\tv[0-9]+,v[0-9]+,v[0-9]+,v0\\.t 3
    ...
    The reason for these failed testcases is the introduce of
    .VCOND_MASK_LEN
    in midend for other bugfix and further leads to a new vcond_mask_len rtl
    pattern after expand. So we need add new combine patterns handle
    this case.
    Consider this code:
    int16_t foo (int8_t *restrict a, int8_t *restrict pred)
    {
       int16_t sum = 0;
       for (int i = 0; i < 16; i += 1)
         if (pred[i])
           sum += a[i];
       return sum;
    }
    Before this patch:
    foo:
             vsetivli        zero,16,e8,m1,ta,ma
             vle8.v  v0,0(a1)
             vsetvli a5,zero,e8,m1,ta,ma
             vmsne.vi        v0,v0,0
             vsetvli zero,zero,e16,m2,ta,ma
             li      a3,0
             vmv.v.i v2,0
             vsetivli        zero,16,e16,m2,ta,ma
             vle8.v  v6,0(a0),v0.t
             vmv.s.x v1,a3
             vsetvli a5,zero,e16,m2,ta,ma
             vsext.vf2       v4,v6
             vsetivli        zero,16,e16,m2,tu,ma
             vmerge.vvm      v2,v2,v4,v0
             vsetvli a5,zero,e16,m2,ta,ma
             vredsum.vs      v2,v2,v1
             vmv.x.s a0,v2
             slliw   a0,a0,16
             sraiw   a0,a0,16
             ret
    After this patch:
    foo:
    vsetivli zero,16,e16,m2,ta,ma
    li a5,0
    vle8.v v0,0(a1)
    vmv.s.x v1,a5
    vsetvli zero,zero,e8,m1,ta,ma
    vmsne.vi v0,v0,0
    vle8.v v2,0(a0),v0.t
    vwredsum.vs v1,v2,v1,v0.t
    vsetvli zero,zero,e16,m1,ta,ma
    vmv.x.s a0,v1
    slliw a0,a0,16
    sraiw a0,a0,16
    ret
    Combine the vsext.vf2, vmerge.vvm, and vredsum.vs instructions while
    reducing the corresponding vsetvl instructions.
    gcc/ChangeLog:
    * config/riscv/autovec-opt.md (*cond_len_<optab><v_double_trunc><mode>):
    New combine pattern.
    (*cond_len_<optab><v_quad_trunc><mode>): Ditto.
    (*cond_len_<optab><v_oct_trunc><mode>): Ditto.
    (*cond_len_extend<v_double_trunc><mode>): Ditto.
    (*cond_len_widen_reduc_plus_scal_<mode>): Ditto.
    gcc/testsuite/ChangeLog:
    * gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c:
    * gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c:
    ---
    gcc/config/riscv/autovec-opt.md               | 214 ++++++++++++++++++
    .../rvv/autovec/cond/cond_widen_reduc-1.c     |  13 +-
    .../rvv/autovec/cond/cond_widen_reduc-2.c     |  30 +--
    3 files changed, 232 insertions(+), 25 deletions(-)
    diff --git a/gcc/config/riscv/autovec-opt.md
    b/gcc/config/riscv/autovec-opt.md
    index d0f8b3cde4e..3c87e66ea49 100644
    --- a/gcc/config/riscv/autovec-opt.md
    +++ b/gcc/config/riscv/autovec-opt.md
    @@ -194,6 +194,84 @@
    }
    [(set_attr "type" "vector")])
    +;; Combine sign_extend/zero_extend(vf2) and vcond_mask_len
    +(define_insn_and_split "*cond_len_<optab><v_double_trunc><mode>"
    +  [(set (match_operand:VWEXTI 0 "register_operand")
    +    (if_then_else:VWEXTI
    +      (unspec:<VM>
    +        [(match_operand 4 "vector_length_operand")
    +         (match_operand 5 "const_int_operand")
    +         (match_operand 6 "const_int_operand")
    +         (reg:SI VL_REGNUM)
    +         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
    +      (vec_merge:VWEXTI
    +        (any_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 2
    "register_operand"))
    +        (match_operand:VWEXTI 1 "vector_merge_operand")
    + (match_operand:<VM> 3 "register_operand"))
    +      (match_dup 1)))]
    +  "TARGET_VECTOR"
    +  "#"
    +  "&& 1"
    +  [(const_int 0)]
    +{
    +  emit_insn (gen_pred_<optab><mode>_vf2 (operands[0], operands[3],
    operands[1], operands[2],
    +                                         operands[4], operands[5],
    operands[6], CONST0_RTX (Pmode)));
    +  DONE;
    +}
    +[(set_attr "type" "vector")])
    +
    +;; Combine sign_extend/zero_extend(vf4) and vcond_mask_len
    +(define_insn_and_split "*cond_len_<optab><v_quad_trunc><mode>"
    +  [(set (match_operand:VQEXTI 0 "register_operand")
    +    (if_then_else:VQEXTI
    +      (unspec:<VM>
    +        [(match_operand 4 "vector_length_operand")
    +         (match_operand 5 "const_int_operand")
    +         (match_operand 6 "const_int_operand")
    +         (reg:SI VL_REGNUM)
    +         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
    +      (vec_merge:VQEXTI
    +        (any_extend:VQEXTI (match_operand:<V_QUAD_TRUNC> 2
    "register_operand"))
    +        (match_operand:VQEXTI 1 "vector_merge_operand")
    + (match_operand:<VM> 3 "register_operand"))
    +      (match_dup 1)))]
    +  "TARGET_VECTOR"
    +  "#"
    +  "&& 1"
    +  [(const_int 0)]
    +{
    +  emit_insn (gen_pred_<optab><mode>_vf4 (operands[0], operands[3],
    operands[1], operands[2],
    +                                         operands[4], operands[5],
    operands[6], CONST0_RTX (Pmode)));
    +  DONE;
    +}
    +[(set_attr "type" "vector")])
    +
    +;; Combine sign_extend/zero_extend(vf8) and vcond_mask_len
    +(define_insn_and_split "*cond_len_<optab><v_oct_trunc><mode>"
    +  [(set (match_operand:VOEXTI 0 "register_operand")
    +    (if_then_else:VOEXTI
    +      (unspec:<VM>
    +        [(match_operand 4 "vector_length_operand")
    +         (match_operand 5 "const_int_operand")
    +         (match_operand 6 "const_int_operand")
    +         (reg:SI VL_REGNUM)
    +         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
    +      (vec_merge:VOEXTI
    +        (any_extend:VOEXTI (match_operand:<V_OCT_TRUNC> 2
    "register_operand"))
    +        (match_operand:VOEXTI 1 "vector_merge_operand")
    + (match_operand:<VM> 3 "register_operand"))
    +      (match_dup 1)))]
    +  "TARGET_VECTOR"
    +  "#"
    +  "&& 1"
    +  [(const_int 0)]
    +{
    +  emit_insn (gen_pred_<optab><mode>_vf8 (operands[0], operands[3],
    operands[1], operands[2],
    +                                         operands[4], operands[5],
    operands[6], CONST0_RTX (Pmode)));
    +  DONE;
    +}
    +[(set_attr "type" "vector")])
    +
    ;; Combine trunc(vf2) + vcond_mask
    (define_insn_and_split "*cond_trunc<mode><v_double_trunc>"
        [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
    @@ -235,6 +313,32 @@
    }
    [(set_attr "type" "vector")])
    +;; Combine FP extend(vf2) and vcond_mask_len
    +(define_insn_and_split "*cond_len_extend<v_double_trunc><mode>"
    +  [(set (match_operand:VWEXTF_ZVFHMIN 0 "register_operand")
    +    (if_then_else:VWEXTF_ZVFHMIN
    +      (unspec:<VM>
    +        [(match_operand 4 "vector_length_operand")
    +         (match_operand 5 "const_int_operand")
    +         (match_operand 6 "const_int_operand")
    +         (reg:SI VL_REGNUM)
    +         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
    +      (vec_merge:VWEXTF_ZVFHMIN
    +        (float_extend:VWEXTF_ZVFHMIN
    (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
    +        (match_operand:VWEXTF_ZVFHMIN 1 "vector_merge_operand")
    + (match_operand:<VM> 3 "register_operand"))
    +      (match_dup 1)))]
    +  "TARGET_VECTOR"
    +  "#"
    +  "&& 1"
    +  [(const_int 0)]
    +{
    +  emit_insn (gen_pred_extend<mode> (operands[0], operands[3],
    operands[1], operands[2],
    +                                    operands[4], operands[5],
    operands[6], CONST0_RTX (Pmode)));
    +  DONE;
    +}
    +[(set_attr "type" "vector")])
    +
    ;; Combine FP trunc(vf2) + vcond_mask
    (define_insn_and_split "*cond_trunc<mode><v_double_trunc>"
        [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
    @@ -1151,6 +1255,61 @@
    }
    [(set_attr "type" "vector")])
    +;; Combine mask_len_extend + vredsum to mask_vwredsum[u]
    +;; where the mrege of mask_len_extend is vector const 0
    +(define_insn_and_split "*cond_len_widen_reduc_plus_scal_<mode>"
    +  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
    +        (unspec:<V_DOUBLE_EXTEND_VEL> [
    +          (if_then_else:<V_DOUBLE_EXTEND>
    +            (unspec:<VM> [
    +              (match_operand 2 "vector_length_operand")
    +              (const_int 0)
    +              (const_int 0)
    +              (reg:SI VL_REGNUM)
    +              (reg:SI VTYPE_REGNUM)
    +            ] UNSPEC_VPREDICATE)
    +            (vec_merge:<V_DOUBLE_EXTEND>
    +              (any_extend:<V_DOUBLE_EXTEND>
    +                (match_operand:VI_QHS_NO_M8 3 "register_operand"))
    +              (if_then_else:<V_DOUBLE_EXTEND>
    +                (unspec:<VM> [
    +                  (match_operand:<VM> 4
    "vector_all_trues_mask_operand")
    +                  (match_operand 5 "vector_length_operand")
    +                  (match_operand 6 "const_int_operand")
    +                  (match_operand 7 "const_int_operand")
    +                  (match_operand 8 "const_1_or_2_operand")
    +                  (reg:SI VL_REGNUM)
    +                  (reg:SI VTYPE_REGNUM)
    +                ] UNSPEC_VPREDICATE)
    +                (match_operand:<V_DOUBLE_EXTEND> 9
    "vector_const_0_operand")
    +                (match_operand:<V_DOUBLE_EXTEND> 10
    "vector_merge_operand"))
    +              (match_operand:<VM> 1 "register_operand"))
    +            (if_then_else:<V_DOUBLE_EXTEND>
    +              (unspec:<VM> [
    +                (match_dup 4)
    +                (match_dup 5)
    +                (match_dup 6)
    +                (match_dup 7)
    +                (match_dup 8)
    +                (reg:SI VL_REGNUM)
    +                (reg:SI VTYPE_REGNUM)
    +              ] UNSPEC_VPREDICATE)
    +              (match_dup 9)
    +              (match_dup 10)))
    +        ] UNSPEC_REDUC_SUM))]
    +  "TARGET_VECTOR && can_create_pseudo_p ()"
    +  "#"
    +  "&& 1"
    +  [(const_int 0)]
    +{
    +  rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
    +  riscv_vector::expand_reduction (<WREDUC_UNSPEC>,
    +                                  riscv_vector::REDUCE_OP_M,
    +                                  ops, CONST0_RTX
    (<V_DOUBLE_EXTEND_VEL>mode));
    +  DONE;
    +}
    +[(set_attr "type" "vector")])
    +
    ;; Combine mask_extend + vfredsum to mask_vfwredusum
    ;; where the mrege of mask_extend is vector const 0
    (define_insn_and_split "*cond_widen_reduc_plus_scal_<mode>"
    @@ -1187,6 +1346,61 @@
    }
    [(set_attr "type" "vector")])
    +;; Combine mask_len_extend + vredsum to mask_vwredsum[u]
    +;; where the mrege of mask_len_extend is vector const 0
    +(define_insn_and_split "*cond_len_widen_reduc_plus_scal_<mode>"
    +  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
    +        (unspec:<V_DOUBLE_EXTEND_VEL> [
    +          (if_then_else:<V_DOUBLE_EXTEND>
    +            (unspec:<VM> [
    +              (match_operand 2 "vector_length_operand")
    +              (const_int 0)
    +              (const_int 0)
    +              (reg:SI VL_REGNUM)
    +              (reg:SI VTYPE_REGNUM)
    +            ] UNSPEC_VPREDICATE)
    +            (vec_merge:<V_DOUBLE_EXTEND>
    +              (float_extend:<V_DOUBLE_EXTEND>
    +                (match_operand:VF_HS_NO_M8 3 "register_operand"))
    +              (if_then_else:<V_DOUBLE_EXTEND>
    +                (unspec:<VM> [
    +                  (match_operand:<VM> 4
    "vector_all_trues_mask_operand")
    +                  (match_operand 5 "vector_length_operand")
    +                  (match_operand 6 "const_int_operand")
    +                  (match_operand 7 "const_int_operand")
    +                  (match_operand 8 "const_1_or_2_operand")
    +                  (reg:SI VL_REGNUM)
    +                  (reg:SI VTYPE_REGNUM)
    +                ] UNSPEC_VPREDICATE)
    +                (match_operand:<V_DOUBLE_EXTEND> 9
    "vector_const_0_operand")
    +                (match_operand:<V_DOUBLE_EXTEND> 10
    "vector_merge_operand"))
    +              (match_operand:<VM> 1 "register_operand"))
    +            (if_then_else:<V_DOUBLE_EXTEND>
    +              (unspec:<VM> [
    +                (match_dup 4)
    +                (match_dup 5)
    +                (match_dup 6)
    +                (match_dup 7)
    +                (match_dup 8)
    +                (reg:SI VL_REGNUM)
    +                (reg:SI VTYPE_REGNUM)
    +              ] UNSPEC_VPREDICATE)
    +              (match_dup 9)
    +              (match_dup 10)))
    +        ] UNSPEC_REDUC_SUM_UNORDERED))]
    +  "TARGET_VECTOR && can_create_pseudo_p ()"
    +  "#"
    +  "&& 1"
    +  [(const_int 0)]
    +{
    +  rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
    +  riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED,
    +                                  riscv_vector::REDUCE_OP_M_FRM_DYN,
    +                                  ops, CONST0_RTX
    (<V_DOUBLE_EXTEND_VEL>mode));
    +  DONE;
    +}
    +[(set_attr "type" "vector")])
    +
    ;;
    
=============================================================================
    ;; Misc combine patterns
    ;;
    
=============================================================================
    diff --git
    a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
    index 22a71048684..47889f3a1cd 100644
    ---
    a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
    +++
    b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
    @@ -15,16 +15,27 @@
    #define
    TEST_ALL(TEST)                                                         \
        TEST (int16_t, int8_t,
    16)                                                   \
    +  TEST (int32_t, int8_t,
    8)                                                    \
        TEST (int32_t, int16_t,
    8)                                                   \
    +  TEST (int64_t, int8_t,
    4)                                                    \
    +  TEST (int64_t, int16_t,
    4)                                                   \
        TEST (int64_t, int32_t,
    4)                                                   \
        TEST (uint16_t, uint8_t,
    16)                                                 \
    +  TEST (uint32_t, uint8_t,
    8)                                                  \
        TEST (uint32_t, uint16_t,
    8)                                                 \
    +  TEST (uint64_t, uint8_t,
    4)                                                  \
    +  TEST (uint64_t, uint16_t,
    4)                                                 \
        TEST (uint64_t, uint32_t,
    4)                                                 \
        TEST (float, _Float16,
    8)                                                    \
    +  TEST (double, _Float16,
    4)                                                   \
        TEST (double, float, 4)
    TEST_ALL (TEST_TYPE)
    -/* { dg-final { scan-assembler-times
    {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
    +/* { dg-final { scan-assembler-times
    {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
    /* { dg-final { scan-assembler-times
    {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
    /* { dg-final { scan-assembler-times
    {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
    +/* { dg-final { scan-assembler-times
    {\tvsext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
    +/* { dg-final { scan-assembler-times
    {\tvsext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
    +/* { dg-final { scan-assembler-times
    {\tvzext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
    +/* { dg-final { scan-assembler-times
    {\tvzext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
    diff --git
    a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
    index 7c8fedd072b..662d1351215 100644
    ---
    a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
    +++
    b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
    @@ -1,30 +1,12 @@
    /* { dg-do compile } */
    /* { dg-additional-options "-march=rv64gcv_zvfh_zvl128b -mabi=lp64d
    --param riscv-autovec-preference=scalable --param
    riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */
    -#include <stdint-gcc.h>
    -#define TEST_TYPE(TYPE1, TYPE2,
    N)                                             \
    -  __attribute__
    ((noipa))                                                      \
    -  TYPE1 reduc_##TYPE1##_##TYPE2 (TYPE2 *restrict a, TYPE2 *restrict
    pred)      \
- {                                                                            \
    -    TYPE1 sum =
    0;                                                             \
    -    for (int i = 0; i < N; i +=
    1)                                             \
    -      if
    (pred[i])                                                             \
    - sum +=
    a[i];                                                           \
    -    return
    sum;                                                                \
    -  }
    +#include "cond_widen_reduc-1.c"
    -#define
    TEST_ALL(TEST)                                                         \
    -  TEST (int16_t, int8_t,
    16)                                                   \
    -  TEST (int32_t, int16_t,
    8)                                                   \
    -  TEST (int64_t, int32_t,
    4)                                                   \
    -  TEST (uint16_t, uint8_t,
    16)                                                 \
    -  TEST (uint32_t, uint16_t,
    8)                                                 \
    -  TEST (uint64_t, uint32_t,
    4)                                                 \
    -  TEST (float, _Float16,
    8)                                                    \
    -  TEST (double, float, 4)
    -
    -TEST_ALL (TEST_TYPE)
    -
    -/* { dg-final { scan-assembler-times
    {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
    +/* { dg-final { scan-assembler-times
    {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
    /* { dg-final { scan-assembler-times
    {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
    /* { dg-final { scan-assembler-times
    {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
    +/* { dg-final { scan-assembler-times
    {\tvsext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
    +/* { dg-final { scan-assembler-times
    {\tvsext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
    +/* { dg-final { scan-assembler-times
    {\tvzext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
    +/* { dg-final { scan-assembler-times
    {\tvzext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
-- 2.36.3


--
Best,
Lehua (RiVAI)
lehua.d...@rivai.ai

Reply via email to