Ping^2 [PATCH v2, rs6000] Use CC for BCD operations [PR100736]

HAO CHEN GUI via Gcc-patches Sun, 31 Jul 2022 19:03:11 -0700

Hi,
    Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html
Thanks.


On 4/7/2022 下午 2:33, HAO CHEN GUI wrote:
> Hi,
>    Gentle ping this:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html
> Thanks.
> 
> On 22/6/2022 下午 4:26, HAO CHEN GUI wrote:
>> Hi,
>>   This patch uses CC instead of CCFP for all BCD operations. Thus, infinite
>> math flag has no impact on BCD operations. To support BCD overflow and
>> invalid coding, an UNSPEC is defined to move the bit to a general register.
>> The patterns of condition branch and return with overflow bit are defined as
>> the UNSPEC and branch/return can be combined to one jump insn. The split
>> pattern of overflow bit extension is define for optimization.
>>
>>   This patch also replaces bcdadd with bcdsub for BCD invaliding coding
>> expand.
>>
>> ChangeLog
>> 2022-06-22 Haochen Gui <guih...@linux.ibm.com>
>>
>> gcc/
>>      PR target/100736
>>      * config/rs6000/altivec.md (BCD_TEST): Remove unordered.
>>      (bcd<bcd_add_sub>_<mode>): Replace CCFP with CC.
>>      (*bcd<bcd_add_sub>_test_<mode>): Replace CCFP with CC.  Generate
>>      condition insn with CC mode.
>>      (bcd<bcd_add_sub>_overflow_<mode>): New.
>>      (*bcdoverflow_<mode>): New.
>>      (*bcdinvalid_<mode>): Removed.
>>      (bcdinvalid_<mode>): Implement by UNSPEC_BCDSUB and UNSPEC_BCD_OVERFLOW.
>>      (nuun): New.
>>      (*overflow_cbranch): New.
>>      (*overflow_creturn): New.
>>      (*overflow_extendsidi): New.
>>      (bcdshift_v16qi): Replace CCFP with CC.
>>      (bcdmul10_v16qi): Likewise.
>>      (bcddiv10_v16qi): Likewise.
>>      (peephole for bcd_add/sub): Likewise.
>>      * config/rs6000/rs6000-builtins.def (__builtin_bcdadd_ov_v1ti): Set
>>      pattern to bcdadd_overflow_v1ti.
>>      (__builtin_bcdadd_ov_v16qi): Set pattern to bcdadd_overflow_v16qi.
>>      (__builtin_bcdsub_ov_v1ti): Set pattern to bcdsub_overflow_v1ti.
>>      (__builtin_bcdsub_ov_v16qi): Set pattern to bcdsub_overflow_v16qi.
>>
>> gcc/testsuite/
>>      PR target/100736
>>      * gcc.target/powerpc/bcd-4.c: Adjust number of bcdadd and bcdsub.
>>      Scan no cror insns.
>>
>> patch.diff
>> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
>> index efc8ae35c2e..26f131e61ea 100644
>> --- a/gcc/config/rs6000/altivec.md
>> +++ b/gcc/config/rs6000/altivec.md
>> @@ -4370,7 +4370,7 @@ (define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD 
>> UNSPEC_BCDSUB])
>>  (define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add")
>>                            (UNSPEC_BCDSUB "sub")])
>>
>> -(define_code_iterator BCD_TEST [eq lt le gt ge unordered])
>> +(define_code_iterator BCD_TEST [eq lt le gt ge])
>>  (define_mode_iterator VBCD [V1TI V16QI])
>>
>>  (define_insn "bcd<bcd_add_sub>_<mode>"
>> @@ -4379,7 +4379,7 @@ (define_insn "bcd<bcd_add_sub>_<mode>"
>>                    (match_operand:VBCD 2 "register_operand" "v")
>>                    (match_operand:QI 3 "const_0_to_1_operand" "n")]
>>                   UNSPEC_BCD_ADD_SUB))
>> -   (clobber (reg:CCFP CR6_REGNO))]
>> +   (clobber (reg:CC CR6_REGNO))]
>>    "TARGET_P8_VECTOR"
>>    "bcd<bcd_add_sub>. %0,%1,%2,%3"
>>    [(set_attr "type" "vecsimple")])
>> @@ -4389,9 +4389,9 @@ (define_insn "bcd<bcd_add_sub>_<mode>"
>>  ;; UNORDERED test on an integer type (like V1TImode) is not defined.  The 
>> type
>>  ;; probably should be one that can go in the VMX (Altivec) registers, so we
>>  ;; can't use DDmode or DFmode.
>> -(define_insn "*bcd<bcd_add_sub>_test_<mode>"
>> -  [(set (reg:CCFP CR6_REGNO)
>> -    (compare:CCFP
>> +(define_insn "bcd<bcd_add_sub>_test_<mode>"
>> +  [(set (reg:CC CR6_REGNO)
>> +    (compare:CC
>>       (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v")
>>                     (match_operand:VBCD 2 "register_operand" "v")
>>                     (match_operand:QI 3 "const_0_to_1_operand" "i")]
>> @@ -4408,8 +4408,8 @@ (define_insn "*bcd<bcd_add_sub>_test2_<mode>"
>>                    (match_operand:VBCD 2 "register_operand" "v")
>>                    (match_operand:QI 3 "const_0_to_1_operand" "i")]
>>                   UNSPEC_BCD_ADD_SUB))
>> -   (set (reg:CCFP CR6_REGNO)
>> -    (compare:CCFP
>> +   (set (reg:CC CR6_REGNO)
>> +    (compare:CC
>>       (unspec:V2DF [(match_dup 1)
>>                     (match_dup 2)
>>                     (match_dup 3)]
>> @@ -4502,8 +4502,8 @@ (define_insn "vclrrb"
>>     [(set_attr "type" "vecsimple")])
>>
>>  (define_expand "bcd<bcd_add_sub>_<code>_<mode>"
>> -  [(parallel [(set (reg:CCFP CR6_REGNO)
>> -               (compare:CCFP
>> +  [(parallel [(set (reg:CC CR6_REGNO)
>> +               (compare:CC
>>                  (unspec:V2DF [(match_operand:VBCD 1 "register_operand")
>>                                (match_operand:VBCD 2 "register_operand")
>>                                (match_operand:QI 3 "const_0_to_1_operand")]
>> @@ -4511,46 +4511,138 @@ (define_expand "bcd<bcd_add_sub>_<code>_<mode>"
>>                  (match_dup 4)))
>>            (clobber (match_scratch:VBCD 5))])
>>     (set (match_operand:SI 0 "register_operand")
>> -    (BCD_TEST:SI (reg:CCFP CR6_REGNO)
>> +    (BCD_TEST:SI (reg:CC CR6_REGNO)
>>                   (const_int 0)))]
>>    "TARGET_P8_VECTOR"
>>  {
>>    operands[4] = CONST0_RTX (V2DFmode);
>> +  emit_insn (gen_bcd<bcd_add_sub>_test_<mode> (operands[0], operands[1],
>> +                                           operands[2], operands[3],
>> +                                           operands[4]));
>> +
>> +  rtx cr6 = gen_rtx_REG (CCmode, CR6_REGNO);
>> +  rtx condition_rtx = gen_rtx_<CODE> (SImode, cr6, const0_rtx);
>> +
>> +  if (<CODE> == GE || <CODE> == LE)
>> +    {
>> +      rtx not_result = gen_reg_rtx (CCEQmode);
>> +      rtx not_op, rev_cond_rtx;
>> +      rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (SImode, 
>> <CODE>),
>> +                                 SImode, XEXP (condition_rtx, 0),
>> +                                 const0_rtx);
>> +      not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
>> +      emit_insn (gen_rtx_SET (not_result, not_op));
>> +      condition_rtx = gen_rtx_EQ (SImode, not_result, const0_rtx);
>> +    }
>> +
>> +  emit_insn (gen_rtx_SET (operands[0], condition_rtx));
>> +  DONE;
>>  })
>>
>> -(define_insn "*bcdinvalid_<mode>"
>> -  [(set (reg:CCFP CR6_REGNO)
>> -    (compare:CCFP
>> -     (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v")]
>> -                  UNSPEC_BCDADD)
>> -     (match_operand:V2DF 2 "zero_constant" "j")))
>> -   (clobber (match_scratch:VBCD 0 "=v"))]
>> +(define_expand "bcd<bcd_add_sub>_overflow_<mode>"
>> +  [(parallel [(set (reg:CC CR6_REGNO)
>> +               (compare:CC
>> +                (unspec:V2DF [(match_operand:VBCD 1 "register_operand")
>> +                              (match_operand:VBCD 2 "register_operand")
>> +                              (match_operand:QI 3 "const_0_to_1_operand")]
>> +                             UNSPEC_BCD_ADD_SUB)
>> +                (match_dup 4)))
>> +          (clobber (match_scratch:VBCD 5))])
>> +   (set (match_operand:SI 0 "register_operand")
>> +    (unspec:SI [(reg:CC CR6_REGNO)
>> +                (const_int 0)]
>> +               UNSPEC_BCD_OVERFLOW))]
>>    "TARGET_P8_VECTOR"
>> -  "bcdadd. %0,%1,%1,0"
>> +{
>> +  operands[4] = CONST0_RTX (V2DFmode);
>> +})
>> +
>> +(define_insn "*bcdoverflow_<mode>"
>> +  [(set (match_operand:SDI 0 "register_operand" "=r")
>> +    (unspec:SDI [(reg:CC CR6_REGNO)
>> +                 (const_int 0)]
>> +                UNSPEC_BCD_OVERFLOW))]
>> +  "TARGET_P8_VECTOR"
>> +  "mfcr %0,2\;rlwinm %0,%0,28,1"
>>    [(set_attr "type" "vecsimple")])
>>
>>  (define_expand "bcdinvalid_<mode>"
>> -  [(parallel [(set (reg:CCFP CR6_REGNO)
>> -               (compare:CCFP
>> -                (unspec:V2DF [(match_operand:VBCD 1 "register_operand")]
>> -                             UNSPEC_BCDADD)
>> +  [(parallel [(set (reg:CC CR6_REGNO)
>> +               (compare:CC
>> +                (unspec:V2DF [(match_operand:VBCD 1 "register_operand")
>> +                              (match_dup 1)
>> +                              (const_int 0)]
>> +                             UNSPEC_BCDSUB)
>>                  (match_dup 2)))
>>            (clobber (match_scratch:VBCD 3))])
>>     (set (match_operand:SI 0 "register_operand")
>> -    (unordered:SI (reg:CCFP CR6_REGNO)
>> -                  (const_int 0)))]
>> +    (unspec:SI [(reg:CC CR6_REGNO)
>> +                (const_int 0)]
>> +               UNSPEC_BCD_OVERFLOW))]
>>    "TARGET_P8_VECTOR"
>>  {
>>    operands[2] = CONST0_RTX (V2DFmode);
>>  })
>>
>> +(define_code_attr nuun [(eq "nu")
>> +                    (ne "un")])
>> +
>> +(define_insn "*overflow_cbranch"
>> +  [(set (pc)
>> +    (if_then_else (eqne
>> +                   (unspec:SI [(reg:CC CR6_REGNO)
>> +                               (const_int 0)]
>> +                              UNSPEC_BCD_OVERFLOW)
>> +                   (const_int 0))
>> +                  (label_ref (match_operand 0))
>> +                  (pc)))]
>> +  "TARGET_P8_VECTOR"
>> +  "b<nuun> 6,%l0"
>> +  [(set_attr "type" "branch")
>> +   (set (attr "length")
>> +    (if_then_else (and (ge (minus (match_dup 0) (pc))
>> +                           (const_int -32768))
>> +                       (lt (minus (match_dup 0) (pc))
>> +                           (const_int 32764)))
>> +                  (const_int 4)
>> +                  (const_int 8)))])
>> +
>> +(define_insn "*overflow_creturn"
>> +  [(set (pc)
>> +    (if_then_else (eqne
>> +                   (unspec:SI [(reg:CC CR6_REGNO)
>> +                               (const_int 0)]
>> +                              UNSPEC_BCD_OVERFLOW)
>> +                   (const_int 0))
>> +                  (simple_return)
>> +                  (pc)))]
>> +  "TARGET_P8_VECTOR"
>> +  "b<nuun>lr 6"
>> +  [(set_attr "type" "jmpreg")])
>> +
>> +(define_insn_and_split "*overflow_extendsidi"
>> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
>> +    (sign_extend:DI
>> +     (unspec:SI [(reg:CC CR6_REGNO)
>> +                 (const_int 0)]
>> +                UNSPEC_BCD_OVERFLOW)))]
>> +  "TARGET_P8_VECTOR"
>> +  "#"
>> +  "&& 1"
>> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
>> +    (unspec:DI [(reg:CC CR6_REGNO)
>> +                (const_int 0)]
>> +               UNSPEC_BCD_OVERFLOW))]
>> +  ""
>> +  [(set_attr "type" "vecsimple")])
>> +
>>  (define_insn "bcdshift_v16qi"
>>    [(set (match_operand:V16QI 0 "register_operand" "=v")
>>      (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
>>                     (match_operand:V16QI 2 "register_operand" "v")
>>                     (match_operand:QI 3 "const_0_to_1_operand" "n")]
>>                   UNSPEC_BCDSHIFT))
>> -   (clobber (reg:CCFP CR6_REGNO))]
>> +   (clobber (reg:CC CR6_REGNO))]
>>    "TARGET_P8_VECTOR"
>>    "bcds. %0,%1,%2,%3"
>>    [(set_attr "type" "vecsimple")])
>> @@ -4559,7 +4651,7 @@ (define_expand "bcdmul10_v16qi"
>>    [(set (match_operand:V16QI 0 "register_operand")
>>      (unspec:V16QI [(match_operand:V16QI 1 "register_operand")]
>>                    UNSPEC_BCDSHIFT))
>> -   (clobber (reg:CCFP CR6_REGNO))]
>> +   (clobber (reg:CC CR6_REGNO))]
>>    "TARGET_P9_VECTOR"
>>  {
>>    rtx one = gen_reg_rtx (V16QImode);
>> @@ -4574,7 +4666,7 @@ (define_expand "bcddiv10_v16qi"
>>    [(set (match_operand:V16QI 0 "register_operand")
>>      (unspec:V16QI [(match_operand:V16QI 1 "register_operand")]
>>                    UNSPEC_BCDSHIFT))
>> -   (clobber (reg:CCFP CR6_REGNO))]
>> +   (clobber (reg:CC CR6_REGNO))]
>>    "TARGET_P9_VECTOR"
>>  {
>>    rtx one = gen_reg_rtx (V16QImode);
>> @@ -4598,9 +4690,9 @@ (define_peephole2
>>                               (match_operand:V1TI 2 "register_operand")
>>                               (match_operand:QI 3 "const_0_to_1_operand")]
>>                              UNSPEC_BCD_ADD_SUB))
>> -          (clobber (reg:CCFP CR6_REGNO))])
>> -   (parallel [(set (reg:CCFP CR6_REGNO)
>> -               (compare:CCFP
>> +          (clobber (reg:CC CR6_REGNO))])
>> +   (parallel [(set (reg:CC CR6_REGNO)
>> +               (compare:CC
>>                  (unspec:V2DF [(match_dup 1)
>>                                (match_dup 2)
>>                                (match_dup 3)]
>> @@ -4613,8 +4705,8 @@ (define_peephole2
>>                               (match_dup 2)
>>                               (match_dup 3)]
>>                              UNSPEC_BCD_ADD_SUB))
>> -          (set (reg:CCFP CR6_REGNO)
>> -               (compare:CCFP
>> +          (set (reg:CC CR6_REGNO)
>> +               (compare:CC
>>                  (unspec:V2DF [(match_dup 1)
>>                                (match_dup 2)
>>                                (match_dup 3)]
>> diff --git a/gcc/config/rs6000/rs6000-builtins.def 
>> b/gcc/config/rs6000/rs6000-builtins.def
>> index f4a9f24bcc5..8e94fe5c438 100644
>> --- a/gcc/config/rs6000/rs6000-builtins.def
>> +++ b/gcc/config/rs6000/rs6000-builtins.def
>> @@ -2371,10 +2371,10 @@
>>      BCDADD_LT_V16QI bcdadd_lt_v16qi {}
>>
>>    const signed int __builtin_bcdadd_ov_v1ti (vsq, vsq, const int<1>);
>> -    BCDADD_OV_V1TI bcdadd_unordered_v1ti {}
>> +    BCDADD_OV_V1TI bcdadd_overflow_v1ti {}
>>
>>    const signed int __builtin_bcdadd_ov_v16qi (vsc, vsc, const int<1>);
>> -    BCDADD_OV_V16QI bcdadd_unordered_v16qi {}
>> +    BCDADD_OV_V16QI bcdadd_overflow_v16qi {}
>>
>>    const signed int __builtin_bcdinvalid_v1ti (vsq);
>>      BCDINVALID_V1TI bcdinvalid_v1ti {}
>> @@ -2419,10 +2419,10 @@
>>      BCDSUB_LT_V16QI bcdsub_lt_v16qi {}
>>
>>    const signed int __builtin_bcdsub_ov_v1ti (vsq, vsq, const int<1>);
>> -    BCDSUB_OV_V1TI bcdsub_unordered_v1ti {}
>> +    BCDSUB_OV_V1TI bcdsub_overflow_v1ti {}
>>
>>    const signed int __builtin_bcdsub_ov_v16qi (vsc, vsc, const int<1>);
>> -    BCDSUB_OV_V16QI bcdsub_unordered_v16qi {}
>> +    BCDSUB_OV_V16QI bcdsub_overflow_v16qi {}
>>
>>    const vuc __builtin_crypto_vpermxor_v16qi (vuc, vuc, vuc);
>>      VPERMXOR_V16QI crypto_vpermxor_v16qi {}
>> diff --git a/gcc/testsuite/gcc.target/powerpc/bcd-4.c 
>> b/gcc/testsuite/gcc.target/powerpc/bcd-4.c
>> index 2c8554dfe82..3c25ed60e17 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/bcd-4.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/bcd-4.c
>> @@ -2,10 +2,11 @@
>>  /* { dg-require-effective-target int128 } */
>>  /* { dg-require-effective-target power10_hw } */
>>  /* { dg-options "-mdejagnu-cpu=power10 -O2 -save-temps" } */
>> -/* { dg-final { scan-assembler-times {\mbcdadd\M} 7 } } */
>> -/* { dg-final { scan-assembler-times {\mbcdsub\M} 18 } } */
>> +/* { dg-final { scan-assembler-times {\mbcdadd\M} 5 } } */
>> +/* { dg-final { scan-assembler-times {\mbcdsub\M} 20 } } */
>>  /* { dg-final { scan-assembler-times {\mbcds\M} 2 } } */
>>  /* { dg-final { scan-assembler-times {\mdenbcdq\M} 1 } } */
>> +/* { dg-final { scan-assembler-not {\mcror\M} 1 } } */
>>
>>  #include <altivec.h>
>>

Ping^2 [PATCH v2, rs6000] Use CC for BCD operations [PR100736]

Reply via email to