https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111255

            Bug ID: 111255
           Summary: RISC-V: Miss combine two vsetvl insns
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: lehua.ding at rivai dot ai
  Target Milestone: ---

Missed combine two bellow vsetvl insns:

        vsetvli a5,a4,e8,m1,tu,mu      => this two vsetvl insns 
        vsetvli zero,a5,e16,m2,ta,ma      should be combined.


C Code:

#include <stdint.h>

#define DEF_LOOP(OLD_TYPE, NEW_TYPE)                                          
\
  void __attribute__ ((noipa))                                                
\
  test_##OLD_TYPE##_2_##NEW_TYPE (NEW_TYPE *__restrict r,                     
\
                                  OLD_TYPE *__restrict a, NEW_TYPE b,         
\
                                  OLD_TYPE *__restrict pred, int n)           
\
  {                                                                           
\
    for (int i = 0; i < n; ++i)                                               
\
      {                                                                       
\
        r[i] = pred[i] ? (NEW_TYPE) a[i] : b;                                 
\
      }                                                                       
\
  }

/* INT -> narrower-INT */
#define TEST_ALL_X2X_NARROWER(T)                                              
\
  T (int16_t, int8_t)

TEST_ALL_X2X_NARROWER (DEF_LOOP)

Assembly:

test_int16_t_2_int8_t:
        ble     a4,zero,.L5
        vsetvli t1,zero,e8,m1,ta,ma
        vmv.v.x v4,a2
.L3:
        vsetvli a5,a4,e8,m1,tu,mu      => this two vsetvl insns 
        vsetvli zero,a5,e16,m2,ta,ma      should be combined.
        vle16.v v0,0(a3)
        vsetvli t1,zero,e16,m2,ta,ma
        vmsne.vi        v0,v0,0
        vsetvli zero,a5,e16,m2,ta,ma
        vle16.v v2,0(a1),v0.t
        vsetvli a6,zero,e8,m1,ta,ma
        slli    a7,a5,1
        vncvt.x.x.w     v2,v2
        sub     a4,a4,a5
        vmerge.vvm      v2,v4,v2,v0
        vsetvli zero,a5,e8,m1,ta,ma
        vse8.v  v2,0(a0)
        add     a3,a3,a7
        add     a0,a0,a5
        add     a1,a1,a7
        bne     a4,zero,.L3
.L5:
        ret

compiler explorer: https://godbolt.org/z/KPP8G1E3W

Reply via email to