https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114887

            Bug ID: 114887
           Summary: RISC-V: expect M8 but M4 generated with dynamic LMUL
                    for TSVC s319
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: deminhan at gcc dot gnu.org
  Target Milestone: ---

we expect M8 when using following code and options, but M4 generated.

-march=rv64gcv_zba_zbb_zvl256b -mabi=lp64d -mrvv-max-lmul=dynamic -O3
-ffast-math

typedef float real_t;
__attribute__((aligned(64))) real_t
a[32000],b[32000],c[32000],d[32000],e[32000],
                                  
aa[256][256],bb[256][256],cc[256][256],tt[256][256];

real_t s319()
{
    real_t sum;
    for (int nl = 0; nl < 2*256; nl++) {
        sum = 0.;
        for (int i = 0; i < 32000; i++) {
            a[i] = c[i] + d[i];
            sum += a[i];
            b[i] = c[i] + e[i];
            sum += b[i];
        }
    }
    return sum;
}

generated asm:
.L2:
        vsetvli t0,zero,e32,m4,ta,ma
        vmv.v.i v12,0
        li      a4,32768
        addi    a4,a4,-768
        mv      a2,t6
        mv      a6,t5
        mv      a3,t4
        mv      a0,t3
        mv      a1,t1
.L3:
        vsetvli a5,a4,e32,m4,tu,ma
        vle32.v v8,0(a1)
        vle32.v v4,0(a0)
        vle32.v v16,0(a6)
        sub     a4,a4,a5
        sh2add  a1,a5,a1
        sh2add  a0,a5,a0
        sh2add  a6,a5,a6
        vfadd.vv        v4,v4,v8
        vfadd.vv        v8,v8,v16
        vse32.v v4,0(a3)
        vfadd.vv        v4,v4,v8
        sh2add  a3,a5,a3
        vfadd.vv        v12,v12,v4
        vse32.v v8,0(a2)
        sh2add  a2,a5,a2
        bne     a4,zero,.L3
        addiw   a7,a7,-1
        bne     a7,zero,.L2
        fmv.s.x fa5,zero
        vsetvli a4,zero,e32,m4,ta,ma
        vfmv.s.f        v1,fa5
        vfredusum.vs    v12,v12,v1
        vfmv.f.s        fa0,v12
        ret
        .cfi_endproc

Reply via email to