https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115759

            Bug ID: 115759
           Summary: RISC-V: complex code generated for lmbench's fwr when
                    uses scalable autovec
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: deminhan at gcc dot gnu.org
  Target Milestone: ---

compile option: -march=rv64gcv -O3 -mrvv-vector-bits=scalable

source code:
typedef  int size_t;
typedef unsigned long long uint64;

#define NULL 0

#define TYPE    int
typedef struct _state {
        double  overhead;
        size_t  nbytes;
        int     need_buf2;
        int     aligned;
        TYPE    *buf;
        TYPE    *buf2;
        TYPE    *buf2_orig;
        TYPE    *lastone;
        size_t  N;
} state_t;

void
fwr(int iterations, void *cookie)
{       
        state_t *state = (state_t *) cookie;
        register TYPE *lastone = state->lastone;
        TYPE* p_save = NULL;
        int a[1000];

        //while (iterations-- > 0) {
            register TYPE *p = state->buf;
            while (p <= lastone) {
#define DOIT(i) p[i]=
                DOIT(0) DOIT(1) DOIT(2) DOIT(3) DOIT(4) DOIT(5) DOIT(6)
                DOIT(7) DOIT(8) DOIT(9) DOIT(10) DOIT(11) DOIT(12)
                DOIT(13) DOIT(14) DOIT(15) DOIT(16) DOIT(17) DOIT(18)
                DOIT(19) DOIT(20) DOIT(21) DOIT(22) DOIT(23) DOIT(24)
                DOIT(25) DOIT(26) DOIT(27) DOIT(28) DOIT(29) DOIT(30)
                DOIT(31) DOIT(32) DOIT(33) DOIT(34) DOIT(35) DOIT(36)
                DOIT(37) DOIT(38) DOIT(39) DOIT(40) DOIT(41) DOIT(42)
                DOIT(43) DOIT(44) DOIT(45) DOIT(46) DOIT(47) DOIT(48)
                DOIT(49) DOIT(50) DOIT(51) DOIT(52) DOIT(53) DOIT(54)
                DOIT(55) DOIT(56) DOIT(57) DOIT(58) DOIT(59) DOIT(60)
                DOIT(61) DOIT(62) DOIT(63) DOIT(64) DOIT(65) DOIT(66)
                DOIT(67) DOIT(68) DOIT(69) DOIT(70) DOIT(71) DOIT(72)
                DOIT(73) DOIT(74) DOIT(75) DOIT(76) DOIT(77) DOIT(78)
                DOIT(79) DOIT(80) DOIT(81) DOIT(82) DOIT(83) DOIT(84)
                DOIT(85) DOIT(86) DOIT(87) DOIT(88) DOIT(89) DOIT(90)
                DOIT(91) DOIT(92) DOIT(93) DOIT(94) DOIT(95) DOIT(96)
                DOIT(97) DOIT(98) DOIT(99) DOIT(100) DOIT(101) DOIT(102)
                DOIT(103) DOIT(104) DOIT(105) DOIT(106) DOIT(107)
                DOIT(108) DOIT(109) DOIT(110) DOIT(111) DOIT(112)
                DOIT(113) DOIT(114) DOIT(115) DOIT(116) DOIT(117)
                DOIT(118) DOIT(119) DOIT(120) DOIT(121) DOIT(122)
                DOIT(123) DOIT(124) DOIT(125) DOIT(126) DOIT(127) 1;
                p += 128;
            }
            p_save = p;
        //}
        //use_pointer(p_save);
}
#undef  DOIT

assembly code:
        .file   "test.c"
        .option nopic
        .attribute arch,
"rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zicsr2p0_zifencei2p0_zaamo1p0_zalrsc1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
        .attribute unaligned_access, 0
        .attribute stack_align, 16
        .text
        .align  1
        .globl  fwr
        .type   fwr, @function
fwr:
.LFB0:
        .cfi_startproc
        addi    sp,sp,-464
        .cfi_def_cfa_offset 464
        sd      s8,392(sp)
        ld      t3,24(a1)
        .cfi_offset 24, -72
        ld      s8,48(a1)
        bltu    s8,t3,.L37
        csrr    a5,vlenb
        li      a3,20
        mul     a3,a3,a5
        li      t0,6
        li      t6,10
        li      t5,11
        li      t4,12
        mv      a7,t3
        sd      s0,456(sp)
        li      a6,13
        .cfi_offset 8, -8
        slli    s0,a5,4
        li      a0,14
        mul     t0,t0,a5
        add     a3,a7,a3
        li      a1,18
        li      a2,19
        sd      s2,440(sp)
        sd      a3,240(sp)
        .cfi_offset 18, -24
        sub     s2,s0,a5
        li      a4,21
        li      a3,22
        add     s2,a7,s2
        mul     t6,t6,a5
        slli    t2,a5,3
        sd      s2,200(sp)
        add     s2,s0,a5
        add     s0,a7,s0
        sd      s0,208(sp)
        add     s0,a7,t2
        sd      s0,144(sp)
        add     s0,a7,t0
        sd      s0,128(sp)
        mul     t5,t5,a5
        add     s0,a7,t6
        sub     s8,s8,t3
        sd      s0,160(sp)
        vsetvli t3,zero,e32,m1,ta,ma
        slli    t3,a5,1
        sd      s5,416(sp)
        .cfi_offset 21, -48
        slli    s5,a5,2
        sd      s3,432(sp)
        sd      s4,424(sp)
        mul     t4,t4,a5
        add     s0,a7,t5
        sd      s0,168(sp)
        sd      s6,408(sp)
        sd      s7,400(sp)
        .cfi_offset 19, -32
        .cfi_offset 20, -40
        .cfi_offset 22, -56
        .cfi_offset 23, -64
        add     s6,s5,a5
        sub     s7,s5,a5
        sub     s4,t2,a5
        add     s3,t2,a5
        add     s7,a7,s7
        mul     a6,a6,a5
        add     s0,a7,t4
        sd      s0,176(sp)
        add     s0,a7,t3
        sd      s0,96(sp)
        add     s0,a7,a5
        sd      s0,88(sp)
        add     s6,a7,s6
        add     s5,a7,s5
        add     s4,a7,s4
        mul     a0,a0,a5
        add     s3,a7,s3
        add     s2,a7,s2
        add     s0,a7,a6
        sd      s9,384(sp)
        sd      s10,376(sp)
        sd      s11,368(sp)
        sd      s7,104(sp)
        sd      s1,448(sp)
        .cfi_offset 25, -80
        .cfi_offset 26, -88
        .cfi_offset 27, -96
        .cfi_offset 9, -16
        sd      s6,120(sp)
        mul     a1,a1,a5
        add     a0,a7,a0
        sd      s5,112(sp)
        sd      s4,136(sp)
        sd      s3,152(sp)
        sd      s2,216(sp)
        sd      s0,184(sp)
        sd      a0,192(sp)
        li      s9,25
        li      s10,26
        mul     a2,a2,a5
        add     a1,a7,a1
        sd      a1,224(sp)
        li      s11,27
        slli    t1,a5,5
        srli    t3,a5,2
        srli    s8,s8,9
        vmv.v.i v1,1
        addi    s8,s8,1
        slli    s7,s8,7
        mul     a4,a4,a5
        add     a2,a7,a2
        sd      a2,232(sp)
        mul     a3,a3,a5
        add     a4,a7,a4
        sd      a3,296(sp)
        sd      a4,248(sp)
        li      a4,23
        mul     a4,a4,a5
        sd      a4,304(sp)
        li      a4,24
        mul     a4,a4,a5
        sd      a4,312(sp)
        mul     a4,s9,a5
        sd      a4,320(sp)
        mul     a4,s10,a5
        sd      a4,328(sp)
        mul     a4,s11,a5
        sd      a4,336(sp)
        li      a4,28
        mul     a4,a4,a5
        sd      a4,344(sp)
        li      a4,29
        mul     a4,a4,a5
        sd      a4,352(sp)
        li      a4,30
        mul     a5,a4,a5
        sd      a5,360(sp)
.L35:
        mv      s1,s7
        bleu    s7,t2,.L3
        mv      s1,t2
.L3:
        mv      s5,s1
        bleu    s1,t3,.L4
        mv      s5,t3
.L4:
        sub     s1,s1,s5
        mv      s6,s1
        bleu    s1,t3,.L5
        mv      s6,t3
.L5:
        sub     s1,s1,s6
        mv      s4,s1
        bleu    s1,t3,.L6
        mv      s4,t3
.L6:
        sub     s1,s1,s4
        sd      s1,8(sp)
        bleu    s1,t3,.L7
        sd      t3,8(sp)
.L7:
        ld      a5,8(sp)
        sub     s1,s1,a5
        mv      s3,s1
        bleu    s1,t3,.L8
        mv      s3,t3
.L8:
        sub     s1,s1,s3
        mv      s2,s1
        bleu    s1,t3,.L9
        mv      s2,t3
.L9:
        sub     s1,s1,s2
        sd      s1,16(sp)
        bleu    s1,t3,.L10
        sd      t3,16(sp)
.L10:
        ld      a5,16(sp)
        sub     s1,s1,a5
        mv      s11,s1
        bleu    s1,t3,.L11
        mv      s11,t3
.L11:
        sub     s1,s1,s11
        sd      s1,24(sp)
        bleu    s1,t3,.L12
        sd      t3,24(sp)
.L12:
        ld      a5,24(sp)
        sub     s1,s1,a5
        sd      s1,32(sp)
        bleu    s1,t3,.L13
        sd      t3,32(sp)
.L13:
        ld      a5,32(sp)
        sub     s1,s1,a5
        sd      s1,40(sp)
        bleu    s1,t3,.L14
        sd      t3,40(sp)
.L14:
        ld      a5,40(sp)
        sub     s1,s1,a5
        sd      s1,48(sp)
        bleu    s1,t3,.L15
        sd      t3,48(sp)
.L15:
        ld      a5,48(sp)
        sub     s1,s1,a5
        sd      s1,56(sp)
        bleu    s1,t3,.L16
        sd      t3,56(sp)
.L16:
        ld      a5,56(sp)
        sub     s1,s1,a5
        sd      s1,64(sp)
        bleu    s1,t3,.L17
        sd      t3,64(sp)
.L17:
        ld      a5,64(sp)
        sub     s1,s1,a5
        sd      s1,72(sp)
        bleu    s1,t3,.L18
        sd      t3,72(sp)
.L18:
        ld      a5,72(sp)
        sub     s1,s1,a5
        sd      s1,80(sp)
        bleu    s1,t3,.L19
        sd      t3,80(sp)
.L19:
        ld      a5,80(sp)
        sub     s1,s1,a5
        mv      s10,s1
        bleu    s1,t3,.L20
        mv      s10,t3
.L20:
        sub     s1,s1,s10
        mv      s9,s1
        bleu    s1,t3,.L21
        mv      s9,t3
.L21:
        sub     s1,s1,s9
        mv      s8,s1
        bleu    s1,t3,.L22
        mv      s8,t3
.L22:
        sub     s1,s1,s8
        mv      s0,s1
        bleu    s1,t3,.L23
        mv      s0,t3
.L23:
        sub     s1,s1,s0
        mv      t0,s1
        bleu    s1,t3,.L24
        mv      t0,t3
.L24:
        sub     s1,s1,t0
        mv      t6,s1
        bleu    s1,t3,.L25
        mv      t6,t3
.L25:
        sub     s1,s1,t6
        mv      t5,s1
        bleu    s1,t3,.L26
        mv      t5,t3
.L26:
        sub     s1,s1,t5
        mv      t4,s1
        bleu    s1,t3,.L27
        mv      t4,t3
.L27:
        sub     s1,s1,t4
        mv      a6,s1
        bleu    s1,t3,.L28
        mv      a6,t3
.L28:
        sub     s1,s1,a6
        mv      a0,s1
        bleu    s1,t3,.L29
        mv      a0,t3
.L29:
        sub     s1,s1,a0
        mv      a1,s1
        bleu    s1,t3,.L30
        mv      a1,t3
.L30:
        sub     s1,s1,a1
        mv      a2,s1
        bleu    s1,t3,.L31
        mv      a2,t3
.L31:
        sub     s1,s1,a2
        mv      a3,s1
        bleu    s1,t3,.L32
        mv      a3,t3
.L32:
        sub     s1,s1,a3
        mv      a4,s1
        bleu    s1,t3,.L33
        mv      a4,t3
.L33:
        sub     s1,s1,a4
        mv      a5,s1
        bleu    s1,t3,.L34
        mv      a5,t3
.L34:
        vsetvli zero,s5,e32,m1,ta,ma
        vse32.v v1,0(a7)
        ld      s5,296(sp)
        sub     s1,s1,a5
        vsetvli zero,s6,e32,m1,ta,ma
        add     s5,a7,s5
        sd      s1,256(sp)
        ld      s1,304(sp)
        sd      s5,264(sp)
        ld      s5,312(sp)
        add     s1,a7,s1
        sd      s1,272(sp)
        ld      s1,88(sp)
        add     s5,a7,s5
        sd      s5,280(sp)
        vse32.v v1,0(s1)
        add     s6,s1,t1
        ld      s1,96(sp)
        vsetvli zero,s4,e32,m1,ta,ma
        sd      s6,88(sp)
        ld      s6,320(sp)
        add     s4,s1,t1
        add     s6,a7,s6
        sd      s6,288(sp)
        ld      s6,328(sp)
        vse32.v v1,0(s1)
        ld      s1,104(sp)
        add     s6,a7,s6
        sd      s4,96(sp)
        ld      s4,336(sp)
        add     s5,a7,s4
        ld      s4,8(sp)
        vsetvli zero,s4,e32,m1,ta,ma
        vse32.v v1,0(s1)
        add     s4,s1,t1
        ld      s1,112(sp)
        vsetvli zero,s3,e32,m1,ta,ma
        sd      s4,104(sp)
        ld      s4,344(sp)
        vse32.v v1,0(s1)
        add     s3,s1,t1
        vsetvli zero,s2,e32,m1,ta,ma
        add     s4,a7,s4
        sd      s3,112(sp)
        ld      s3,352(sp)
        ld      s1,120(sp)
        add     s3,a7,s3
        vse32.v v1,0(s1)
        add     s2,s1,t1
        sd      s2,120(sp)
        ld      s2,360(sp)
        add     s1,a7,s2
        ld      s2,16(sp)
        sd      s1,8(sp)
        ld      s1,128(sp)
        vsetvli zero,s2,e32,m1,ta,ma
        vse32.v v1,0(s1)
        add     s2,s1,t1
        vsetvli zero,s11,e32,m1,ta,ma
        sd      s2,128(sp)
        csrr    s2,vlenb
        slli    s1,s2,5
        sub     s1,s1,s2
        add     s2,a7,s1
        ld      s1,136(sp)
        sd      s2,16(sp)
        ld      s2,24(sp)
        vse32.v v1,0(s1)
        add     s11,s1,t1
        ld      s1,144(sp)
        vsetvli zero,s2,e32,m1,ta,ma
        ld      s2,32(sp)
        sd      s11,136(sp)
        vse32.v v1,0(s1)
        add     s1,s1,t1
        vsetvli zero,s2,e32,m1,ta,ma
        ld      s2,40(sp)
        mv      s11,s7
        sd      s1,144(sp)
        ld      s1,152(sp)
        add     a7,a7,t1
        sub     s7,s7,t2
        vse32.v v1,0(s1)
        add     s1,s1,t1
        vsetvli zero,s2,e32,m1,ta,ma
        sd      s1,152(sp)
        ld      s1,160(sp)
        vse32.v v1,0(s1)
        add     s1,s1,t1
        sd      s1,160(sp)
        ld      s1,168(sp)
        ld      s2,48(sp)
        vsetvli zero,s2,e32,m1,ta,ma
        vse32.v v1,0(s1)
        ld      s2,56(sp)
        add     s1,s1,t1
        vsetvli zero,s2,e32,m1,ta,ma
        sd      s1,168(sp)
        ld      s1,176(sp)
        ld      s2,64(sp)
        vse32.v v1,0(s1)
        add     s1,s1,t1
        vsetvli zero,s2,e32,m1,ta,ma
        ld      s2,72(sp)
        sd      s1,176(sp)
        ld      s1,184(sp)
        vse32.v v1,0(s1)
        add     s1,s1,t1
        vsetvli zero,s2,e32,m1,ta,ma
        ld      s2,80(sp)
        sd      s1,184(sp)
        ld      s1,192(sp)
        vse32.v v1,0(s1)
        add     s1,s1,t1
        vsetvli zero,s2,e32,m1,ta,ma
        ld      s2,208(sp)
        sd      s1,192(sp)
        ld      s1,200(sp)
        vse32.v v1,0(s1)
        vsetvli zero,s10,e32,m1,ta,ma
        ld      s10,216(sp)
        vse32.v v1,0(s2)
        vsetvli zero,s9,e32,m1,ta,ma
        add     s9,s1,t1
        vse32.v v1,0(s10)
        vsetvli zero,s8,e32,m1,ta,ma
        sd      s9,200(sp)
        add     s9,s2,t1
        sd      s9,208(sp)
        add     s9,s10,t1
        sd      s9,216(sp)
        ld      s1,224(sp)
        vse32.v v1,0(s1)
        add     s8,s1,t1
        ld      s1,232(sp)
        vsetvli zero,s0,e32,m1,ta,ma
        ld      s0,240(sp)
        sd      s8,224(sp)
        vse32.v v1,0(s1)
        add     s8,s1,t1
        vsetvli zero,t0,e32,m1,ta,ma
        add     t0,s0,t1
        ld      s1,272(sp)
        sd      s8,232(sp)
        vse32.v v1,0(s0)
        ld      s0,248(sp)
        vsetvli zero,t6,e32,m1,ta,ma
        add     t6,s0,t1
        sd      t0,240(sp)
        vse32.v v1,0(s0)
        ld      s0,264(sp)
        vsetvli zero,t5,e32,m1,ta,ma
        sd      t6,248(sp)
        vse32.v v1,0(s0)
        vsetvli zero,t4,e32,m1,ta,ma
        ld      s0,280(sp)
        vse32.v v1,0(s1)
        vsetvli zero,a6,e32,m1,ta,ma
        vse32.v v1,0(s0)
        ld      s0,288(sp)
        vsetvli zero,a0,e32,m1,ta,ma
        vse32.v v1,0(s0)
        vsetvli zero,a1,e32,m1,ta,ma
        vse32.v v1,0(s6)
        vsetvli zero,a2,e32,m1,ta,ma
        vse32.v v1,0(s5)
        vsetvli zero,a3,e32,m1,ta,ma
        vse32.v v1,0(s4)
        vsetvli zero,a4,e32,m1,ta,ma
        vse32.v v1,0(s3)
        ld      a4,8(sp)
        vsetvli zero,a5,e32,m1,ta,ma
        ld      a5,256(sp)
        ld      s2,16(sp)
        vse32.v v1,0(a4)
        vsetvli zero,a5,e32,m1,ta,ma
        vse32.v v1,0(s2)
        bgtu    s11,t2,.L35
        ld      s0,456(sp)
        .cfi_restore 8
        ld      s1,448(sp)
        .cfi_restore 9
        ld      s2,440(sp)
        .cfi_restore 18
        ld      s3,432(sp)
        .cfi_restore 19
        ld      s4,424(sp)
        .cfi_restore 20
        ld      s5,416(sp)
        .cfi_restore 21
        ld      s6,408(sp)
        .cfi_restore 22
        ld      s7,400(sp)
        .cfi_restore 23
        ld      s9,384(sp)
        .cfi_restore 25
        ld      s10,376(sp)
        .cfi_restore 26
        ld      s11,368(sp)
        .cfi_restore 27
.L37:
        ld      s8,392(sp)
        .cfi_restore 24
        addi    sp,sp,464
        .cfi_def_cfa_offset 0
        jr      ra
        .cfi_endproc
.LFE0:
        .size   fwr, .-fwr
        .ident  "GCC: (g0b4fd672bf0) 15.0.0 20240702 (experimental)"
        .section        .note.GNU-stack,"",@progbits

Reply via email to