https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115759
Bug ID: 115759 Summary: RISC-V: complex code generated for lmbench's fwr when uses scalable autovec Product: gcc Version: 15.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: deminhan at gcc dot gnu.org Target Milestone: --- compile option: -march=rv64gcv -O3 -mrvv-vector-bits=scalable source code: typedef int size_t; typedef unsigned long long uint64; #define NULL 0 #define TYPE int typedef struct _state { double overhead; size_t nbytes; int need_buf2; int aligned; TYPE *buf; TYPE *buf2; TYPE *buf2_orig; TYPE *lastone; size_t N; } state_t; void fwr(int iterations, void *cookie) { state_t *state = (state_t *) cookie; register TYPE *lastone = state->lastone; TYPE* p_save = NULL; int a[1000]; //while (iterations-- > 0) { register TYPE *p = state->buf; while (p <= lastone) { #define DOIT(i) p[i]= DOIT(0) DOIT(1) DOIT(2) DOIT(3) DOIT(4) DOIT(5) DOIT(6) DOIT(7) DOIT(8) DOIT(9) DOIT(10) DOIT(11) DOIT(12) DOIT(13) DOIT(14) DOIT(15) DOIT(16) DOIT(17) DOIT(18) DOIT(19) DOIT(20) DOIT(21) DOIT(22) DOIT(23) DOIT(24) DOIT(25) DOIT(26) DOIT(27) DOIT(28) DOIT(29) DOIT(30) DOIT(31) DOIT(32) DOIT(33) DOIT(34) DOIT(35) DOIT(36) DOIT(37) DOIT(38) DOIT(39) DOIT(40) DOIT(41) DOIT(42) DOIT(43) DOIT(44) DOIT(45) DOIT(46) DOIT(47) DOIT(48) DOIT(49) DOIT(50) DOIT(51) DOIT(52) DOIT(53) DOIT(54) DOIT(55) DOIT(56) DOIT(57) DOIT(58) DOIT(59) DOIT(60) DOIT(61) DOIT(62) DOIT(63) DOIT(64) DOIT(65) DOIT(66) DOIT(67) DOIT(68) DOIT(69) DOIT(70) DOIT(71) DOIT(72) DOIT(73) DOIT(74) DOIT(75) DOIT(76) DOIT(77) DOIT(78) DOIT(79) DOIT(80) DOIT(81) DOIT(82) DOIT(83) DOIT(84) DOIT(85) DOIT(86) DOIT(87) DOIT(88) DOIT(89) DOIT(90) DOIT(91) DOIT(92) DOIT(93) DOIT(94) DOIT(95) DOIT(96) DOIT(97) DOIT(98) DOIT(99) DOIT(100) DOIT(101) DOIT(102) DOIT(103) DOIT(104) DOIT(105) DOIT(106) DOIT(107) DOIT(108) DOIT(109) DOIT(110) DOIT(111) DOIT(112) DOIT(113) DOIT(114) DOIT(115) DOIT(116) DOIT(117) DOIT(118) DOIT(119) DOIT(120) DOIT(121) DOIT(122) DOIT(123) DOIT(124) DOIT(125) DOIT(126) DOIT(127) 1; p += 128; } p_save = p; //} //use_pointer(p_save); } #undef DOIT assembly code: .file "test.c" .option nopic .attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zicsr2p0_zifencei2p0_zaamo1p0_zalrsc1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" .attribute unaligned_access, 0 .attribute stack_align, 16 .text .align 1 .globl fwr .type fwr, @function fwr: .LFB0: .cfi_startproc addi sp,sp,-464 .cfi_def_cfa_offset 464 sd s8,392(sp) ld t3,24(a1) .cfi_offset 24, -72 ld s8,48(a1) bltu s8,t3,.L37 csrr a5,vlenb li a3,20 mul a3,a3,a5 li t0,6 li t6,10 li t5,11 li t4,12 mv a7,t3 sd s0,456(sp) li a6,13 .cfi_offset 8, -8 slli s0,a5,4 li a0,14 mul t0,t0,a5 add a3,a7,a3 li a1,18 li a2,19 sd s2,440(sp) sd a3,240(sp) .cfi_offset 18, -24 sub s2,s0,a5 li a4,21 li a3,22 add s2,a7,s2 mul t6,t6,a5 slli t2,a5,3 sd s2,200(sp) add s2,s0,a5 add s0,a7,s0 sd s0,208(sp) add s0,a7,t2 sd s0,144(sp) add s0,a7,t0 sd s0,128(sp) mul t5,t5,a5 add s0,a7,t6 sub s8,s8,t3 sd s0,160(sp) vsetvli t3,zero,e32,m1,ta,ma slli t3,a5,1 sd s5,416(sp) .cfi_offset 21, -48 slli s5,a5,2 sd s3,432(sp) sd s4,424(sp) mul t4,t4,a5 add s0,a7,t5 sd s0,168(sp) sd s6,408(sp) sd s7,400(sp) .cfi_offset 19, -32 .cfi_offset 20, -40 .cfi_offset 22, -56 .cfi_offset 23, -64 add s6,s5,a5 sub s7,s5,a5 sub s4,t2,a5 add s3,t2,a5 add s7,a7,s7 mul a6,a6,a5 add s0,a7,t4 sd s0,176(sp) add s0,a7,t3 sd s0,96(sp) add s0,a7,a5 sd s0,88(sp) add s6,a7,s6 add s5,a7,s5 add s4,a7,s4 mul a0,a0,a5 add s3,a7,s3 add s2,a7,s2 add s0,a7,a6 sd s9,384(sp) sd s10,376(sp) sd s11,368(sp) sd s7,104(sp) sd s1,448(sp) .cfi_offset 25, -80 .cfi_offset 26, -88 .cfi_offset 27, -96 .cfi_offset 9, -16 sd s6,120(sp) mul a1,a1,a5 add a0,a7,a0 sd s5,112(sp) sd s4,136(sp) sd s3,152(sp) sd s2,216(sp) sd s0,184(sp) sd a0,192(sp) li s9,25 li s10,26 mul a2,a2,a5 add a1,a7,a1 sd a1,224(sp) li s11,27 slli t1,a5,5 srli t3,a5,2 srli s8,s8,9 vmv.v.i v1,1 addi s8,s8,1 slli s7,s8,7 mul a4,a4,a5 add a2,a7,a2 sd a2,232(sp) mul a3,a3,a5 add a4,a7,a4 sd a3,296(sp) sd a4,248(sp) li a4,23 mul a4,a4,a5 sd a4,304(sp) li a4,24 mul a4,a4,a5 sd a4,312(sp) mul a4,s9,a5 sd a4,320(sp) mul a4,s10,a5 sd a4,328(sp) mul a4,s11,a5 sd a4,336(sp) li a4,28 mul a4,a4,a5 sd a4,344(sp) li a4,29 mul a4,a4,a5 sd a4,352(sp) li a4,30 mul a5,a4,a5 sd a5,360(sp) .L35: mv s1,s7 bleu s7,t2,.L3 mv s1,t2 .L3: mv s5,s1 bleu s1,t3,.L4 mv s5,t3 .L4: sub s1,s1,s5 mv s6,s1 bleu s1,t3,.L5 mv s6,t3 .L5: sub s1,s1,s6 mv s4,s1 bleu s1,t3,.L6 mv s4,t3 .L6: sub s1,s1,s4 sd s1,8(sp) bleu s1,t3,.L7 sd t3,8(sp) .L7: ld a5,8(sp) sub s1,s1,a5 mv s3,s1 bleu s1,t3,.L8 mv s3,t3 .L8: sub s1,s1,s3 mv s2,s1 bleu s1,t3,.L9 mv s2,t3 .L9: sub s1,s1,s2 sd s1,16(sp) bleu s1,t3,.L10 sd t3,16(sp) .L10: ld a5,16(sp) sub s1,s1,a5 mv s11,s1 bleu s1,t3,.L11 mv s11,t3 .L11: sub s1,s1,s11 sd s1,24(sp) bleu s1,t3,.L12 sd t3,24(sp) .L12: ld a5,24(sp) sub s1,s1,a5 sd s1,32(sp) bleu s1,t3,.L13 sd t3,32(sp) .L13: ld a5,32(sp) sub s1,s1,a5 sd s1,40(sp) bleu s1,t3,.L14 sd t3,40(sp) .L14: ld a5,40(sp) sub s1,s1,a5 sd s1,48(sp) bleu s1,t3,.L15 sd t3,48(sp) .L15: ld a5,48(sp) sub s1,s1,a5 sd s1,56(sp) bleu s1,t3,.L16 sd t3,56(sp) .L16: ld a5,56(sp) sub s1,s1,a5 sd s1,64(sp) bleu s1,t3,.L17 sd t3,64(sp) .L17: ld a5,64(sp) sub s1,s1,a5 sd s1,72(sp) bleu s1,t3,.L18 sd t3,72(sp) .L18: ld a5,72(sp) sub s1,s1,a5 sd s1,80(sp) bleu s1,t3,.L19 sd t3,80(sp) .L19: ld a5,80(sp) sub s1,s1,a5 mv s10,s1 bleu s1,t3,.L20 mv s10,t3 .L20: sub s1,s1,s10 mv s9,s1 bleu s1,t3,.L21 mv s9,t3 .L21: sub s1,s1,s9 mv s8,s1 bleu s1,t3,.L22 mv s8,t3 .L22: sub s1,s1,s8 mv s0,s1 bleu s1,t3,.L23 mv s0,t3 .L23: sub s1,s1,s0 mv t0,s1 bleu s1,t3,.L24 mv t0,t3 .L24: sub s1,s1,t0 mv t6,s1 bleu s1,t3,.L25 mv t6,t3 .L25: sub s1,s1,t6 mv t5,s1 bleu s1,t3,.L26 mv t5,t3 .L26: sub s1,s1,t5 mv t4,s1 bleu s1,t3,.L27 mv t4,t3 .L27: sub s1,s1,t4 mv a6,s1 bleu s1,t3,.L28 mv a6,t3 .L28: sub s1,s1,a6 mv a0,s1 bleu s1,t3,.L29 mv a0,t3 .L29: sub s1,s1,a0 mv a1,s1 bleu s1,t3,.L30 mv a1,t3 .L30: sub s1,s1,a1 mv a2,s1 bleu s1,t3,.L31 mv a2,t3 .L31: sub s1,s1,a2 mv a3,s1 bleu s1,t3,.L32 mv a3,t3 .L32: sub s1,s1,a3 mv a4,s1 bleu s1,t3,.L33 mv a4,t3 .L33: sub s1,s1,a4 mv a5,s1 bleu s1,t3,.L34 mv a5,t3 .L34: vsetvli zero,s5,e32,m1,ta,ma vse32.v v1,0(a7) ld s5,296(sp) sub s1,s1,a5 vsetvli zero,s6,e32,m1,ta,ma add s5,a7,s5 sd s1,256(sp) ld s1,304(sp) sd s5,264(sp) ld s5,312(sp) add s1,a7,s1 sd s1,272(sp) ld s1,88(sp) add s5,a7,s5 sd s5,280(sp) vse32.v v1,0(s1) add s6,s1,t1 ld s1,96(sp) vsetvli zero,s4,e32,m1,ta,ma sd s6,88(sp) ld s6,320(sp) add s4,s1,t1 add s6,a7,s6 sd s6,288(sp) ld s6,328(sp) vse32.v v1,0(s1) ld s1,104(sp) add s6,a7,s6 sd s4,96(sp) ld s4,336(sp) add s5,a7,s4 ld s4,8(sp) vsetvli zero,s4,e32,m1,ta,ma vse32.v v1,0(s1) add s4,s1,t1 ld s1,112(sp) vsetvli zero,s3,e32,m1,ta,ma sd s4,104(sp) ld s4,344(sp) vse32.v v1,0(s1) add s3,s1,t1 vsetvli zero,s2,e32,m1,ta,ma add s4,a7,s4 sd s3,112(sp) ld s3,352(sp) ld s1,120(sp) add s3,a7,s3 vse32.v v1,0(s1) add s2,s1,t1 sd s2,120(sp) ld s2,360(sp) add s1,a7,s2 ld s2,16(sp) sd s1,8(sp) ld s1,128(sp) vsetvli zero,s2,e32,m1,ta,ma vse32.v v1,0(s1) add s2,s1,t1 vsetvli zero,s11,e32,m1,ta,ma sd s2,128(sp) csrr s2,vlenb slli s1,s2,5 sub s1,s1,s2 add s2,a7,s1 ld s1,136(sp) sd s2,16(sp) ld s2,24(sp) vse32.v v1,0(s1) add s11,s1,t1 ld s1,144(sp) vsetvli zero,s2,e32,m1,ta,ma ld s2,32(sp) sd s11,136(sp) vse32.v v1,0(s1) add s1,s1,t1 vsetvli zero,s2,e32,m1,ta,ma ld s2,40(sp) mv s11,s7 sd s1,144(sp) ld s1,152(sp) add a7,a7,t1 sub s7,s7,t2 vse32.v v1,0(s1) add s1,s1,t1 vsetvli zero,s2,e32,m1,ta,ma sd s1,152(sp) ld s1,160(sp) vse32.v v1,0(s1) add s1,s1,t1 sd s1,160(sp) ld s1,168(sp) ld s2,48(sp) vsetvli zero,s2,e32,m1,ta,ma vse32.v v1,0(s1) ld s2,56(sp) add s1,s1,t1 vsetvli zero,s2,e32,m1,ta,ma sd s1,168(sp) ld s1,176(sp) ld s2,64(sp) vse32.v v1,0(s1) add s1,s1,t1 vsetvli zero,s2,e32,m1,ta,ma ld s2,72(sp) sd s1,176(sp) ld s1,184(sp) vse32.v v1,0(s1) add s1,s1,t1 vsetvli zero,s2,e32,m1,ta,ma ld s2,80(sp) sd s1,184(sp) ld s1,192(sp) vse32.v v1,0(s1) add s1,s1,t1 vsetvli zero,s2,e32,m1,ta,ma ld s2,208(sp) sd s1,192(sp) ld s1,200(sp) vse32.v v1,0(s1) vsetvli zero,s10,e32,m1,ta,ma ld s10,216(sp) vse32.v v1,0(s2) vsetvli zero,s9,e32,m1,ta,ma add s9,s1,t1 vse32.v v1,0(s10) vsetvli zero,s8,e32,m1,ta,ma sd s9,200(sp) add s9,s2,t1 sd s9,208(sp) add s9,s10,t1 sd s9,216(sp) ld s1,224(sp) vse32.v v1,0(s1) add s8,s1,t1 ld s1,232(sp) vsetvli zero,s0,e32,m1,ta,ma ld s0,240(sp) sd s8,224(sp) vse32.v v1,0(s1) add s8,s1,t1 vsetvli zero,t0,e32,m1,ta,ma add t0,s0,t1 ld s1,272(sp) sd s8,232(sp) vse32.v v1,0(s0) ld s0,248(sp) vsetvli zero,t6,e32,m1,ta,ma add t6,s0,t1 sd t0,240(sp) vse32.v v1,0(s0) ld s0,264(sp) vsetvli zero,t5,e32,m1,ta,ma sd t6,248(sp) vse32.v v1,0(s0) vsetvli zero,t4,e32,m1,ta,ma ld s0,280(sp) vse32.v v1,0(s1) vsetvli zero,a6,e32,m1,ta,ma vse32.v v1,0(s0) ld s0,288(sp) vsetvli zero,a0,e32,m1,ta,ma vse32.v v1,0(s0) vsetvli zero,a1,e32,m1,ta,ma vse32.v v1,0(s6) vsetvli zero,a2,e32,m1,ta,ma vse32.v v1,0(s5) vsetvli zero,a3,e32,m1,ta,ma vse32.v v1,0(s4) vsetvli zero,a4,e32,m1,ta,ma vse32.v v1,0(s3) ld a4,8(sp) vsetvli zero,a5,e32,m1,ta,ma ld a5,256(sp) ld s2,16(sp) vse32.v v1,0(a4) vsetvli zero,a5,e32,m1,ta,ma vse32.v v1,0(s2) bgtu s11,t2,.L35 ld s0,456(sp) .cfi_restore 8 ld s1,448(sp) .cfi_restore 9 ld s2,440(sp) .cfi_restore 18 ld s3,432(sp) .cfi_restore 19 ld s4,424(sp) .cfi_restore 20 ld s5,416(sp) .cfi_restore 21 ld s6,408(sp) .cfi_restore 22 ld s7,400(sp) .cfi_restore 23 ld s9,384(sp) .cfi_restore 25 ld s10,376(sp) .cfi_restore 26 ld s11,368(sp) .cfi_restore 27 .L37: ld s8,392(sp) .cfi_restore 24 addi sp,sp,464 .cfi_def_cfa_offset 0 jr ra .cfi_endproc .LFE0: .size fwr, .-fwr .ident "GCC: (g0b4fd672bf0) 15.0.0 20240702 (experimental)" .section .note.GNU-stack,"",@progbits