https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111450
Bug ID: 111450 Summary: RISC-V: Missed optimized for strided load/store with stride = element width Product: gcc Version: 14.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c Assignee: unassigned at gcc dot gnu.org Reporter: juzhe.zhong at rivai dot ai Target Milestone: --- Consider this following cases: https://godbolt.org/z/3MPoz5q6x #include "riscv_vector.h" void foo (int8_t *in, int8_t *out, int n) { vint8m1_t v = __riscv_vlse8_v_i8m1 (in, 1, n); __riscv_vsse8_v_i8m1 (out, 1, v, n); } void foo1 (int16_t *in, int16_t *out, int n) { vint16m1_t v = __riscv_vlse16_v_i16m1 (in, 2, n); __riscv_vsse16_v_i16m1 (out, 2, v, n); } void foo3 (int32_t *in, int32_t *out, int n) { vint32m1_t v = __riscv_vlse32_v_i32m1 (in, 4, n); __riscv_vsse32_v_i32m1 (out, 4, v, n); } void foo4 (int64_t *in, int64_t *out, int n) { vint64m1_t v = __riscv_vlse64_v_i64m1 (in, 8, n); __riscv_vsse64_v_i64m1 (out, 8, v, n); } ASM: foo: li a5,1 vsetvli zero,a2,e8,m1,ta,ma vlse8.v v1,0(a0),a5 vsse8.v v1,0(a1),a5 ret foo1: li a5,2 vsetvli zero,a2,e16,m1,ta,ma vlse16.v v1,0(a0),a5 vsse16.v v1,0(a1),a5 ret foo3: li a5,4 vsetvli zero,a2,e32,m1,ta,ma vlse32.v v1,0(a0),a5 vsse32.v v1,0(a1),a5 ret foo4: li a5,8 vsetvli zero,a2,e64,m1,ta,ma vlse64.v v1,0(a0),a5 vsse64.v v1,0(a1),a5 ret When stride = element width, vlse should be optimized into vle. vsse should be optimized into vse.v. So we can save a constant move instruction.