https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111450

            Bug ID: 111450
           Summary: RISC-V: Missed optimized for strided load/store with
                    stride = element width
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: juzhe.zhong at rivai dot ai
  Target Milestone: ---

Consider this following cases:

https://godbolt.org/z/3MPoz5q6x

#include "riscv_vector.h"

void foo (int8_t *in, int8_t *out, int n)
{
    vint8m1_t v = __riscv_vlse8_v_i8m1 (in, 1, n);
    __riscv_vsse8_v_i8m1 (out, 1, v, n);
}

void foo1 (int16_t *in, int16_t *out, int n)
{
    vint16m1_t v = __riscv_vlse16_v_i16m1 (in, 2, n);
    __riscv_vsse16_v_i16m1 (out, 2, v, n);
}

void foo3 (int32_t *in, int32_t *out, int n)
{
    vint32m1_t v = __riscv_vlse32_v_i32m1 (in, 4, n);
    __riscv_vsse32_v_i32m1 (out, 4, v, n);
}

void foo4 (int64_t *in, int64_t *out, int n)
{
    vint64m1_t v = __riscv_vlse64_v_i64m1 (in, 8, n);
    __riscv_vsse64_v_i64m1 (out, 8, v, n);
}

ASM:

foo:
        li      a5,1
        vsetvli zero,a2,e8,m1,ta,ma
        vlse8.v v1,0(a0),a5
        vsse8.v v1,0(a1),a5
        ret
foo1:
        li      a5,2
        vsetvli zero,a2,e16,m1,ta,ma
        vlse16.v        v1,0(a0),a5
        vsse16.v        v1,0(a1),a5
        ret
foo3:
        li      a5,4
        vsetvli zero,a2,e32,m1,ta,ma
        vlse32.v        v1,0(a0),a5
        vsse32.v        v1,0(a1),a5
        ret
foo4:
        li      a5,8
        vsetvli zero,a2,e64,m1,ta,ma
        vlse64.v        v1,0(a0),a5
        vsse64.v        v1,0(a1),a5
        ret


When stride = element width, vlse should be optimized into vle.
vsse should be optimized into vse.v.

So we can save a constant move instruction.

Reply via email to