https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112401
--- Comment #3 from JuzheZhong ---
vfloat32m4_t matrix_4x4_transpose_vslide(vfloat32m4_t src) {
vfloat32m1_t inMat0 = __riscv_vget_v_f32m4_f32m1(src, 0);
vfloat32m1_t inMat1 = __riscv_vget_v_f32m4_f32m1(src, 1);
vfloat32m1_t inMat2 =
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112401
--- Comment #2 from JuzheZhong ---
Add more test:
void matrix_4x4_transpose_segmented_load(float* dst, float* src)
{
vfloat32m1x4_t data = __riscv_vlseg4e32_v_f32m1x4(src, 4);
vfloat32m1_t data0 = __riscv_vget_v_f32m1x4_f32m1(data, 0);