https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109748

            Bug ID: 109748
           Summary: RISC-V: Mis code gen for the
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: pan2.li at intel dot com
  Target Milestone: ---

Created attachment 55007
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=55007&action=edit
Test file for reproducing

Given we have bellow code.

#include <riscv_vector.h>

int byte_mac_vec(unsigned char *a, unsigned char *b, int len) {
  size_t vlmax = __riscv_vsetvlmax_e8m1();
  vint32m4_t vec_s = __riscv_vmv_v_x_i32m4(0, vlmax);
  vint32m1_t vec_zero = __riscv_vmv_v_x_i32m1(0, vlmax);
  int k = len;

  for (size_t vl; k > 0; k -= vl, a += vl, b += vl) {
      vl = __riscv_vsetvl_e8m1(k);

      vuint8m1_t a8s = __riscv_vle8_v_u8m1(a, vl);
      vuint8m1_t b8s = __riscv_vle8_v_u8m1(b, vl);
      vuint32m4_t a8s_extended = __riscv_vzext_vf4_u32m4(a8s, vl);
      vuint32m4_t b8s_extended = __riscv_vzext_vf4_u32m4(a8s, vl);

      vint32m4_t a8s_as_i32 = __riscv_vreinterpret_v_u32m4_i32m4(a8s_extended);
      vint32m4_t b8s_as_i32 = __riscv_vreinterpret_v_u32m4_i32m4(b8s_extended);

      vec_s = __riscv_vmacc_vv_i32m4_tu(vec_s, a8s_as_i32, b8s_as_i32, vl);
  }

  vint32m1_t vec_sum = __riscv_vredsum_vs_i32m4_i32m1(vec_s, vec_zero,
__riscv_vsetvl_e32m4(len));
  int sum = __riscv_vmv_x_s_i32m1_i32(vec_sum);

  return sum;
}

It will generate the below assembly code with build option '-march=rv64gcv
-mabi=lp64 -O3 -c -S test.c -o -'.

byte_mac_vec:
        vsetvli a5,zero,e32,m4,ta,ma
        vmv.v.i v4,0
        vsetvli zero,a5,e32,m1,ta,ma
        vmv.v.i v2,0
        ble     a2,zero,.L2
        mv      a4,a2
.L3:
        vsetvli a5,a4,e8,m1,ta,ma   <- should be e32m4
        subw    a4,a4,a5
        vle8.v  v1,0(a0)
        add     a0,a0,a5
        vzext.vf4       v8,v1
        vmacc.vv        v4,v8,v8
        bgt     a4,zero,.L3
.L2:
        vsetvli zero,a2,e32,m4,ta,ma
        vredsum.vs      v4,v4,v2
        vmv.x.s a0,v4
        sext.w  a0,a0
        ret

Reply via email to