https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112331
Bug ID: 112331 Summary: middle-end: Fail vectorization Product: gcc Version: 14.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c Assignee: unassigned at gcc dot gnu.org Reporter: juzhe.zhong at rivai dot ai Target Milestone: --- https://gcc.godbolt.org/z/x7GGzezGh #include <stdio.h> #define LEN 32000 #define ntimes 200000 #define TYPE float #define lll LEN #define LEN2 256 #define ALIGNMENT 16 __attribute__ ((aligned(ALIGNMENT))) TYPE X[lll],Y[lll],Z[lll],U[lll],V[lll]; struct GlobalData { __attribute__((aligned(ALIGNMENT))) TYPE a[LEN]; int pad1[3]; __attribute__((aligned(ALIGNMENT))) TYPE b[LEN]; int pad2[5]; __attribute__((aligned(ALIGNMENT))) TYPE c[LEN]; int pad3[7]; __attribute__((aligned(ALIGNMENT))) TYPE d[LEN]; int pad4[11]; __attribute__((aligned(ALIGNMENT))) TYPE e[LEN]; int pad5[13]; __attribute__((aligned(ALIGNMENT))) TYPE aa[LEN2][LEN2]; int pad6[17]; __attribute__((aligned(ALIGNMENT))) TYPE bb[LEN2][LEN2]; int pad7[19]; __attribute__((aligned(ALIGNMENT))) TYPE cc[LEN2][LEN2]; int pad8[23]; __attribute__((aligned(ALIGNMENT))) TYPE tt[LEN2][LEN2]; } global_data; __attribute__((aligned(ALIGNMENT))) TYPE * const a = global_data.a; __attribute__((aligned(ALIGNMENT))) TYPE * const b = global_data.b; __attribute__((aligned(ALIGNMENT))) TYPE * const c = global_data.c; __attribute__((aligned(ALIGNMENT))) TYPE * const d = global_data.d; __attribute__((aligned(ALIGNMENT))) TYPE * const e = global_data.e; __attribute__((aligned(ALIGNMENT))) TYPE (* const aa)[LEN2] = global_data.aa; __attribute__((aligned(ALIGNMENT))) TYPE (* const bb)[LEN2] = global_data.bb; __attribute__((aligned(ALIGNMENT))) TYPE (* const cc)[LEN2] = global_data.cc; __attribute__((aligned(ALIGNMENT))) TYPE (* const tt)[LEN2] = global_data.tt; int foo() { // linear dependence testing // no dependence - vectorizable for (int nl = 0; nl < 2*ntimes; nl++) { // #pragma vector always for (int i = 1; i < LEN; i += 2) { a[i] = a[i - 1] + b[i]; } } return 0; } Both RVV and ARM SVE faild to vectorize it wheras Clang can vectorize it.