https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92655
Bug ID: 92655 Summary: Suboptimal vectorization of variable shift Product: gcc Version: 10.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: rguenth at gcc dot gnu.org Target Milestone: --- For the following testcase vect_recog_vector_vector_shift_pattern isn't able to fully elide the use of 'int' and thus we fall back to vectorization with SSE instead of AVX with -O3 -march=core-avx2 #define MULSEQ {32,34,35,38} #define STRIDE 4 #define M61 2305843009213693951ULL #define BITS 61 typedef unsigned long uint64_t; typedef struct myvec_t { uint64_t __attribute__ ((aligned (32))) val[STRIDE]; } __attribute__ ((aligned (32))) myvec_t; inline uint64_t MULWU(uint64_t k, uint64_t m) { return (( (k)<<(m) & M61) + ( (k) >> (BITS-m)) ) ; } myvec_t MULWU(myvec_t x) { myvec_t __attribute__ ((aligned (32))) v; myvec_t __attribute__ ((aligned (32))) SPECIALMUL=MULSEQ; for(int j=0;j<STRIDE;j++) v.val[j] = MULWU(x.val[j], SPECIALMUL.val[j]); return v; }