https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92655

            Bug ID: 92655
           Summary: Suboptimal vectorization of variable shift
           Product: gcc
           Version: 10.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: rguenth at gcc dot gnu.org
  Target Milestone: ---

For the following testcase vect_recog_vector_vector_shift_pattern isn't able
to fully elide the use of 'int' and thus we fall back to vectorization with
SSE instead of AVX with -O3 -march=core-avx2

#define MULSEQ {32,34,35,38}
#define STRIDE 4
#define M61 2305843009213693951ULL
#define BITS 61
typedef unsigned long uint64_t;
typedef struct myvec_t { uint64_t __attribute__ ((aligned (32))) val[STRIDE]; }
__attribute__ ((aligned (32))) myvec_t;
inline uint64_t MULWU(uint64_t k, uint64_t m)
{
  return (( (k)<<(m) & M61) + ( (k) >> (BITS-m))  )  ;
}
myvec_t MULWU(myvec_t x)
{
  myvec_t __attribute__ ((aligned (32))) v;
  myvec_t __attribute__ ((aligned (32))) SPECIALMUL=MULSEQ;
  for(int j=0;j<STRIDE;j++)
    v.val[j] = MULWU(x.val[j], SPECIALMUL.val[j]);
  return v;
}

Reply via email to