https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111846

--- Comment #3 from Richard Biener <rguenth at gcc dot gnu.org> ---
We are dividing 4 by 8.  We get here with basic-block vectorization and
a group size of 4 but we let vectorizable_simd_clone_call choose its
own vector type (it chooses 8 lanes).

While we properly constrain with

        if (!constant_multiple_p (vf * group_size,
                                  n->simdclone->simdlen, &num_calls)
            || n->simdclone->nargs != nargs)
          continue;

we seem to cache things in STMT_VINFO_SIMD_CLONE_INFO and this info gets
re-used across a group_size 8 -> 4 transition here.  This is because
we have multiple SLP instances each sharing some of the calls (in the end
costs would make vectorization not profitable), vectorizing the CTORs in

  <bb 2> [local count: 9759239]:
  _14 = eq_set_bands_real_adj[0];
  _15 = powf (0.0, _14);
  _16 = _15 - 1.0e+0;
  _21 = eq_set_bands_real_adj[1];
  _22 = powf (0.0, _21);
  _23 = _22 - 1.0e+0;
  _28 = eq_set_bands_real_adj[2];
  _29 = powf (0.0, _28);
  _30 = _29 - 1.0e+0;
  _35 = eq_set_bands_real_adj[3];
  _36 = powf (0.0, _35);
  _37 = _36 - 1.0e+0;
  _42 = eq_set_bands_real_adj[4];
  _43 = powf (0.0, _42);
  _44 = _43 - 1.0e+0;
  _49 = eq_set_bands_real_adj[5];
  _50 = powf (0.0, _49);
  _51 = _50 - 1.0e+0;
  _56 = eq_set_bands_real_adj[6];
  _57 = powf (0.0, _56);
  _58 = _57 - 1.0e+0;
  _63 = eq_set_bands_real_adj[7];
  _64 = powf (0.0, _63);
  _65 = _64 - 1.0e+0;
  _70 = eq_set_bands_real_adj[8];
  _71 = powf (0.0, _70);
  _72 = _71 - 1.0e+0;
  _77 = eq_set_bands_real_adj[9];
  _78 = powf (0.0, _77);
  _79 = _78 - 1.0e+0;
  _19 = {_30, _37, _44, _51, _58, _65, _72, _79};
  _25 = {_44, _51, _58, _65, _72, _79, _16, _23};
  _26 = {_58, _65, _72, _79, _16, _23, _30, _37};
  _32 = {_72, _79, _16, _23, _30, _37, _44, _51};
  _33 = {_16, _23, _30, _37, _44, _51, _58, _65};
  MEM <vector(8) float> [(float *)&gv] = _33;
  MEM <vector(8) float> [(float *)&gv + 32B] = _32;
  MEM <vector(8) float> [(float *)&gv + 64B] = _26;
  MEM <vector(8) float> [(float *)&gv + 96B] = _25;
  MEM <vector(8) float> [(float *)&gv + 128B] = _19;
  MEM <vector(8) float> [(float *)&gv + 160B] = _33;
  MEM <vector(8) float> [(float *)&gv + 192B] = _32;
  MEM <vector(8) float> [(float *)&gv + 224B] = _26;
  MEM <vector(8) float> [(float *)&gv + 256B] = _25;
  MEM <vector(8) float> [(float *)&gv + 288B] = _19;
  _82 = {_58, _65, _72, _79};
  _83 = {_30, _37, _44, _51};
  _84 = {_72, _79, _16, _23};
  _85 = {_44, _51, _58, _65};
  _86 = {_16, _23, _30, _37};
  MEM <vector(4) float> [(float *)&gv + 320B] = _86;
  MEM <vector(4) float> [(float *)&gv + 336B] = _85;
  MEM <vector(4) float> [(float *)&gv + 352B] = _84;
  MEM <vector(4) float> [(float *)&gv + 368B] = _83;
  MEM <vector(4) float> [(float *)&gv + 384B] = _82;
  return;

Reply via email to