https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111846
--- Comment #3 from Richard Biener <rguenth at gcc dot gnu.org> --- We are dividing 4 by 8. We get here with basic-block vectorization and a group size of 4 but we let vectorizable_simd_clone_call choose its own vector type (it chooses 8 lanes). While we properly constrain with if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen, &num_calls) || n->simdclone->nargs != nargs) continue; we seem to cache things in STMT_VINFO_SIMD_CLONE_INFO and this info gets re-used across a group_size 8 -> 4 transition here. This is because we have multiple SLP instances each sharing some of the calls (in the end costs would make vectorization not profitable), vectorizing the CTORs in <bb 2> [local count: 9759239]: _14 = eq_set_bands_real_adj[0]; _15 = powf (0.0, _14); _16 = _15 - 1.0e+0; _21 = eq_set_bands_real_adj[1]; _22 = powf (0.0, _21); _23 = _22 - 1.0e+0; _28 = eq_set_bands_real_adj[2]; _29 = powf (0.0, _28); _30 = _29 - 1.0e+0; _35 = eq_set_bands_real_adj[3]; _36 = powf (0.0, _35); _37 = _36 - 1.0e+0; _42 = eq_set_bands_real_adj[4]; _43 = powf (0.0, _42); _44 = _43 - 1.0e+0; _49 = eq_set_bands_real_adj[5]; _50 = powf (0.0, _49); _51 = _50 - 1.0e+0; _56 = eq_set_bands_real_adj[6]; _57 = powf (0.0, _56); _58 = _57 - 1.0e+0; _63 = eq_set_bands_real_adj[7]; _64 = powf (0.0, _63); _65 = _64 - 1.0e+0; _70 = eq_set_bands_real_adj[8]; _71 = powf (0.0, _70); _72 = _71 - 1.0e+0; _77 = eq_set_bands_real_adj[9]; _78 = powf (0.0, _77); _79 = _78 - 1.0e+0; _19 = {_30, _37, _44, _51, _58, _65, _72, _79}; _25 = {_44, _51, _58, _65, _72, _79, _16, _23}; _26 = {_58, _65, _72, _79, _16, _23, _30, _37}; _32 = {_72, _79, _16, _23, _30, _37, _44, _51}; _33 = {_16, _23, _30, _37, _44, _51, _58, _65}; MEM <vector(8) float> [(float *)&gv] = _33; MEM <vector(8) float> [(float *)&gv + 32B] = _32; MEM <vector(8) float> [(float *)&gv + 64B] = _26; MEM <vector(8) float> [(float *)&gv + 96B] = _25; MEM <vector(8) float> [(float *)&gv + 128B] = _19; MEM <vector(8) float> [(float *)&gv + 160B] = _33; MEM <vector(8) float> [(float *)&gv + 192B] = _32; MEM <vector(8) float> [(float *)&gv + 224B] = _26; MEM <vector(8) float> [(float *)&gv + 256B] = _25; MEM <vector(8) float> [(float *)&gv + 288B] = _19; _82 = {_58, _65, _72, _79}; _83 = {_30, _37, _44, _51}; _84 = {_72, _79, _16, _23}; _85 = {_44, _51, _58, _65}; _86 = {_16, _23, _30, _37}; MEM <vector(4) float> [(float *)&gv + 320B] = _86; MEM <vector(4) float> [(float *)&gv + 336B] = _85; MEM <vector(4) float> [(float *)&gv + 352B] = _84; MEM <vector(4) float> [(float *)&gv + 368B] = _83; MEM <vector(4) float> [(float *)&gv + 384B] = _82; return;