https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109048
Bug ID: 109048 Summary: [13 regression] redundant mask compare generated by vectorizer. Product: gcc Version: 13.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: crazylht at gmail dot com Target Milestone: --- #include <math.h> void tmp2 (float *af, int type, int type2, float *res) { const int Etype = (type == 1 || type2 == 2); const float f1 = (type == 3 || type2 == 4) ? 4.f : 2.f; const float f2 = (type == 3 || type2 == 4) ? 0.25f : 0.5f; for (int i = 0; i < 256; i++) { float x = af[i]; int z = (x < 0.f); float t1 = (z ? 1.f : f2) + (x < f1 ? 1.f : 0.f); float neg_t1 = -fabsf(t1); float t2 = Etype ? neg_t1 : t1; res[i] += t2 + x; } } gcc trunk now generates <bb 58> [local count: 5368707]: vect_cst__110 = {iftmp.0_34, iftmp.0_34, iftmp.0_34, iftmp.0_34, iftmp.0_34, iftmp.0_34, iftmp.0_34, iftmp.0_34}; vect_cst__119 = {prephitmp_41, prephitmp_41, prephitmp_41, prephitmp_41, prephitmp_41, prephitmp_41, prephitmp_41, prephitmp_41}; vect_cst__123 = {iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16}; <bb 17> [local count: 53687070]: # i_18 = PHI <i_47(26), 0(58)> # ivtmp_15 = PHI <ivtmp_43(26), 256(58)> # vectp_af.11_105 = PHI <vectp_af.11_106(26), af_24(D)(58)> # vectp_res.23_125 = PHI <vectp_res.23_126(26), res_28(D)(58)> # vectp_res.28_130 = PHI <vectp_res.28_131(26), res_28(D)(58)> # ivtmp_133 = PHI <ivtmp_134(26), 0(58)> # DEBUG i => NULL # DEBUG BEGIN_STMT _38 = (long unsigned int) i_18; _37 = _38 * 4; _36 = af_24(D) + _37; vect_x_20.13_107 = MEM <vector(8) float> [(float *)vectp_af.11_105]; x_20 = *_36; # DEBUG x => NULL # DEBUG BEGIN_STMT # DEBUG D#1 => NULL # DEBUG z => NULL # DEBUG BEGIN_STMT mask__50.14_109 = vect_x_20.13_107 >= { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; _50 = x_20 >= 0.0; mask__52.15_111 = vect_x_20.13_107 < vect_cst__110; _52 = x_20 < iftmp.0_34; mask__53.16_112 = mask__50.14_109 & mask__52.15_111; _53 = _50 & _52; mask__55.17_114 = vect_x_20.13_107 >= vect_cst__110; _55 = x_20 >= iftmp.0_34; mask__56.18_115 = mask__50.14_109 & mask__55.17_114; _56 = _50 & _55; mask__74.19_117 = vect_x_20.13_107 < { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; _74 = x_20 < 0.0; vect__ifc__73.20_120 = VEC_COND_EXPR <mask__74.19_117, { 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0 }, vect_cst__119>; _ifc__73 = _74 ? 2.0e+0 : prephitmp_41; _174 = ~mask__53.16_112; _175 = mask__74.19_117 & _174; vect__ifc__75.21_122 = VEC_COND_EXPR <_175, { 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0, 2.0e+0 }, vect_cst__119>; _ifc__75 = _53 ? prephitmp_41 : _ifc__73; vect_prephitmp_17.22_124 = VEC_COND_EXPR <mask__56.18_115, vect_cst__123, vect__ifc__75.21_122>; prephitmp_17 = _56 ? iftmp.1_16 : _ifc__75; # DEBUG t1 => D#2 # DEBUG BEGIN_STMT # DEBUG neg_t1 => -D#2 # DEBUG BEGIN_STMT # DEBUG t2 => prephitmp_17 # DEBUG BEGIN_STMT _12 = res_28(D) + _37; vect__26.25_127 = MEM <vector(8) float> [(float *)vectp_res.23_125]; _26 = *_12; vect__27.26_128 = vect__26.25_127 + vect_x_20.13_107; _27 = _26 + x_20; vect__45.27_129 = vect_prephitmp_17.22_124 + vect__27.26_128; _45 = prephitmp_17 + _27; MEM <vector(8) float> [(float *)vectp_res.28_130] = vect__45.27_129; # DEBUG BEGIN_STMT i_47 = i_18 + 1; # DEBUG i => i_47 # DEBUG BEGIN_STMT ivtmp_43 = ivtmp_15 - 1; vectp_af.11_106 = vectp_af.11_105 + 32; vectp_res.23_126 = vectp_res.23_125 + 32; vectp_res.28_131 = vectp_res.28_130 + 32; ivtmp_134 = ivtmp_133 + 1; if (ivtmp_134 < 32) goto <bb 26>; [90.00%] else goto <bb 56>; [10.00%] vs gcc12.2 <bb 57> [local count: 5368707]: vect_cst__128 = {iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16, iftmp.1_16}; vect_cst__134 = {iftmp.0_33, iftmp.0_33, iftmp.0_33, iftmp.0_33, iftmp.0_33, iftmp.0_33, iftmp.0_33, iftmp.0_33}; <bb 5> [local count: 53687070]: # i_15 = PHI <i_30(13), 0(57)> # ivtmp_20 = PHI <ivtmp_49(13), 256(57)> # vectp_af.24_124 = PHI <vectp_af.24_125(13), af_24(D)(57)> # vectp_res.31_138 = PHI <vectp_res.31_139(13), res_28(D)(57)> # vectp_res.36_143 = PHI <vectp_res.36_144(13), res_28(D)(57)> # ivtmp_146 = PHI <ivtmp_147(13), 0(57)> # DEBUG i => NULL # DEBUG BEGIN_STMT _7 = (long unsigned int) i_15; _8 = _7 * 4; _9 = af_24(D) + _8; vect_x_25.26_126 = MEM <vector(8) float> [(float *)vectp_af.24_124]; x_25 = *_9; # DEBUG x => NULL # DEBUG BEGIN_STMT # DEBUG D#1 => NULL # DEBUG z => NULL # DEBUG BEGIN_STMT _130 = vect_x_25.26_126 >= { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; vect_iftmp.27_131 = VEC_COND_EXPR <_130, vect_cst__128, { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 }>; iftmp.2_17 = x_25 >= 0.0 ? iftmp.1_16 : 1.0e+0; vect__41.28_133 = vect_iftmp.27_131 + { 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0, 1.0e+0 }; _41 = iftmp.2_17 + 1.0e+0; _135 = vect_x_25.26_126 >= vect_cst__134; vect_prephitmp_42.29_136 = VEC_COND_EXPR <_135, vect_iftmp.27_131, vect__41.28_133>; prephitmp_42 = x_25 >= iftmp.0_33 ? iftmp.2_17 : _41; # DEBUG t1 => NULL # DEBUG BEGIN_STMT # DEBUG neg_t1 => -prephitmp_42 # DEBUG BEGIN_STMT vect_neg_t1_27.30_137 = -vect_prephitmp_42.29_136; neg_t1_27 = -prephitmp_42; # DEBUG t2 => neg_t1_27 # DEBUG BEGIN_STMT _10 = res_28(D) + _8; vect__11.33_140 = MEM <vector(8) float> [(float *)vectp_res.31_138]; _11 = *_10; vect__35.34_141 = vect__11.33_140 + vect_x_25.26_126; _35 = _11 + x_25; vect__13.35_142 = vect_neg_t1_27.30_137 + vect__35.34_141; _13 = neg_t1_27 + _35; MEM <vector(8) float> [(float *)vectp_res.36_143] = vect__13.35_142; # DEBUG BEGIN_STMT i_30 = i_15 + 1; # DEBUG i => i_30 # DEBUG BEGIN_STMT ivtmp_49 = ivtmp_20 - 1; vectp_af.24_125 = vectp_af.24_124 + 32; vectp_res.31_139 = vectp_res.31_138 + 32; vectp_res.36_144 = vectp_res.36_143 + 32; ivtmp_147 = ivtmp_146 + 1; if (ivtmp_147 < 32)