https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105062
--- Comment #2 from Marc Glisse <glisse at gcc dot gnu.org> --- (In reply to Richard Biener from comment #1) > But since not all of the std::max are recognized as > MAX_EXPR but some only after loop if-conversion Ah, I hadn't noticed. I tried replacing std::max with a simpler by-value version so we get MAX_EXPR already in early inline, but that didn't help. Actually, it made things worse: #include <vector> #include <tuple> #include <chrono> #include <algorithm> #include <random> #include <iostream> int my_max(int a, int b){ return (a<b)?b:a; } int main(){ const long n = 100000000; std::vector<std::tuple<int,int,double>> vec; vec.reserve(n); std::random_device rd; std::default_random_engine re(rd()); std::uniform_int_distribution<int> rand_int; std::uniform_real_distribution<double> rand_dbl; for(int i=0;i<n;++i) vec.emplace_back(rand_int(re), rand_int(re), rand_dbl(re)); auto start = std::chrono::system_clock::now(); #ifdef SLOW { int sup = 0; for(int i=0;i<n;++i) sup=my_max(sup,my_max(std::get<0>(vec[i]),std::get<1>(vec[i]))); volatile int noopt0 = sup; } #else { int sup = 0; for(int i=0;i<n;++i) sup=my_max(my_max(sup,std::get<0>(vec[i])),std::get<1>(vec[i])); volatile int noopt1 = sup; } #endif auto finish = std::chrono::system_clock::now(); std::cout << std::chrono::duration_cast<std::chrono::microseconds>(finish - start).count() << '\n'; } Now reassoc1 turns the fast code into the slow code before the vectorizer can detect the reduction chain :-(