--- Comment #22 from Matthias Kretz (Vir) <mkretz at gcc dot> --- I took your hypot3_scale and reduced latency and throughput. I don't think the sqrtmax/sqrtmin limits are correct (sqrtmax² * 3 -> infinity). TYPE Latency Speedup Throughput Speedup [cycles/call] [per value] [cycles/call] [per value] float, 46.5 1 12.7 1 float, hypot3_scale 35.5 1.31 27 0.47 float, hypot3_mkretz 30.2 1.54 12 1.06 -------------------------------------------------------------------------------------- TYPE Latency Speedup Throughput Speedup [cycles/call] [per value] [cycles/call] [per value] double, 59.6 1 60 1 double, hypot3_scale 51.2 1.16 48.8 1.23 double, hypot3_mkretz 40.1 1.49 35 1.71 template <typename T> constexpr T hypot(T x, T y, T z) { using limits = std::numeric_limits<T>; auto prev_power2 = [](const T value) constexpr noexcept -> T { return std::exp2(std::floor(std::log2(value))); }; constexpr T sqrtmax = std::sqrt(limits::max()); constexpr T sqrtmin = std::sqrt(limits::min()); constexpr T scale_up = prev_power2(sqrtmax); constexpr T scale_down = T(1) / scale_up; constexpr T zero = 0; if (not (std::isnormal(x) && std::isnormal(y) && std::isnormal(z))) [[unlikely]] { if (std::isinf(x) | std::isinf(y) | std::isinf(z)) return limits::infinity(); else if (std::isnan(x) | std::isnan(y) | std::isnan(z)) return limits::quiet_NaN(); const bool xz = x == zero; const bool yz = y == zero; const bool zz = z == zero; if (xz) { if (yz) return zz ? zero : z; else if (zz) return y; } else if (yz && zz) return x; } x = std::abs(x); y = std::abs(y); z = std::abs(z); T a = std::max(std::max(x, y), z); T b = std::min(std::max(x, y), z); T c = std::min(x, y); if (a >= sqrtmin && a <= sqrtmax) [[likely]] return std::sqrt(__builtin_assoc_barrier(c * c + b * b) + a * a); const T scale = a >= sqrtmin ? scale_down : scale_up; a *= scale; b *= scale; c *= scale; return std::sqrt(__builtin_assoc_barrier(c * c + b * b) + a * a) / scale; }