https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102974
--- Comment #1 from Andrew Pinski <pinskia at gcc dot gnu.org> --- #include<cstdint> #include<bit> struct ul32x2 { std::uint_least32_t low,high; }; inline constexpr std::uint_least32_t umul_least_32(std::uint_least32_t a,std::uint_least32_t b,std::uint_least32_t& high) noexcept { if constexpr(std::endian::native==std::endian::little||std::endian::native==std::endian::big) { struct ul32x2_little_endian_t { std::uint_least32_t low,high; }; struct ul32x2_big_endian_t { std::uint_least32_t high,low; }; using ul32x2_t = std::conditional_t<std::endian::native==std::endian::little,ul32x2_little_endian_t,ul32x2_big_endian_t>; auto ret{__builtin_bit_cast(ul32x2_t,static_cast<std::uint_least64_t>(a)*b)}; high=ret.high; return ret.low; } else { std::uint_least64_t v{static_cast<std::uint_least64_t>(a)*b}; high=static_cast<std::uint_least32_t>(v>>32u); return static_cast<std::uint_least32_t>(v); } } template<typename T> #if __cpp_lib_concepts >= 202002L requires (std::unsigned_integral<T>) #endif inline constexpr bool add_carry_naive(bool carry,T a,T b,T& out) noexcept { T temp{carry+a}; out=temp+b; return (out < b) | (temp < a); } template<typename T> #if __cpp_lib_concepts >= 202002L requires (std::unsigned_integral<T>) #endif inline constexpr bool add_carry(bool carry,T a,T b,T& out) noexcept { #if __cpp_lib_is_constant_evaluated >= 201811L if(std::is_constant_evaluated()) return add_carry_naive(carry,a,b,out); else #endif { #if defined(_MSC_VER) && !defined(__clang__) #if (defined(_M_IX86) || defined(_M_AMD64)) if constexpr(sizeof(T)==8) { #if defined(_M_AMD64) return _addcarryx_u64(carry,a,b,reinterpret_cast<std::uint64_t*>(__builtin_addressof(out))); #else return _addcarryx_u32(_addcarryx_u32(carry, *reinterpret_cast<std::uint32_t*>(__builtin_addressof(a)),*reinterpret_cast<std::uint32_t*>(__builtin_addressof(b)),reinterpret_cast<std::uint32_t*>(__builtin_addressof(out))), reinterpret_cast<std::uint32_t*>(__builtin_addressof(a))[1],reinterpret_cast<std::uint32_t*>(__builtin_addressof(b))[1],reinterpret_cast<std::uint32_t*>(__builtin_addressof(out))+1); #endif } else if constexpr(sizeof(T)==4) return _addcarryx_u32(carry,a,b,reinterpret_cast<std::uint32_t*>(__builtin_addressof(out))); else if constexpr(sizeof(T)==2) return _addcarry_u16(carry,a,b,reinterpret_cast<short unsigned*>(__builtin_addressof(out))); else if constexpr(sizeof(T)==1) return _addcarry_u8(carry,a,b,reinterpret_cast<char unsigned*>(__builtin_addressof(out))); else return add_carry_naive(carry,a,b,out); #else return add_carry_naive(carry,a,b,out); #endif #elif defined(__has_builtin) && (__has_builtin(__builtin_addcb)&&__has_builtin(__builtin_addcs)&&__has_builtin(__builtin_addc)&&__has_builtin(__builtin_addcl)&&__has_builtin(__builtin_addcll)) if constexpr(sizeof(T)==sizeof(long long unsigned)) { long long unsigned carryout; out=__builtin_addcll(a,b,carry,__builtin_addressof(carryout)); return carryout; } else if constexpr(sizeof(T)==sizeof(long unsigned)) { long unsigned carryout; out=__builtin_addcl(a,b,carry,__builtin_addressof(carryout)); return carryout; } else if constexpr(sizeof(T)==sizeof(unsigned)) { unsigned carryout; out=__builtin_addc(a,b,carry,__builtin_addressof(carryout)); return carryout; } else if constexpr(sizeof(T)==sizeof(short unsigned)) { short unsigned carryout; out=__builtin_addcs(a,b,carry,__builtin_addressof(carryout)); return carryout; } else if constexpr(sizeof(T)==sizeof(char unsigned)) { char unsigned carryout; out=__builtin_addcb(a,b,carry,__builtin_addressof(carryout)); return carryout; } else { return add_carry_naive(carry,a,b,out); } #elif defined(__has_builtin) && (__has_builtin(__builtin_ia32_addcarryx_u32)||__has_builtin(__builtin_ia32_addcarry_u32)||__has_builtin(__builtin_ia32_addcarryx_u64)) if constexpr(sizeof(T)==8) { #if __has_builtin(__builtin_ia32_addcarryx_u64) using may_alias_ptr_type [[gnu::may_alias]] = unsigned long long*; return __builtin_ia32_addcarryx_u64(carry,a,b,reinterpret_cast<may_alias_ptr_type>(__builtin_addressof(out))); #else std::uint32_t a_low; std::uint32_t a_high; __builtin_memcpy(__builtin_addressof(a_low),__builtin_addressof(a),4); __builtin_memcpy(__builtin_addressof(a_high),reinterpret_cast<char const*>(__builtin_addressof(a))+4,4); std::uint32_t b_low; std::uint32_t b_high; __builtin_memcpy(__builtin_addressof(b_low),__builtin_addressof(b),4); __builtin_memcpy(__builtin_addressof(b_high),reinterpret_cast<char const*>(__builtin_addressof(b))+4,4); using may_alias_ptr_type [[gnu::may_alias]] = unsigned*; #if __has_builtin(__builtin_ia32_addcarry_u32) return __builtin_ia32_addcarry_u32(__builtin_ia32_addcarry_u32(carry,a_low,b_low,reinterpret_cast<may_alias_ptr_type>(__builtin_addressof(out))), a_high,b_high,reinterpret_cast<may_alias_ptr_type>(__builtin_addressof(out))+1); #elif __has_builtin(__builtin_ia32_addcarryx_u32) return __builtin_ia32_addcarryx_u32(__builtin_ia32_addcarryx_u32(carry,a_low,b_low,reinterpret_cast<may_alias_ptr_type>(__builtin_addressof(out))), a_high,b_high,reinterpret_cast<may_alias_ptr_type>(__builtin_addressof(out))+1); #else return add_carry_naive(carry,a,b,out); #endif #endif } else if constexpr(sizeof(T)==4) { using may_alias_ptr_type [[gnu::may_alias]] = unsigned*; #if __has_builtin(__builtin_ia32_addcarry_u32) return __builtin_ia32_addcarry_u32(carry,a,b,reinterpret_cast<may_alias_ptr_type>(__builtin_addressof(out))); #elif __has_builtin(__builtin_ia32_addcarryx_u32) return __builtin_ia32_addcarryx_u32(carry,a,b,reinterpret_cast<may_alias_ptr_type>(__builtin_addressof(out))); #else return add_carry_naive(carry,a,b,out); #endif } else return add_carry_naive(carry,a,b,out); //16 bit addcarry simply does not exist on gcc and clang #else return add_carry_naive(carry,a,b,out); #endif } } std::uint_least64_t umul_least_64(std::uint_least64_t a,std::uint_least64_t b,std::uint_least64_t& high) noexcept { auto [a0,a1]=__builtin_bit_cast(ul32x2,a); auto [b0,b1]=__builtin_bit_cast(ul32x2,b); std::uint_least32_t c1; std::uint_least32_t c0{umul_least_32(a0,b0,c1)}; std::uint_least32_t a0b1h; std::uint_least32_t a0b1l{umul_least_32(a0,b1,a0b1h)}; std::uint_least32_t a1b0h; std::uint_least32_t a1b0l{umul_least_32(a1,b0,a1b0h)}; std::uint_least32_t c3; std::uint_least32_t c2{umul_least_32(a1,b1,c3)}; bool carry{add_carry(false,c1,a0b1l,c1)}; carry=add_carry(carry,a0b1h,c2,c2); std::uint_least32_t temp{carry}; carry=add_carry(false,c1,a1b0l,c1); carry=add_carry(carry,a1b0h,c2,c2); add_carry(carry,temp,c3,c3); high=__builtin_bit_cast(std::uint_least64_t,ul32x2{c2,c3}); return __builtin_bit_cast(std::uint_least64_t,ul32x2{c0,c1}); }