Ping.
On Tue, 20 Aug 2019, Nicolas Pitre wrote: > Let's rework that code to avoid large immediate values and convert some > 64-bit variables to 32-bit ones when possible. This allows gcc to > produce smaller and better code. This even produces optimal code on > RISC-V. > > Signed-off-by: Nicolas Pitre <n...@fluxnic.net> > > diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h > index dc9726fdac..33358245b4 100644 > --- a/include/asm-generic/div64.h > +++ b/include/asm-generic/div64.h > @@ -178,7 +178,8 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, > uint64_t n, bool bias) > uint32_t m_hi = m >> 32; > uint32_t n_lo = n; > uint32_t n_hi = n >> 32; > - uint64_t res, tmp; > + uint64_t res; > + uint32_t res_lo, res_hi, tmp; > > if (!bias) { > res = ((uint64_t)m_lo * n_lo) >> 32; > @@ -187,8 +188,9 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, > uint64_t n, bool bias) > res = (m + (uint64_t)m_lo * n_lo) >> 32; > } else { > res = m + (uint64_t)m_lo * n_lo; > - tmp = (res < m) ? (1ULL << 32) : 0; > - res = (res >> 32) + tmp; > + res_lo = res >> 32; > + res_hi = (res_lo < m_hi); > + res = res_lo | ((uint64_t)res_hi << 32); > } > > if (!(m & ((1ULL << 63) | (1ULL << 31)))) { > @@ -197,10 +199,12 @@ static inline uint64_t __arch_xprod_64(const uint64_t > m, uint64_t n, bool bias) > res += (uint64_t)m_hi * n_lo; > res >>= 32; > } else { > - tmp = res += (uint64_t)m_lo * n_hi; > + res += (uint64_t)m_lo * n_hi; > + tmp = res >> 32; > res += (uint64_t)m_hi * n_lo; > - tmp = (res < tmp) ? (1ULL << 32) : 0; > - res = (res >> 32) + tmp; > + res_lo = res >> 32; > + res_hi = (res_lo < tmp); > + res = res_lo | ((uint64_t)res_hi << 32); > } > > res += (uint64_t)m_hi * n_hi; >