On Sun, Dec 5, 2021 at 10:55 PM Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > When compiling an optabs.ii at -O2 with a release-checking build, > the hottest function in the profile was irange_union. This patch > tries to optimise it a bit. The specific changes are: > > - Use quick_push rather than safe_push, since the final number > of entries is known in advance. > > - Avoid assigning wi::to_wide & co. to a temporary wide_int, > such as in: > > wide_int val_j = wi::to_wide (res[j]); > > wi::to_wide returns a wide_int "view" of the in-place INTEGER_CST > storage. Assigning the result to wide_int forces an unnecessary > copy to temporary storage. > > This is one area where "auto" helps a lot. In the end though, > it seemed more readable to inline the wi::to_*s rather than > use auto. > > - Use to_widest_int rather than to_wide_int. Both are functionally > correct, but to_widest_int is more efficient, for three reasons: > > - to_wide returns a wide-int representation in which the most > significant element might not be canonically sign-extended. > This is because we want to allow the storage of an INTEGER_CST > like 0x1U << 31 to be accessed directly with both a wide_int view > (where only 32 bits matter) and a widest_int view (where many more > bits matter, and where the 32 bits are zero-extended to match the > unsigned type). However, operating on uncanonicalised wide_int > forms is less efficient than operating on canonicalised forms. > > - to_widest_int has a constant rather than variable precision and > there are never any redundant upper bits to worry about. > > - Using widest_int avoids the need for an overflow check, since > there is enough precision to add 1 to any IL constant without > wrap-around. > > This gives a ~2% compile-time speed up with the test above. > > I also tried adding a path for two single-pair ranges, but it > wasn't a win. > > Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install?
OK. Thanks, Richard. > Richard > > > gcc/ > * value-range.cc (irange::irange_union): Use quick_push rather > than safe_push. Use widest_int rather than wide_int. Avoid > assigning wi::to_* results to wide*_int temporaries. > --- > gcc/value-range.cc | 46 +++++++++++++--------------------------------- > 1 file changed, 13 insertions(+), 33 deletions(-) > > diff --git a/gcc/value-range.cc b/gcc/value-range.cc > index 82509fa55a7..d38d0786072 100644 > --- a/gcc/value-range.cc > +++ b/gcc/value-range.cc > @@ -1550,70 +1550,50 @@ irange::irange_union (const irange &r) > // the merge is performed. > // > // [Xi,Yi]..[Xn,Yn] U [Xj,Yj]..[Xm,Ym] --> [Xk,Yk]..[Xp,Yp] > - tree ttype = r.type (); > - signop sign = TYPE_SIGN (ttype); > - > - auto_vec<tree, 20> res; > - wide_int u1 ; > - wi::overflow_type ovf; > + auto_vec<tree, 20> res (m_num_ranges * 2 + r.m_num_ranges * 2); > unsigned i = 0, j = 0, k = 0; > > while (i < m_num_ranges * 2 && j < r.m_num_ranges * 2) > { > // lower of Xi and Xj is the lowest point. > - if (wi::le_p (wi::to_wide (m_base[i]), wi::to_wide (r.m_base[j]), > sign)) > + if (wi::to_widest (m_base[i]) <= wi::to_widest (r.m_base[j])) > { > - res.safe_push (m_base[i]); > - res.safe_push (m_base[i + 1]); > + res.quick_push (m_base[i]); > + res.quick_push (m_base[i + 1]); > k += 2; > i += 2; > } > else > { > - res.safe_push (r.m_base[j]); > - res.safe_push (r.m_base[j + 1]); > + res.quick_push (r.m_base[j]); > + res.quick_push (r.m_base[j + 1]); > k += 2; > j += 2; > } > } > for ( ; i < m_num_ranges * 2; i += 2) > { > - res.safe_push (m_base[i]); > - res.safe_push (m_base[i + 1]); > + res.quick_push (m_base[i]); > + res.quick_push (m_base[i + 1]); > k += 2; > } > for ( ; j < r.m_num_ranges * 2; j += 2) > { > - res.safe_push (r.m_base[j]); > - res.safe_push (r.m_base[j + 1]); > + res.quick_push (r.m_base[j]); > + res.quick_push (r.m_base[j + 1]); > k += 2; > } > > // Now normalize the vector removing any overlaps. > i = 2; > - int prec = TYPE_PRECISION (ttype); > - wide_int max_val = wi::max_value (prec, sign); > for (j = 2; j < k ; j += 2) > { > - wide_int val_im1 = wi::to_wide (res[i - 1]); > - if (val_im1 == max_val) > - break; > - u1 = wi::add (val_im1, 1, sign, &ovf); > - > - // Overflow indicates we are at MAX already. > - // A wide int bug requires the previous max_val check > - // trigger: gcc.c-torture/compile/pr80443.c with -O3 > - if (ovf == wi::OVF_OVERFLOW) > - break; > - > - wide_int val_j = wi::to_wide (res[j]); > - wide_int val_jp1 = wi::to_wide (res[j+1]); > // Current upper+1 is >= lower bound next pair, then we merge ranges. > - if (wi::ge_p (u1, val_j, sign)) > + if (wi::to_widest (res[i - 1]) + 1 >= wi::to_widest (res[j])) > { > // New upper bounds is greater of current or the next one. > - if (wi::gt_p (val_jp1, val_im1, sign)) > - res [i - 1] = res[j + 1]; > + if (wi::to_widest (res[j + 1]) > wi::to_widest (res[i - 1])) > + res[i - 1] = res[j + 1]; > } > else > { > -- > 2.31.1 >