Hi,

On Fri, Apr 26 2024, Aldy Hernandez via Gcc wrote:
> Hi folks!
>
> In implementing prange (pointer ranges), I have found a 1.74% slowdown
> in VRP, even without any code path actually using the code.  I have
> tracked this down to irange::get_bitmask() being compiled differently
> with and without the bare bones patch.  With the patch,
> irange::get_bitmask() has a lot of code inlined into it, particularly
> get_bitmask_from_range() and consequently the wide_int_storage code.
>
> I don't know whether this is expected behavior, and if it is, how to
> mitigate it.  I have tried declaring get_bitmask_from_range() inline,
> but that didn't help.  OTOH, using __attribute__((always_inline))
> helps a bit, but not entirely.  What does help is inlining
> irange::get_bitmask() entirely, but that seems like a big hammer.
>
> The overall slowdown in compilation is 0.26%, because VRP is a
> relatively fast pass, but a measurable pass slowdown is something we'd
> like to avoid.
>
> What's the recommended approach here?

I'm afraid that the right approach (not sure if that also means the
recommended approach) is to figure out why inlining
irange::get_bitmask() helps, i.e. what unnecessary computations or
memory accesses it avoids or which other subsequent optimizations it
enables, etc.  Then we can have a look whether IPA could facilitate this
without inlining (or if eventually code shrinks to a reasonable size,
how to teach the inliner to predict this).

Martin


>
> For the curious, I am attaching before and after copies of
> value-range.s.  I am also attaching the two patches needed to
> reproduce the problem on mainline.  The first patch is merely setup.
> It is the second patch that exhibits the problem.  Notice there are no
> uses of prange yet.
>
> Thanks.
> Aldy
> From ee63833c5f56064ef47c2bb9debd485f77d00171 Mon Sep 17 00:00:00 2001
> From: Aldy Hernandez <al...@redhat.com>
> Date: Tue, 19 Mar 2024 18:04:55 +0100
> Subject: [PATCH] Move get_bitmask_from_range out of irange class.
>
> ---
>  gcc/value-range.cc | 52 +++++++++++++++++++++++-----------------------
>  gcc/value-range.h  |  1 -
>  2 files changed, 26 insertions(+), 27 deletions(-)
>
> diff --git a/gcc/value-range.cc b/gcc/value-range.cc
> index 70375f7abf9..0f81ce32615 100644
> --- a/gcc/value-range.cc
> +++ b/gcc/value-range.cc
> @@ -31,6 +31,30 @@ along with GCC; see the file COPYING3.  If not see
>  #include "fold-const.h"
>  #include "gimple-range.h"
>  
> +// Return the bitmask inherent in a range.
> +
> +static irange_bitmask
> +get_bitmask_from_range (tree type,
> +                     const wide_int &min, const wide_int &max)
> +{
> +  unsigned prec = TYPE_PRECISION (type);
> +
> +  // All the bits of a singleton are known.
> +  if (min == max)
> +    {
> +      wide_int mask = wi::zero (prec);
> +      wide_int value = min;
> +      return irange_bitmask (value, mask);
> +    }
> +
> +  wide_int xorv = min ^ max;
> +
> +  if (xorv != 0)
> +    xorv = wi::mask (prec - wi::clz (xorv), false, prec);
> +
> +  return irange_bitmask (wi::zero (prec), min | xorv);
> +}
> +
>  void
>  irange::accept (const vrange_visitor &v) const
>  {
> @@ -1832,31 +1856,6 @@ irange::invert ()
>      verify_range ();
>  }
>  
> -// Return the bitmask inherent in the range.
> -
> -irange_bitmask
> -irange::get_bitmask_from_range () const
> -{
> -  unsigned prec = TYPE_PRECISION (type ());
> -  wide_int min = lower_bound ();
> -  wide_int max = upper_bound ();
> -
> -  // All the bits of a singleton are known.
> -  if (min == max)
> -    {
> -      wide_int mask = wi::zero (prec);
> -      wide_int value = lower_bound ();
> -      return irange_bitmask (value, mask);
> -    }
> -
> -  wide_int xorv = min ^ max;
> -
> -  if (xorv != 0)
> -    xorv = wi::mask (prec - wi::clz (xorv), false, prec);
> -
> -  return irange_bitmask (wi::zero (prec), min | xorv);
> -}
> -
>  // Remove trailing ranges that this bitmask indicates can't exist.
>  
>  void
> @@ -1978,7 +1977,8 @@ irange::get_bitmask () const
>    // in the mask.
>    //
>    // See also the note in irange_bitmask::intersect.
> -  irange_bitmask bm = get_bitmask_from_range ();
> +  irange_bitmask bm
> +    = get_bitmask_from_range (type (), lower_bound (), upper_bound ());
>    if (!m_bitmask.unknown_p ())
>      bm.intersect (m_bitmask);
>    return bm;
> diff --git a/gcc/value-range.h b/gcc/value-range.h
> index 9531df56988..dc5b153a83e 100644
> --- a/gcc/value-range.h
> +++ b/gcc/value-range.h
> @@ -351,7 +351,6 @@ private:
>    bool varying_compatible_p () const;
>    bool intersect_bitmask (const irange &r);
>    bool union_bitmask (const irange &r);
> -  irange_bitmask get_bitmask_from_range () const;
>    bool set_range_from_bitmask ();
>  
>    bool intersect (const wide_int& lb, const wide_int& ub);
> -- 
> 2.44.0
>
> From 03c70de43177a97ec5e9c243aafc798c1f37e6d8 Mon Sep 17 00:00:00 2001
> From: Aldy Hernandez <al...@redhat.com>
> Date: Wed, 20 Mar 2024 06:25:52 +0100
> Subject: [PATCH] Implement minimum prange class exhibiting VRP slowdown.
>
> ---
>  gcc/value-range-pretty-print.cc |  25 +++
>  gcc/value-range-pretty-print.h  |   1 +
>  gcc/value-range.cc              | 274 ++++++++++++++++++++++++++++++++
>  gcc/value-range.h               | 196 +++++++++++++++++++++++
>  4 files changed, 496 insertions(+)
>
> diff --git a/gcc/value-range-pretty-print.cc b/gcc/value-range-pretty-print.cc
> index c75cbea3955..154253e047f 100644
> --- a/gcc/value-range-pretty-print.cc
> +++ b/gcc/value-range-pretty-print.cc
> @@ -113,6 +113,31 @@ vrange_printer::print_irange_bitmasks (const irange &r) 
> const
>    pp_string (pp, p);
>  }
>  
> +void
> +vrange_printer::visit (const prange &r) const
> +{
> +  pp_string (pp, "[prange] ");
> +  if (r.undefined_p ())
> +    {
> +      pp_string (pp, "UNDEFINED");
> +      return;
> +    }
> +  dump_generic_node (pp, r.type (), 0, TDF_NONE | TDF_NOUID, false);
> +  pp_character (pp, ' ');
> +  if (r.varying_p ())
> +    {
> +      pp_string (pp, "VARYING");
> +      return;
> +    }
> +
> +  pp_character (pp, '[');
> +  //print_int_bound (pp, r.lower_bound (), r.type ());
> +  pp_string (pp, ", ");
> +  //print_int_bound (pp, r.upper_bound (), r.type ());
> +  pp_character (pp, ']');
> +  //print_irange_bitmasks (pp, r.m_bitmask);
> +}
> +
>  void
>  vrange_printer::print_real_value (tree type, const REAL_VALUE_TYPE &r) const
>  {
> diff --git a/gcc/value-range-pretty-print.h b/gcc/value-range-pretty-print.h
> index ca85fd6157c..54ee0cf8c26 100644
> --- a/gcc/value-range-pretty-print.h
> +++ b/gcc/value-range-pretty-print.h
> @@ -27,6 +27,7 @@ public:
>    vrange_printer (pretty_printer *pp_) : pp (pp_) { }
>    void visit (const unsupported_range &) const override;
>    void visit (const irange &) const override;
> +  void visit (const prange &) const override;
>    void visit (const frange &) const override;
>  private:
>    void print_irange_bound (const wide_int &w, tree type) const;
> diff --git a/gcc/value-range.cc b/gcc/value-range.cc
> index 0f81ce32615..06ab1a616bf 100644
> --- a/gcc/value-range.cc
> +++ b/gcc/value-range.cc
> @@ -377,6 +377,280 @@ irange::set_nonnegative (tree type)
>         wi::to_wide (TYPE_MAX_VALUE (type)));
>  }
>  
> +// Prange implementation.
> +
> +void
> +prange::accept (const vrange_visitor &v) const
> +{
> +  v.visit (*this);
> +}
> +
> +void
> +prange::set_nonnegative (tree type)
> +{
> +  set (type,
> +       wi::zero (TYPE_PRECISION (type)),
> +       wi::max_value (TYPE_PRECISION (type), UNSIGNED));
> +}
> +
> +void
> +prange::set (tree min, tree max, value_range_kind kind)
> +{
> +  return set (TREE_TYPE (min), wi::to_wide (min), wi::to_wide (max), kind);
> +}
> +
> +void
> +prange::set (tree type, const wide_int &min, const wide_int &max,
> +          value_range_kind kind)
> +{
> +  if (kind == VR_UNDEFINED)
> +    {
> +      set_undefined ();
> +      return;
> +    }
> +  if (kind == VR_VARYING)
> +    {
> +      set_varying (type);
> +      return;
> +    }
> +  if (kind == VR_ANTI_RANGE)
> +    {
> +      gcc_checking_assert (min == 0 && max == 0);
> +      set_nonzero (type);
> +      return;
> +    }
> +  m_type = type;
> +  m_min = min;
> +  m_max = max;
> +  if (m_min == 0 && m_max == -1)
> +    {
> +      m_kind = VR_VARYING;
> +      m_bitmask.set_unknown (TYPE_PRECISION (type));
> +      if (flag_checking)
> +     verify_range ();
> +      return;
> +    }
> +
> +  m_kind = VR_RANGE;
> +  m_bitmask = get_bitmask_from_range (type, min, max);
> +  if (flag_checking)
> +    verify_range ();
> +}
> +
> +bool
> +prange::contains_p (const wide_int &w) const
> +{
> +  if (undefined_p ())
> +    return false;
> +
> +  if (varying_p ())
> +    return true;
> +
> +  return (wi::le_p (lower_bound (), w, UNSIGNED)
> +       && wi::ge_p (upper_bound (), w, UNSIGNED));
> +}
> +
> +bool
> +prange::singleton_p (tree *result) const
> +{
> +  if (m_kind == VR_RANGE && lower_bound () == upper_bound ())
> +    {
> +      if (result)
> +     *result = wide_int_to_tree (type (), m_min);
> +      return true;
> +    }
> +  return false;
> +}
> +
> +bool
> +prange::union_ (const vrange &v)
> +{
> +  const prange &r = as_a <prange> (v);
> +
> +  if (r.undefined_p ())
> +    return false;
> +  if (undefined_p ())
> +    {
> +      *this = r;
> +      if (flag_checking)
> +     verify_range ();
> +      return true;
> +    }
> +  if (varying_p ())
> +    return false;
> +  if (r.varying_p ())
> +    {
> +      set_varying (type ());
> +      return true;
> +    }
> +
> +  wide_int new_lb = wi::min (r.lower_bound (), lower_bound (), UNSIGNED);
> +  wide_int new_ub = wi::max (r.upper_bound (), upper_bound (), UNSIGNED);
> +  prange new_range (type (), new_lb, new_ub);
> +  new_range.m_bitmask.union_ (m_bitmask);
> +  new_range.m_bitmask.union_ (r.m_bitmask);
> +  if (new_range.varying_compatible_p ())
> +    {
> +      set_varying (type ());
> +      return true;
> +    }
> +  if (flag_checking)
> +    new_range.verify_range ();
> +  if (new_range == *this)
> +    return false;
> +  *this = new_range;
> +  return true;
> +}
> +
> +bool
> +prange::intersect (const vrange &v)
> +{
> +  const prange &r = as_a <prange> (v);
> +  gcc_checking_assert (undefined_p () || r.undefined_p ()
> +                    || range_compatible_p (type (), r.type ()));
> +
> +  if (undefined_p ())
> +    return false;
> +  if (r.undefined_p ())
> +    {
> +      set_undefined ();
> +      return true;
> +    }
> +  if (r.varying_p ())
> +    return false;
> +  if (varying_p ())
> +    {
> +      *this = r;
> +      return true;
> +    }
> +
> +  prange save = *this;
> +  m_min = wi::max (r.lower_bound (), lower_bound (), UNSIGNED);
> +  m_max = wi::min (r.upper_bound (), upper_bound (), UNSIGNED);
> +  if (wi::gt_p (m_min, m_max, UNSIGNED))
> +    {
> +      set_undefined ();
> +      return true;
> +    }
> +
> +  // Intersect all bitmasks: the old one, the new one, and the other 
> operand's.
> +  irange_bitmask new_bitmask = get_bitmask_from_range (m_type, m_min, m_max);
> +  m_bitmask.intersect (new_bitmask);
> +  m_bitmask.intersect (r.m_bitmask);
> +
> +  if (flag_checking)
> +    verify_range ();
> +  if (*this == save)
> +    return false;
> +  return true;
> +}
> +
> +prange &
> +prange::operator= (const prange &src)
> +{
> +  m_type = src.m_type;
> +  m_kind = src.m_kind;
> +  m_min = src.m_min;
> +  m_max = src.m_max;
> +  m_bitmask = src.m_bitmask;
> +  if (flag_checking)
> +    verify_range ();
> +  return *this;
> +}
> +
> +bool
> +prange::operator== (const prange &src) const
> +{
> +  if (m_kind == src.m_kind)
> +    {
> +      if (undefined_p ())
> +     return true;
> +
> +      if (varying_p ())
> +     return types_compatible_p (type (), src.type ());
> +
> +      return (m_min == src.m_min && m_max == src.m_max
> +           && m_bitmask == src.m_bitmask);
> +    }
> +  return false;
> +}
> +
> +void
> +prange::invert ()
> +{
> +  gcc_checking_assert (!undefined_p () && !varying_p ());
> +
> +  wide_int new_lb, new_ub;
> +  unsigned prec = TYPE_PRECISION (type ());
> +  wide_int type_min = wi::zero (prec);
> +  wide_int type_max = wi::max_value (prec, UNSIGNED);
> +  wi::overflow_type ovf;
> +
> +  if (lower_bound () == type_min)
> +    {
> +      new_lb = wi::add (upper_bound (), 1, UNSIGNED, &ovf);
> +      if (ovf)
> +     new_lb = type_min;
> +      new_ub = type_max;
> +      set (type (), new_lb, new_ub);
> +    }
> +  else if (upper_bound () == type_max)
> +    {
> +      wi::overflow_type ovf;
> +      new_lb = type_min;
> +      new_ub = wi::sub (lower_bound (), 1, UNSIGNED, &ovf);
> +      if (ovf)
> +     new_ub = type_max;
> +      set (type (), new_lb, new_ub);
> +    }
> +  else
> +    set_varying (type ());
> +}
> +
> +void
> +prange::verify_range () const
> +{
> +  gcc_checking_assert (m_discriminator == VR_PRANGE);
> +
> +  if (m_kind == VR_UNDEFINED)
> +    return;
> +
> +  gcc_checking_assert (supports_p (type ()));
> +
> +  if (m_kind == VR_VARYING)
> +    {
> +      gcc_checking_assert (varying_compatible_p ());
> +      return;
> +    }
> +  gcc_checking_assert (!varying_compatible_p ());
> +  gcc_checking_assert (m_kind == VR_RANGE);
> +}
> +
> +void
> +prange::update_bitmask (const irange_bitmask &bm)
> +{
> +  gcc_checking_assert (!undefined_p ());
> +
> +  // If all the bits are known, this is a singleton.
> +  if (bm.mask () == 0)
> +    {
> +      set (type (), m_bitmask.value (), m_bitmask.value ());
> +      return;
> +    }
> +
> +  // Drop VARYINGs with known bits to a plain range.
> +  if (m_kind == VR_VARYING && !bm.unknown_p ())
> +    m_kind = VR_RANGE;
> +
> +  m_bitmask = bm;
> +  if (varying_compatible_p ())
> +    m_kind = VR_VARYING;
> +
> +  if (flag_checking)
> +    verify_range ();
> +}
> +
> +
>  void
>  frange::accept (const vrange_visitor &v) const
>  {
> diff --git a/gcc/value-range.h b/gcc/value-range.h
> index dc5b153a83e..9fac89a2f98 100644
> --- a/gcc/value-range.h
> +++ b/gcc/value-range.h
> @@ -47,6 +47,8 @@ enum value_range_discriminator
>  {
>    // Range holds an integer or pointer.
>    VR_IRANGE,
> +  // Pointer range.
> +  VR_PRANGE,
>    // Floating point range.
>    VR_FRANGE,
>    // Range holds an unsupported type.
> @@ -389,6 +391,54 @@ private:
>    wide_int m_ranges[N*2];
>  };
>  
> +class prange : public vrange
> +{
> +  friend class prange_storage;
> +  friend class vrange_printer;
> +public:
> +  prange ();
> +  prange (const prange &);
> +  prange (tree type);
> +  prange (tree type, const wide_int &, const wide_int &,
> +       value_range_kind = VR_RANGE);
> +  static bool supports_p (const_tree type);
> +  virtual bool supports_type_p (const_tree type) const final override;
> +  virtual void accept (const vrange_visitor &v) const final override;
> +  virtual void set_undefined () final override;
> +  virtual void set_varying (tree type) final override;
> +  virtual void set_nonzero (tree type) final override;
> +  virtual void set_zero (tree type) final override;
> +  virtual void set_nonnegative (tree type) final override;
> +  virtual bool contains_p (tree cst) const final override;
> +  virtual bool fits_p (const vrange &v) const final override;
> +  virtual bool singleton_p (tree *result = NULL) const final override;
> +  virtual bool zero_p () const final override;
> +  virtual bool nonzero_p () const final override;
> +  virtual void set (tree, tree, value_range_kind = VR_RANGE) final override;
> +  virtual tree type () const final override;
> +  virtual bool union_ (const vrange &v) final override;
> +  virtual bool intersect (const vrange &v) final override;
> +
> +  prange& operator= (const prange &);
> +  bool operator== (const prange &) const;
> +  void set (tree type, const wide_int &, const wide_int &,
> +         value_range_kind = VR_RANGE);
> +  void invert ();
> +  bool contains_p (const wide_int &) const;
> +  wide_int lower_bound () const;
> +  wide_int upper_bound () const;
> +  void verify_range () const;
> +  irange_bitmask get_bitmask () const;
> +  void update_bitmask (const irange_bitmask &);
> +protected:
> +  bool varying_compatible_p () const;
> +
> +  tree m_type;
> +  wide_int m_min;
> +  wide_int m_max;
> +  irange_bitmask m_bitmask;
> +};
> +
>  // Unsupported temporaries may be created by ranger before it's known
>  // they're unsupported, or by vr_values::get_value_range.
>  
> @@ -667,6 +717,7 @@ class vrange_visitor
>  {
>  public:
>    virtual void visit (const irange &) const { }
> +  virtual void visit (const prange &) const { }
>    virtual void visit (const frange &) const { }
>    virtual void visit (const unsupported_range &) const { }
>  };
> @@ -1196,6 +1247,151 @@ irange_val_max (const_tree type)
>    return wi::max_value (TYPE_PRECISION (type), TYPE_SIGN (type));
>  }
>  
> +inline
> +prange::prange ()
> +  : vrange (VR_PRANGE)
> +{
> +  set_undefined ();
> +}
> +
> +inline
> +prange::prange (const prange &r)
> +  : vrange (VR_PRANGE)
> +{
> +  *this = r;
> +}
> +
> +inline
> +prange::prange (tree type)
> +  : vrange (VR_PRANGE)
> +{
> +  set_varying (type);
> +}
> +
> +inline
> +prange::prange (tree type, const wide_int &lb, const wide_int &ub,
> +             value_range_kind kind)
> +  : vrange (VR_PRANGE)
> +{
> +  set (type, lb, ub, kind);
> +}
> +
> +inline bool
> +prange::supports_p (const_tree type)
> +{
> +  return POINTER_TYPE_P (type);
> +}
> +
> +inline bool
> +prange::supports_type_p (const_tree type) const
> +{
> +  return POINTER_TYPE_P (type);
> +}
> +
> +inline void
> +prange::set_undefined ()
> +{
> +  m_kind = VR_UNDEFINED;
> +}
> +
> +inline void
> +prange::set_varying (tree type)
> +{
> +  m_kind = VR_VARYING;
> +  m_type = type;
> +  m_min = wi::zero (TYPE_PRECISION (type));
> +  m_max = wi::max_value (TYPE_PRECISION (type), UNSIGNED);
> +  m_bitmask.set_unknown (TYPE_PRECISION (type));
> +
> +  if (flag_checking)
> +    verify_range ();
> +}
> +
> +inline void
> +prange::set_nonzero (tree type)
> +{
> +  m_kind = VR_RANGE;
> +  m_type = type;
> +  m_min = wi::one (TYPE_PRECISION (type));
> +  m_max = wi::max_value (TYPE_PRECISION (type), UNSIGNED);
> +  m_bitmask.set_unknown (TYPE_PRECISION (type));
> +
> +  if (flag_checking)
> +    verify_range ();
> +}
> +
> +inline void
> +prange::set_zero (tree type)
> +{
> +  m_kind = VR_RANGE;
> +  m_type = type;
> +  wide_int zero = wi::zero (TYPE_PRECISION (type));
> +  m_min = m_max = zero;
> +  m_bitmask = irange_bitmask (zero, zero);
> +
> +  if (flag_checking)
> +    verify_range ();
> +}
> +
> +inline bool
> +prange::contains_p (tree cst) const
> +{
> +  return contains_p (wi::to_wide (cst));
> +}
> +
> +inline bool
> +prange::zero_p () const
> +{
> +  return m_kind == VR_RANGE && m_min == 0 && m_max == 0;
> +}
> +
> +inline bool
> +prange::nonzero_p () const
> +{
> +  return m_kind == VR_RANGE && m_min == 1 && m_max == -1;
> +}
> +
> +inline tree
> +prange::type () const
> +{
> +  gcc_checking_assert (!undefined_p ());
> +  return m_type;
> +}
> +
> +inline wide_int
> +prange::lower_bound () const
> +{
> +  gcc_checking_assert (!undefined_p ());
> +  return m_min;
> +}
> +
> +inline wide_int
> +prange::upper_bound () const
> +{
> +  gcc_checking_assert (!undefined_p ());
> +  return m_max;
> +}
> +
> +inline bool
> +prange::varying_compatible_p () const
> +{
> +  return (!undefined_p ()
> +       && m_min == 0 && m_max == -1 && get_bitmask ().unknown_p ());
> +}
> +
> +inline irange_bitmask
> +prange::get_bitmask () const
> +{
> +  return m_bitmask;
> +}
> +
> +inline bool
> +prange::fits_p (const vrange &) const
> +{
> +  return true;
> +}
> +
> +
>  inline
>  frange::frange ()
>    : vrange (VR_FRANGE)
> -- 
> 2.44.0
>
>       .globl  _ZNK6irange11get_bitmaskEv
>       .type   _ZNK6irange11get_bitmaskEv, @function
> _ZNK6irange11get_bitmaskEv:
> .LFB3242:
>       .cfi_startproc
>       pushq   %r13
>       .cfi_def_cfa_offset 16
>       .cfi_offset 13, -16
>       movq    %rdi, %r13
>       pushq   %r12
>       .cfi_def_cfa_offset 24
>       .cfi_offset 12, -24
>       pushq   %rbp
>       .cfi_def_cfa_offset 32
>       .cfi_offset 6, -32
>       pushq   %rbx
>       .cfi_def_cfa_offset 40
>       .cfi_offset 3, -40
>       movq    %rsi, %rbx
>       subq    $168, %rsp
>       .cfi_def_cfa_offset 208
>       movzbl  10(%rsi), %eax
>       movq    184(%rsi), %r12
>       leal    -1(%rax,%rax), %eax
>       leaq    (%rax,%rax,4), %rbp
>       salq    $4, %rbp
>       addq    %r12, %rbp
>       movdqu  0(%rbp), %xmm0
>       movaps  %xmm0, 80(%rsp)
>       movdqu  16(%rbp), %xmm0
>       movaps  %xmm0, 96(%rsp)
>       movdqu  32(%rbp), %xmm0
>       movaps  %xmm0, 112(%rsp)
>       movdqu  48(%rbp), %xmm0
>       movaps  %xmm0, 128(%rsp)
>       movdqu  64(%rbp), %xmm0
>       movaps  %xmm0, 144(%rsp)
>       movl    156(%rsp), %eax
>       cmpl    $576, %eax
>       ja      .L2460
> .L2448:
>       movdqu  (%r12), %xmm0
>       movaps  %xmm0, (%rsp)
>       movdqu  16(%r12), %xmm0
>       movaps  %xmm0, 16(%rsp)
>       movdqu  32(%r12), %xmm0
>       movaps  %xmm0, 32(%rsp)
>       movdqu  48(%r12), %xmm0
>       movaps  %xmm0, 48(%rsp)
>       movdqu  64(%r12), %xmm0
>       movaps  %xmm0, 64(%rsp)
>       movl    76(%rsp), %eax
>       cmpl    $576, %eax
>       ja      .L2461
> .L2449:
>       movq    (%rbx), %rax
>       movq    16(%rax), %rax
>       cmpq    $_ZNK6irange4typeEv, %rax
>       jne     .L2450
>       movq    16(%rbx), %rax
> .L2451:
>       movzwl  54(%rax), %esi
>       leaq    80(%rsp), %rcx
>       movq    %rsp, %rdx
>       movq    %r13, %rdi
>       call    
> _ZL22get_bitmask_from_rangeP9tree_nodeRK16generic_wide_intI16wide_int_storageES5_.isra.0
>       cmpl    $576, 76(%rsp)
>       ja      .L2462
>       cmpl    $576, 156(%rsp)
>       ja      .L2463
> .L2453:
>       cmpl    $576, 180(%rbx)
>       movl    176(%rbx), %eax
>       leaq    104(%rbx), %rdx
>       ja      .L2464
> .L2455:
>       cmpl    $1, %eax
>       jne     .L2458
>       cmpq    $-1, (%rdx)
>       je      .L2447
> .L2458:
>       leaq    24(%rbx), %rsi
>       movq    %r13, %rdi
>       call    _ZN14irange_bitmask9intersectERKS_
> .L2447:
>       addq    $168, %rsp
>       .cfi_remember_state
>       .cfi_def_cfa_offset 40
>       movq    %r13, %rax
>       popq    %rbx
>       .cfi_def_cfa_offset 32
>       popq    %rbp
>       .cfi_def_cfa_offset 24
>       popq    %r12
>       .cfi_def_cfa_offset 16
>       popq    %r13
>       .cfi_def_cfa_offset 8
>       ret
>       .p2align 4,,10
>       .p2align 3
> .L2450:
>       .cfi_restore_state
>       movq    %rbx, %rdi
>       call    *%rax
>       jmp     .L2451
>       .p2align 4,,10
>       .p2align 3
> .L2461:
>       leal    63(%rax), %edi
>       shrl    $6, %edi
>       salq    $3, %rdi
>       call    xmalloc
>       movl    72(%rsp), %edx
>       movq    (%r12), %rsi
>       movq    %rax, %rdi
>       movq    %rax, (%rsp)
>       salq    $3, %rdx
>       call    memcpy
>       jmp     .L2449
>       .p2align 4,,10
>       .p2align 3
> .L2462:
>       movq    (%rsp), %rdi
>       call    free
>       cmpl    $576, 156(%rsp)
>       jbe     .L2453
>       .p2align 4,,10
>       .p2align 3
> .L2463:
>       movq    80(%rsp), %rdi
>       call    free
>       jmp     .L2453
>       .p2align 4,,10
>       .p2align 3
> .L2464:
>       movq    104(%rbx), %rdx
>       jmp     .L2455
>       .p2align 4,,10
>       .p2align 3
> .L2460:
>       leal    63(%rax), %edi
>       shrl    $6, %edi
>       salq    $3, %rdi
>       call    xmalloc
>       movl    152(%rsp), %edx
>       movq    0(%rbp), %rsi
>       movq    %rax, %rdi
>       movq    %rax, 80(%rsp)
>       salq    $3, %rdx
>       call    memcpy
>       movq    184(%rbx), %r12
>       jmp     .L2448
>       .cfi_endproc
> .LFE3242:
>       .size   _ZNK6irange11get_bitmaskEv, .-_ZNK6irange11get_bitmaskEv
>       .section        .rodata.str1.1
> .LC38:
>       .string "add_vrange"
>       .globl  _ZNK6irange11get_bitmaskEv
>       .type   _ZNK6irange11get_bitmaskEv, @function
> _ZNK6irange11get_bitmaskEv:
> .LFB3197:
>       .cfi_startproc
>       pushq   %r15
>       .cfi_def_cfa_offset 16
>       .cfi_offset 15, -16
>       pushq   %r14
>       .cfi_def_cfa_offset 24
>       .cfi_offset 14, -24
>       pushq   %r13
>       .cfi_def_cfa_offset 32
>       .cfi_offset 13, -32
>       pushq   %r12
>       .cfi_def_cfa_offset 40
>       .cfi_offset 12, -40
>       pushq   %rbp
>       .cfi_def_cfa_offset 48
>       .cfi_offset 6, -48
>       movq    %rdi, %rbp
>       pushq   %rbx
>       .cfi_def_cfa_offset 56
>       .cfi_offset 3, -56
>       movq    %rsi, %rbx
>       subq    $584, %rsp
>       .cfi_def_cfa_offset 640
>       movzbl  10(%rsi), %eax
>       movq    184(%rsi), %r13
>       leal    -1(%rax,%rax), %eax
>       leaq    (%rax,%rax,4), %r12
>       salq    $4, %r12
>       addq    %r13, %r12
>       movdqu  (%r12), %xmm0
>       movaps  %xmm0, 96(%rsp)
>       movdqu  16(%r12), %xmm0
>       movaps  %xmm0, 112(%rsp)
>       movdqu  32(%r12), %xmm0
>       movaps  %xmm0, 128(%rsp)
>       movdqu  48(%r12), %xmm0
>       movaps  %xmm0, 144(%rsp)
>       movdqu  64(%r12), %xmm0
>       movaps  %xmm0, 160(%rsp)
>       movl    172(%rsp), %eax
>       cmpl    $576, %eax
>       ja      .L1610
> .L1532:
>       movdqu  0(%r13), %xmm0
>       movaps  %xmm0, 16(%rsp)
>       movdqu  16(%r13), %xmm0
>       movaps  %xmm0, 32(%rsp)
>       movdqu  32(%r13), %xmm0
>       movaps  %xmm0, 48(%rsp)
>       movdqu  48(%r13), %xmm0
>       movaps  %xmm0, 64(%rsp)
>       movdqu  64(%r13), %xmm0
>       movaps  %xmm0, 80(%rsp)
>       movl    92(%rsp), %eax
>       cmpl    $576, %eax
>       ja      .L1611
> .L1533:
>       movq    (%rbx), %rax
>       movq    16(%rax), %rax
>       cmpq    $_ZNK6irange4typeEv, %rax
>       jne     .L1534
>       movq    16(%rbx), %rax
> .L1535:
>       movl    92(%rsp), %r9d
>       movzwl  54(%rax), %r14d
>       movl    88(%rsp), %esi
>       movl    172(%rsp), %r11d
>       movl    168(%rsp), %r8d
>       cmpl    $576, %r9d
>       ja      .L1536
>       cmpl    $576, %r11d
>       ja      .L1612
>       cmpl    %r8d, %esi
>       je      .L1587
>       movl    %r9d, 252(%rsp)
>       leaq    16(%rsp), %r12
>       leaq    96(%rsp), %rcx
>       leaq    176(%rsp), %r15
>       movq    %r12, %rax
>       movq    %r15, %rdi
> .L1541:
>       leal    (%rsi,%r8), %edx
>       cmpl    $2, %edx
>       jne     .L1555
>       movl    252(%rsp), %edx
>       movq    (%rax), %rax
>       xorq    (%rcx), %rax
>       movq    %r15, %rcx
>       movq    %rax, (%rdi)
>       movl    $1, 248(%rsp)
>       cmpl    $576, %edx
>       ja      .L1613
> .L1556:
>       cmpq    $0, (%rcx)
>       jne     .L1593
>       leaq    416(%rsp), %rax
>       leaq    496(%rsp), %r13
>       movq    %rax, (%rsp)
> .L1565:
>       movq    (%rsp), %rdi
>       movq    %r15, %rdx
>       movq    %r12, %rsi
>       call    
> _ZN2wi6bit_orI16generic_wide_intI16wide_int_storageES3_EENS_13binary_traitsIT_T0_XsrNS_10int_traitsIS5_EE14precision_typeEXsrNS7_IS6_EE14precision_typeEE11result_typeERKS5_RKS6_
>       leaq    352(%rsp), %rax
>       movq    $0, 352(%rsp)
>       movq    %rax, 336(%rsp)
>       movl    $1, 344(%rsp)
>       movl    %r14d, 348(%rsp)
>       movl    %r14d, 572(%rsp)
>       cmpl    $576, %r14d
>       ja      .L1614
>       movq    $0, 496(%rsp)
>       movl    $1, %esi
> .L1582:
>       movl    $0, 76(%rbp)
>       movq    %rbp, %rdi
>       movl    $0, 156(%rbp)
>       movl    %esi, 568(%rsp)
>       movq    %r13, %rsi
>       call    _ZN16wide_int_storageaSERKS_.isra.0
>       movq    (%rsp), %rsi
>       leaq    80(%rbp), %rdi
>       call    _ZN16wide_int_storageaSERKS_.isra.0
>       movl    global_options+3536(%rip), %eax
>       testl   %eax, %eax
>       je      .L1569
>       movl    156(%rbp), %eax
>       cmpl    %eax, 76(%rbp)
>       jne     .L1570
> .L1569:
>       cmpl    $576, 572(%rsp)
>       ja      .L1615
> .L1571:
>       cmpl    $576, 492(%rsp)
>       ja      .L1616
> .L1572:
>       cmpl    $576, 252(%rsp)
>       jbe     .L1552
>       movq    176(%rsp), %rdi
>       call    free
>       jmp     .L1552
>       .p2align 4,,10
>       .p2align 3
> .L1587:
>       leaq    16(%rsp), %r12
>       leaq    96(%rsp), %rdi
>       movq    %r12, %rcx
> .L1540:
>       xorl    %eax, %eax
>       jmp     .L1545
>       .p2align 4,,10
>       .p2align 3
> .L1618:
>       addl    $1, %eax
>       cmpl    %eax, %esi
>       je      .L1617
> .L1545:
>       movl    %eax, %edx
>       movq    (%rdi,%rdx,8), %r10
>       cmpq    %r10, (%rcx,%rdx,8)
>       je      .L1618
>       movl    %r9d, 252(%rsp)
>       cmpl    $576, %r9d
>       ja      .L1543
>       leaq    176(%rsp), %r15
>       movq    %r12, %rax
>       movq    %r15, %rdi
>       .p2align 4,,10
>       .p2align 3
> .L1554:
>       leaq    96(%rsp), %rcx
>       cmpl    $576, %r11d
>       jbe     .L1541
>       .p2align 4,,10
>       .p2align 3
> .L1539:
>       movq    96(%rsp), %rcx
>       jmp     .L1541
>       .p2align 4,,10
>       .p2align 3
> .L1617:
>       leaq    512(%rsp), %rax
>       movl    %r14d, 508(%rsp)
>       movq    $0, 512(%rsp)
>       movq    %rax, 496(%rsp)
>       movl    $1, 504(%rsp)
>       movl    %r14d, 332(%rsp)
>       cmpl    $576, %r14d
>       ja      .L1619
>       movl    $1, %esi
>       leaq    256(%rsp), %r13
>       movq    $0, 256(%rsp)
> .L1583:
>       leaq    336(%rsp), %r14
>       movl    %esi, 328(%rsp)
>       movq    %r12, %rsi
>       movq    %r14, %rdi
>       call    _ZN16wide_int_storageC2ERKS_
>       movl    $0, 76(%rbp)
>       movq    %r14, %rsi
>       movq    %rbp, %rdi
>       movl    $0, 156(%rbp)
>       call    _ZN16wide_int_storageaSERKS_.isra.0
>       leaq    80(%rbp), %rdi
>       movq    %r13, %rsi
>       call    _ZN16wide_int_storageaSERKS_.isra.0
>       movl    global_options+3536(%rip), %edx
>       testl   %edx, %edx
>       je      .L1549
>       movl    156(%rbp), %eax
>       cmpl    %eax, 76(%rbp)
>       jne     .L1570
> .L1549:
>       cmpl    $576, 412(%rsp)
>       ja      .L1620
> .L1550:
>       cmpl    $576, 332(%rsp)
>       ja      .L1621
> .L1552:
>       cmpl    $576, 92(%rsp)
>       ja      .L1622
> .L1574:
>       cmpl    $576, 172(%rsp)
>       ja      .L1623
> .L1575:
>       cmpl    $576, 180(%rbx)
>       movl    176(%rbx), %eax
>       leaq    104(%rbx), %rdx
>       ja      .L1624
> .L1577:
>       cmpl    $1, %eax
>       jne     .L1580
>       cmpq    $-1, (%rdx)
>       je      .L1531
> .L1580:
>       leaq    24(%rbx), %rsi
>       movq    %rbp, %rdi
>       call    _ZN14irange_bitmask9intersectERKS_
> .L1531:
>       addq    $584, %rsp
>       .cfi_remember_state
>       .cfi_def_cfa_offset 56
>       movq    %rbp, %rax
>       popq    %rbx
>       .cfi_def_cfa_offset 48
>       popq    %rbp
>       .cfi_def_cfa_offset 40
>       popq    %r12
>       .cfi_def_cfa_offset 32
>       popq    %r13
>       .cfi_def_cfa_offset 24
>       popq    %r14
>       .cfi_def_cfa_offset 16
>       popq    %r15
>       .cfi_def_cfa_offset 8
>       ret
>       .p2align 4,,10
>       .p2align 3
> .L1534:
>       .cfi_restore_state
>       movq    %rbx, %rdi
>       call    *%rax
>       jmp     .L1535
>       .p2align 4,,10
>       .p2align 3
> .L1593:
>       movl    $1, %eax
> .L1564:
>       movl    %eax, 424(%rsp)
>       leaq    416(%rsp), %rax
>       leaq    496(%rsp), %r13
>       movq    %rax, %rdi
>       movq    %rcx, 416(%rsp)
>       movl    %edx, 428(%rsp)
>       movq    %rax, (%rsp)
>       call    
> _ZN2wi3clzERK16generic_wide_intI20wide_int_ref_storageILb0ELb1EEE
>       movl    %r14d, %esi
>       movl    %r14d, 572(%rsp)
>       subl    %eax, %esi
>       cmpl    $576, %r14d
>       ja      .L1625
> .L1558:
>       movq    %r13, %rdi
> .L1559:
>       movl    %r14d, %ecx
>       xorl    %edx, %edx
>       call    _ZN2wi4maskEPljbj
>       movl    572(%rsp), %edx
>       movl    %eax, %ecx
>       movl    %eax, 568(%rsp)
>       sall    $6, %ecx
>       cmpl    %ecx, %edx
>       jnb     .L1560
>       movq    %r13, %rcx
>       cmpl    $576, %edx
>       ja      .L1626
> .L1561:
>       subl    $1, %eax
>       andl    $63, %edx
>       leaq    (%rcx,%rax,8), %rsi
>       movl    $64, %ecx
>       movq    (%rsi), %rax
>       subl    %edx, %ecx
>       salq    %cl, %rax
>       sarq    %cl, %rax
>       movq    %rax, (%rsi)
> .L1560:
>       movq    %r13, %rsi
>       movq    %r15, %rdi
>       call    _ZN16wide_int_storageaSERKS_.isra.0
>       cmpl    $576, 572(%rsp)
>       jbe     .L1565
>       movq    496(%rsp), %rdi
>       call    free
>       jmp     .L1565
>       .p2align 4,,10
>       .p2align 3
> .L1536:
>       movq    16(%rsp), %rcx
>       cmpl    $576, %r11d
>       ja      .L1627
>       cmpl    %r8d, %esi
>       jne     .L1581
>       leaq    96(%rsp), %rdi
>       leaq    16(%rsp), %r12
>       jmp     .L1540
> .L1627:
>       leaq    16(%rsp), %r12
>       cmpl    %r8d, %esi
>       je      .L1538
>       .p2align 4,,10
>       .p2align 3
> .L1581:
>       movl    %r9d, 252(%rsp)
>       leaq    16(%rsp), %r12
> .L1543:
>       leal    63(%r9), %edi
>       leaq    176(%rsp), %r15
>       shrl    $6, %edi
>       salq    $3, %rdi
>       call    xmalloc
>       movl    88(%rsp), %esi
>       movl    252(%rsp), %r9d
>       movq    %rax, 176(%rsp)
>       movl    92(%rsp), %edx
>       movq    %rax, %rdi
>       movl    168(%rsp), %r8d
>       movl    172(%rsp), %r11d
>       cmpl    $576, %r9d
>       jbe     .L1607
> .L1553:
>       movq    %r12, %rax
>       cmpl    $576, %edx
>       jbe     .L1554
>       movq    16(%rsp), %rax
>       jmp     .L1554
>       .p2align 4,,10
>       .p2align 3
> .L1624:
>       movq    104(%rbx), %rdx
>       jmp     .L1577
>       .p2align 4,,10
>       .p2align 3
> .L1623:
>       movq    96(%rsp), %rdi
>       call    free
>       jmp     .L1575
>       .p2align 4,,10
>       .p2align 3
> .L1622:
>       movq    16(%rsp), %rdi
>       call    free
>       jmp     .L1574
>       .p2align 4,,10
>       .p2align 3
> .L1610:
>       leal    63(%rax), %edi
>       shrl    $6, %edi
>       salq    $3, %rdi
>       call    xmalloc
>       movl    168(%rsp), %edx
>       movq    (%r12), %rsi
>       movq    %rax, %rdi
>       movq    %rax, 96(%rsp)
>       salq    $3, %rdx
>       call    memcpy
>       movq    184(%rbx), %r13
>       jmp     .L1532
>       .p2align 4,,10
>       .p2align 3
> .L1611:
>       leal    63(%rax), %edi
>       shrl    $6, %edi
>       salq    $3, %rdi
>       call    xmalloc
>       movl    88(%rsp), %edx
>       movq    0(%r13), %rsi
>       movq    %rax, %rdi
>       movq    %rax, 16(%rsp)
>       salq    $3, %rdx
>       call    memcpy
>       jmp     .L1533
>       .p2align 4,,10
>       .p2align 3
> .L1612:
>       cmpl    %r8d, %esi
>       je      .L1586
>       leaq    176(%rsp), %r15
>       leaq    16(%rsp), %r12
>       movl    %r9d, 252(%rsp)
>       movq    %r15, %rdi
>       movq    %r12, %rax
>       jmp     .L1539
>       .p2align 4,,10
>       .p2align 3
> .L1614:
>       leal    63(%r14), %edi
>       shrl    $6, %edi
>       salq    $3, %rdi
>       call    xmalloc
>       movl    344(%rsp), %esi
>       cmpl    $576, 572(%rsp)
>       movq    %rax, 496(%rsp)
>       movq    336(%rsp), %rdi
>       jbe     .L1628
> .L1567:
>       xorl    %edx, %edx
>       .p2align 4,,10
>       .p2align 3
> .L1568:
>       movq    (%rdi,%rdx,8), %rcx
>       movq    %rcx, (%rax,%rdx,8)
>       addq    $1, %rdx
>       cmpl    %esi, %edx
>       jb      .L1568
>       jmp     .L1582
>       .p2align 4,,10
>       .p2align 3
> .L1621:
>       movq    256(%rsp), %rdi
>       call    free
>       jmp     .L1552
>       .p2align 4,,10
>       .p2align 3
> .L1620:
>       movq    336(%rsp), %rdi
>       call    free
>       jmp     .L1550
>       .p2align 4,,10
>       .p2align 3
> .L1619:
>       leal    63(%r14), %edi
>       leaq    256(%rsp), %r13
>       shrl    $6, %edi
>       salq    $3, %rdi
>       call    xmalloc
>       movl    504(%rsp), %esi
>       cmpl    $576, 332(%rsp)
>       movq    %rax, 256(%rsp)
>       movq    496(%rsp), %rdi
>       jbe     .L1606
> .L1547:
>       xorl    %edx, %edx
>       .p2align 4,,10
>       .p2align 3
> .L1548:
>       movq    (%rdi,%rdx,8), %rcx
>       movq    %rcx, (%rax,%rdx,8)
>       addq    $1, %rdx
>       cmpl    %esi, %edx
>       jb      .L1548
>       jmp     .L1583
>       .p2align 4,,10
>       .p2align 3
> .L1613:
>       movq    176(%rsp), %rcx
>       jmp     .L1556
>       .p2align 4,,10
>       .p2align 3
> .L1555:
>       movl    %esi, %edx
>       movq    %rax, %rsi
>       call    _ZN2wi9xor_largeEPlPKljS2_jj
>       movl    252(%rsp), %edx
>       movl    %eax, 248(%rsp)
>       cmpl    $576, %edx
>       ja      .L1629
>       movq    %r15, %rcx
> .L1557:
>       cmpl    $1, %eax
>       jne     .L1564
>       jmp     .L1556
>       .p2align 4,,10
>       .p2align 3
> .L1616:
>       movq    416(%rsp), %rdi
>       call    free
>       jmp     .L1572
>       .p2align 4,,10
>       .p2align 3
> .L1615:
>       movq    496(%rsp), %rdi
>       call    free
>       jmp     .L1571
> .L1586:
>       leaq    16(%rsp), %r12
>       movq    %r12, %rcx
> .L1538:
>       movq    96(%rsp), %rdi
>       jmp     .L1540
> .L1625:
>       leal    63(%r14), %edi
>       movl    %esi, 12(%rsp)
>       shrl    $6, %edi
>       salq    $3, %rdi
>       call    xmalloc
>       movl    12(%rsp), %esi
>       cmpl    $576, 572(%rsp)
>       movq    %rax, 496(%rsp)
>       movq    %rax, %rdi
>       ja      .L1559
>       jmp     .L1558
>       .p2align 4,,10
>       .p2align 3
> .L1626:
>       movq    496(%rsp), %rcx
>       jmp     .L1561
> .L1607:
>       movq    %r15, %rdi
>       jmp     .L1553
> .L1628:
>       movq    %r13, %rax
>       jmp     .L1567
> .L1629:
>       movq    176(%rsp), %rcx
>       jmp     .L1557
> .L1606:
>       movq    %r13, %rax
>       jmp     .L1547
>       .cfi_endproc
>       .section        .text.unlikely
>       .cfi_startproc
>       .type   _ZNK6irange11get_bitmaskEv.cold, @function
> _ZNK6irange11get_bitmaskEv.cold:
> .LFSB3197:
> .L1570:
>       .cfi_def_cfa_offset 640
>       .cfi_offset 3, -56
>       .cfi_offset 6, -48
>       .cfi_offset 12, -40
>       .cfi_offset 13, -32
>       .cfi_offset 14, -24
>       .cfi_offset 15, -16
>       call    _ZNK14irange_bitmask11verify_maskEv.part.0
>       .cfi_endproc
> .LFE3197:
>       .text
>       .size   _ZNK6irange11get_bitmaskEv, .-_ZNK6irange11get_bitmaskEv
>       .section        .text.unlikely
>       .size   _ZNK6irange11get_bitmaskEv.cold, 
> .-_ZNK6irange11get_bitmaskEv.cold
> .LCOLDE33:
>       .text
> .LHOTE33:
>       .section        .rodata.str1.1
> .LC34:
>       .string "add_vrange"

Reply via email to