On Thu, 2020-10-08 at 09:36 +1030, Alan Modra via Gcc-patches wrote:
> Implement more two insn constants.  rotate_and_mask_constant covers
> 64-bit constants that can be formed by rotating a 16-bit signed
> constant, rotating a 16-bit signed constant masked on left or right
> (rldicl and rldicr), rotating a 16-bit signed constant masked by
> rldic, and unusual "lis; rldicl" and "lis; rldicr" patterns.  All the
> values possible for DImode rs6000_is_valid_and_mask are covered.

lgtm, 

Just a couple cosmetic nits, since I was reading through.. :-)

> 
> Bootstrapped and regression tested powerpc64le-linux.
> 
>       PR 94393


PR Target/94393 

(unless the hooks currently handle that for us? )


> gcc/

gcc/ChangeLog:

>       * config/rs6000/rs6000.c (rotate_and_mask_constant): New function.
>       (num_insns_constant_multi, rs6000_emit_set_long_const): Use it here.
>       * config/rs6000/rs6000.md (*movdi_internal64+1 splitter): Delete.
> gcc/testsuite/
>       * gcc.target/powerpc/rot_cst.h,
>       * gcc.target/powerpc/rot_cst1.c,
>       * gcc.target/powerpc/rot_cst2.c: New tests.
> 



> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 14ecbad5df4..9809d11f47a 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -1129,6 +1129,8 @@ static tree rs6000_handle_altivec_attribute (tree *, 
> tree, tree, int, bool *);
>  static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
>  static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
>  static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
> +static bool rotate_and_mask_constant (unsigned HOST_WIDE_INT, HOST_WIDE_INT 
> *,
> +                                   int *, unsigned HOST_WIDE_INT *);
>  static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
>  static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, 
> bool);
>  static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
> @@ -5877,7 +5879,7 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
>  }
> 
>  /* Helper for num_insns_constant.  Allow constants formed by the
> -   num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
> +   num_insns_constant_gpr sequences, and li/lis+rldicl/rldicr/rldic/rlwinm,
>     and handle modes that require multiple gprs.  */
> 
>  static int
> @@ -5892,8 +5894,8 @@ num_insns_constant_multi (HOST_WIDE_INT value, 
> machine_mode mode)
>        if (insns > 2
>         /* We won't get more than 2 from num_insns_constant_gpr
>            except when TARGET_POWERPC64 and mode is DImode or
> -          wider, so the register mode must be DImode.  */
> -       && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
> +          wider.  */
> +       && rotate_and_mask_constant (low, NULL, NULL, NULL))
>       insns = 2;
>        total += insns;
>        /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
> @@ -9524,6 +9526,244 @@ rs6000_emit_set_const (rtx dest, rtx source)
>    return true;
>  }
> 
> +/* Rotate DImode word, being careful to handle the case where
> +   HOST_WIDE_INT is larger than DImode.  */
> +
> +static inline unsigned HOST_WIDE_INT
> +rotate_di (unsigned HOST_WIDE_INT x, unsigned int shift)
> +{
> +  unsigned HOST_WIDE_INT mask_hi, mask_lo;
> +
> +  mask_hi = (HOST_WIDE_INT_1U << 63 << 1) - (HOST_WIDE_INT_1U << shift);
> +  mask_lo = (HOST_WIDE_INT_1U << shift) - 1;
> +  x = ((x << shift) & mask_hi) | ((x >> (64 - shift)) & mask_lo);
> +  x = (x ^ (HOST_WIDE_INT_1U << 63)) - (HOST_WIDE_INT_1U << 63);
> +  return x;
> +}
> +
> +/* Can C be formed by rotating a 16-bit positive value left by C16LSB?  */
> +
> +static inline bool
> +is_rotate_positive_constant (unsigned HOST_WIDE_INT c, int c16lsb,
> +                          HOST_WIDE_INT *val, int *shift,
> +                          unsigned HOST_WIDE_INT *mask)
> +{
> +  if ((c & ~(HOST_WIDE_INT_UC (0x7fff) << c16lsb)) == 0)
> +    {
> +      /* eg. c = 1100 0000 0000 ... 0000
> +      -> val = 0x3000, shift = 49, mask = -1ull.  */
> +      if (val)
> +     {
> +       c >>= c16lsb;
> +       /* Make the value and shift canonical in the sense of
> +          selecting the smallest value.  For the example above
> +          -> val = 3, shift = 61.  */
> +       int trail_zeros = ctz_hwi (c);
> +       c >>= trail_zeros;
> +       c16lsb += trail_zeros;
> +       *val = c;
> +       *shift = c16lsb;
> +       *mask = HOST_WIDE_INT_M1U;
> +     }
> +      return true;
> +    }
> +  return false;
> +}
> +
> +/* Can C be formed by rotating a 16-bit negative value left by C16LSB?  */
> +
> +static inline bool
> +is_rotate_negative_constant (unsigned HOST_WIDE_INT c, int c16lsb,
> +                          HOST_WIDE_INT *val, int *shift,
> +                          unsigned HOST_WIDE_INT *mask)
> +{
> +  if ((c | (HOST_WIDE_INT_UC (0x7fff) << c16lsb)) == HOST_WIDE_INT_M1U)
> +    {
> +      if (val)
> +     {
> +       c >>= c16lsb;
> +       /* When there are no leading ones, C16LSB is 49 and we have
> +          an implicit 1 bit to make up our 16-bit negative value,
> +          which is why the shift here is 15 rather than 16.  */
> +       c |= HOST_WIDE_INT_M1U << 15;
> +       /* Canonicalize by removing trailing ones in the constant.
> +          eg. 0xbfff -> 0xfffe.  */
> +       int trail_ones = ctz_hwi (~c);
> +       c = (HOST_WIDE_INT) c >> trail_ones;
> +       c16lsb += trail_ones;
> +       *val = c;
> +       *shift = c16lsb;
> +       *mask = HOST_WIDE_INT_M1U;
> +     }
> +      return true;
> +    }
> +  return false;
> +}
> +
> +/* Detect cases where a constant can be formed by li; rldicl, li; rldicr,
> +   lis; rldicl, lis; rldicr, or li; rldic.  */
> +
> +static bool
> +rotate_and_mask_constant (unsigned HOST_WIDE_INT c,
> +                       HOST_WIDE_INT *val, int *shift,
> +                       unsigned HOST_WIDE_INT *mask)
> +{
> +  /* We know C is a DImode value properly sign extended to
> +     HOST_WIDE_INT that can't be formed by lis; addi.  So if C is
> +     positive there must be a 1 bit somewhere in bits 32 thru 62, and
> +     if C is negative there must be a 0 bit in the same range.  That
> +     puts constraints on the max leading zeros or ones.  Thus no need
> +     to check the C16LSB (lsb of 16-bit constant that fits in li)
> +     expressions below for negative values.  */
> +  int lead_zeros = clz_hwi (c);
> +
> +  /* First check for rotated constants.  */
> +  int c16lsb = HOST_BITS_PER_WIDE_INT - 15 - lead_zeros;
> +  if (is_rotate_positive_constant (c, c16lsb, val, shift, mask))
> +    return true;
> +
> +  /* Handle wrap-around 16-bit constants too, by rotating the
> +     potential wrapped constant so that it no longer wraps.  This also
> +     has constraints on the max leading zeros or ones such that the
> +     C16LSB expression below will never be negative.  */
> +  unsigned HOST_WIDE_INT rc = rotate_di (c, 32);
> +  c16lsb = HOST_BITS_PER_WIDE_INT - 15 - clz_hwi (rc);
> +  if (is_rotate_positive_constant (rc, c16lsb, val, shift, mask))
> +    {
> +      if (val)
> +     *shift ^= 32;
> +      return true;
> +    }
> +
> +  int lead_ones = clz_hwi (~c);
> +  c16lsb = HOST_BITS_PER_WIDE_INT - 15 - lead_ones;
> +  if (is_rotate_negative_constant (c, c16lsb, val, shift, mask))
> +    return true;
> +
> +  c16lsb = HOST_BITS_PER_WIDE_INT - 15 - clz_hwi (~rc);
> +  if (is_rotate_negative_constant (rc, c16lsb, val, shift, mask))
> +    {
> +      if (val)
> +     *shift ^= 32;
> +      return true;
> +    }
> +
> +  /* 00...01xxxxxxxxxxxxxxx1..11 (up to 15 x's, any number of leading
> +     0's and trailing 1's) can be implemented as a li, rldicl.  */
> +  c16lsb = HOST_BITS_PER_WIDE_INT - 16 - lead_zeros;
> +  if ((c | (HOST_WIDE_INT_M1U << c16lsb)) == HOST_WIDE_INT_M1U)
> +    {
> +      /* eg. c = 0000 1011 1111 1111 ... 1111
> +      -> val = sext(0xbfff), shift = 44, mask = 0x0fffffffffffffff.  */
> +      if (val)
> +     {
> +       c = (c >> c16lsb) | (HOST_WIDE_INT_M1U << 16);
> +       *val = c;
> +       *shift = c == HOST_WIDE_INT_M1U ? 0 : c16lsb;
> +       *mask = HOST_WIDE_INT_M1U >> lead_zeros;
> +     }
> +      return true;
> +    }
> +  /* 00...01xxxxxxxxxxxxxxx00..01..11 (up to 15 x's followed by 16 0's,
> +     any number of leading 0's and trailing 1's) can be implemented as
> +     lis, rldicl.  */
> +  c16lsb = HOST_BITS_PER_WIDE_INT - 32 - lead_zeros;
> +  if (c16lsb >= 0
> +      && (c & ((HOST_WIDE_INT_1U << (c16lsb + 16))
> +            - (HOST_WIDE_INT_1U << c16lsb))) == 0
> +      && (c | (HOST_WIDE_INT_M1U << c16lsb)) == HOST_WIDE_INT_M1U)
> +    {
> +      if (val)
> +     {
> +       c = (c >> c16lsb) | (HOST_WIDE_INT_M1U << 32);
> +       *val = c;
> +       *shift = c16lsb;
> +       *mask = HOST_WIDE_INT_M1U >> lead_zeros;
> +     }
> +      return true;
> +    }
> +  /* TRAIL_ZEROS is less than 49 here since 49 or more trailing zeros
> +     are handled above as a rotate of a positive 16-bit value.  */
> +  int trail_zeros = ctz_hwi (c);
> +  /* 1..1xxxxxxxxxxxxxxx1..10..0 (any number of leading 1's, up to 15
> +     x's followed by any number of 1's followed by any number of 0's)
> +     can be implemented as li, rldicr.  */
> +  c16lsb = HOST_BITS_PER_WIDE_INT - 15 - lead_ones;
> +  if ((c | (HOST_WIDE_INT_M1U << c16lsb)
> +       | ~(HOST_WIDE_INT_M1U << trail_zeros)) == HOST_WIDE_INT_M1U)
> +    {
> +      if (val)
> +     {
> +       c = (c >> c16lsb) | (HOST_WIDE_INT_M1U << 15);
> +       *val = c;
> +       *shift = c == HOST_WIDE_INT_M1U ? 0 : c16lsb;
> +       *mask = HOST_WIDE_INT_M1U << trail_zeros;
> +     }
> +      return true;
> +    }
> +  /* 1..1xxxxxxxxxxxxxxx0..01..10..0 (any number of leading 1's, up to
> +     15 x's followed by 16 0's, any number of 1's followed by any
> +     number of 0's) can be implemented as lis, rldicr.  */
> +  c16lsb = HOST_BITS_PER_WIDE_INT - 31 - lead_ones;
> +  if (c16lsb >= trail_zeros
> +      && (c & ((HOST_WIDE_INT_1U << (c16lsb + 16))
> +            - (HOST_WIDE_INT_1U << c16lsb))) == 0
> +      && (c | (HOST_WIDE_INT_M1U << c16lsb)
> +       | ~(HOST_WIDE_INT_M1U << trail_zeros)) == HOST_WIDE_INT_M1U)
> +    {
> +      if (val)
> +     {
> +       c = (c >> c16lsb) | (HOST_WIDE_INT_M1U << 31);
> +       *val = c;
> +       *shift = c16lsb;
> +       *mask = HOST_WIDE_INT_M1U << trail_zeros;
> +     }
> +      return true;
> +    }
> +  /* 00..011..1xxxxxxxxxxxxxx0..0 (up to 15 x's, any number of leading
> +     0's followed by any number of 1's and any number of trailing 0's)
> +     can be implemented as li, rldic.  */
> +  c16lsb = trail_zeros;
> +  if ((c | ~(HOST_WIDE_INT_M1U << (c16lsb + 15))
> +       | ~(HOST_WIDE_INT_M1U >> lead_zeros)) == HOST_WIDE_INT_M1U)
> +    {
> +      if (val)
> +     {
> +       c = (c >> c16lsb) | (HOST_WIDE_INT_M1U << 16);
> +       *val = c;
> +       *shift = c16lsb;
> +       *mask = ((HOST_WIDE_INT_M1U << c16lsb)
> +                & (HOST_WIDE_INT_M1U >> lead_zeros));
> +     }
> +      return true;
> +    }
> +  /* 11..1xxxxxxxxxxxxxx0..01..1 (up to 15 x's, any number of leading
> +     1's and any number of trailing 0's followed by any number of 1's)
> +     can be implemented as li, rldic.  */
> +  int trail_ones = ctz_hwi (~c);
> +  c16lsb = HOST_BITS_PER_WIDE_INT - 15 - lead_ones;
> +  if ((c & (HOST_WIDE_INT_M1U << trail_ones)
> +       & ~(HOST_WIDE_INT_M1U << c16lsb)) == 0)
> +    {
> +      if (val)
> +     {
> +       c = (c >> c16lsb) | (HOST_WIDE_INT_M1U << 15);
> +       /* Canonicalize by removing trailing zeros in the constant.
> +          eg. 0x8000 -> 0xffff.  */
> +       trail_zeros = ctz_hwi (c);
> +       c = (HOST_WIDE_INT) c >> trail_zeros;
> +       c16lsb += trail_zeros;
> +       *val = c;
> +       *shift = c16lsb;
> +       *mask = ((HOST_WIDE_INT_M1U << c16lsb)
> +                | ~(HOST_WIDE_INT_M1U << trail_ones));
> +     }
> +      return true;
> +    }
> +
> +  return false;
> +}
> +
>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>     Output insns to set DEST equal to the constant C as a series of
>     lis, ori and shl instructions.  */
> @@ -9533,6 +9773,9 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>  {
>    rtx temp;
>    HOST_WIDE_INT ud1, ud2, ud3, ud4;
> +  HOST_WIDE_INT val = c;
> +  int shift;
> +  unsigned HOST_WIDE_INT mask;
> 
>    ud1 = c & 0xffff;
>    c = c >> 16;
> @@ -9558,6 +9801,17 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>                       gen_rtx_IOR (DImode, copy_rtx (temp),
>                                    GEN_INT (ud1)));
>      }
> +  else if (rotate_and_mask_constant (val, &val, &shift, &mask))
> +    {
> +      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> +      emit_move_insn (temp, GEN_INT (val));
> +      rtx x = copy_rtx (temp);
> +      if (shift != 0)
> +     x = gen_rtx_ROTATE (DImode, x, GEN_INT (shift));
> +      if (mask != HOST_WIDE_INT_M1U)
> +     x = gen_rtx_AND (DImode, x, GEN_INT (mask));
> +      emit_move_insn (dest, x);
> +    }
>    else if (ud3 == 0 && ud4 == 0)
>      {
>        temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 779bfd11237..2bf5df41a9b 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -9081,24 +9081,8 @@
>          *,          *,          *,
>          p8v,        p8v")])
> 
> -; Some DImode loads are best done as a load of -1 followed by a mask
> -; instruction.
> -(define_split
> -  [(set (match_operand:DI 0 "int_reg_operand_not_pseudo")
> -     (match_operand:DI 1 "const_int_operand"))]
> -  "TARGET_POWERPC64
> -   && num_insns_constant (operands[1], DImode) > 1
> -   && !IN_RANGE (INTVAL (operands[1]), -0x80000000, 0xffffffff)
> -   && rs6000_is_valid_and_mask (operands[1], DImode)"
> -  [(set (match_dup 0)
> -     (const_int -1))
> -   (set (match_dup 0)
> -     (and:DI (match_dup 0)
> -             (match_dup 1)))]
> -  "")
> -
> -;; Split a load of a large constant into the appropriate five-instruction
> -;; sequence.  Handle anything in a constant number of insns.
> +;; Split a load of a large constant into a sequence of two to five
> +;; instructions.
>  ;; When non-easy constants can go in the TOC, this should use
>  ;; easy_fp_constant predicate.
>  (define_split
> diff --git a/gcc/testsuite/gcc.target/powerpc/rot_cst.h 
> b/gcc/testsuite/gcc.target/powerpc/rot_cst.h
> new file mode 100644
> index 00000000000..a57b5aca6cf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/rot_cst.h
> @@ -0,0 +1,5270 @@
> +unsigned long long __attribute__ ((__noinline__, __noclone__))
> +c1 (void)
> +{
> +  return 0xc000000000000000ULL;
> +}


<snip>

> diff --git a/gcc/testsuite/gcc.target/powerpc/rot_cst1.c 
> b/gcc/testsuite/gcc.target/powerpc/rot_cst1.c
> new file mode 100644
> index 00000000000..d7781cf8ac7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/rot_cst1.c
> @@ -0,0 +1,1077 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +#include "rot_cst.h"
> +
> +struct fun {
> +  unsigned long long (*f) (void);
> +  unsigned long long val;
> +};
> +
> +volatile struct fun values[] = {
> +  { c1,  0xc000000000000000ULL },

Impressive list containing oodles of fun. :-) 

lgtm, 
thanks, 
-Will



Reply via email to