On Thu, 2020-10-08 at 09:36 +1030, Alan Modra via Gcc-patches wrote: > Implement more two insn constants. rotate_and_mask_constant covers > 64-bit constants that can be formed by rotating a 16-bit signed > constant, rotating a 16-bit signed constant masked on left or right > (rldicl and rldicr), rotating a 16-bit signed constant masked by > rldic, and unusual "lis; rldicl" and "lis; rldicr" patterns. All the > values possible for DImode rs6000_is_valid_and_mask are covered.
lgtm, Just a couple cosmetic nits, since I was reading through.. :-) > > Bootstrapped and regression tested powerpc64le-linux. > > PR 94393 PR Target/94393 (unless the hooks currently handle that for us? ) > gcc/ gcc/ChangeLog: > * config/rs6000/rs6000.c (rotate_and_mask_constant): New function. > (num_insns_constant_multi, rs6000_emit_set_long_const): Use it here. > * config/rs6000/rs6000.md (*movdi_internal64+1 splitter): Delete. > gcc/testsuite/ > * gcc.target/powerpc/rot_cst.h, > * gcc.target/powerpc/rot_cst1.c, > * gcc.target/powerpc/rot_cst2.c: New tests. > > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index 14ecbad5df4..9809d11f47a 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -1129,6 +1129,8 @@ static tree rs6000_handle_altivec_attribute (tree *, > tree, tree, int, bool *); > static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *); > static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree); > static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT); > +static bool rotate_and_mask_constant (unsigned HOST_WIDE_INT, HOST_WIDE_INT > *, > + int *, unsigned HOST_WIDE_INT *); > static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool); > static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, > bool); > static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t, > @@ -5877,7 +5879,7 @@ num_insns_constant_gpr (HOST_WIDE_INT value) > } > > /* Helper for num_insns_constant. Allow constants formed by the > - num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm, > + num_insns_constant_gpr sequences, and li/lis+rldicl/rldicr/rldic/rlwinm, > and handle modes that require multiple gprs. */ > > static int > @@ -5892,8 +5894,8 @@ num_insns_constant_multi (HOST_WIDE_INT value, > machine_mode mode) > if (insns > 2 > /* We won't get more than 2 from num_insns_constant_gpr > except when TARGET_POWERPC64 and mode is DImode or > - wider, so the register mode must be DImode. */ > - && rs6000_is_valid_and_mask (GEN_INT (low), DImode)) > + wider. */ > + && rotate_and_mask_constant (low, NULL, NULL, NULL)) > insns = 2; > total += insns; > /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing > @@ -9524,6 +9526,244 @@ rs6000_emit_set_const (rtx dest, rtx source) > return true; > } > > +/* Rotate DImode word, being careful to handle the case where > + HOST_WIDE_INT is larger than DImode. */ > + > +static inline unsigned HOST_WIDE_INT > +rotate_di (unsigned HOST_WIDE_INT x, unsigned int shift) > +{ > + unsigned HOST_WIDE_INT mask_hi, mask_lo; > + > + mask_hi = (HOST_WIDE_INT_1U << 63 << 1) - (HOST_WIDE_INT_1U << shift); > + mask_lo = (HOST_WIDE_INT_1U << shift) - 1; > + x = ((x << shift) & mask_hi) | ((x >> (64 - shift)) & mask_lo); > + x = (x ^ (HOST_WIDE_INT_1U << 63)) - (HOST_WIDE_INT_1U << 63); > + return x; > +} > + > +/* Can C be formed by rotating a 16-bit positive value left by C16LSB? */ > + > +static inline bool > +is_rotate_positive_constant (unsigned HOST_WIDE_INT c, int c16lsb, > + HOST_WIDE_INT *val, int *shift, > + unsigned HOST_WIDE_INT *mask) > +{ > + if ((c & ~(HOST_WIDE_INT_UC (0x7fff) << c16lsb)) == 0) > + { > + /* eg. c = 1100 0000 0000 ... 0000 > + -> val = 0x3000, shift = 49, mask = -1ull. */ > + if (val) > + { > + c >>= c16lsb; > + /* Make the value and shift canonical in the sense of > + selecting the smallest value. For the example above > + -> val = 3, shift = 61. */ > + int trail_zeros = ctz_hwi (c); > + c >>= trail_zeros; > + c16lsb += trail_zeros; > + *val = c; > + *shift = c16lsb; > + *mask = HOST_WIDE_INT_M1U; > + } > + return true; > + } > + return false; > +} > + > +/* Can C be formed by rotating a 16-bit negative value left by C16LSB? */ > + > +static inline bool > +is_rotate_negative_constant (unsigned HOST_WIDE_INT c, int c16lsb, > + HOST_WIDE_INT *val, int *shift, > + unsigned HOST_WIDE_INT *mask) > +{ > + if ((c | (HOST_WIDE_INT_UC (0x7fff) << c16lsb)) == HOST_WIDE_INT_M1U) > + { > + if (val) > + { > + c >>= c16lsb; > + /* When there are no leading ones, C16LSB is 49 and we have > + an implicit 1 bit to make up our 16-bit negative value, > + which is why the shift here is 15 rather than 16. */ > + c |= HOST_WIDE_INT_M1U << 15; > + /* Canonicalize by removing trailing ones in the constant. > + eg. 0xbfff -> 0xfffe. */ > + int trail_ones = ctz_hwi (~c); > + c = (HOST_WIDE_INT) c >> trail_ones; > + c16lsb += trail_ones; > + *val = c; > + *shift = c16lsb; > + *mask = HOST_WIDE_INT_M1U; > + } > + return true; > + } > + return false; > +} > + > +/* Detect cases where a constant can be formed by li; rldicl, li; rldicr, > + lis; rldicl, lis; rldicr, or li; rldic. */ > + > +static bool > +rotate_and_mask_constant (unsigned HOST_WIDE_INT c, > + HOST_WIDE_INT *val, int *shift, > + unsigned HOST_WIDE_INT *mask) > +{ > + /* We know C is a DImode value properly sign extended to > + HOST_WIDE_INT that can't be formed by lis; addi. So if C is > + positive there must be a 1 bit somewhere in bits 32 thru 62, and > + if C is negative there must be a 0 bit in the same range. That > + puts constraints on the max leading zeros or ones. Thus no need > + to check the C16LSB (lsb of 16-bit constant that fits in li) > + expressions below for negative values. */ > + int lead_zeros = clz_hwi (c); > + > + /* First check for rotated constants. */ > + int c16lsb = HOST_BITS_PER_WIDE_INT - 15 - lead_zeros; > + if (is_rotate_positive_constant (c, c16lsb, val, shift, mask)) > + return true; > + > + /* Handle wrap-around 16-bit constants too, by rotating the > + potential wrapped constant so that it no longer wraps. This also > + has constraints on the max leading zeros or ones such that the > + C16LSB expression below will never be negative. */ > + unsigned HOST_WIDE_INT rc = rotate_di (c, 32); > + c16lsb = HOST_BITS_PER_WIDE_INT - 15 - clz_hwi (rc); > + if (is_rotate_positive_constant (rc, c16lsb, val, shift, mask)) > + { > + if (val) > + *shift ^= 32; > + return true; > + } > + > + int lead_ones = clz_hwi (~c); > + c16lsb = HOST_BITS_PER_WIDE_INT - 15 - lead_ones; > + if (is_rotate_negative_constant (c, c16lsb, val, shift, mask)) > + return true; > + > + c16lsb = HOST_BITS_PER_WIDE_INT - 15 - clz_hwi (~rc); > + if (is_rotate_negative_constant (rc, c16lsb, val, shift, mask)) > + { > + if (val) > + *shift ^= 32; > + return true; > + } > + > + /* 00...01xxxxxxxxxxxxxxx1..11 (up to 15 x's, any number of leading > + 0's and trailing 1's) can be implemented as a li, rldicl. */ > + c16lsb = HOST_BITS_PER_WIDE_INT - 16 - lead_zeros; > + if ((c | (HOST_WIDE_INT_M1U << c16lsb)) == HOST_WIDE_INT_M1U) > + { > + /* eg. c = 0000 1011 1111 1111 ... 1111 > + -> val = sext(0xbfff), shift = 44, mask = 0x0fffffffffffffff. */ > + if (val) > + { > + c = (c >> c16lsb) | (HOST_WIDE_INT_M1U << 16); > + *val = c; > + *shift = c == HOST_WIDE_INT_M1U ? 0 : c16lsb; > + *mask = HOST_WIDE_INT_M1U >> lead_zeros; > + } > + return true; > + } > + /* 00...01xxxxxxxxxxxxxxx00..01..11 (up to 15 x's followed by 16 0's, > + any number of leading 0's and trailing 1's) can be implemented as > + lis, rldicl. */ > + c16lsb = HOST_BITS_PER_WIDE_INT - 32 - lead_zeros; > + if (c16lsb >= 0 > + && (c & ((HOST_WIDE_INT_1U << (c16lsb + 16)) > + - (HOST_WIDE_INT_1U << c16lsb))) == 0 > + && (c | (HOST_WIDE_INT_M1U << c16lsb)) == HOST_WIDE_INT_M1U) > + { > + if (val) > + { > + c = (c >> c16lsb) | (HOST_WIDE_INT_M1U << 32); > + *val = c; > + *shift = c16lsb; > + *mask = HOST_WIDE_INT_M1U >> lead_zeros; > + } > + return true; > + } > + /* TRAIL_ZEROS is less than 49 here since 49 or more trailing zeros > + are handled above as a rotate of a positive 16-bit value. */ > + int trail_zeros = ctz_hwi (c); > + /* 1..1xxxxxxxxxxxxxxx1..10..0 (any number of leading 1's, up to 15 > + x's followed by any number of 1's followed by any number of 0's) > + can be implemented as li, rldicr. */ > + c16lsb = HOST_BITS_PER_WIDE_INT - 15 - lead_ones; > + if ((c | (HOST_WIDE_INT_M1U << c16lsb) > + | ~(HOST_WIDE_INT_M1U << trail_zeros)) == HOST_WIDE_INT_M1U) > + { > + if (val) > + { > + c = (c >> c16lsb) | (HOST_WIDE_INT_M1U << 15); > + *val = c; > + *shift = c == HOST_WIDE_INT_M1U ? 0 : c16lsb; > + *mask = HOST_WIDE_INT_M1U << trail_zeros; > + } > + return true; > + } > + /* 1..1xxxxxxxxxxxxxxx0..01..10..0 (any number of leading 1's, up to > + 15 x's followed by 16 0's, any number of 1's followed by any > + number of 0's) can be implemented as lis, rldicr. */ > + c16lsb = HOST_BITS_PER_WIDE_INT - 31 - lead_ones; > + if (c16lsb >= trail_zeros > + && (c & ((HOST_WIDE_INT_1U << (c16lsb + 16)) > + - (HOST_WIDE_INT_1U << c16lsb))) == 0 > + && (c | (HOST_WIDE_INT_M1U << c16lsb) > + | ~(HOST_WIDE_INT_M1U << trail_zeros)) == HOST_WIDE_INT_M1U) > + { > + if (val) > + { > + c = (c >> c16lsb) | (HOST_WIDE_INT_M1U << 31); > + *val = c; > + *shift = c16lsb; > + *mask = HOST_WIDE_INT_M1U << trail_zeros; > + } > + return true; > + } > + /* 00..011..1xxxxxxxxxxxxxx0..0 (up to 15 x's, any number of leading > + 0's followed by any number of 1's and any number of trailing 0's) > + can be implemented as li, rldic. */ > + c16lsb = trail_zeros; > + if ((c | ~(HOST_WIDE_INT_M1U << (c16lsb + 15)) > + | ~(HOST_WIDE_INT_M1U >> lead_zeros)) == HOST_WIDE_INT_M1U) > + { > + if (val) > + { > + c = (c >> c16lsb) | (HOST_WIDE_INT_M1U << 16); > + *val = c; > + *shift = c16lsb; > + *mask = ((HOST_WIDE_INT_M1U << c16lsb) > + & (HOST_WIDE_INT_M1U >> lead_zeros)); > + } > + return true; > + } > + /* 11..1xxxxxxxxxxxxxx0..01..1 (up to 15 x's, any number of leading > + 1's and any number of trailing 0's followed by any number of 1's) > + can be implemented as li, rldic. */ > + int trail_ones = ctz_hwi (~c); > + c16lsb = HOST_BITS_PER_WIDE_INT - 15 - lead_ones; > + if ((c & (HOST_WIDE_INT_M1U << trail_ones) > + & ~(HOST_WIDE_INT_M1U << c16lsb)) == 0) > + { > + if (val) > + { > + c = (c >> c16lsb) | (HOST_WIDE_INT_M1U << 15); > + /* Canonicalize by removing trailing zeros in the constant. > + eg. 0x8000 -> 0xffff. */ > + trail_zeros = ctz_hwi (c); > + c = (HOST_WIDE_INT) c >> trail_zeros; > + c16lsb += trail_zeros; > + *val = c; > + *shift = c16lsb; > + *mask = ((HOST_WIDE_INT_M1U << c16lsb) > + | ~(HOST_WIDE_INT_M1U << trail_ones)); > + } > + return true; > + } > + > + return false; > +} > + > /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. > Output insns to set DEST equal to the constant C as a series of > lis, ori and shl instructions. */ > @@ -9533,6 +9773,9 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) > { > rtx temp; > HOST_WIDE_INT ud1, ud2, ud3, ud4; > + HOST_WIDE_INT val = c; > + int shift; > + unsigned HOST_WIDE_INT mask; > > ud1 = c & 0xffff; > c = c >> 16; > @@ -9558,6 +9801,17 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) > gen_rtx_IOR (DImode, copy_rtx (temp), > GEN_INT (ud1))); > } > + else if (rotate_and_mask_constant (val, &val, &shift, &mask)) > + { > + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > + emit_move_insn (temp, GEN_INT (val)); > + rtx x = copy_rtx (temp); > + if (shift != 0) > + x = gen_rtx_ROTATE (DImode, x, GEN_INT (shift)); > + if (mask != HOST_WIDE_INT_M1U) > + x = gen_rtx_AND (DImode, x, GEN_INT (mask)); > + emit_move_insn (dest, x); > + } > else if (ud3 == 0 && ud4 == 0) > { > temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index 779bfd11237..2bf5df41a9b 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -9081,24 +9081,8 @@ > *, *, *, > p8v, p8v")]) > > -; Some DImode loads are best done as a load of -1 followed by a mask > -; instruction. > -(define_split > - [(set (match_operand:DI 0 "int_reg_operand_not_pseudo") > - (match_operand:DI 1 "const_int_operand"))] > - "TARGET_POWERPC64 > - && num_insns_constant (operands[1], DImode) > 1 > - && !IN_RANGE (INTVAL (operands[1]), -0x80000000, 0xffffffff) > - && rs6000_is_valid_and_mask (operands[1], DImode)" > - [(set (match_dup 0) > - (const_int -1)) > - (set (match_dup 0) > - (and:DI (match_dup 0) > - (match_dup 1)))] > - "") > - > -;; Split a load of a large constant into the appropriate five-instruction > -;; sequence. Handle anything in a constant number of insns. > +;; Split a load of a large constant into a sequence of two to five > +;; instructions. > ;; When non-easy constants can go in the TOC, this should use > ;; easy_fp_constant predicate. > (define_split > diff --git a/gcc/testsuite/gcc.target/powerpc/rot_cst.h > b/gcc/testsuite/gcc.target/powerpc/rot_cst.h > new file mode 100644 > index 00000000000..a57b5aca6cf > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/rot_cst.h > @@ -0,0 +1,5270 @@ > +unsigned long long __attribute__ ((__noinline__, __noclone__)) > +c1 (void) > +{ > + return 0xc000000000000000ULL; > +} <snip> > diff --git a/gcc/testsuite/gcc.target/powerpc/rot_cst1.c > b/gcc/testsuite/gcc.target/powerpc/rot_cst1.c > new file mode 100644 > index 00000000000..d7781cf8ac7 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/rot_cst1.c > @@ -0,0 +1,1077 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2" } */ > + > +#include "rot_cst.h" > + > +struct fun { > + unsigned long long (*f) (void); > + unsigned long long val; > +}; > + > +volatile struct fun values[] = { > + { c1, 0xc000000000000000ULL }, Impressive list containing oodles of fun. :-) lgtm, thanks, -Will