Hi, We may use two instructions (rlwinm with mask + li/lis) to build 64bit constant. For example: 'li 9,16383 + rlwinm 9,9,0,29,25' builds 0x00003fff00003fc7LL.
This updates rs6000_emit_set_long_const to building constants through rlwinm. Bootstrap & regtest pass on ppc64 and ppc64le. Is this ok for trunk? BR, Jeff(Jiufu) PR target/94395 gcc/ChangeLog: * config/rs6000/rs6000.cc (from_rotate32): New function to check a 32 bit value is rotate32 from li/lis. (check_rotate32_mask): New function to check sh/mb/me for rlwinm. (rs6000_emit_set_long_const): Use rlwinm to build constant. * config/rs6000/rs6000.md (rlwinm<mode>3): New define_insn. gcc/testsuite/ChangeLog: * gcc.target/powerpc/pr93012.c: Update insn count. * gcc.target/powerpc/pr94395_rlwinm.c: New test. * gcc.target/powerpc/pr94395_rlwinm.h: New file. * gcc.target/powerpc/pr94395_rlwinm_1.c: New test. --- gcc/config/rs6000/rs6000.cc | 83 ++++++++++++++++++- gcc/config/rs6000/rs6000.md | 11 +++ gcc/testsuite/gcc.target/powerpc/pr93012.c | 3 +- .../gcc.target/powerpc/pr94395_rlwinm.c | 6 ++ .../gcc.target/powerpc/pr94395_rlwinm.h | 8 ++ .../gcc.target/powerpc/pr94395_rlwinm_1.c | 16 ++++ 6 files changed, 123 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm.h create mode 100644 gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm_1.c diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 93438b4da07..3b5a2f5a16e 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -10110,7 +10110,8 @@ rs6000_emit_set_const (rtx dest, rtx source) Return -1 if C can not be rotated as from. */ static int -rotate_from_leading_zeros_const (unsigned HOST_WIDE_INT c, int clz) +rotate_from_leading_zeros_const (unsigned HOST_WIDE_INT c, int clz, + bool rotl32 = false) { /* case a. 0..0xxx: already at least clz zeros. */ int lz = clz_hwi (c); @@ -10126,7 +10127,9 @@ rotate_from_leading_zeros_const (unsigned HOST_WIDE_INT c, int clz) ^bit -> Vbit, then zeros are at head or tail. 00...00xxx100, 'clz + 1' >= 'bits of xxxx'. */ const int rot_bits = HOST_BITS_PER_WIDE_INT - clz + 1; - unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1)); + unsigned HOST_WIDE_INT rc; + rc = rotl32 ? (((c >> rot_bits) | (c << (32 - rot_bits))) & 0xFFFFFFFFULL) + : (c >> rot_bits) | (c << (clz - 1)); lz = clz_hwi (rc); tz = ctz_hwi (rc); if (lz + tz >= clz) @@ -10319,6 +10322,71 @@ check_rotate_mask (unsigned HOST_WIDE_INT c, HOST_WIDE_INT *val, int *shift, return true; } +/* For low 32bits of C, check if it can be rotated from an constant value + which contains count of leading zeros at least CLZ. */ + +static int +from_rotate32 (unsigned HOST_WIDE_INT c) +{ + /* rotate32 from li possitive 17bits zeros (17 + 32 = 49). */ + int n = rotate_from_leading_zeros_const (c & 0xFFFFFFFFULL, 49, true); + + /* rotate32 from li negative. */ + if (n < 0) + n = rotate_from_leading_zeros_const ((~c) & 0xFFFFFFFFULL, 49, true); + + /* rotate32 from lis negative. */ + if (n < 0) + { + n = rotate_from_leading_zeros_const (c & 0xFFFFFFFFULL, 48, true); + if (n >= 0) + n += 16; + } + + return n < 0 ? -1 : (n % 32); +} + +/* Check if value C can be generated by 2 instructions, one instruction + is li/lis or pli, another instruction is rlwinm. */ + +static bool +check_rotate32_mask (unsigned HOST_WIDE_INT c, HOST_WIDE_INT *val, int *shift, + int *mb, int *me, bool for_pli) +{ + unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL; + unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL; + unsigned HOST_WIDE_INT v; + int b, e; + + /* diff of high and low (high ^ low) should be the mask position. */ + unsigned HOST_WIDE_INT m = low ^ high; + int tz = ctz_hwi (m); + int lz = clz_hwi (m); + b = m == 0 ? 1 : (high != 0 ? 32 - tz : lz - 32); + e = m == 0 ? 0 : (high != 0 ? lz - 33 : 31 - tz); + if (m != 0) + m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz); + if (high != 0) + m = ~m; + v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF); + + if ((high != 0) && ((v & m) != low || e < 0 || b > 31)) + return false; + + int n = for_pli ? 0 : from_rotate32 (v); + if (n < 0) + return false; + + v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF; + if (v & 0x80000000ULL) + v |= HOST_WIDE_INT_M1U << 32; + *me = e; + *mb = b; + *val = v; + *shift = n; + return true; +} + /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. Output insns to set DEST equal to the constant C as a series of lis, ori and shl instructions. */ @@ -10330,7 +10398,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) HOST_WIDE_INT ud1, ud2, ud3, ud4; HOST_WIDE_INT orig_c = c; HOST_WIDE_INT val = c; - int shift; + int shift, mb, me; unsigned HOST_WIDE_INT mask; ud1 = c & 0xffff; @@ -10391,6 +10459,15 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) x = gen_rtx_AND (DImode, x, GEN_INT (mask)); emit_move_insn (dest, x); } + else if (check_rotate32_mask (orig_c, &val, &shift, &mb, &me, false) + || (TARGET_PREFIXED + && check_rotate32_mask (orig_c, &val, &shift, &mb, &me, true))) + { + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); + emit_move_insn (temp, GEN_INT (val)); + emit_insn (gen_rlwinmdi3 (dest, copy_rtx (temp), GEN_INT (shift), + GEN_INT (mb), GEN_INT (me))); + } else if (ud3 == 0 && ud4 == 0) { temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index e9e5cd1e54d..ae60d2c958f 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -4101,6 +4101,17 @@ (define_insn "*rotl<mode>3_mask" [(set_attr "type" "shift") (set_attr "maybe_var_shift" "yes")]) +(define_insn "rlwinm<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (and:GPR (rotate:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "immediate_operand" "n")) + (unspec:DI [(match_operand:SI 3 "immediate_operand" "n") + (match_operand:SI 4 "immediate_operand" "n")] + UNSPEC_AND)))] + "UINTVAL (operands[3]) < 32 && UINTVAL (operands[4]) < 32" + "rlwinm %0,%1,%2,%3,%4" + [(set_attr "type" "shift")]) + (define_insn_and_split "*rotl<mode>3_mask_dot" [(set (match_operand:CC 5 "cc_reg_operand" "=x,?y") (compare:CC diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c index 4f764d0576f..aaad9ede831 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c @@ -10,4 +10,5 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; } unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; } unsigned long long mskse() { return 0xffff1234ffff1234ULL; } -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */ +/* { dg-final { scan-assembler-times {\mrldimi\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mrlwinm\M} 4 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm.c b/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm.c new file mode 100644 index 00000000000..80b0c4ebd64 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm.c @@ -0,0 +1,6 @@ +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ +/* { dg-do compile { target has_arch_ppc64 } } */ +#include "pr94395_rlwinm.h" + +/* { dg-final { scan-assembler-times {\mli\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mrlwinm\M} 3 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm.h b/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm.h new file mode 100644 index 00000000000..6edadd261ad --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm.h @@ -0,0 +1,8 @@ +/* using 2 instructions(rlwinm) to build constants. */ +void __attribute__ ((__noinline__, __noclone__)) +foo (long long *arg) +{ + *arg++ = 0x00000000faaabf80ULL; + *arg++ = 0x0002aaa80002aaa8ULL; + *arg++ = 0x00003fff00003fc7ULL; +} diff --git a/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm_1.c b/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm_1.c new file mode 100644 index 00000000000..f8a5f69bf3e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr94395_rlwinm_1.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +#include "pr94395_rlwinm.h" + +long long arr1[] = {0xfaaabf80ULL, 0x2aaa80002aaa8ULL, 0x3fff00003fc7ULL}; +int +main () +{ + long long a[3]; + + foo (a); + if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0) + __builtin_abort (); + return 0; +} -- 2.17.1