Hi,
Gentle ping... BR, Jeff(Jiufu) Guo Jiufu Guo <guoji...@linux.ibm.com> writes: > Hi, > > Gentle ping... > > BR, > Jeff(Jiufu) Guo > > Jiufu Guo <guoji...@linux.ibm.com> writes: > >> Hi, >> >> Gentle ping. >> >> BR, >> Jeff(Jiufu) Guo >> >> Jiufu Guo <guoji...@linux.ibm.com> writes: >> >>> Hi, >>> >>> Gentle ping ... >>> >>> Jiufu Guo <guoji...@linux.ibm.com> writes: >>> >>>> Hi, >>>> >>>> Gentle ping ... >>>> >>>> BR, >>>> Jeff(Jiufu) Guo >>>> >>>> Jiufu Guo <guoji...@linux.ibm.com> writes: >>>> >>>>> Hi, >>>>> >>>>> 'rlwinm' pattern is already well used for SImode. As this instruction >>>>> can touch the whole 64bit register, so some constants in 64bit(DImode) >>>>> can be built via 'lis/li+rlwinm'. To achieve this, a new pattern for >>>>> 'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check >>>>> if a constant is able to be built by 'lis/li; rlwinm'. >>>>> >>>>> Bootstrap and regtest pass on ppc64{,le}. >>>>> >>>>> Is this patch ok for trunk (when stage1 is open)? >>> >>> Is this patch ok for trunk? >>> >>> BR, >>> Jeff(Jiufu) Guo >>> >>>>> >>>>> Jeff (Jiufu Guo). >>>>> >>>>> gcc/ChangeLog: >>>>> >>>>> * config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new >>>>> parameter. >>>>> * config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New >>>>> function. >>>>> (rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'. >>>>> (can_be_rotated_to_lowbits): Add new parameter. >>>>> * config/rs6000/rs6000.md (rlwinm_di_mask): New pattern. >>>>> >>>>> gcc/testsuite/ChangeLog: >>>>> >>>>> * gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'. >>>>> * gcc.target/powerpc/rlwinm4di-1.c: New test. >>>>> * gcc.target/powerpc/rlwinm4di-2.c: New test. >>>>> * gcc.target/powerpc/rlwinm4di.c: New test. >>>>> * gcc.target/powerpc/rlwinm4di.h: New test. >>>>> >>>>> --- >>>>> gcc/config/rs6000/rs6000-protos.h | 2 +- >>>>> gcc/config/rs6000/rs6000.cc | 65 ++++++++++++++++++- >>>>> gcc/config/rs6000/rs6000.md | 18 +++++ >>>>> gcc/testsuite/gcc.target/powerpc/pr93012.c | 2 +- >>>>> .../gcc.target/powerpc/rlwinm4di-1.c | 25 +++++++ >>>>> .../gcc.target/powerpc/rlwinm4di-2.c | 19 ++++++ >>>>> gcc/testsuite/gcc.target/powerpc/rlwinm4di.c | 6 ++ >>>>> gcc/testsuite/gcc.target/powerpc/rlwinm4di.h | 25 +++++++ >>>>> 8 files changed, 158 insertions(+), 4 deletions(-) >>>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c >>>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c >>>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c >>>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h >>>>> >>>>> diff --git a/gcc/config/rs6000/rs6000-protos.h >>>>> b/gcc/config/rs6000/rs6000-protos.h >>>>> index 09a57a806fa..10505a8061a 100644 >>>>> --- a/gcc/config/rs6000/rs6000-protos.h >>>>> +++ b/gcc/config/rs6000/rs6000-protos.h >>>>> @@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, >>>>> machine_mode, int * = nullptr); >>>>> extern int vspltis_shifted (rtx); >>>>> extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); >>>>> extern bool macho_lo_sum_memory_operand (rtx, machine_mode); >>>>> -extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int >>>>> *); >>>>> +extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int >>>>> *, bool = false); >>>>> extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT); >>>>> extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT); >>>>> extern int num_insns_constant (rtx, machine_mode); >>>>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc >>>>> index 6ba9df4f02e..853eaede673 100644 >>>>> --- a/gcc/config/rs6000/rs6000.cc >>>>> +++ b/gcc/config/rs6000/rs6000.cc >>>>> @@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, >>>>> int *shift, HOST_WIDE_INT *mask) >>>>> return false; >>>>> } >>>>> >>>>> +/* Check if value C can be generated by 2 instructions, one instruction >>>>> + is li/lis, another instruction is rlwinm. */ >>>>> + >>>>> +static bool >>>>> +can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val, >>>>> + int *shift, HOST_WIDE_INT *mask) >>>>> +{ >>>>> + unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL; >>>>> + unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL; >>>>> + unsigned HOST_WIDE_INT v; >>>>> + >>>>> + /* diff of high and low (high ^ low) should be the mask position. */ >>>>> + unsigned HOST_WIDE_INT m = low ^ high; >>>>> + int tz = ctz_hwi (m); >>>>> + int lz = clz_hwi (m); >>>>> + if (m != 0) >>>>> + m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz); >>>>> + if (high != 0) >>>>> + m = ~m; >>>>> + v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF); >>>>> + >>>>> + if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1)) >>>>> + return false; >>>>> + >>>>> + /* rotl32 on positive/negative value of 'li' 15/16bits. */ >>>>> + int n; >>>>> + if (!can_be_rotated_to_lowbits (v, 15, &n, true) >>>>> + && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true)) >>>>> + { >>>>> + /* rotate32 from a negative value of 'lis'. */ >>>>> + if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true)) >>>>> + return false; >>>>> + n += 16; >>>>> + } >>>>> + n = 32 - (n % 32); >>>>> + n %= 32; >>>>> + v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF; >>>>> + if (v & 0x80000000ULL) >>>>> + v |= HOST_WIDE_INT_M1U << 32; >>>>> + *mask = m; >>>>> + *val = v; >>>>> + *shift = n; >>>>> + return true; >>>>> +} >>>>> + >>>>> /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. >>>>> Output insns to set DEST equal to the constant C as a series of >>>>> lis, ori and shl instructions. If NUM_INSNS is not NULL, then >>>>> @@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, >>>>> HOST_WIDE_INT c, int *num_insns) >>>>> return; >>>>> } >>>>> >>>>> + HOST_WIDE_INT val; >>>>> + if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask)) >>>>> + { >>>>> + /* li/lis; rlwinm */ >>>>> + count_or_emit_insn (temp, GEN_INT (val)); >>>>> + rtx low = temp ? gen_lowpart (SImode, temp) : nullptr; >>>>> + rtx m = GEN_INT (mask); >>>>> + rtx n = GEN_INT (shift); >>>>> + count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m)); >>>>> + return; >>>>> + } >>>>> + >>>>> if (ud3 == 0 && ud4 == 0) >>>>> { >>>>> gcc_assert ((ud2 & 0x8000) && ud1 != 0); >>>>> @@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum >>>>> rtx_code code) >>>>> Return false otherwise. */ >>>>> >>>>> bool >>>>> -can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int >>>>> *rot) >>>>> +can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int >>>>> *rot, >>>>> + bool rotl32) >>>>> { >>>>> int clz = HOST_BITS_PER_WIDE_INT - lowbits; >>>>> >>>>> @@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned >>>>> HOST_WIDE_INT c, int lowbits, int *rot) >>>>> ^bit -> Vbit, , then zeros are at head or tail. >>>>> 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */ >>>>> const int rot_bits = lowbits + 1; >>>>> - unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1)); >>>>> + unsigned HOST_WIDE_INT rc; >>>>> + rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits) >>>>> + | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL))) >>>>> + : (c >> rot_bits) | (c << (clz - 1)); >>>>> tz = ctz_hwi (rc); >>>>> if (clz_hwi (rc) + tz >= clz) >>>>> { >>>>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md >>>>> index bc8bc6ab060..8a82ba3e26c 100644 >>>>> --- a/gcc/config/rs6000/rs6000.md >>>>> +++ b/gcc/config/rs6000/rs6000.md >>>>> @@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2" >>>>> (set_attr "dot" "yes") >>>>> (set_attr "length" "4,8")]) >>>>> >>>>> +; define an insn about rlwinm for DI mode (with high part content) >>>>> +(define_insn "rlwinm_di_mask" >>>>> + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") >>>>> + (and:DI (plus:DI >>>>> + (ashift:DI (subreg:DI >>>>> + (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r") >>>>> + (match_operand:SI 2 "const_int_operand" >>>>> "n")) 0) >>>>> + (const_int 32)) >>>>> + (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2)))) >>>>> + (match_operand:DI 3 "const_int_operand" "n")))] >>>>> + "rs6000_is_valid_and_mask (operands[3], SImode)" >>>>> +{ >>>>> + return UINTVAL (operands[3]) == -1ULL ? >>>>> + "rlwinm %0,%1,%h2,1,0" : "rlwinm %0,%1,%h2,%3"; >>>>> +} >>>>> + [(set_attr "type" "shift") >>>>> + (set_attr "maybe_var_shift" "yes")]) >>>>> + >>>>> ; Special case for less-than-0. We can do it with just one machine >>>>> ; instruction, but the generic optimizers do not realise it is cheap. >>>>> (define_insn "*lt0_<mode>di" >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c >>>>> b/gcc/testsuite/gcc.target/powerpc/pr93012.c >>>>> index 4f764d0576f..70ddfaa21da 100644 >>>>> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c >>>>> @@ -10,4 +10,4 @@ unsigned long long mskh1() { return >>>>> 0xffff9234ffff9234ULL; } >>>>> unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; } >>>>> unsigned long long mskse() { return 0xffff1234ffff1234ULL; } >>>>> >>>>> -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */ >>>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */ >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c >>>>> b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c >>>>> new file mode 100644 >>>>> index 00000000000..8959578143b >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c >>>>> @@ -0,0 +1,25 @@ >>>>> +/* { dg-do run } */ >>>>> +/* { dg-options "-O2" } */ >>>>> + >>>>> +#include "rlwinm4di.h" >>>>> + >>>>> +long long arr1[] = { >>>>> + 0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL, >>>>> + 0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL, >>>>> + 0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL, >>>>> + 0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL, >>>>> + 0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL, >>>>> + 0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL, >>>>> + 0x0002000100000001ULL, 0x0002000100020001ULL, >>>>> +}; >>>>> + >>>>> +int >>>>> +main () >>>>> +{ >>>>> + long long a[sizeof (arr1) / sizeof (arr1[0])]; >>>>> + >>>>> + foo (a); >>>>> + if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0) >>>>> + __builtin_abort (); >>>>> + return 0; >>>>> +} >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c >>>>> b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c >>>>> new file mode 100644 >>>>> index 00000000000..9494d0327b4 >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c >>>>> @@ -0,0 +1,19 @@ >>>>> +/* { dg-options "-O2 -mno-prefixed" } */ >>>>> +/* { dg-do compile { target has_arch_ppc64 } } */ >>>>> + >>>>> +#define N 5 >>>>> +#define MASK 0xffffffffe0000003ULL >>>>> + >>>>> +typedef unsigned long long int64; >>>>> + >>>>> +int64 >>>>> +foo (int64 v) >>>>> +{ >>>>> + unsigned int v1 = v; >>>>> + unsigned int v2 = ((v1 << N) | (v1 >> (32 - N))); >>>>> + return ((int64) v2 | ((int64) v2 << 32)) & MASK; >>>>> +} >>>>> + >>>>> +/* { dg-final { scan-assembler-not {\mor\M} } } */ >>>>> +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */ >>>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */ >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c >>>>> b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c >>>>> new file mode 100644 >>>>> index 00000000000..fcbc8f8d742 >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c >>>>> @@ -0,0 +1,6 @@ >>>>> +/* { dg-options "-O2 -mno-prefixed" } */ >>>>> +/* { dg-do compile { target has_arch_ppc64 } } */ >>>>> +#include "rlwinm4di.h" >>>>> + >>>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */ >>>>> + >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h >>>>> b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h >>>>> new file mode 100644 >>>>> index 00000000000..59fe739ca85 >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h >>>>> @@ -0,0 +1,25 @@ >>>>> +/* using 2 instructions(rlwinm) to build constants. */ >>>>> +void __attribute__ ((__noinline__, __noclone__)) >>>>> +foo (long long *arg) >>>>> +{ >>>>> + *arg++ = 0x0000400100000001ULL; >>>>> + *arg++ = 0x0000000200000002ULL; >>>>> + *arg++ = 0xffff8000bfff8000ULL; >>>>> + *arg++ = 0xffff8001ffff8001ULL; >>>>> + *arg++ = 0x0000800100000001ULL; >>>>> + *arg++ = 0x0000800100008001ULL; >>>>> + *arg++ = 0x0000800200000002ULL; >>>>> + *arg++ = 0x0000800000008000ULL; >>>>> + *arg++ = 0x0000000080008000ULL; >>>>> + *arg++ = 0xffff0001bfff0001ULL; >>>>> + *arg++ = 0xffff0001ffff0001ULL; >>>>> + *arg++ = 0x0001000200000002ULL; >>>>> + *arg++ = 0x8001000080010000ULL; >>>>> + *arg++ = 0x0004000100000001ULL; >>>>> + *arg++ = 0x0004000100040001ULL; >>>>> + *arg++ = 0x00000000bfffe001ULL; >>>>> + *arg++ = 0x0003fffe0001fffeULL; >>>>> + *arg++ = 0x0003fffe0003fffeULL; >>>>> + *arg++ = 0x0002000100000001ULL; >>>>> + *arg++ = 0x0002000100020001ULL; >>>>> +}