Hi,

Gentle ping...

BR,
Jeff(Jiufu) Guo

Jiufu Guo <guoji...@linux.ibm.com> writes:

> Hi,
>
> Gentle ping...
>
> BR,
> Jeff(Jiufu) Guo
>
> Jiufu Guo <guoji...@linux.ibm.com> writes:
>
>> Hi,
>>
>> Gentle ping.
>>
>> BR,
>> Jeff(Jiufu) Guo
>>
>> Jiufu Guo <guoji...@linux.ibm.com> writes:
>>
>>> Hi,
>>>
>>> Gentle ping ...
>>>
>>> Jiufu Guo <guoji...@linux.ibm.com> writes:
>>>
>>>> Hi,
>>>>
>>>> Gentle ping ...
>>>>
>>>> BR,
>>>> Jeff(Jiufu) Guo
>>>>
>>>> Jiufu Guo <guoji...@linux.ibm.com> writes:
>>>>
>>>>> Hi,
>>>>>
>>>>> 'rlwinm' pattern is already well used for SImode.  As this instruction
>>>>> can touch the whole 64bit register, so some constants in 64bit(DImode)
>>>>> can be built via 'lis/li+rlwinm'.  To achieve this, a new pattern for
>>>>> 'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check
>>>>> if a constant is able to be built by 'lis/li; rlwinm'.
>>>>>
>>>>> Bootstrap and regtest pass on ppc64{,le}.
>>>>>
>>>>> Is this patch ok for trunk (when stage1 is open)?
>>>
>>> Is this patch ok for trunk?
>>>
>>> BR,
>>> Jeff(Jiufu) Guo
>>>
>>>>>
>>>>> Jeff (Jiufu Guo).
>>>>>
>>>>> gcc/ChangeLog:
>>>>>
>>>>>   * config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new
>>>>>   parameter.
>>>>>   * config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New 
>>>>> function.
>>>>>   (rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'.
>>>>>   (can_be_rotated_to_lowbits): Add new parameter.
>>>>>   * config/rs6000/rs6000.md (rlwinm_di_mask): New pattern.
>>>>>
>>>>> gcc/testsuite/ChangeLog:
>>>>>
>>>>>   * gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'.
>>>>>   * gcc.target/powerpc/rlwinm4di-1.c: New test.
>>>>>   * gcc.target/powerpc/rlwinm4di-2.c: New test.
>>>>>   * gcc.target/powerpc/rlwinm4di.c: New test.
>>>>>   * gcc.target/powerpc/rlwinm4di.h: New test.
>>>>>
>>>>> ---
>>>>>  gcc/config/rs6000/rs6000-protos.h             |  2 +-
>>>>>  gcc/config/rs6000/rs6000.cc                   | 65 ++++++++++++++++++-
>>>>>  gcc/config/rs6000/rs6000.md                   | 18 +++++
>>>>>  gcc/testsuite/gcc.target/powerpc/pr93012.c    |  2 +-
>>>>>  .../gcc.target/powerpc/rlwinm4di-1.c          | 25 +++++++
>>>>>  .../gcc.target/powerpc/rlwinm4di-2.c          | 19 ++++++
>>>>>  gcc/testsuite/gcc.target/powerpc/rlwinm4di.c  |  6 ++
>>>>>  gcc/testsuite/gcc.target/powerpc/rlwinm4di.h  | 25 +++++++
>>>>>  8 files changed, 158 insertions(+), 4 deletions(-)
>>>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>>>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>>>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>>>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>>>>>
>>>>> diff --git a/gcc/config/rs6000/rs6000-protos.h 
>>>>> b/gcc/config/rs6000/rs6000-protos.h
>>>>> index 09a57a806fa..10505a8061a 100644
>>>>> --- a/gcc/config/rs6000/rs6000-protos.h
>>>>> +++ b/gcc/config/rs6000/rs6000-protos.h
>>>>> @@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, 
>>>>> machine_mode, int * = nullptr);
>>>>>  extern int vspltis_shifted (rtx);
>>>>>  extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
>>>>>  extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
>>>>> -extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int 
>>>>> *);
>>>>> +extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int 
>>>>> *, bool = false);
>>>>>  extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT);
>>>>>  extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT);
>>>>>  extern int num_insns_constant (rtx, machine_mode);
>>>>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>>>>> index 6ba9df4f02e..853eaede673 100644
>>>>> --- a/gcc/config/rs6000/rs6000.cc
>>>>> +++ b/gcc/config/rs6000/rs6000.cc
>>>>> @@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, 
>>>>> int *shift, HOST_WIDE_INT *mask)
>>>>>    return false;
>>>>>  }
>>>>>  
>>>>> +/* Check if value C can be generated by 2 instructions, one instruction
>>>>> +   is li/lis, another instruction is rlwinm.  */
>>>>> +
>>>>> +static bool
>>>>> +can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val,
>>>>> +                            int *shift, HOST_WIDE_INT *mask)
>>>>> +{
>>>>> +  unsigned HOST_WIDE_INT low = c & 0xFFFFFFFFULL;
>>>>> +  unsigned HOST_WIDE_INT high = (c >> 32) & 0xFFFFFFFFULL;
>>>>> +  unsigned HOST_WIDE_INT v;
>>>>> +
>>>>> +  /* diff of high and low (high ^ low) should be the mask position.  */
>>>>> +  unsigned HOST_WIDE_INT m = low ^ high;
>>>>> +  int tz = ctz_hwi (m);
>>>>> +  int lz = clz_hwi (m);
>>>>> +  if (m != 0)
>>>>> +    m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz);
>>>>> +  if (high != 0)
>>>>> +    m = ~m;
>>>>> +  v = high != 0 ? high : ((low | ~m) & 0xFFFFFFFF);
>>>>> +
>>>>> +  if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1))
>>>>> +    return false;
>>>>> +
>>>>> +  /* rotl32 on positive/negative value of 'li' 15/16bits.  */
>>>>> +  int n;
>>>>> +  if (!can_be_rotated_to_lowbits (v, 15, &n, true)
>>>>> +      && !can_be_rotated_to_lowbits ((~v) & 0xFFFFFFFFULL, 15, &n, true))
>>>>> +    {
>>>>> +      /* rotate32 from a negative value of 'lis'.  */
>>>>> +      if (!can_be_rotated_to_lowbits (v & 0xFFFFFFFFULL, 16, &n, true))
>>>>> + return false;
>>>>> +      n += 16;
>>>>> +    }
>>>>> +  n = 32 - (n % 32);
>>>>> +  n %= 32;
>>>>> +  v = ((v >> n) | (v << (32 - n))) & 0xFFFFFFFF;
>>>>> +  if (v & 0x80000000ULL)
>>>>> +    v |= HOST_WIDE_INT_M1U << 32;
>>>>> +  *mask = m;
>>>>> +  *val = v;
>>>>> +  *shift = n;
>>>>> +  return true;
>>>>> +}
>>>>> +
>>>>>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>>>>>     Output insns to set DEST equal to the constant C as a series of
>>>>>     lis, ori and shl instructions.  If NUM_INSNS is not NULL, then
>>>>> @@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, 
>>>>> HOST_WIDE_INT c, int *num_insns)
>>>>>        return;
>>>>>      }
>>>>>  
>>>>> +  HOST_WIDE_INT val;
>>>>> +  if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask))
>>>>> +    {
>>>>> +      /* li/lis; rlwinm */
>>>>> +      count_or_emit_insn (temp, GEN_INT (val));
>>>>> +      rtx low = temp ? gen_lowpart (SImode, temp) : nullptr;
>>>>> +      rtx m = GEN_INT (mask);
>>>>> +      rtx n = GEN_INT (shift);
>>>>> +      count_or_emit_insn (gen_rlwinm_di_mask (dest, low, n, m));
>>>>> +      return;
>>>>> +    }
>>>>> +
>>>>>    if (ud3 == 0 && ud4 == 0)
>>>>>      {
>>>>>        gcc_assert ((ud2 & 0x8000) && ud1 != 0);
>>>>> @@ -15220,7 +15277,8 @@ rs6000_reverse_condition (machine_mode mode, enum 
>>>>> rtx_code code)
>>>>>     Return false otherwise.  */
>>>>>  
>>>>>  bool
>>>>> -can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int 
>>>>> *rot)
>>>>> +can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int 
>>>>> *rot,
>>>>> +                    bool rotl32)
>>>>>  {
>>>>>    int clz = HOST_BITS_PER_WIDE_INT - lowbits;
>>>>>  
>>>>> @@ -15244,7 +15302,10 @@ can_be_rotated_to_lowbits (unsigned 
>>>>> HOST_WIDE_INT c, int lowbits, int *rot)
>>>>>          ^bit -> Vbit, , then zeros are at head or tail.
>>>>>        00...00xxx100, 'clz - 1' >= 'bits of xxxx'.  */
>>>>>    const int rot_bits = lowbits + 1;
>>>>> -  unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
>>>>> +  unsigned HOST_WIDE_INT rc;
>>>>> +  rc = rotl32 ? ((((c & 0xFFFFFFFFULL) >> rot_bits)
>>>>> +           | ((c << (32 - rot_bits)) & 0xFFFFFFFFULL)))
>>>>> +       : (c >> rot_bits) | (c << (clz - 1));
>>>>>    tz = ctz_hwi (rc);
>>>>>    if (clz_hwi (rc) + tz >= clz)
>>>>>      {
>>>>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>>>>> index bc8bc6ab060..8a82ba3e26c 100644
>>>>> --- a/gcc/config/rs6000/rs6000.md
>>>>> +++ b/gcc/config/rs6000/rs6000.md
>>>>> @@ -4213,6 +4213,24 @@ (define_insn_and_split "*rotl<mode>3_mask_dot2"
>>>>>     (set_attr "dot" "yes")
>>>>>     (set_attr "length" "4,8")])
>>>>>  
>>>>> +; define an insn about rlwinm for DI mode (with high part content)
>>>>> +(define_insn "rlwinm_di_mask"
>>>>> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
>>>>> +    (and:DI (plus:DI
>>>>> +              (ashift:DI (subreg:DI
>>>>> +            (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
>>>>> +                              (match_operand:SI 2 "const_int_operand" 
>>>>> "n")) 0)
>>>>> +                   (const_int 32))
>>>>> +              (zero_extend:DI (rotate:SI (match_dup 1) (match_dup 2))))
>>>>> +            (match_operand:DI 3 "const_int_operand" "n")))]
>>>>> +  "rs6000_is_valid_and_mask (operands[3], SImode)"
>>>>> +{
>>>>> +  return UINTVAL (operands[3]) == -1ULL ?
>>>>> +    "rlwinm %0,%1,%h2,1,0" :  "rlwinm %0,%1,%h2,%3";
>>>>> +}
>>>>> +  [(set_attr "type" "shift")
>>>>> +   (set_attr "maybe_var_shift" "yes")])
>>>>> +
>>>>>  ; Special case for less-than-0.  We can do it with just one machine
>>>>>  ; instruction, but the generic optimizers do not realise it is cheap.
>>>>>  (define_insn "*lt0_<mode>di"
>>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c 
>>>>> b/gcc/testsuite/gcc.target/powerpc/pr93012.c
>>>>> index 4f764d0576f..70ddfaa21da 100644
>>>>> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
>>>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
>>>>> @@ -10,4 +10,4 @@ unsigned long long mskh1() { return 
>>>>> 0xffff9234ffff9234ULL; }
>>>>>  unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
>>>>>  unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
>>>>>  
>>>>> -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
>>>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M|\mrldimi\M} 7 } } */
>>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c 
>>>>> b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>>>>> new file mode 100644
>>>>> index 00000000000..8959578143b
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
>>>>> @@ -0,0 +1,25 @@
>>>>> +/* { dg-do run } */
>>>>> +/* { dg-options "-O2" } */
>>>>> +
>>>>> +#include "rlwinm4di.h"
>>>>> +
>>>>> +long long arr1[] = {
>>>>> +  0x0000400100000001ULL, 0x0000000200000002ULL, 0xffff8000bfff8000ULL,
>>>>> +  0xffff8001ffff8001ULL, 0x0000800100000001ULL, 0x0000800100008001ULL,
>>>>> +  0x0000800200000002ULL, 0x0000800000008000ULL, 0x0000000080008000ULL,
>>>>> +  0xffff0001bfff0001ULL, 0xffff0001ffff0001ULL, 0x0001000200000002ULL,
>>>>> +  0x8001000080010000ULL, 0x0004000100000001ULL, 0x0004000100040001ULL,
>>>>> +  0x00000000bfffe001ULL, 0x0003fffe0001fffeULL, 0x0003fffe0003fffeULL,
>>>>> +  0x0002000100000001ULL, 0x0002000100020001ULL,
>>>>> +};
>>>>> +
>>>>> +int
>>>>> +main ()
>>>>> +{
>>>>> +  long long a[sizeof (arr1) / sizeof (arr1[0])];
>>>>> +
>>>>> +  foo (a);
>>>>> +  if (__builtin_memcmp (a, arr1, sizeof (arr1)) != 0)
>>>>> +    __builtin_abort ();
>>>>> +  return 0;
>>>>> +}
>>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c 
>>>>> b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>>>>> new file mode 100644
>>>>> index 00000000000..9494d0327b4
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
>>>>> @@ -0,0 +1,19 @@
>>>>> +/* { dg-options "-O2 -mno-prefixed" } */
>>>>> +/* { dg-do compile { target has_arch_ppc64 } } */
>>>>> +
>>>>> +#define N 5
>>>>> +#define MASK 0xffffffffe0000003ULL
>>>>> +
>>>>> +typedef unsigned long long int64;
>>>>> +
>>>>> +int64
>>>>> +foo (int64 v)
>>>>> +{
>>>>> +  unsigned int v1 = v;
>>>>> +  unsigned int v2 = ((v1 << N) | (v1 >> (32 - N)));
>>>>> +  return ((int64) v2 | ((int64) v2 << 32)) & MASK;
>>>>> +}
>>>>> +
>>>>> +/* { dg-final { scan-assembler-not {\mor\M} } } */
>>>>> +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
>>>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 1 } } */
>>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c 
>>>>> b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>>>>> new file mode 100644
>>>>> index 00000000000..fcbc8f8d742
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
>>>>> @@ -0,0 +1,6 @@
>>>>> +/* { dg-options "-O2 -mno-prefixed" } */
>>>>> +/* { dg-do compile { target has_arch_ppc64 } } */
>>>>> +#include "rlwinm4di.h"
>>>>> +
>>>>> +/* { dg-final { scan-assembler-times {\mrlwinm\M} 20 } } */
>>>>> +
>>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h 
>>>>> b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>>>>> new file mode 100644
>>>>> index 00000000000..59fe739ca85
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwinm4di.h
>>>>> @@ -0,0 +1,25 @@
>>>>> +/* using 2 instructions(rlwinm) to build constants.   */
>>>>> +void __attribute__ ((__noinline__, __noclone__))
>>>>> +foo (long long *arg)
>>>>> +{
>>>>> +  *arg++ = 0x0000400100000001ULL;
>>>>> +  *arg++ = 0x0000000200000002ULL;
>>>>> +  *arg++ = 0xffff8000bfff8000ULL;
>>>>> +  *arg++ = 0xffff8001ffff8001ULL;
>>>>> +  *arg++ = 0x0000800100000001ULL;
>>>>> +  *arg++ = 0x0000800100008001ULL;
>>>>> +  *arg++ = 0x0000800200000002ULL;
>>>>> +  *arg++ = 0x0000800000008000ULL;
>>>>> +  *arg++ = 0x0000000080008000ULL;
>>>>> +  *arg++ = 0xffff0001bfff0001ULL;
>>>>> +  *arg++ = 0xffff0001ffff0001ULL;
>>>>> +  *arg++ = 0x0001000200000002ULL;
>>>>> +  *arg++ = 0x8001000080010000ULL;
>>>>> +  *arg++ = 0x0004000100000001ULL;
>>>>> +  *arg++ = 0x0004000100040001ULL;
>>>>> +  *arg++ = 0x00000000bfffe001ULL;
>>>>> +  *arg++ = 0x0003fffe0001fffeULL;
>>>>> +  *arg++ = 0x0003fffe0003fffeULL;
>>>>> +  *arg++ = 0x0002000100000001ULL;
>>>>> +  *arg++ = 0x0002000100020001ULL;
>>>>> +}

Reply via email to