Enable direct move between masks and gprs in pass_reload with consideration of cost model.
Changelog gcc/ * config/i386/i386.c (inline_secondary_memory_needed): No memory is needed between mask regs and gpr. (ix86_hard_regno_mode_ok): Add condition TARGET_AVX512F for mask regno. * config/i386/i386.h (enum reg_class): Add INT_MASK_REGS. (REG_CLASS_NAMES): Ditto. (REG_CLASS_CONTENTS): Ditto. * config/i386/i386.md: Exclude mask register in define_peephole2 which is available only for gpr. gcc/testsuites/ * gcc.target/i386/pr71453-1.c: New tests. * gcc.target/i386/pr71453-2.c: Ditto. * gcc.target/i386/pr71453-3.c: Ditto. * gcc.target/i386/pr71453-4.c: Ditto. -- BR, Hongtao
From 131388217cc5c52947fdad43216f77aa6ff090ab Mon Sep 17 00:00:00 2001 From: liuhongt <hongtao....@intel.com> Date: Thu, 6 Aug 2020 13:48:38 +0800 Subject: [PATCH 2/4] Enable direct movement between gpr and mask registers in pass_reload. Changelog gcc/ * config/i386/i386.c (inline_secondary_memory_needed): No memory is needed between mask regs and gpr. (ix86_hard_regno_mode_ok): Add condition TARGET_AVX512F for mask regno. * config/i386/i386.h (enum reg_class): Add INT_MASK_REGS. (REG_CLASS_NAMES): Ditto. (REG_CLASS_CONTENTS): Ditto. * config/i386/i386.md: Exclude mask register in define_peephole2 which is avaiable only for gpr. gcc/testsuites/ * gcc.target/i386/pr71453-1.c: New tests. * gcc.target/i386/pr71453-2.c: Ditto. * gcc.target/i386/pr71453-3.c: Ditto. * gcc.target/i386/pr71453-4.c: Ditto. Delete mspill2mask option. Fix bugs. --- gcc/config/i386/i386.c | 6 +- gcc/config/i386/i386.h | 3 + gcc/config/i386/i386.md | 3 +- gcc/testsuite/gcc.target/i386/pr71453-1.c | 86 +++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr71453-2.c | 6 ++ gcc/testsuite/gcc.target/i386/pr71453-3.c | 7 ++ 6 files changed, 106 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr71453-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr71453-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr71453-3.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 156df77166b..c9bd3ab06a2 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -18571,9 +18571,7 @@ inline_secondary_memory_needed (machine_mode mode, reg_class_t class1, || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) - || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2) - || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1) - || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2)) + || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) { gcc_assert (!strict || lra_in_progress); return true; @@ -18999,7 +18997,7 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode) if ((mode == P2QImode || mode == P2HImode)) return MASK_PAIR_REGNO_P(regno); - return (VALID_MASK_REG_MODE (mode) + return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode)) || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode))); } diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index e0af87450b8..852dd017aa4 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1418,6 +1418,7 @@ enum reg_class FLOAT_INT_SSE_REGS, MASK_REGS, ALL_MASK_REGS, + INT_MASK_REGS, ALL_REGS, LIM_REG_CLASSES }; @@ -1477,6 +1478,7 @@ enum reg_class "FLOAT_INT_SSE_REGS", \ "MASK_REGS", \ "ALL_MASK_REGS", \ + "INT_MASK_REGS", \ "ALL_REGS" } /* Define which registers fit in which classes. This is an initializer @@ -1515,6 +1517,7 @@ enum reg_class { 0xff9ffff, 0xfffffff0, 0xf }, /* FLOAT_INT_SSE_REGS */ \ { 0x0, 0x0, 0xfe0 }, /* MASK_REGS */ \ { 0x0, 0x0, 0xff0 }, /* ALL_MASK_REGS */ \ + { 0x900ff, 0xff0, 0xff0 }, /* INT_MASK_REGS */ \ { 0xffffffff, 0xffffffff, 0xfff } /* ALL_REGS */ \ } diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index b24a4557871..74d207c3711 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -15051,7 +15051,7 @@ (parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 0)] UNSPEC_PARITY)) (clobber (match_dup 0))])] - "" + "!MASK_REGNO_P (REGNO (operands[0]))" [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1)] UNSPEC_PARITY))]) @@ -15072,6 +15072,7 @@ (label_ref (match_operand 5)) (pc)))] "REGNO (operands[2]) == REGNO (operands[3]) + && !MASK_REGNO_P (REGNO (operands[1])) && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (3, operands[2]) && peep2_regno_dead_p (4, FLAGS_REG)" diff --git a/gcc/testsuite/gcc.target/i386/pr71453-1.c b/gcc/testsuite/gcc.target/i386/pr71453-1.c new file mode 100644 index 00000000000..a23cdc242e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr71453-1.c @@ -0,0 +1,86 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake" } */ + +typedef unsigned int u32; +typedef unsigned char u8; + +#define R(x,n) ( (x >> n) | (x << (32 - n))) + +#define S0(x) (R(x, 2) ^ R(x,13) ^ R(x,22)) +#define S1(x) (R(x, 6) ^ R(x,11) ^ R(x,25)) + +#define TT(a,b,c,d,e,f,g,h,x,K) \ +{ \ + tmp1 = h + S1(e) + (g ^ (e & (f ^ g))) + K + x; \ + tmp2 = S0(a) + ((a & b) | (c & (a | b))); \ + h = tmp1 + tmp2; \ + d += tmp1; \ +} + +static inline u32 byteswap(u32 x) +{ + x = (x & 0x0000FFFF) << 16 | (x & 0xFFFF0000) >> 16; + x = (x & 0x00FF00FF) << 8 | (x & 0xFF00FF00) >> 8; + return x; +} + +#define BE_LOAD32(n,b,i) (n) = byteswap(*(u32 *)(b + i)) + +void foo (u8 *in, u32 out[8], const u32 C[16]) +{ + u32 tmp1 = 0, tmp2 = 0, a, b, c, d, e, f, g, h; + u32 w0, w1, w2, w3, w4, w5, w6, w7, + w8, w9, w10, w11, w12, w13, w14, w15; + w0 = byteswap(*(u32 *)(in + 0)); + w1 = byteswap(*(u32 *)(in + 4)); + w2 = byteswap(*(u32 *)(in + 8)); + w3 = byteswap(*(u32 *)(in + 12)); + w4 = byteswap(*(u32 *)(in + 16)); + w5 = byteswap(*(u32 *)(in + 20)); + w6 = byteswap(*(u32 *)(in + 24)); + w7 = byteswap(*(u32 *)(in + 28)); + w8 = byteswap(*(u32 *)(in + 32)); + w9 = byteswap(*(u32 *)(in + 36)); + w10 = byteswap(*(u32 *)(in + 40)); + w11 = byteswap(*(u32 *)(in + 44)); + w12 = byteswap(*(u32 *)(in + 48)); + w13 = byteswap(*(u32 *)(in + 52)); + w14 = byteswap(*(u32 *)(in + 56)); + w15 = byteswap(*(u32 *)(in + 60)); + a = out[0]; + b = out[1]; + c = out[2]; + d = out[3]; + e = out[4]; + f = out[5]; + g = out[6]; + h = out[7]; + + TT(a, b, c, d, e, f, g, h, w0, C[0]); + TT(h, a, b, c, d, e, f, g, w1, C[1]); + TT(g, h, a, b, c, d, e, f, w2, C[2]); + TT(f, g, h, a, b, c, d, e, w3, C[3]); + TT(e, f, g, h, a, b, c, d, w4, C[4]); + TT(d, e, f, g, h, a, b, c, w5, C[5]); + TT(c, d, e, f, g, h, a, b, w6, C[6]); + TT(b, c, d, e, f, g, h, a, w7, C[7]); + TT(a, b, c, d, e, f, g, h, w8, C[8]); + TT(h, a, b, c, d, e, f, g, w9, C[9]); + TT(g, h, a, b, c, d, e, f, w10, C[10]); + TT(f, g, h, a, b, c, d, e, w11, C[11]); + TT(e, f, g, h, a, b, c, d, w12, C[12]); + TT(d, e, f, g, h, a, b, c, w13, C[13]); + TT(c, d, e, f, g, h, a, b, w14, C[14]); + TT(b, c, d, e, f, g, h, a, w15, C[15]); + + out[0] += a; + out[1] += b; + out[2] += c; + out[3] += d; + out[4] += e; + out[5] += f; + out[6] += g; + out[7] += h; +} + +/* { dg-final { scan-assembler-not "%xmm" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr71453-2.c b/gcc/testsuite/gcc.target/i386/pr71453-2.c new file mode 100644 index 00000000000..7b5339d2e4b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr71453-2.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=haswell" } */ + +#include "pr71453-1.c" + +/* { dg-final { scan-assembler-not "%xmm" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr71453-3.c b/gcc/testsuite/gcc.target/i386/pr71453-3.c new file mode 100644 index 00000000000..64ee1289664 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr71453-3.c @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512" } */ + +#include "pr71453-1.c" + +/* { dg-final { scan-assembler "kmovd" } } */ +/* { dg-final { scan-assembler-not "%xmm" } } */ -- 2.18.1