On Wed, May 13, 2020 at 6:35 AM Uros Bizjak <ubiz...@gmail.com> wrote: > > On Wed, May 13, 2020 at 3:25 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > On Wed, May 13, 2020 at 6:17 AM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > > > On Wed, May 13, 2020 at 2:37 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > > > > > On Wed, May 13, 2020 at 5:04 AM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > > > > > > > On Wed, May 13, 2020 at 1:05 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > > > > > > > > > On Tue, May 12, 2020 at 10:07 PM H.J. Lu <hjl.to...@gmail.com> > > > > > > wrote: > > > > > > > > > > > > > > Update STV pass to properly count cost of XMM register push. In > > > > > > > 32-bit > > > > > > > mode, to convert XMM register push in DImode, we do an XMM store > > > > > > > in > > > > > > > DImode, followed by 2 memory pushes in SImode, instead of 2 > > > > > > > integer > > > > > > > register pushes in SImode. To convert XM register push in > > > > > > > SImode, we > > > > > > > do an XMM register to integer register move in SImode, followed an > > > > > > > integer register push in SImode, instead of an integer register > > > > > > > push in > > > > > > > SImode. In 64-bit mode, we do an XMM register to integer > > > > > > > register move > > > > > > > in SImode or DImode, followed an integer register push in SImode > > > > > > > or > > > > > > > DImode, instead of an integer register push SImode or DImode. > > > > > > > > > > > > > > Tested on Linux/x86 and Linux/x86-64. > > > > > > > > > > > > I think it is better to implement XMM register pushes, and split > > > > > > them > > > > > > after reload to a sequence of: > > > > > > > > > > > > (set (reg:P SP_REG) (plus:P SP_REG) (const_int -8))) > > > > > > (set (match_dup 0) (match_dup 1)) > > > > > > > > > > > > This is definitely better than trips through memory to stack. > > > > > > > > > > Attached (untested patch) allows fake pushes from XMM registers, so > > > > > STV pass can allow pushes. > > > > > > > > The problem isn't STV pass. The IRA pass won't assign hard register for > > > > > > Please see the sequence before STV pass: > > > > > > (insn 24 23 25 3 (set (reg/v:DI 85 [ target ]) > > > (mem:DI (reg/f:SI 86 [ target_p ]) [2 *target_p.0_1+0 S8 > > > A32])) "pr95021.c":17:7 64 {*movdi_internal} > > > (expr_list:REG_DEAD (reg/f:SI 86 [ target_p ]) > > > (nil))) > > > > > > (insn 26 25 27 3 (set (mem:DI (reg/f:SI 87 [ c ]) [2 c.1_2->target+0 S8 > > > A32]) > > > (reg/v:DI 85 [ target ])) "pr95021.c":18:15 64 {*movdi_internal} > > > (expr_list:REG_DEAD (reg/f:SI 87 [ c ]) > > > (nil))) > > > > > > (insn 28 27 29 3 (set (mem:DI (pre_dec:SI (reg/f:SI 7 sp)) [2 S8 A64]) > > > (reg/v:DI 85 [ target ])) "pr95021.c":19:5 40 {*pushdi2} > > > (expr_list:REG_DEAD (reg/v:DI 85 [ target ]) > > > (expr_list:REG_ARGS_SIZE (const_int 16 [0x10]) > > > (nil)))) > > > > > > So, (reg 85) gets moved to a memory in (insn 26) and pushed to a stack > > > in (insn 28). > > > > > > STV pass does this: > > > > > > (insn 24 41 37 3 (set (subreg:V2DI (reg:DI 89) 0) > > > (subreg:V2DI (reg:DI 91) 0)) "pr95021.c":17:7 1342 > > > {movv2di_internal} > > > (expr_list:REG_DEAD (reg/f:SI 86 [ target_p ]) > > > (nil))) > > > > > > (insn 37 24 38 3 (set (reg:V2DI 90) > > > (subreg:V2DI (reg:DI 89) 0)) "pr95021.c":17:7 -1 > > > (nil)) > > > (insn 38 37 39 3 (set (subreg:SI (reg/v:DI 85 [ target ]) 0) > > > (subreg:SI (reg:V2DI 90) 0)) "pr95021.c":17:7 -1 > > > (nil)) > > > (insn 39 38 40 3 (set (reg:V2DI 90) > > > (lshiftrt:V2DI (reg:V2DI 90) > > > (const_int 32 [0x20]))) "pr95021.c":17:7 -1 > > > (nil)) > > > (insn 40 39 25 3 (set (subreg:SI (reg/v:DI 85 [ target ]) 4) > > > (subreg:SI (reg:V2DI 90) 0)) "pr95021.c":17:7 -1 > > > (nil)) > > > > > > (insn 26 25 27 3 (set (mem:DI (reg/f:SI 87 [ c ]) [2 c.1_2->target+0 S8 > > > A32]) > > > (reg:DI 89)) "pr95021.c":18:15 64 {*movdi_internal} > > > (expr_list:REG_DEAD (reg/f:SI 87 [ c ]) > > > (nil))) > > > > > > (insn 28 27 29 3 (set (mem:DI (pre_dec:SI (reg/f:SI 7 sp)) [2 S8 A64]) > > > (reg/v:DI 85 [ target ])) "pr95021.c":19:5 40 {*pushdi2} > > > (expr_list:REG_DEAD (reg/v:DI 85 [ target ]) > > > (expr_list:REG_ARGS_SIZE (const_int 16 [0x10]) > > > (nil)))) > > > > > > For some reason (reg 89) moves to (reg 85) via sequence (insn 37) to > > > (insn 40), splitting to SImode and reassembling back to (reg 85). > > > > > > > > > > > (insn 28 27 29 3 (set (mem:DI (pre_dec:SI (reg/f:SI 7 sp)) [2 S8 A64]) > > > > (reg/v:DI 85 [ target ])) "x.i":19:5 40 {*pushdi2} > > > > (expr_list:REG_DEAD (reg/v:DI 85 [ target ]) > > > > (expr_list:REG_ARGS_SIZE (const_int 16 [0x10]) > > > > (nil)))) > > > > > > > > and the reload pass turns into > > > > > > > > .... > > > > (insn 28 27 29 3 (set (mem:DI (pre_dec:SI (reg/f:SI 7 sp)) [2 S8 A64]) > > > > (mem/c:DI (plus:SI (reg/f:SI 7 sp) > > > > (const_int 16 [0x10])) [8 %sfp+-8 S8 A64])) "x.i":19:5 > > > > 40 {*pushdi2} > > > > (expr_list:REG_ARGS_SIZE (const_int 16 [0x10]) > > > > (nil))) > > > > > > This is an optimization of the reload pass, it figures out where the > > > values live and tries its best to assign hard regs and stack slots to > > > the above convoluted sequence. Note, that DImode push from integer > > > regs splits to SImode pushes after reload. This has nothing with STV > > > pass. > > > > > > The question is, why STV pass creates its funny sequence? The original > > > sequence should be easily solved by storing DImode from XMM register > > > and (with patched gcc) pushing DImode value from the same XMM > > > register. > > > > STV pass is mostly OK since it has to use XMM to move upper and lower > > 32 bits of a 64-bit integer. The problem is that push XMM becomes very > > expensive later. > > As shown in the patch, XMM push should be just decrement of SP reg and > move to the created stack slot.
OK for master if there are no regressions? Thanks. -- H.J.
From 08d2f763bf16e59e4788212298a47366b8179474 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Tue, 12 May 2020 11:30:29 -0700 Subject: [PATCH] x86: Allow vector register pushes Add vector register pushes and split them after reload to a sequence of: (set (reg:P SP_REG) (plus:P SP_REG) (const_int -8))) (set (match_dup 0) (match_dup 1)) so that STV pass can convert DI/TI mode integer pushes to vector register pushes. Also update has_non_address_hard_reg to allow pushes. gcc/ 2020-05-XX Uros Bizjak <ubiz...@gmail.com> H.J. Lu <hongjiu...@intel.com> PR target/95021 * config/i386/i386-features.c (has_non_address_hard_reg): Allow SP_REG. * config/i386/i386.md (SWIDWI): New mode iterator. (*push<mode>2): Allow XMM register. (*pushdi2_rex64): Likewise. (*pushsi2): Likewise. (*push<mode>2_rex64): Likewise. Add XMM register push splitters. gcc/testsuite/ 2020-05-XX H.J. Lu <hongjiu...@intel.com> PR target/95021 * gcc.target/i386/pr95021-1.c: New test. * gcc.target/i386/pr95021-2.c: Likewise. * gcc.target/i386/pr95021-3.c: Likewise. --- gcc/config/i386/i386-features.c | 3 +- gcc/config/i386/i386.md | 66 ++++++++++++++++++----- gcc/testsuite/gcc.target/i386/pr95021-1.c | 25 +++++++++ gcc/testsuite/gcc.target/i386/pr95021-2.c | 25 +++++++++ gcc/testsuite/gcc.target/i386/pr95021-3.c | 25 +++++++++ 5 files changed, 130 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-3.c diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c index 78fb373db6e..6cad125fa83 100644 --- a/gcc/config/i386/i386-features.c +++ b/gcc/config/i386/i386-features.c @@ -1264,7 +1264,8 @@ has_non_address_hard_reg (rtx_insn *insn) FOR_EACH_INSN_DEF (ref, insn) if (HARD_REGISTER_P (DF_REF_REAL_REG (ref)) && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) - && DF_REF_REGNO (ref) != FLAGS_REG) + && DF_REF_REGNO (ref) != FLAGS_REG + && DF_REF_REGNO (ref) != SP_REG) return true; FOR_EACH_INSN_USE (ref, insn) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 722eb9b5ec8..226af7b1773 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1049,6 +1049,9 @@ (define_mode_iterator DWI [(DI "!TARGET_64BIT") ;; SWI and DWI together. (define_mode_iterator SWIDWI [QI HI SI DI (TI "TARGET_64BIT")]) +;; SWI48 and DWI together. +(define_mode_iterator SWI48DWI [SI DI (TI "TARGET_64BIT")]) + ;; GET_MODE_SIZE for selected modes. As GET_MODE_SIZE is not ;; compile time constant, it is faster to use <MODE_SIZE> than ;; GET_MODE_SIZE (<MODE>mode). For XFmode which depends on @@ -1670,8 +1673,8 @@ (define_insn "*cmpi<unord><MODEF:mode>" ;; Push/pop instructions. (define_insn "*push<mode>2" - [(set (match_operand:DWI 0 "push_operand" "=<") - (match_operand:DWI 1 "general_no_elim_operand" "riF*o"))] + [(set (match_operand:DWI 0 "push_operand" "=<,<") + (match_operand:DWI 1 "general_no_elim_operand" "riF*o,v"))] "" "#" [(set_attr "type" "multi") @@ -1685,13 +1688,14 @@ (define_split "ix86_split_long_move (operands); DONE;") (define_insn "*pushdi2_rex64" - [(set (match_operand:DI 0 "push_operand" "=<,!<") - (match_operand:DI 1 "general_no_elim_operand" "re*m,n"))] + [(set (match_operand:DI 0 "push_operand" "=<,<,!<") + (match_operand:DI 1 "general_no_elim_operand" "re*m,v,n"))] "TARGET_64BIT" "@ push{q}\t%1 + # #" - [(set_attr "type" "push,multi") + [(set_attr "type" "push,multi,multi") (set_attr "mode" "DI")]) ;; Convert impossible pushes of immediate to existing instructions. @@ -1726,11 +1730,13 @@ (define_split }) (define_insn "*pushsi2" - [(set (match_operand:SI 0 "push_operand" "=<") - (match_operand:SI 1 "general_no_elim_operand" "ri*m"))] + [(set (match_operand:SI 0 "push_operand" "=<,<") + (match_operand:SI 1 "general_no_elim_operand" "ri*m,v"))] "!TARGET_64BIT" - "push{l}\t%1" - [(set_attr "type" "push") + "@ + push{l}\t%1 + #" + [(set_attr "type" "push,multi") (set_attr "mode" "SI")]) ;; emit_push_insn when it calls move_by_pieces requires an insn to @@ -1739,11 +1745,13 @@ (define_insn "*pushsi2" ;; For TARGET_64BIT we always round up to 8 bytes. (define_insn "*push<mode>2_rex64" - [(set (match_operand:SWI124 0 "push_operand" "=X") - (match_operand:SWI124 1 "nonmemory_no_elim_operand" "r<i>"))] + [(set (match_operand:SWI124 0 "push_operand" "=X,X") + (match_operand:SWI124 1 "nonmemory_no_elim_operand" "r<i>,v"))] "TARGET_64BIT" - "push{q}\t%q1" - [(set_attr "type" "push") + "@ + push{q}\t%q1 + #" + [(set_attr "type" "push,multi") (set_attr "mode" "DI")]) (define_insn "*push<mode>2" @@ -1754,6 +1762,38 @@ (define_insn "*push<mode>2" [(set_attr "type" "push") (set_attr "mode" "SI")]) +(define_split + [(set (match_operand:SWI48DWI 0 "push_operand") + (match_operand:SWI48DWI 1 "sse_reg_operand"))] + "TARGET_SSE && reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (match_dup 0) (match_dup 1))] +{ + operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (<SWI48DWI:MODE>mode))); + /* Preserve memory attributes. */ + operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); +}) + +(define_insn "*pushv1ti2" + [(set (match_operand:V1TI 0 "push_operand" "=<") + (match_operand:V1TI 1 "general_no_elim_operand" "v"))] + "" + "#" + [(set_attr "type" "multi") + (set_attr "mode" "TI")]) + +(define_split + [(set (match_operand:V1TI 0 "push_operand" "") + (match_operand:V1TI 1 "sse_reg_operand" ""))] + "TARGET_64BIT && reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (match_dup 0) (match_dup 1))] +{ + operands[2] = GEN_INT (-PUSH_ROUNDING (16)); + /* Preserve memory attributes. */ + operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); +}) + (define_insn "*push<mode>2_prologue" [(set (match_operand:W 0 "push_operand" "=<") (match_operand:W 1 "general_no_elim_operand" "r<i>*m")) diff --git a/gcc/testsuite/gcc.target/i386/pr95021-1.c b/gcc/testsuite/gcc.target/i386/pr95021-1.c new file mode 100644 index 00000000000..218776240ee --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95021-1.c @@ -0,0 +1,25 @@ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2 -msse2 -mstv -mregparm=0 -W" } */ +/* { dg-final { scan-assembler "movq\[ \t\]+\[^\n\]*, %xmm" } } */ + +typedef long long a; +struct __jmp_buf_tag { +}; +typedef struct __jmp_buf_tag sigjmp_buf[1]; +sigjmp_buf jmp_buf; +int __sigsetjmp (sigjmp_buf, int); +typedef struct { + a target; +} b; +extern a *target_p; +b *c; +extern void foo (a); +void +d(void) +{ + if (__sigsetjmp(jmp_buf, 1)) { + a target = *target_p; + c->target = target; + foo(target); + } +} diff --git a/gcc/testsuite/gcc.target/i386/pr95021-2.c b/gcc/testsuite/gcc.target/i386/pr95021-2.c new file mode 100644 index 00000000000..b213b5db072 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95021-2.c @@ -0,0 +1,25 @@ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2 -msse2 -mstv -mregparm=3 -W" } */ +/* { dg-final { scan-assembler "movq\[ \t\]+\[^\n\]*, %xmm" } } */ + +typedef long long a; +struct __jmp_buf_tag { +}; +typedef struct __jmp_buf_tag sigjmp_buf[1]; +sigjmp_buf jmp_buf; +int __sigsetjmp (sigjmp_buf, int); +typedef struct { + a target; +} b; +extern a *target_p; +b *c; +extern void foo (a); +void +d(void) +{ + if (__sigsetjmp(jmp_buf, 1)) { + a target = *target_p; + c->target = target; + foo(target); + } +} diff --git a/gcc/testsuite/gcc.target/i386/pr95021-3.c b/gcc/testsuite/gcc.target/i386/pr95021-3.c new file mode 100644 index 00000000000..a080d7b289f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95021-3.c @@ -0,0 +1,25 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -msse2 -mstv -W" } */ +/* { dg-final { scan-assembler "movdqa\[ \t\]+\[^\n\]*, %xmm" } } */ + +typedef __int128 a; +struct __jmp_buf_tag { +}; +typedef struct __jmp_buf_tag sigjmp_buf[1]; +sigjmp_buf jmp_buf; +int __sigsetjmp (sigjmp_buf, int); +typedef struct { + a target; +} b; +extern a *target_p; +b *c; +extern void foo (int, int, int, int, int, int, a); +void +d(void) +{ + if (__sigsetjmp(jmp_buf, 1)) { + a target = *target_p; + c->target = target; + foo(1, 2, 3, 4, 5, 6, target); + } +} -- 2.26.2