On Tue, Feb 4, 2020 at 1:06 PM Richard Sandiford <richard.sandif...@arm.com> wrote: > > Uros Bizjak <ubiz...@gmail.com> writes: > > On Tue, Feb 4, 2020 at 12:13 PM Uros Bizjak <ubiz...@gmail.com> wrote: > >> > >> On Tue, Feb 4, 2020 at 12:05 PM Jakub Jelinek <ja...@redhat.com> wrote: > >> > > >> > On Tue, Feb 04, 2020 at 11:16:06AM +0100, Uros Bizjak wrote: > >> > > If it works OK, I'd rather see this functionality implemented as an > >> > > epilogue_completed guarded splitter. In the .md files, there are > >> > > already cases where we split at this point, and where it is assumed > >> > > that allocated registers won't change anymore. Also, please don't make > >> > > the functionality conditional on flag_split_ra. This way, we would > >> > > always get new patterns in the debug dumps, so in case something goes > >> > > wrong, one could at least clearly see the full pattern. > >> > > >> > The following seems to work on the testcase, will bootstrap/regtest it > >> > soon. > >> > > >> > 2020-02-04 Jakub Jelinek <ja...@redhat.com> > >> > > >> > PR target/92190 > >> > * config/i386/i386-features.c > >> > (ix86_add_reg_usage_to_vzeroupper): Only > >> > include sets and not clobbers in the vzeroupper pattern. > >> > * config/i386/sse.md (*avx_vzeroupper): Change from define_insn > >> > to > >> > define_insn_and_split, split if epilogue_completed and not all > >> > xmm0-xmm15 > >> > registers are mentioned in the pattern and add clobbers for the > >> > missing > >> > registers at that point. > >> > > >> > * gcc.target/i386/pr92190.c: New test. > >> > >> A && is missing in the split condition to inherit TARGET_AVX. > > > > Also, you don't need to emit "#" in output template. This is just for > > extra safety, we can live without. Please see e.g. "*tzcnt<mode>_1". > > IMO it's better to keep it here, where we're relying on the split > happening for correctness. There's no theoretical guarantee that > a split ever happens otherwise.
In this case, I think it would be better to have two patterns, one (define_insn_and_split) with "#" and the other "real" (define_insn). Uros. > Thanks, > Richard > >> OK with this change. > >> > >> Thanks, > >> Uros. > >> > >> > > >> > --- gcc/config/i386/i386-features.c.jj 2020-02-04 11:40:58.755611428 > >> > +0100 > >> > +++ gcc/config/i386/i386-features.c 2020-02-04 11:51:33.602148491 > >> > +0100 > >> > @@ -1764,29 +1764,32 @@ convert_scalars_to_vector (bool timode_p > >> > > >> > (set (reg:V2DF R) (reg:V2DF R)) > >> > > >> > - which preserves the low 128 bits but clobbers the upper bits. > >> > - For a dead register we just use: > >> > - > >> > - (clobber (reg:V2DF R)) > >> > - > >> > - which invalidates any previous contents of R and stops R from > >> > becoming > >> > - live across the vzeroupper in future. */ > >> > + which preserves the low 128 bits but clobbers the upper bits. */ > >> > > >> > static void > >> > ix86_add_reg_usage_to_vzeroupper (rtx_insn *insn, bitmap live_regs) > >> > { > >> > rtx pattern = PATTERN (insn); > >> > unsigned int nregs = TARGET_64BIT ? 16 : 8; > >> > - rtvec vec = rtvec_alloc (nregs + 1); > >> > - RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0); > >> > + unsigned int npats = nregs; > >> > for (unsigned int i = 0; i < nregs; ++i) > >> > { > >> > unsigned int regno = GET_SSE_REGNO (i); > >> > + if (!bitmap_bit_p (live_regs, regno)) > >> > + npats--; > >> > + } > >> > + if (npats == 0) > >> > + return; > >> > + rtvec vec = rtvec_alloc (npats + 1); > >> > + RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0); > >> > + for (unsigned int i = 0, j = 0; i < nregs; ++i) > >> > + { > >> > + unsigned int regno = GET_SSE_REGNO (i); > >> > + if (!bitmap_bit_p (live_regs, regno)) > >> > + continue; > >> > rtx reg = gen_rtx_REG (V2DImode, regno); > >> > - if (bitmap_bit_p (live_regs, regno)) > >> > - RTVEC_ELT (vec, i + 1) = gen_rtx_SET (reg, reg); > >> > - else > >> > - RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg); > >> > + ++j; > >> > + RTVEC_ELT (vec, j) = gen_rtx_SET (reg, reg); > >> > } > >> > XVEC (pattern, 0) = vec; > >> > df_insn_rescan (insn); > >> > --- gcc/config/i386/sse.md.jj 2020-02-04 11:40:58.813610563 +0100 > >> > +++ gcc/config/i386/sse.md 2020-02-04 11:58:31.544909659 +0100 > >> > @@ -19815,11 +19815,43 @@ (define_expand "avx_vzeroupper" > >> > [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])] > >> > "TARGET_AVX") > >> > > >> > -(define_insn "*avx_vzeroupper" > >> > +(define_insn_and_split "*avx_vzeroupper" > >> > [(match_parallel 0 "vzeroupper_pattern" > >> > [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])] > >> > "TARGET_AVX" > >> > - "vzeroupper" > >> > +{ > >> > + if (XVECLEN (operands[0], 0) != (TARGET_64BIT ? 16 : 8) + 1) > >> > + return "#"; > >> > + else > >> > + return "vzeroupper"; > >> > +} > >> > + "epilogue_completed > >> > >> && epilogue_completed > >> > >> > + && XVECLEN (operands[0], 0) != (TARGET_64BIT ? 16 : 8) + 1" > >> > + [(match_dup 0)] > >> > +{ > >> > + /* For IPA-RA purposes, make it clear the instruction clobbers > >> > + even XMM registers not mentioned explicitly in the pattern. */ > >> > + unsigned int nregs = TARGET_64BIT ? 16 : 8; > >> > + unsigned int npats = XVECLEN (operands[0], 0); > >> > + rtvec vec = rtvec_alloc (nregs + 1); > >> > + RTVEC_ELT (vec, 0) = XVECEXP (operands[0], 0, 0); > >> > + for (unsigned int i = 0, j = 1; i < nregs; ++i) > >> > + { > >> > + unsigned int regno = GET_SSE_REGNO (i); > >> > + if (j < npats > >> > + && REGNO (SET_DEST (XVECEXP (operands[0], 0, j))) == regno) > >> > + { > >> > + RTVEC_ELT (vec, i + 1) = XVECEXP (operands[0], 0, j); > >> > + j++; > >> > + } > >> > + else > >> > + { > >> > + rtx reg = gen_rtx_REG (V2DImode, regno); > >> > + RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg); > >> > + } > >> > + } > >> > + XVEC (operands[0], 0) = vec; > >> > +} > >> > [(set_attr "type" "sse") > >> > (set_attr "modrm" "0") > >> > (set_attr "memory" "none") > >> > --- gcc/testsuite/gcc.target/i386/pr92190.c.jj 2020-02-04 > >> > 11:51:33.608148402 +0100 > >> > +++ gcc/testsuite/gcc.target/i386/pr92190.c 2020-02-04 > >> > 11:51:33.608148402 +0100 > >> > @@ -0,0 +1,19 @@ > >> > +/* PR target/92190 */ > >> > +/* { dg-do compile { target { *-*-linux* && lp64 } } } */ > >> > +/* { dg-options "-mabi=ms -O2 -mavx512f" } */ > >> > + > >> > +typedef char VC __attribute__((vector_size (16))); > >> > +typedef int VI __attribute__((vector_size (16 * sizeof 0))); > >> > +VC a; > >> > +VI b; > >> > +void bar (VI); > >> > +void baz (VC); > >> > + > >> > +void > >> > +foo (void) > >> > +{ > >> > + VC k = a; > >> > + VI n = b; > >> > + bar (n); > >> > + baz (k); > >> > +} > >> > > >> > > >> > Jakub > >> >