Re: [PATCH] Fix regression introduced by r12-5536.
On Tue, Nov 30, 2021 at 5:21 AM Uros Bizjak wrote: > > On Mon, Nov 29, 2021 at 10:48 AM Hongtao Liu wrote: > > > > On Mon, Nov 29, 2021 at 3:53 PM Uros Bizjak wrote: > > > > > > On Mon, Nov 29, 2021 at 2:32 AM liuhongt wrote: > > > > > > > > There're several failures reported in [1]: > > > > 1. unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)" > > > > %vpextrw should be used in output templates. > > > > 2. ICE in get_attr_memory for movhi_internal since some alternatives > > > > are marked as TYPE_SSELOG. > > > > Explicitly set memory_attr for those alternatives. > > > > > > > > Also this patch fixs a typo and some latent bugs which are related to > > > > moving HImode from/to sse register w/o TARGET_AVX512FP16. > > Here are some more fixes: > thanks. > i386: Fix and improve movhi_internal and movhf_internal some more. > > An (*v,C) alternative can be added to movhi_internal to directly load > HImode constant 0 to xmm register. Also, V4SFmode moves can be used > for xmm->xmm moves instead of TImode moves when optimizing for size. > Fix invalid %vpinsrw insn template, which needs to duplicate %xmm > register for AVX targets. > > Optimize GPR moves in movhf_internal in the same way as in movhi_internal. > Fix pinsrw and pextrw templates for AVX targets. Use sselog1 > instead of sselog type. Also, handle TARGET_SSE_PARTIAL_REG_DEPENDENCY > and TARGET_SSE_SPLIT_REGS targets. > > 2021-11-29 Uroš Bizjak > > gcc/ChangeLog: > > PR target/102811 > * config/i386/i386.md (*movhi_internal): Introduce (*v,C) alternative. > Do not allocate non-GPR registers. Optimize xmm->xmm moves when > optimizing for size. Fix vpinsrw insn template. > (*movhf_internal): Fix pinsrw and pextrw insn templates for > AVX targets. Use sselog1 type instead of sselog. Optimize GPR moves. > Optimize xmm->xmm moves for TARGET_SSE_PARTIAL_REG_DEPENDENCY > and TARGET_SSE_SPLIT_REGS targets. > > Bootstrapped and regression tested on x86_64-linux-gnu {,-m32} w/ and > w/o -mf16c. > > Pushed to master. > > Uros. -- BR, Hongtao
Re: [PATCH] Fix regression introduced by r12-5536.
On Mon, Nov 29, 2021 at 10:48 AM Hongtao Liu wrote: > > On Mon, Nov 29, 2021 at 3:53 PM Uros Bizjak wrote: > > > > On Mon, Nov 29, 2021 at 2:32 AM liuhongt wrote: > > > > > > There're several failures reported in [1]: > > > 1. unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)" > > > %vpextrw should be used in output templates. > > > 2. ICE in get_attr_memory for movhi_internal since some alternatives > > > are marked as TYPE_SSELOG. > > > Explicitly set memory_attr for those alternatives. > > > > > > Also this patch fixs a typo and some latent bugs which are related to > > > moving HImode from/to sse register w/o TARGET_AVX512FP16. Here are some more fixes: i386: Fix and improve movhi_internal and movhf_internal some more. An (*v,C) alternative can be added to movhi_internal to directly load HImode constant 0 to xmm register. Also, V4SFmode moves can be used for xmm->xmm moves instead of TImode moves when optimizing for size. Fix invalid %vpinsrw insn template, which needs to duplicate %xmm register for AVX targets. Optimize GPR moves in movhf_internal in the same way as in movhi_internal. Fix pinsrw and pextrw templates for AVX targets. Use sselog1 instead of sselog type. Also, handle TARGET_SSE_PARTIAL_REG_DEPENDENCY and TARGET_SSE_SPLIT_REGS targets. 2021-11-29 Uroš Bizjak gcc/ChangeLog: PR target/102811 * config/i386/i386.md (*movhi_internal): Introduce (*v,C) alternative. Do not allocate non-GPR registers. Optimize xmm->xmm moves when optimizing for size. Fix vpinsrw insn template. (*movhf_internal): Fix pinsrw and pextrw insn templates for AVX targets. Use sselog1 type instead of sselog. Optimize GPR moves. Optimize xmm->xmm moves for TARGET_SSE_PARTIAL_REG_DEPENDENCY and TARGET_SSE_SPLIT_REGS targets. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32} w/ and w/o -mf16c. Pushed to master. Uros. diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a384dae23e2..c88374c9d2b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2495,11 +2495,12 @@ (symbol_ref "true")))]) (define_insn "*movhi_internal" - [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,*k,*k ,*r,*m,*k,?r,?v,*v,*v,*m") - (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,*r,*km,*k,*k,CBC,v, r, v, m, v"))] + [(set (match_operand:HI 0 "nonimmediate_operand" +"=r,r,r,m ,*k,*k ,r ,m ,*k ,?r,?*v,*v,*v,*v,m") + (match_operand:HI 1 "general_operand" +"r ,n,m,rn,r ,*km,*k,*k,CBC,*v,r ,C ,*v,m ,*v"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && ix86_hardreg_mov_ok (operands[0], operands[1])" - { switch (get_attr_type (insn)) { @@ -2526,10 +2527,13 @@ return ix86_output_ssemov (insn, operands); case TYPE_SSELOG1: + if (satisfies_constraint_C (operands[1])) + return standard_sse_constant_opcode (insn, operands); + if (SSE_REG_P (operands[0])) return MEM_P (operands[1]) - ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}" - : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}"; + ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}" + : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}"; else return MEM_P (operands[0]) ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" @@ -2550,23 +2554,25 @@ } } [(set (attr "isa") - (cond [(eq_attr "alternative" "9,10,11,12") + (cond [(eq_attr "alternative" "9,10,11,12,13") (const_string "sse2") - (eq_attr "alternative" "13") + (eq_attr "alternative" "14") (const_string "sse4") ] (const_string "*"))) (set (attr "type") - (cond [(eq_attr "alternative" "9,10,12,13") + (cond [(eq_attr "alternative" "4,5,6,7") + (const_string "mskmov") + (eq_attr "alternative" "8") + (const_string "msklog") + (eq_attr "alternative" "9,10,13,14") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "ssemov") (const_string "sselog1")) - (eq_attr "alternative" "4,5,6,7") - (const_string "mskmov") (eq_attr "alternative" "11") + (const_string "sselog1") + (eq_attr "alternative" "12") (const_string "ssemov") - (eq_attr "alternative" "8") - (const_string "msklog") (match_test "optimize_function_for_size_p (cfun)") (const_string "imov") (and (eq_attr "alternative" "0") @@ -2581,33 +2587,54 @@ (const_string "imovx") ] (const_string "imov"))) -(set (attr "prefix") -(cond [(eq_attr "alternative" "9,10,11,12,13") - (const_string "maybe_evex") - (eq_attr "alternative" "4,5,6,7,8") - (const_string "vex") - ] - (const_string "orig"))) -(set (attr "mode") - (cond [
Re: [PATCH] Fix regression introduced by r12-5536.
On Mon, Nov 29, 2021 at 3:53 PM Uros Bizjak wrote: > > On Mon, Nov 29, 2021 at 2:32 AM liuhongt wrote: > > > > There're several failures reported in [1]: > > 1. unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)" > > %vpextrw should be used in output templates. > > 2. ICE in get_attr_memory for movhi_internal since some alternatives > > are marked as TYPE_SSELOG. > > Explicitly set memory_attr for those alternatives. > > > > Also this patch fixs a typo and some latent bugs which are related to > > moving HImode from/to sse register w/o TARGET_AVX512FP16. > > > > For optimization issues discussed in PR102811, I'll create another patch for > > it. > > [1] https://gcc.gnu.org/pipermail/gcc-regression/2021-November/075893.html > > > > > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} and > > x86_64-pc-linux-gnu{-m32\ -march=cascadelake,\ -march=cascadelake} > > Ok for trunk? > > > > gcc/ChangeLog: > > > > * config/i386/i386.c (ix86_secondary_reload): Without > > TARGET_SSE4_1, General register is needed to move HImode from > > sse register to memory. > > * config/i386/sse.md (*vec_extrachf): Use %vpextrw instead of > > pextrw in output templates. > > * config/i386/i386.md (movhi_internal): Ditto, also fix typo of > > MEM_P (operands[1]) and adjust memory/mode/prefix/type > > attribute for alternatives related to sse register. > > OK, but please use sselog1 type instead so you don't need to introduce > the memory attribute. Changed, committed as r12-5573 thanks for the review. > > Thanks, > Uros. > > > --- > > gcc/config/i386/i386.c | 2 +- > > gcc/config/i386/i386.md | 44 ++--- > > gcc/config/i386/sse.md | 6 +++--- > > 3 files changed, 36 insertions(+), 16 deletions(-) > > > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > > index 3dedf522c42..7cf599f57f7 100644 > > --- a/gcc/config/i386/i386.c > > +++ b/gcc/config/i386/i386.c > > @@ -19277,7 +19277,7 @@ ix86_secondary_reload (bool in_p, rtx x, > > reg_class_t rclass, > > } > > > >/* Require movement to gpr, and then store to memory. */ > > - if (mode == HFmode > > + if ((mode == HFmode || mode == HImode) > >&& !TARGET_SSE4_1 > >&& SSE_CLASS_P (rclass) > >&& !in_p && MEM_P (x)) > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > > index 68606e57e60..2cb3e727588 100644 > > --- a/gcc/config/i386/i386.md > > +++ b/gcc/config/i386/i386.md > > @@ -2528,12 +2528,12 @@ (define_insn "*movhi_internal" > > case TYPE_SSELOG: > >if (SSE_REG_P (operands[0])) > > return MEM_P (operands[1]) > > - ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" > > - : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; > > + ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}" > > + : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}"; > >else > > - return MEM_P (operands[1]) > > - ? "pextrw\t{$0, %1, %0|%0, %1, 0}" > > - : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; > > + return MEM_P (operands[0]) > > + ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" > > + : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}"; > > > > case TYPE_MSKLOG: > >if (operands[1] == const0_rtx) > > @@ -2557,12 +2557,14 @@ (define_insn "*movhi_internal" > >] > >(const_string "*"))) > > (set (attr "type") > > - (cond [(eq_attr "alternative" "9,10,11,12,13") > > + (cond [(eq_attr "alternative" "9,10,12,13") > > (if_then_else (match_test "TARGET_AVX512FP16") > > (const_string "ssemov") > > (const_string "sselog")) > > (eq_attr "alternative" "4,5,6,7") > > (const_string "mskmov") > > + (eq_attr "alternative" "11") > > + (const_string "ssemov") > > (eq_attr "alternative" "8") > > (const_string "msklog") > > (match_test "optimize_function_for_size_p (cfun)") > > @@ -2579,15 +2581,33 @@ (define_insn "*movhi_internal" > > (const_string "imovx") > >] > >(const_string "imov"))) > > +(set (attr "memory") > > +(cond [(eq_attr "alternative" "9,10") > > + (const_string "none") > > + (eq_attr "alternative" "12") > > + (const_string "load") > > + (eq_attr "alternative" "13") > > + (const_string "store") > > + ] > > + (const_string "*"))) > > Please use sselog1 type instead, and the memory attribute will be > calculated correctly. > > > (set (attr "prefix") > > - (if_then_else (eq_attr "alternative" "4,5,6,7,8") > > - (const_string "vex") > > - (const_string "orig"))) > > +(cond [(eq_attr "alternative" "9,10,11,12,13") > > + (const_string "maybe_evex") > > + (eq_attr "alternative" "4,5,6,7,8") > > + (const_
Re: [PATCH] Fix regression introduced by r12-5536.
On Mon, Nov 29, 2021 at 2:32 AM liuhongt wrote: > > There're several failures reported in [1]: > 1. unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)" > %vpextrw should be used in output templates. > 2. ICE in get_attr_memory for movhi_internal since some alternatives > are marked as TYPE_SSELOG. > Explicitly set memory_attr for those alternatives. > > Also this patch fixs a typo and some latent bugs which are related to > moving HImode from/to sse register w/o TARGET_AVX512FP16. > > For optimization issues discussed in PR102811, I'll create another patch for > it. > [1] https://gcc.gnu.org/pipermail/gcc-regression/2021-November/075893.html > > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} and > x86_64-pc-linux-gnu{-m32\ -march=cascadelake,\ -march=cascadelake} > Ok for trunk? > > gcc/ChangeLog: > > * config/i386/i386.c (ix86_secondary_reload): Without > TARGET_SSE4_1, General register is needed to move HImode from > sse register to memory. > * config/i386/sse.md (*vec_extrachf): Use %vpextrw instead of > pextrw in output templates. > * config/i386/i386.md (movhi_internal): Ditto, also fix typo of > MEM_P (operands[1]) and adjust memory/mode/prefix/type > attribute for alternatives related to sse register. OK, but please use sselog1 type instead so you don't need to introduce the memory attribute. Thanks, Uros. > --- > gcc/config/i386/i386.c | 2 +- > gcc/config/i386/i386.md | 44 ++--- > gcc/config/i386/sse.md | 6 +++--- > 3 files changed, 36 insertions(+), 16 deletions(-) > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 3dedf522c42..7cf599f57f7 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -19277,7 +19277,7 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t > rclass, > } > >/* Require movement to gpr, and then store to memory. */ > - if (mode == HFmode > + if ((mode == HFmode || mode == HImode) >&& !TARGET_SSE4_1 >&& SSE_CLASS_P (rclass) >&& !in_p && MEM_P (x)) > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 68606e57e60..2cb3e727588 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -2528,12 +2528,12 @@ (define_insn "*movhi_internal" > case TYPE_SSELOG: >if (SSE_REG_P (operands[0])) > return MEM_P (operands[1]) > - ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" > - : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; > + ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}" > + : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}"; >else > - return MEM_P (operands[1]) > - ? "pextrw\t{$0, %1, %0|%0, %1, 0}" > - : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; > + return MEM_P (operands[0]) > + ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" > + : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}"; > > case TYPE_MSKLOG: >if (operands[1] == const0_rtx) > @@ -2557,12 +2557,14 @@ (define_insn "*movhi_internal" >] >(const_string "*"))) > (set (attr "type") > - (cond [(eq_attr "alternative" "9,10,11,12,13") > + (cond [(eq_attr "alternative" "9,10,12,13") > (if_then_else (match_test "TARGET_AVX512FP16") > (const_string "ssemov") > (const_string "sselog")) > (eq_attr "alternative" "4,5,6,7") > (const_string "mskmov") > + (eq_attr "alternative" "11") > + (const_string "ssemov") > (eq_attr "alternative" "8") > (const_string "msklog") > (match_test "optimize_function_for_size_p (cfun)") > @@ -2579,15 +2581,33 @@ (define_insn "*movhi_internal" > (const_string "imovx") >] >(const_string "imov"))) > +(set (attr "memory") > +(cond [(eq_attr "alternative" "9,10") > + (const_string "none") > + (eq_attr "alternative" "12") > + (const_string "load") > + (eq_attr "alternative" "13") > + (const_string "store") > + ] > + (const_string "*"))) Please use sselog1 type instead, and the memory attribute will be calculated correctly. > (set (attr "prefix") > - (if_then_else (eq_attr "alternative" "4,5,6,7,8") > - (const_string "vex") > - (const_string "orig"))) > +(cond [(eq_attr "alternative" "9,10,11,12,13") > + (const_string "maybe_evex") > + (eq_attr "alternative" "4,5,6,7,8") > + (const_string "vex") > + ] > + (const_string "orig"))) > (set (attr "mode") >(cond [(eq_attr "type" "imovx") >(const_string "SI") > +(eq_attr "alternative" "9,10,12,13") > + (if_then_else (match_test "TARGET_AVX512FP16") > +(const_string "HI") > +