RE: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
OK, This is the patch I prepare to check in. -Original Message- From: Uros Bizjak Sent: Wednesday, November 24, 2021 4:49 PM To: Kong, Lingling Cc: Liu, Hongtao ; gcc-patches@gcc.gnu.org Subject: Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811] On Wed, Nov 24, 2021 at 9:44 AM Kong, Lingling wrote: > > Hi, > > vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with > -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c. > Cleared before conversion, updated movhi_internal and > ix86_can_change_mode_class. And fixed some commit message. > > OK for master? OK, with a small adjustment to ChangeLog. Thanks, Uros. > gcc/ChangeLog: > > PR target/102811 > * config/i386/i386.c (ix86_can_change_mode_class): Allow 16 bit data > in XMM register > for TARGET_SSE2. > * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for TARGET_F16C. > (extendhfdf2): Restrict extendhfdf for TARGET_AVX512FP16 only. > (*extendhf2): Rename from extendhf2. > (truncsfhf2): Likewise. > (truncdfhf2): Likewise. > (*trunc2): Likewise. > > gcc/testsuite/ChangeLog: > > PR target/102811 > * gcc.target/i386/pr90773-21.c: Optimize movhi_internal, > also allow pextrw replace vmovd + movw. Just write: * gcc.target/i386/pr90773-21.c: Allow pextrw instead of movw. > * gcc.target/i386/pr90773-23.c: Ditto. > * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test. > --- > gcc/config/i386/i386.c| 5 +- > gcc/config/i386/i386.md | 74 +-- > .../i386/avx512vl-vcvtps2ph-pr102811.c| 11 +++ > gcc/testsuite/gcc.target/i386/pr90773-21.c| 2 +- > gcc/testsuite/gcc.target/i386/pr90773-23.c| 2 +- > 5 files changed, 83 insertions(+), 11 deletions(-) create mode > 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index > e94efdf39fb..4b813533961 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, > machine_mode to, > disallow a change to these modes, reload will assume it's ok to > drop the subreg from (subreg:SI (reg:HI 100) 0). This affects > the vec_dupv4hi pattern. > -NB: AVX512FP16 supports vmovw which can load 16bit data to sse > -register. */ > - int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : > 4; > +NB: SSE2 can load 16bit data to sse register via pinsrw. */ > + int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : > +4; >if (GET_MODE_SIZE (from) < mov_size) > return false; > } > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index > 6eb9de81921..6ee264f1151 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -2525,6 +2525,16 @@ > case TYPE_SSEMOV: >return ix86_output_ssemov (insn, operands); > > +case TYPE_SSELOG: > + if (SSE_REG_P (operands[0])) > + return MEM_P (operands[1]) > + ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" > + : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; > + else > + return MEM_P (operands[1]) > + ? "pextrw\t{$0, %1, %0|%0, %1, 0}" > + : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; > + > case TYPE_MSKLOG: >if (operands[1] == const0_rtx) > return "kxorw\t%0, %0, %0"; > @@ -2540,13 +2550,17 @@ > } > } >[(set (attr "isa") > - (cond [(eq_attr "alternative" "9,10,11,12,13") > - (const_string "avx512fp16") > + (cond [(eq_attr "alternative" "9,10,11,12") > + (const_string "sse2") > + (eq_attr "alternative" "13") > + (const_string "sse4") >] >(const_string "*"))) > (set (attr "type") > (cond [(eq_attr "alternative" "9,10,11,12,13") > - (const_string "ssemov") > + (if_then_else (match_test "TARGET_AVX512FP16") > + (const_string "ssemov") > + (const_string "sselog")) > (eq_attr "alternative" "4,5,6,7") > (const_string "mskmov") > (eq_attr "alternative" "8") @@ -4574,8 +4588,32 @@ >emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); > }) > > -(define_insn "extendhf2" > - [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v") > +(define_expand "extendhfsf2" > + [(set (match_operand:SF 0 "register_operand") > + (float_extend:SF > + (match_operand:HF 1 "nonimmediate_operand")))] > + "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL" > +{ > + if (!TARGET_AVX512FP16) > +{ > + rtx res = gen_reg_rtx (V4SFmode); > + rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode)); > + > +
Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
On Wed, Nov 24, 2021 at 9:44 AM Kong, Lingling wrote: > > Hi, > > vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with > -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c. > Cleared before conversion, updated movhi_internal and > ix86_can_change_mode_class. And fixed some commit message. > > OK for master? OK, with a small adjustment to ChangeLog. Thanks, Uros. > gcc/ChangeLog: > > PR target/102811 > * config/i386/i386.c (ix86_can_change_mode_class): Allow 16 bit data > in XMM register > for TARGET_SSE2. > * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for TARGET_F16C. > (extendhfdf2): Restrict extendhfdf for TARGET_AVX512FP16 only. > (*extendhf2): Rename from extendhf2. > (truncsfhf2): Likewise. > (truncdfhf2): Likewise. > (*trunc2): Likewise. > > gcc/testsuite/ChangeLog: > > PR target/102811 > * gcc.target/i386/pr90773-21.c: Optimize movhi_internal, > also allow pextrw replace vmovd + movw. Just write: * gcc.target/i386/pr90773-21.c: Allow pextrw instead of movw. > * gcc.target/i386/pr90773-23.c: Ditto. > * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test. > --- > gcc/config/i386/i386.c| 5 +- > gcc/config/i386/i386.md | 74 +-- > .../i386/avx512vl-vcvtps2ph-pr102811.c| 11 +++ > gcc/testsuite/gcc.target/i386/pr90773-21.c| 2 +- > gcc/testsuite/gcc.target/i386/pr90773-23.c| 2 +- > 5 files changed, 83 insertions(+), 11 deletions(-) create mode 100644 > gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index > e94efdf39fb..4b813533961 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, > machine_mode to, > disallow a change to these modes, reload will assume it's ok to > drop the subreg from (subreg:SI (reg:HI 100) 0). This affects > the vec_dupv4hi pattern. > -NB: AVX512FP16 supports vmovw which can load 16bit data to sse > -register. */ > - int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : > 4; > +NB: SSE2 can load 16bit data to sse register via pinsrw. */ > + int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : > +4; >if (GET_MODE_SIZE (from) < mov_size) > return false; > } > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index > 6eb9de81921..6ee264f1151 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -2525,6 +2525,16 @@ > case TYPE_SSEMOV: >return ix86_output_ssemov (insn, operands); > > +case TYPE_SSELOG: > + if (SSE_REG_P (operands[0])) > + return MEM_P (operands[1]) > + ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" > + : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; > + else > + return MEM_P (operands[1]) > + ? "pextrw\t{$0, %1, %0|%0, %1, 0}" > + : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; > + > case TYPE_MSKLOG: >if (operands[1] == const0_rtx) > return "kxorw\t%0, %0, %0"; > @@ -2540,13 +2550,17 @@ > } > } >[(set (attr "isa") > - (cond [(eq_attr "alternative" "9,10,11,12,13") > - (const_string "avx512fp16") > + (cond [(eq_attr "alternative" "9,10,11,12") > + (const_string "sse2") > + (eq_attr "alternative" "13") > + (const_string "sse4") >] >(const_string "*"))) > (set (attr "type") > (cond [(eq_attr "alternative" "9,10,11,12,13") > - (const_string "ssemov") > + (if_then_else (match_test "TARGET_AVX512FP16") > + (const_string "ssemov") > + (const_string "sselog")) > (eq_attr "alternative" "4,5,6,7") > (const_string "mskmov") > (eq_attr "alternative" "8") > @@ -4574,8 +4588,32 @@ >emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); > }) > > -(define_insn "extendhf2" > - [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v") > +(define_expand "extendhfsf2" > + [(set (match_operand:SF 0 "register_operand") > + (float_extend:SF > + (match_operand:HF 1 "nonimmediate_operand")))] > + "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL" > +{ > + if (!TARGET_AVX512FP16) > +{ > + rtx res = gen_reg_rtx (V4SFmode); > + rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode)); > + > + ix86_expand_vector_set (false, tmp, operands[1], 0); > + emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp))); > + emit_move_insn (operands[0], gen_lowpart (SFmode, res)); > + DONE; > +} > +}) > + > +(define_expand "extendhfdf2" > + [(set (match_operand:DF 0 "register_operand") > +
Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
On Wed, Nov 24, 2021 at 9:06 AM Kong, Lingling wrote: > > Hi Uros, > > > BTW: When playing with my patch, I introduced (define_insn > > "*vec_set_0" ...) to optimize scalar load to a vector. Does > > ix86_expand_vector_set work OK without this pattern? > > Yes, ix86_expand_vector_set could work ok with (define_insn > "_pinsr"), this insn can optimize scalar load to a > vector. Ah, now I remember - this pattern can be used to optimize HI/HF mode scalar loads in the same way as other "vec_set_0" patterns are used. It is similar to e.g. VI4F_128 mode vec_set_0 pattern. I was not able to test it properly without AVX512FP16, but the pattern is otherwise independent of the proposed patch. Uros.
RE: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
Hi Uros, > BTW: When playing with my patch, I introduced (define_insn "*vec_set_0" > ...) to optimize scalar load to a vector. Does ix86_expand_vector_set work OK > without this pattern? Yes, ix86_expand_vector_set could work ok with (define_insn "_pinsr"), this insn can optimize scalar load to a vector. Thanks, Lingling -Original Message- From: Uros Bizjak Sent: Wednesday, November 24, 2021 3:57 PM To: Kong, Lingling Cc: Liu, Hongtao ; gcc-patches@gcc.gnu.org Subject: Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811] On Wed, Nov 24, 2021 at 7:25 AM Kong, Lingling via Gcc-patches wrote: > > Hi, > > vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with > -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c. > And cleared before conversion, updated movhi_internal and > ix86_can_change_mode_class. Please fix the above commit message. > > OK for master? > > gcc/ChangeLog: > > PR target/102811 > * config/i386/i386.c (ix86_can_change_mode_class): SSE2 can load > 16bit data > to sse register via pinsrw. Allow 16bit data in XMM register for SSE2 targets. > * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c. ... for TARGET_F16C. > (extendhfdf2): Split extendhf2 into separate extendhfsf2, > extendhfdf2. > extendhfdf only for target_avx512fp16. Restrict extendhfdf for TARGET_AVX512FP16 only. > (*extendhf2):rename extendhf2. Rename from extendhf2. > (truncsfhf2): Likewise. > (truncdfhf2): Likewise. > (*trunc2): Likewise. > > gcc/testsuite/ChangeLog: > > PR target/102811 > * gcc.target/i386/pr90773-21.c: Optimized movhi_internal, > optimize vmovd + movw to vpextrw. Also allow pextrw. > * gcc.target/i386/pr90773-23.c: Ditto. > * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test. Otherwise LGTM. BTW: When playing with my patch, I introduced (define_insn "*vec_set_0" ...) to optimize scalar load to a vector. Does ix86_expand_vector_set work OK without this pattern? Thanks, Uros. > --- > gcc/config/i386/i386.c| 5 +- > gcc/config/i386/i386.md | 74 +-- > .../i386/avx512vl-vcvtps2ph-pr102811.c| 11 +++ > gcc/testsuite/gcc.target/i386/pr90773-21.c| 2 +- > gcc/testsuite/gcc.target/i386/pr90773-23.c| 2 +- > 5 files changed, 83 insertions(+), 11 deletions(-) create mode > 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index > e94efdf39fb..4b813533961 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, > machine_mode to, > disallow a change to these modes, reload will assume it's ok to > drop the subreg from (subreg:SI (reg:HI 100) 0). This affects > the vec_dupv4hi pattern. > -NB: AVX512FP16 supports vmovw which can load 16bit data to sse > -register. */ > - int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : > 4; > +NB: SSE2 can load 16bit data to sse register via pinsrw. */ > + int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : > +4; >if (GET_MODE_SIZE (from) < mov_size) > return false; > } > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index > 6eb9de81921..6ee264f1151 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -2525,6 +2525,16 @@ > case TYPE_SSEMOV: >return ix86_output_ssemov (insn, operands); > > +case TYPE_SSELOG: > + if (SSE_REG_P (operands[0])) > + return MEM_P (operands[1]) > + ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" > + : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; > + else > + return MEM_P (operands[1]) > + ? "pextrw\t{$0, %1, %0|%0, %1, 0}" > + : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; > + > case TYPE_MSKLOG: >if (operands[1] == const0_rtx) > return "kxorw\t%0, %0, %0"; > @@ -2540,13 +2550,17 @@ > } > } >[(set (attr "isa") > - (cond [(eq_attr "alternative" "9,10,11,12,13") > - (const_string "avx512fp16") > + (cond [(eq_attr "alternative" "9,10,11,12") > + (const_string "sse2") > + (eq_attr "alternative" "13") > + (const_string "sse4") >] >(const_string "*"))) > (set (attr "type") > (cond [(eq_attr "alternative" "9,10,11,12,13") > - (const_string "ssemov") > + (if_then_else (match_test "TARGET_AVX512FP16") > + (const_string "ssemov") > + (const_string "sselog")) > (eq_attr "alternative" "4,5,6,7") > (const_string "mskmov") >
Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
On Wed, Nov 24, 2021 at 7:25 AM Kong, Lingling via Gcc-patches wrote: > > Hi, > > vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with > -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c. > And cleared before conversion, updated movhi_internal and > ix86_can_change_mode_class. Please fix the above commit message. > > OK for master? > > gcc/ChangeLog: > > PR target/102811 > * config/i386/i386.c (ix86_can_change_mode_class): SSE2 can load > 16bit data > to sse register via pinsrw. Allow 16bit data in XMM register for SSE2 targets. > * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c. ... for TARGET_F16C. > (extendhfdf2): Split extendhf2 into separate extendhfsf2, > extendhfdf2. > extendhfdf only for target_avx512fp16. Restrict extendhfdf for TARGET_AVX512FP16 only. > (*extendhf2):rename extendhf2. Rename from extendhf2. > (truncsfhf2): Likewise. > (truncdfhf2): Likewise. > (*trunc2): Likewise. > > gcc/testsuite/ChangeLog: > > PR target/102811 > * gcc.target/i386/pr90773-21.c: Optimized movhi_internal, > optimize vmovd + movw to vpextrw. Also allow pextrw. > * gcc.target/i386/pr90773-23.c: Ditto. > * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test. Otherwise LGTM. BTW: When playing with my patch, I introduced (define_insn "*vec_set_0" ...) to optimize scalar load to a vector. Does ix86_expand_vector_set work OK without this pattern? Thanks, Uros. > --- > gcc/config/i386/i386.c| 5 +- > gcc/config/i386/i386.md | 74 +-- > .../i386/avx512vl-vcvtps2ph-pr102811.c| 11 +++ > gcc/testsuite/gcc.target/i386/pr90773-21.c| 2 +- > gcc/testsuite/gcc.target/i386/pr90773-23.c| 2 +- > 5 files changed, 83 insertions(+), 11 deletions(-) create mode 100644 > gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index > e94efdf39fb..4b813533961 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, > machine_mode to, > disallow a change to these modes, reload will assume it's ok to > drop the subreg from (subreg:SI (reg:HI 100) 0). This affects > the vec_dupv4hi pattern. > -NB: AVX512FP16 supports vmovw which can load 16bit data to sse > -register. */ > - int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : > 4; > +NB: SSE2 can load 16bit data to sse register via pinsrw. */ > + int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : > +4; >if (GET_MODE_SIZE (from) < mov_size) > return false; > } > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index > 6eb9de81921..6ee264f1151 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -2525,6 +2525,16 @@ > case TYPE_SSEMOV: >return ix86_output_ssemov (insn, operands); > > +case TYPE_SSELOG: > + if (SSE_REG_P (operands[0])) > + return MEM_P (operands[1]) > + ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" > + : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; > + else > + return MEM_P (operands[1]) > + ? "pextrw\t{$0, %1, %0|%0, %1, 0}" > + : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; > + > case TYPE_MSKLOG: >if (operands[1] == const0_rtx) > return "kxorw\t%0, %0, %0"; > @@ -2540,13 +2550,17 @@ > } > } >[(set (attr "isa") > - (cond [(eq_attr "alternative" "9,10,11,12,13") > - (const_string "avx512fp16") > + (cond [(eq_attr "alternative" "9,10,11,12") > + (const_string "sse2") > + (eq_attr "alternative" "13") > + (const_string "sse4") >] >(const_string "*"))) > (set (attr "type") > (cond [(eq_attr "alternative" "9,10,11,12,13") > - (const_string "ssemov") > + (if_then_else (match_test "TARGET_AVX512FP16") > + (const_string "ssemov") > + (const_string "sselog")) > (eq_attr "alternative" "4,5,6,7") > (const_string "mskmov") > (eq_attr "alternative" "8") > @@ -4574,8 +4588,32 @@ >emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); > }) > > -(define_insn "extendhf2" > - [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v") > +(define_expand "extendhfsf2" > + [(set (match_operand:SF 0 "register_operand") > + (float_extend:SF > + (match_operand:HF 1 "nonimmediate_operand")))] > + "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL" > +{ > + if (!TARGET_AVX512FP16) > +{ > + rtx res = gen_reg_rtx (V4SFmode); > + rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode)); > + > +
RE: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
>-Original Message- >From: Kong, Lingling >Sent: Wednesday, November 24, 2021 2:25 PM >To: Liu, Hongtao ; gcc-patches@gcc.gnu.org >Cc: Kong, Lingling >Subject: RE: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert >_Float16 to SFmode with -mf16c [PR 102811] > >Hi, > >vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with >-mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c. >And cleared before conversion, updated movhi_internal and >ix86_can_change_mode_class. > >OK for master? > >gcc/ChangeLog: > > PR target/102811 > * config/i386/i386.c (ix86_can_change_mode_class): SSE2 can load >16bit data > to sse register via pinsrw. > * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c. > (extendhfdf2): Split extendhf2 into separate extendhfsf2, >extendhfdf2. > extendhfdf only for target_avx512fp16. > (*extendhf2):rename extendhf2. > (truncsfhf2): Likewise. > (truncdfhf2): Likewise. > (*trunc2): Likewise. > >gcc/testsuite/ChangeLog: > > PR target/102811 > * gcc.target/i386/pr90773-21.c: Optimized movhi_internal, > optimize vmovd + movw to vpextrw. > * gcc.target/i386/pr90773-23.c: Ditto. > * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test. >--- > gcc/config/i386/i386.c| 5 +- > gcc/config/i386/i386.md | 74 +-- > .../i386/avx512vl-vcvtps2ph-pr102811.c| 11 +++ > gcc/testsuite/gcc.target/i386/pr90773-21.c| 2 +- > gcc/testsuite/gcc.target/i386/pr90773-23.c| 2 +- > 5 files changed, 83 insertions(+), 11 deletions(-) create mode 100644 >gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c > >diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index >e94efdf39fb..4b813533961 100644 >--- a/gcc/config/i386/i386.c >+++ b/gcc/config/i386/i386.c >@@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode >from, machine_mode to, >disallow a change to these modes, reload will assume it's ok to >drop the subreg from (subreg:SI (reg:HI 100) 0). This affects >the vec_dupv4hi pattern. >- NB: AVX512FP16 supports vmovw which can load 16bit data to sse >- register. */ >- int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? >2 : 4; >+ NB: SSE2 can load 16bit data to sse register via pinsrw. */ >+ int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : >+4; > if (GET_MODE_SIZE (from) < mov_size) > return false; > } >diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index >6eb9de81921..6ee264f1151 100644 >--- a/gcc/config/i386/i386.md >+++ b/gcc/config/i386/i386.md >@@ -2525,6 +2525,16 @@ > case TYPE_SSEMOV: > return ix86_output_ssemov (insn, operands); > >+case TYPE_SSELOG: >+ if (SSE_REG_P (operands[0])) >+ return MEM_P (operands[1]) >+? "pinsrw\t{$0, %1, %0|%0, %1, 0}" >+: "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; >+ else >+ return MEM_P (operands[1]) >+? "pextrw\t{$0, %1, %0|%0, %1, 0}" >+: "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; >+ > case TYPE_MSKLOG: > if (operands[1] == const0_rtx) > return "kxorw\t%0, %0, %0"; >@@ -2540,13 +2550,17 @@ > } > } > [(set (attr "isa") >- (cond [(eq_attr "alternative" "9,10,11,12,13") >-(const_string "avx512fp16") >+ (cond [(eq_attr "alternative" "9,10,11,12") >+(const_string "sse2") >+ (eq_attr "alternative" "13") >+(const_string "sse4") > ] > (const_string "*"))) >(set (attr "type") > (cond [(eq_attr "alternative" "9,10,11,12,13") >-(const_string "ssemov") >+(if_then_else (match_test "TARGET_AVX512FP16") >+ (const_string "ssemov") >+ (const_string "sselog")) > (eq_attr "alternative" "4,5,6,7") > (const_string "mskmov") > (eq_attr "alternative" "8") >@@ -4574,8 +4588,32 @@ > emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); > }) > >-(define_insn "extendhf2" >- [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v") >+(define_expand "extendhfsf2" >+ [(set (match_operand:SF 0 "register_operand") >+ (float_extend:SF >+(match_operand:HF 1 "nonimmediate_operand")))] >+ "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL" >+{ >+ if (!TARGET_AVX512FP16) >+{ >+ rtx res = gen_reg_rtx (V4SFmode); >+ rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode)); >+ >+ ix86_expand_vector_set (false, tmp, operands[1], 0); >+ emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp))); >+ emit_move_insn (operands[0], gen_lowpart (SFmode, res)); >+ DONE; >+} >+}) >+ >+(define_expand "extendhfdf2" >+ [(set (match_operand:DF 0 "register_operand") >+ (float_extend:DF >+(match_operand:HF 1
RE: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
Hi, vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c. And cleared before conversion, updated movhi_internal and ix86_can_change_mode_class. OK for master? gcc/ChangeLog: PR target/102811 * config/i386/i386.c (ix86_can_change_mode_class): SSE2 can load 16bit data to sse register via pinsrw. * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c. (extendhfdf2): Split extendhf2 into separate extendhfsf2, extendhfdf2. extendhfdf only for target_avx512fp16. (*extendhf2):rename extendhf2. (truncsfhf2): Likewise. (truncdfhf2): Likewise. (*trunc2): Likewise. gcc/testsuite/ChangeLog: PR target/102811 * gcc.target/i386/pr90773-21.c: Optimized movhi_internal, optimize vmovd + movw to vpextrw. * gcc.target/i386/pr90773-23.c: Ditto. * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test. --- gcc/config/i386/i386.c| 5 +- gcc/config/i386/i386.md | 74 +-- .../i386/avx512vl-vcvtps2ph-pr102811.c| 11 +++ gcc/testsuite/gcc.target/i386/pr90773-21.c| 2 +- gcc/testsuite/gcc.target/i386/pr90773-23.c| 2 +- 5 files changed, 83 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e94efdf39fb..4b813533961 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to, disallow a change to these modes, reload will assume it's ok to drop the subreg from (subreg:SI (reg:HI 100) 0). This affects the vec_dupv4hi pattern. -NB: AVX512FP16 supports vmovw which can load 16bit data to sse -register. */ - int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4; +NB: SSE2 can load 16bit data to sse register via pinsrw. */ + int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : +4; if (GET_MODE_SIZE (from) < mov_size) return false; } diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6eb9de81921..6ee264f1151 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2525,6 +2525,16 @@ case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); +case TYPE_SSELOG: + if (SSE_REG_P (operands[0])) + return MEM_P (operands[1]) + ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" + : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; + else + return MEM_P (operands[1]) + ? "pextrw\t{$0, %1, %0|%0, %1, 0}" + : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; + case TYPE_MSKLOG: if (operands[1] == const0_rtx) return "kxorw\t%0, %0, %0"; @@ -2540,13 +2550,17 @@ } } [(set (attr "isa") - (cond [(eq_attr "alternative" "9,10,11,12,13") - (const_string "avx512fp16") + (cond [(eq_attr "alternative" "9,10,11,12") + (const_string "sse2") + (eq_attr "alternative" "13") + (const_string "sse4") ] (const_string "*"))) (set (attr "type") (cond [(eq_attr "alternative" "9,10,11,12,13") - (const_string "ssemov") + (if_then_else (match_test "TARGET_AVX512FP16") + (const_string "ssemov") + (const_string "sselog")) (eq_attr "alternative" "4,5,6,7") (const_string "mskmov") (eq_attr "alternative" "8") @@ -4574,8 +4588,32 @@ emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); }) -(define_insn "extendhf2" - [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v") +(define_expand "extendhfsf2" + [(set (match_operand:SF 0 "register_operand") + (float_extend:SF + (match_operand:HF 1 "nonimmediate_operand")))] + "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL" +{ + if (!TARGET_AVX512FP16) +{ + rtx res = gen_reg_rtx (V4SFmode); + rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode)); + + ix86_expand_vector_set (false, tmp, operands[1], 0); + emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp))); + emit_move_insn (operands[0], gen_lowpart (SFmode, res)); + DONE; +} +}) + +(define_expand "extendhfdf2" + [(set (match_operand:DF 0 "register_operand") + (float_extend:DF + (match_operand:HF 1 "nonimmediate_operand")))] + "TARGET_AVX512FP16") + +(define_insn "*extendhf2" + [(set (match_operand:MODEF 0 "register_operand" "=v") (float_extend:MODEF (match_operand:HF 1 "nonimmediate_operand" "vm")))] "TARGET_AVX512FP16" @@ -4766,7 +4804,31 @@ ;; Conversion from {SF,DF}mode to HFmode. -(define_insn "trunchf2" +(define_expand "truncsfhf2" +
Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
On Tue, Nov 16, 2021 at 9:15 AM Kong, Lingling via Gcc-patches wrote: > > Hi, > > vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with > -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c. > > OK for master? No, this is the wrong approach. There can be invalid values in the high elements of the vector, so these should be cleared before conversion. Please see the attached (unfinished) patch and use it as a starting point. Please note that we can now allow 2-byte values in SSE registers, so movhi_internal and ix86_can_change_mode_class should be updated accordingly. Uros. > > gcc/ChangeLog: > > PR target/102811 > * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c. > (extendhfdf2): Split extendhf2 into separate extendhfsf2, > extendhfdf2. > (truncsfhf2): Likewise. > (truncdfhf2): Likewise. > > gcc/testsuite/ChangeLog: > > PR target/102811 > * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test. > --- > gcc/config/i386/i386.md | 48 +++ > .../i386/avx512vl-vcvtps2ph-pr102811.c| 10 > 2 files changed, 49 insertions(+), 9 deletions(-) create mode 100644 > gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index > 6eb9de81921..c5415475342 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -4574,15 +4574,30 @@ >emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); > }) > > -(define_insn "extendhf2" > - [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v") > -(float_extend:MODEF > +(define_insn "extendhfsf2" > + [(set (match_operand:SF 0 "register_operand" "=v") > + (float_extend:SF > + (match_operand:HF 1 "nonimmediate_operand" "vm")))] > + "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL" > +{ > + if (TARGET_AVX512FP16) > +return "vcvtsh2ss\t{%1, %0, %0|%0, %0, %1}"; > + else > +return "vcvtph2ps\t{%1, %0|%0, %1}"; } > + [(set_attr "type" "ssecvt") > + (set_attr "prefix" "maybe_evex") > + (set_attr "mode" "SF")]) > + > +(define_insn "extendhfdf2" > + [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=v") > + (float_extend:DF > (match_operand:HF 1 "nonimmediate_operand" "vm")))] >"TARGET_AVX512FP16" > - "vcvtsh2\t{%1, %0, %0|%0, %0, %1}" > + "vcvtsh2sd\t{%1, %0, %0|%0, %0, %1}" >[(set_attr "type" "ssecvt") > (set_attr "prefix" "evex") > - (set_attr "mode" "")]) > + (set_attr "mode" "DF")]) > > > (define_expand "extendxf2" > @@ -4766,12 +4781,27 @@ > > ;; Conversion from {SF,DF}mode to HFmode. > > -(define_insn "trunchf2" > +(define_insn "truncsfhf2" > + [(set (match_operand:HF 0 "register_operand" "=v") > + (float_truncate:HF > + (match_operand:SF 1 "nonimmediate_operand" "vm")))] > + "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL" > + { > +if (TARGET_AVX512FP16) > + return "vcvtss2sh\t{%1, %d0|%d0, %1}"; > +else > + return "vcvtps2ph\t{0, %1, %0|%0, %1, 0}"; > + } > + [(set_attr "type" "ssecvt") > + (set_attr "prefix" "evex") > + (set_attr "mode" "HF")]) > + > +(define_insn "truncdfhf2" >[(set (match_operand:HF 0 "register_operand" "=v") > - (float_truncate:HF > - (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] > + (float_truncate:HF > + (match_operand:DF 1 "nonimmediate_operand" "vm")))] >"TARGET_AVX512FP16" > - "vcvt2sh\t{%1, %d0|%d0, %1}" > + "vcvtsd2sh\t{%1, %d0|%d0, %1}" >[(set_attr "type" "ssecvt") > (set_attr "prefix" "evex") > (set_attr "mode" "HF")]) > diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c > b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c > new file mode 100644 > index 000..ab44a304a03 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c > @@ -0,0 +1,10 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */ > +/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */ > +/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */ > +/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */ > +/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */ > +_Float16 test (_Float16 a, _Float16 b) > +{ > + return a + b; > +} > -- > 2.18.1 > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 9cc903e826b..21a3a45d22c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -19462,9 +19462,8 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to, disallow a change to these modes, reload will assume it's ok to drop the subreg from (subreg:SI (reg:HI 100) 0). This affects the vec_dupv4hi pattern. -NB: AVX512FP16 supports vmovw which can load 16bit data to sse -register. */ - int mov_size = MAYBE_SSE_CLASS_P (regclass) &&
Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
On Tue, Nov 16, 2021 at 4:15 PM Kong, Lingling via Gcc-patches wrote: > > Hi, > > vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with > -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c. > > OK for master? > > gcc/ChangeLog: > > PR target/102811 > * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c. > (extendhfdf2): Split extendhf2 into separate extendhfsf2, > extendhfdf2. > (truncsfhf2): Likewise. > (truncdfhf2): Likewise. > > gcc/testsuite/ChangeLog: > > PR target/102811 > * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test. > --- > gcc/config/i386/i386.md | 48 +++ > .../i386/avx512vl-vcvtps2ph-pr102811.c| 10 > 2 files changed, 49 insertions(+), 9 deletions(-) create mode 100644 > gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index > 6eb9de81921..c5415475342 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -4574,15 +4574,30 @@ >emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); > }) > > -(define_insn "extendhf2" > - [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v") > -(float_extend:MODEF > +(define_insn "extendhfsf2" > + [(set (match_operand:SF 0 "register_operand" "=v") > + (float_extend:SF > + (match_operand:HF 1 "nonimmediate_operand" "vm")))] > + "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL" > +{ > + if (TARGET_AVX512FP16) > +return "vcvtsh2ss\t{%1, %0, %0|%0, %0, %1}"; > + else > +return "vcvtph2ps\t{%1, %0|%0, %1}"; } > + [(set_attr "type" "ssecvt") > + (set_attr "prefix" "maybe_evex") > + (set_attr "mode" "SF")]) > + > +(define_insn "extendhfdf2" > + [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=v") > + (float_extend:DF > (match_operand:HF 1 "nonimmediate_operand" "vm")))] >"TARGET_AVX512FP16" > - "vcvtsh2\t{%1, %0, %0|%0, %0, %1}" > + "vcvtsh2sd\t{%1, %0, %0|%0, %0, %1}" >[(set_attr "type" "ssecvt") > (set_attr "prefix" "evex") > - (set_attr "mode" "")]) > + (set_attr "mode" "DF")]) > > > (define_expand "extendxf2" > @@ -4766,12 +4781,27 @@ > > ;; Conversion from {SF,DF}mode to HFmode. > > -(define_insn "trunchf2" > +(define_insn "truncsfhf2" > + [(set (match_operand:HF 0 "register_operand" "=v") > + (float_truncate:HF > + (match_operand:SF 1 "nonimmediate_operand" "vm")))] > + "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL" > + { > +if (TARGET_AVX512FP16) > + return "vcvtss2sh\t{%1, %d0|%d0, %1}"; > +else > + return "vcvtps2ph\t{0, %1, %0|%0, %1, 0}"; > + } > + [(set_attr "type" "ssecvt") > + (set_attr "prefix" "evex") > + (set_attr "mode" "HF")]) > + > +(define_insn "truncdfhf2" >[(set (match_operand:HF 0 "register_operand" "=v") > - (float_truncate:HF > - (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] > + (float_truncate:HF > + (match_operand:DF 1 "nonimmediate_operand" "vm")))] >"TARGET_AVX512FP16" > - "vcvt2sh\t{%1, %d0|%d0, %1}" > + "vcvtsd2sh\t{%1, %d0|%d0, %1}" >[(set_attr "type" "ssecvt") > (set_attr "prefix" "evex") > (set_attr "mode" "HF")]) > diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c > b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c > new file mode 100644 > index 000..ab44a304a03 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c > @@ -0,0 +1,10 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */ > +/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */ > +/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */ > +/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */ > +/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */ > +_Float16 test (_Float16 a, _Float16 b) > +{ > + return a + b; > +} > -- > 2.18.1 > -- BR, Hongtao