>-----Original Message----- >From: Kong, Lingling <lingling.k...@intel.com> >Sent: Wednesday, November 24, 2021 2:25 PM >To: Liu, Hongtao <hongtao....@intel.com>; gcc-patches@gcc.gnu.org >Cc: Kong, Lingling <lingling.k...@intel.com> >Subject: RE: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert >_Float16 to SFmode with -mf16c [PR 102811] > >Hi, > >vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with >-mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c. >And cleared before conversion, updated movhi_internal and >ix86_can_change_mode_class. > >OK for master? > >gcc/ChangeLog: > > PR target/102811 > * config/i386/i386.c (ix86_can_change_mode_class): SSE2 can load >16bit data > to sse register via pinsrw. > * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c. > (extendhfdf2): Split extendhf<mode>2 into separate extendhfsf2, >extendhfdf2. > extendhfdf only for target_avx512fp16. > (*extendhf<mode>2):rename extendhf<mode>2. > (truncsfhf2): Likewise. > (truncdfhf2): Likewise. > (*trunc<mode>2): Likewise. > >gcc/testsuite/ChangeLog: > > PR target/102811 > * gcc.target/i386/pr90773-21.c: Optimized movhi_internal, > optimize vmovd + movw to vpextrw. > * gcc.target/i386/pr90773-23.c: Ditto. > * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test. >--- > gcc/config/i386/i386.c | 5 +- > gcc/config/i386/i386.md | 74 +++++++++++++++++-- > .../i386/avx512vl-vcvtps2ph-pr102811.c | 11 +++ > gcc/testsuite/gcc.target/i386/pr90773-21.c | 2 +- > gcc/testsuite/gcc.target/i386/pr90773-23.c | 2 +- > 5 files changed, 83 insertions(+), 11 deletions(-) create mode 100644 >gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c > >diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index >e94efdf39fb..4b813533961 100644 >--- a/gcc/config/i386/i386.c >+++ b/gcc/config/i386/i386.c >@@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode >from, machine_mode to, > disallow a change to these modes, reload will assume it's ok to > drop the subreg from (subreg:SI (reg:HI 100) 0). This affects > the vec_dupv4hi pattern. >- NB: AVX512FP16 supports vmovw which can load 16bit data to sse >- register. */ >- int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? >2 : 4; >+ NB: SSE2 can load 16bit data to sse register via pinsrw. */ >+ int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : >+4; > if (GET_MODE_SIZE (from) < mov_size) > return false; > } >diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index >6eb9de81921..6ee264f1151 100644 >--- a/gcc/config/i386/i386.md >+++ b/gcc/config/i386/i386.md >@@ -2525,6 +2525,16 @@ > case TYPE_SSEMOV: > return ix86_output_ssemov (insn, operands); > >+ case TYPE_SSELOG: >+ if (SSE_REG_P (operands[0])) >+ return MEM_P (operands[1]) >+ ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" >+ : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; >+ else >+ return MEM_P (operands[1]) >+ ? "pextrw\t{$0, %1, %0|%0, %1, 0}" >+ : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; >+ > case TYPE_MSKLOG: > if (operands[1] == const0_rtx) > return "kxorw\t%0, %0, %0"; >@@ -2540,13 +2550,17 @@ > } > } > [(set (attr "isa") >- (cond [(eq_attr "alternative" "9,10,11,12,13") >- (const_string "avx512fp16") >+ (cond [(eq_attr "alternative" "9,10,11,12") >+ (const_string "sse2") >+ (eq_attr "alternative" "13") >+ (const_string "sse4") > ] > (const_string "*"))) > (set (attr "type") > (cond [(eq_attr "alternative" "9,10,11,12,13") >- (const_string "ssemov") >+ (if_then_else (match_test "TARGET_AVX512FP16") >+ (const_string "ssemov") >+ (const_string "sselog")) > (eq_attr "alternative" "4,5,6,7") > (const_string "mskmov") > (eq_attr "alternative" "8") >@@ -4574,8 +4588,32 @@ > emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); > }) > >-(define_insn "extendhf<mode>2" >- [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v") >+(define_expand "extendhfsf2" >+ [(set (match_operand:SF 0 "register_operand") >+ (float_extend:SF >+ (match_operand:HF 1 "nonimmediate_operand")))] >+ "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL" >+{ >+ if (!TARGET_AVX512FP16) >+ { >+ rtx res = gen_reg_rtx (V4SFmode); >+ rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode)); >+ >+ ix86_expand_vector_set (false, tmp, operands[1], 0); >+ emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp))); >+ emit_move_insn (operands[0], gen_lowpart (SFmode, res)); >+ DONE; >+ } >+}) >+ >+(define_expand "extendhfdf2" >+ [(set (match_operand:DF 0 "register_operand") >+ (float_extend:DF >+ (match_operand:HF 1 "nonimmediate_operand")))] >+ "TARGET_AVX512FP16") >+ >+(define_insn "*extendhf<mode>2" >+ [(set (match_operand:MODEF 0 "register_operand" "=v") > (float_extend:MODEF > (match_operand:HF 1 "nonimmediate_operand" "vm")))] > "TARGET_AVX512FP16" >@@ -4766,7 +4804,31 @@ > > ;; Conversion from {SF,DF}mode to HFmode. > >-(define_insn "trunc<mode>hf2" >+(define_expand "truncsfhf2" >+ [(set (match_operand:HF 0 "register_operand") >+ (float_truncate:HF >+ (match_operand:SF 1 "nonimmediate_operand")))] >+ "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL" >+ { >+ if (!TARGET_AVX512FP16) >+ { >+ rtx res = gen_reg_rtx (V8HFmode); >+ rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode)); >+ >+ ix86_expand_vector_set (false, tmp, operands[1], 0); >+ emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT >(4))); >+ emit_move_insn (operands[0], gen_lowpart (HFmode, res)); >+ DONE; >+ } >+ }) >+ >+(define_expand "truncdfhf2" >+ [(set (match_operand:HF 0 "register_operand") >+ (float_truncate:HF >+ (match_operand:DF 1 "nonimmediate_operand")))] >+ "TARGET_AVX512FP16") >+ >+(define_insn "*trunc<mode>hf2" > [(set (match_operand:HF 0 "register_operand" "=v") > (float_truncate:HF > (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] diff --git >a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c >b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c >new file mode 100644 >index 00000000000..dfbfb167953 >--- /dev/null >+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c >@@ -0,0 +1,11 @@ >+/* { dg-do compile } */ >+/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */ >+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */ >+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */ >+/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */ >+/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */ >+/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */ >+_Float16 test (_Float16 a, _Float16 b) >+{ >+ return a + b; >+} >diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c >b/gcc/testsuite/gcc.target/i386/pr90773-21.c >index 5bbb387a3ea..0d620fff83c 100644 >--- a/gcc/testsuite/gcc.target/i386/pr90773-21.c >+++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c >@@ -10,4 +10,4 @@ foo (int c) > } > > /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, >\\(%\[\^,\]+\\)" 1 } } */ >-/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 32\\(%\[\^,\]+\\)" 1 } >} >*/ >+/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \].*, >+32\\(%\[\^,\]+\\)" 1 } } */ >diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c >b/gcc/testsuite/gcc.target/i386/pr90773-23.c >index ca4a86f30b8..b7369e802e1 100644 >--- a/gcc/testsuite/gcc.target/i386/pr90773-23.c >+++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c >@@ -10,4 +10,4 @@ foo (void) > } > > /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, >\\(%\[\^,\]+\\)" 1 } } */ >-/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } >} >*/ >+/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \]+.+, >+32\\(%\[\^,\]+\\)" 1 } } */ >-- >2.18.1
RE: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]
Liu, Hongtao via Gcc-patches Tue, 23 Nov 2021 23:05:42 -0800
- [PATCH] i386: vcvtph2ps and vcvtps2ph shoul... Kong, Lingling via Gcc-patches
- Re: [PATCH] i386: vcvtph2ps and vcvtps... Hongtao Liu via Gcc-patches
- Re: [PATCH] i386: vcvtph2ps and vcvtps... Uros Bizjak via Gcc-patches
- RE: [PATCH] i386: vcvtph2ps and vcvtps... Kong, Lingling via Gcc-patches
- RE: [PATCH] i386: vcvtph2ps and vc... Liu, Hongtao via Gcc-patches
- Re: [PATCH] i386: vcvtph2ps and vc... Uros Bizjak via Gcc-patches
- RE: [PATCH] i386: vcvtph2ps an... Kong, Lingling via Gcc-patches
- Re: [PATCH] i386: vcvtph2p... Uros Bizjak via Gcc-patches
- [PATCH] i386: vcvtph2ps and vcvtps2ph ... Kong, Lingling via Gcc-patches
- Re: [PATCH] i386: vcvtph2ps and vc... Uros Bizjak via Gcc-patches
- RE: [PATCH] i386: vcvtph2ps an... Kong, Lingling via Gcc-patches