[PATCH 3/4] [PATCH 3/4] x86: Properly handle USE_VECTOR_FP_CONVERTS/USE_VECTOR_CONVERTS
From: "H.J. Lu" Check TARGET_USE_VECTOR_FP_CONVERTS or TARGET_USE_VECTOR_CONVERTS when handling avx_partial_xmm_update attribute. Don't convert AVX partial XMM register update if vector packed SSE conversion should be used. gcc/ PR target/101900 * config/i386/i386-features.c (remove_partial_avx_dependency): Check TARGET_USE_VECTOR_FP_CONVERTS and TARGET_USE_VECTOR_CONVERTS before generating vxorps. gcc/ PR target/101900 * testsuite/gcc.target/i386/pr101900-1.c: New test. * testsuite/gcc.target/i386/pr101900-2.c: Likewise. * testsuite/gcc.target/i386/pr101900-3.c: Likewise. --- gcc/config/i386/i386-features.c| 21 ++--- gcc/testsuite/gcc.target/i386/pr101900-1.c | 18 ++ gcc/testsuite/gcc.target/i386/pr101900-2.c | 18 ++ gcc/testsuite/gcc.target/i386/pr101900-3.c | 19 +++ 4 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-3.c diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c index 5a99ea7c046..ae5ea02a002 100644 --- a/gcc/config/i386/i386-features.c +++ b/gcc/config/i386/i386-features.c @@ -2210,15 +2210,30 @@ remove_partial_avx_dependency (void) != AVX_PARTIAL_XMM_UPDATE_TRUE) continue; - if (!v4sf_const0) - v4sf_const0 = gen_reg_rtx (V4SFmode); - /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF, SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and vec_merge with subreg. */ rtx src = SET_SRC (set); rtx dest = SET_DEST (set); machine_mode dest_mode = GET_MODE (dest); + machine_mode src_mode; + + if (TARGET_USE_VECTOR_FP_CONVERTS) + { + src_mode = GET_MODE (XEXP (src, 0)); + if (src_mode == E_SFmode || src_mode == E_DFmode) + continue; + } + + if (TARGET_USE_VECTOR_CONVERTS) + { + src_mode = GET_MODE (XEXP (src, 0)); + if (src_mode == E_SImode || src_mode == E_DImode) + continue; + } + + if (!v4sf_const0) + v4sf_const0 = gen_reg_rtx (V4SFmode); rtx zero; machine_mode dest_vecmode; diff --git a/gcc/testsuite/gcc.target/i386/pr101900-1.c b/gcc/testsuite/gcc.target/i386/pr101900-1.c new file mode 100644 index 000..0a45f8e340a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101900-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_fp_converts" } */ + +extern float f; +extern double d; +extern int i; + +void +foo (void) +{ + d = f; + f = i; +} + +/* { dg-final { scan-assembler "vcvtps2pd" } } */ +/* { dg-final { scan-assembler "vcvtsi2ssl" } } */ +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */ +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr101900-2.c b/gcc/testsuite/gcc.target/i386/pr101900-2.c new file mode 100644 index 000..c8b2d1da5ae --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101900-2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_converts" } */ + +extern float f; +extern double d; +extern int i; + +void +foo (void) +{ + d = f; + f = i; +} + +/* { dg-final { scan-assembler "vcvtss2sd" } } */ +/* { dg-final { scan-assembler "vcvtdq2ps" } } */ +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */ +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr101900-3.c b/gcc/testsuite/gcc.target/i386/pr101900-3.c new file mode 100644 index 000..6ee565b5bd4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101900-3.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_fp_converts,use_vector_converts" } */ + +extern float f; +extern double d; +extern int i; + +void +foo (void) +{ + d = f; + f = i; +} + +/* { dg-final { scan-assembler "vcvtps2pd" } } */ +/* { dg-final { scan-assembler "vcvtdq2ps" } } */ +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */ +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */ +/* { dg-final { scan-assembler-not "vxorps" } } */ -- 2.17.1
[PATCH 2/4] [PATCH 2/4] x86: Update memcpy/memset inline strategies for -mtune=tremont
From: "H.J. Lu" Simply memcpy and memset inline strategies to avoid branches for -mtune=tremont: 1. Create Tremont cost model from generic cost model. 2. With MOVE_RATIO and CLEAR_RATIO == 17, GCC will use integer/vector load and store for up to 16 * 16 (256) bytes when the data size is fixed and known. 3. Inline only if data size is known to be <= 256. a. Use "rep movsb/stosb" with simple code sequence if the data size is a constant. b. Use loop if data size is not a constant. 4. Use memcpy/memset libray function if data size is unknown or > 256. * config/i386/i386-options.c (processor_cost_table): Use tremont_cost for Tremont. * config/i386/x86-tune-costs.h (tremont_memcpy): New. (tremont_memset): Likewise. (tremont_cost): Likewise. * config/i386/x86-tune.def (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB): Enable for Tremont. --- gcc/config/i386/i386-options.c | 2 +- gcc/config/i386/x86-tune-costs.h | 124 +++ gcc/config/i386/x86-tune.def | 2 +- 3 files changed, 126 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c index c0006b3674b..e7a3bd4aaea 100644 --- a/gcc/config/i386/i386-options.c +++ b/gcc/config/i386/i386-options.c @@ -724,7 +724,7 @@ static const struct processor_costs *processor_cost_table[] = _cost, _cost, _cost, - _cost, + _cost, _cost, _cost, _cost, diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index ffe810f2bcb..93644be9cb3 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -2734,6 +2734,130 @@ struct processor_costs slm_cost = { "16",/* Func alignment. */ }; +static stringop_algs tremont_memcpy[2] = { + {libcall, + {{256, rep_prefix_1_byte, true}, +{256, loop, false}, +{-1, libcall, false}}}, + {libcall, + {{256, rep_prefix_1_byte, true}, +{256, loop, false}, +{-1, libcall, false; +static stringop_algs tremont_memset[2] = { + {libcall, + {{256, rep_prefix_1_byte, true}, +{256, loop, false}, +{-1, libcall, false}}}, + {libcall, + {{256, rep_prefix_1_byte, true}, +{256, loop, false}, +{-1, libcall, false; +static const +struct processor_costs tremont_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 6,/* cost for loading QImode using movzbl */ + {6, 6, 6}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {6, 6, 6}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {6, 6, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 12}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {6, 6}, /* cost of loading MMX registers + in SImode and DImode */ + {6, 6}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */ + {6, 6, 6, 10, 15}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {6, 6, 6, 10, 15}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 6, 6,/* SSE->integer and integer->SSE moves */ + 6, 6,/* mask->integer and integer->mask moves */ + {6, 6, 6}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {6, 6, 6}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ + /* End of register allocator costs. */ + }, + + COSTS_N_INSNS (1), /* cost of an add instruction */ + /* Setting cost to 2 makes our current implementation of synth_mult result in + use of unnecessary temporary registers causing regression on several + SPECfp benchmarks. */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ +
[PATCH 4/4] [PATCH 4/4] x86: Add TARGET_SSE_PARTIAL_REG_[FP_]CONVERTS_DEPENDENCY
From: "H.J. Lu" 1. Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY with TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY in SSE FP to FP splitters. 2. Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY with TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY in SSE INT to FP splitters. 3. Also check TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY and TARGET_SSE_PARTIAL_REG_DEPENDENCY when handling avx_partial_xmm_update attribute. Don't convert AVX partial XMM register update if there is no partial SSE register dependency for SSE conversion. gcc/ * config/i386/i386-features.c (remove_partial_avx_dependency): Also check TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY and and TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY before generating vxorps. * config/i386/i386.h (TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY): New. (TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Likewise. * config/i386/i386.md (SSE FP to FP splitters): Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY with TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY. (SSE INT to FP splitter): Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY with TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY. * config/i386/x86-tune.def (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY): New. (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Likewise. gcc/testsuite/ * gcc.target/i386/avx-covert-1.c: New file. * gcc.target/i386/avx-fp-covert-1.c: Likewise. * gcc.target/i386/avx-int-covert-1.c: Likewise. * gcc.target/i386/sse-covert-1.c: Likewise. * gcc.target/i386/sse-fp-covert-1.c: Likewise. * gcc.target/i386/sse-int-covert-1.c: Likewise. --- gcc/config/i386/i386-features.c | 6 -- gcc/config/i386/i386.h| 4 gcc/config/i386/i386.md | 9 ++--- gcc/config/i386/x86-tune.def | 15 +++ gcc/testsuite/gcc.target/i386/avx-covert-1.c | 19 +++ .../gcc.target/i386/avx-fp-covert-1.c | 15 +++ .../gcc.target/i386/avx-int-covert-1.c| 14 ++ gcc/testsuite/gcc.target/i386/sse-covert-1.c | 19 +++ .../gcc.target/i386/sse-fp-covert-1.c | 15 +++ .../gcc.target/i386/sse-int-covert-1.c| 14 ++ 10 files changed, 125 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx-covert-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-fp-covert-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-int-covert-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sse-covert-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sse-int-covert-1.c diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c index ae5ea02a002..91bfa06d4bf 100644 --- a/gcc/config/i386/i386-features.c +++ b/gcc/config/i386/i386-features.c @@ -2218,14 +2218,16 @@ remove_partial_avx_dependency (void) machine_mode dest_mode = GET_MODE (dest); machine_mode src_mode; - if (TARGET_USE_VECTOR_FP_CONVERTS) + if (TARGET_USE_VECTOR_FP_CONVERTS + || !TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY) { src_mode = GET_MODE (XEXP (src, 0)); if (src_mode == E_SFmode || src_mode == E_DFmode) continue; } - if (TARGET_USE_VECTOR_CONVERTS) + if (TARGET_USE_VECTOR_CONVERTS + || !TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY) { src_mode = GET_MODE (XEXP (src, 0)); if (src_mode == E_SImode || src_mode == E_DImode) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index e76bb55c080..ec60b89753e 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -334,6 +334,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_PARTIAL_REG_DEPENDENCY] #define TARGET_SSE_PARTIAL_REG_DEPENDENCY \ ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY] +#define TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY \ + ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY] +#define TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY \ + ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY] #define TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \ ix86_tune_features[X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL] #define TARGET_SSE_UNALIGNED_STORE_OPTIMAL \ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 13f6f57cdcc..c82a9dc1f67 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4535,7 +4535,8 @@ (float_extend:DF (match_operand:SF 1 "nonimmediate_operand")))] "!TARGET_AVX - && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed + &&
[PATCH 1/4] [PATCH 1/4] x86: Update -mtune=tremont
From: "H.J. Lu" Initial -mtune=tremont update 1. Use Haswell scheduling model. 2. Assume that stack engine allows to execute push instructions in parall. 3. Prepare for scheduling pass as -mtune=generic. 4. Use the same issue rate as -mtune=generic. 5. Enable partial_reg_dependency. 6. Disable accumulate_outgoing_args 7. Enable use_leave 8. Enable push_memory 9. Disable four_jump_limit 10. Disable opt_agu 11. Disable avoid_lea_for_addr 12. Disable avoid_mem_opnd_for_cmove 13. Enable misaligned_move_string_pro_epilogues 14. Enable use_cltd 16. Enable avoid_false_dep_for_bmi 17. Enable avoid_mfence 18. Disable expand_abs 19. Enable sse_typeless_stores 20. Enable sse_load0_by_pxor 21. Disable split_mem_opnd_for_fp_converts 22. Disable slow_pshufb 23. Enable partial_reg_dependency This is the first patch to tune for Tremont. With all patches applied, performance impacts on SPEC CPU 2017 are: 500.perlbench_r 1.81% 502.gcc_r 0.57% 505.mcf_r 1.16% 520.omnetpp_r 0.00% 523.xalancbmk_r 0.00% 525.x264_r 4.55% 531.deepsjeng_r 0.00% 541.leela_r 0.39% 548.exchange2_r 1.13% 557.xz_r0.00% geomean for intrate 0.95% 503.bwaves_r0.00% 507.cactuBSSN_r 6.94% 508.namd_r 12.37% 510.parest_r1.01% 511.povray_r3.70% 519.lbm_r 36.61% 521.wrf_r 8.79% 526.blender_r 2.91% 527.cam4_r 6.23% 538.imagick_r 0.28% 544.nab_r 21.99% 549.fotonik3d_r 3.63% 554.roms_r -1.20% geomean for fprate 7.50% gcc/ChangeLog * common/config/i386/i386-common.c: Use Haswell scheduling model for Tremont. * config/i386/i386.c (ix86_sched_init_global): Prepare for Tremont scheduling pass. * config/i386/x86-tune-sched.c (ix86_issue_rate): Change Tremont issue rate to 4. (ix86_adjust_cost): Handle Tremont. * config/i386/x86-tune.def (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY): Enable for Tremont. (X86_TUNE_USE_LEAVE): Likewise. (X86_TUNE_PUSH_MEMORY): Likewise. (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES): Likewise. (X86_TUNE_USE_CLTD): Likewise. (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI): Likewise. (X86_TUNE_AVOID_MFENCE): Likewise. (X86_TUNE_SSE_TYPELESS_STORES): Likewise. (X86_TUNE_SSE_LOAD0_BY_PXOR): Likewise. (X86_TUNE_ACCUMULATE_OUTGOING_ARGS): Disable for Tremont. (X86_TUNE_FOUR_JUMP_LIMIT): Likewise. (X86_TUNE_OPT_AGU): Likewise. (X86_TUNE_AVOID_LEA_FOR_ADDR): Likewise. (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE): Likewise. (X86_TUNE_EXPAND_ABS): Likewise. (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS): Likewise. (X86_TUNE_SLOW_PSHUFB): Likewise. --- gcc/common/config/i386/i386-common.c | 2 +- gcc/config/i386/i386.c | 1 + gcc/config/i386/x86-tune-sched.c | 2 ++ gcc/config/i386/x86-tune.def | 37 ++-- 4 files changed, 23 insertions(+), 19 deletions(-) diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index 00c65ba15ab..2c9e1ccbc6e 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -1935,7 +1935,7 @@ const pta processor_alias_table[] = M_CPU_TYPE (INTEL_GOLDMONT), P_PROC_SSE4_2}, {"goldmont-plus", PROCESSOR_GOLDMONT_PLUS, CPU_GLM, PTA_GOLDMONT_PLUS, M_CPU_TYPE (INTEL_GOLDMONT_PLUS), P_PROC_SSE4_2}, - {"tremont", PROCESSOR_TREMONT, CPU_GLM, PTA_TREMONT, + {"tremont", PROCESSOR_TREMONT, CPU_HASWELL, PTA_TREMONT, M_CPU_TYPE (INTEL_TREMONT), P_PROC_SSE4_2}, {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL, M_CPU_TYPE (INTEL_KNL), P_PROC_AVX512F}, diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7b173bc0beb..2927e2884c9 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -16976,6 +16976,7 @@ ix86_sched_init_global (FILE *, int, int) case PROCESSOR_NEHALEM: case PROCESSOR_SANDYBRIDGE: case PROCESSOR_HASWELL: +case PROCESSOR_TREMONT: case PROCESSOR_GENERIC: /* Do not perform multipass scheduling for pre-reload schedule to save compile time. */ diff --git a/gcc/config/i386/x86-tune-sched.c b/gcc/config/i386/x86-tune-sched.c index 2e5ee4e..56ada99a450 100644 --- a/gcc/config/i386/x86-tune-sched.c +++ b/gcc/config/i386/x86-tune-sched.c @@ -71,6 +71,7 @@ ix86_issue_rate (void) case PROCESSOR_NEHALEM: case PROCESSOR_SANDYBRIDGE: case PROCESSOR_HASWELL: +case PROCESSOR_TREMONT: case PROCESSOR_GENERIC: return 4; @@ -429,6 +430,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, case PROCESSOR_NEHALEM: case PROCESSOR_SANDYBRIDGE: case PROCESSOR_HASWELL: +case PROCESSOR_TREMONT: case
[PATCH 0/4] Update mtune=tremont
From: "Cui,Lili" Hi, I have four patches for tremont tuning, With all patches applied, performance impacts on SPEC CPU 2017 are: 500.perlbench_r 1.81% 502.gcc_r 0.57% 505.mcf_r 1.16% 520.omnetpp_r 0.00% 523.xalancbmk_r 0.00% 525.x264_r 4.55% 531.deepsjeng_r 0.00% 541.leela_r 0.39% 548.exchange2_r 1.13% 557.xz_r0.00% geomean for intrate 0.95% 503.bwaves_r0.00% 507.cactuBSSN_r 6.94% 508.namd_r 12.37% 510.parest_r1.01% 511.povray_r3.70% 519.lbm_r 36.61% 521.wrf_r 8.79% 526.blender_r 2.91% 527.cam4_r 6.23% 538.imagick_r 0.28% 544.nab_r 21.99% 549.fotonik3d_r 3.63% 554.roms_r -1.20% geomean for fprate 7.50% Bootstrapped and regtested on x86_64-linux-gnu{-m32,-m64}. Ok for master? x86: Update -mtune=tremont x86: Update memcpy/memset inline strategies for -mtune=tremont x86: Properly handle USE_VECTOR_FP_CONVERTS/USE_VECTOR_CONVERTS x86: Add TARGET_SSE_PARTIAL_REG_[FP_]CONVERTS_DEPENDENCY gcc/common/config/i386/i386-common.c | 2 +- gcc/config/i386/i386-features.c | 23 +++- gcc/config/i386/i386-options.c| 2 +- gcc/config/i386/i386.c| 1 + gcc/config/i386/i386.h| 4 + gcc/config/i386/i386.md | 9 +- gcc/config/i386/x86-tune-costs.h | 124 ++ gcc/config/i386/x86-tune-sched.c | 2 + gcc/config/i386/x86-tune.def | 52 +--- gcc/testsuite/gcc.target/i386/avx-covert-1.c | 19 +++ .../gcc.target/i386/avx-fp-covert-1.c | 15 +++ .../gcc.target/i386/avx-int-covert-1.c| 14 ++ gcc/testsuite/gcc.target/i386/pr101900-1.c| 18 +++ gcc/testsuite/gcc.target/i386/pr101900-2.c| 18 +++ gcc/testsuite/gcc.target/i386/pr101900-3.c| 19 +++ gcc/testsuite/gcc.target/i386/sse-covert-1.c | 19 +++ .../gcc.target/i386/sse-fp-covert-1.c | 15 +++ .../gcc.target/i386/sse-int-covert-1.c| 14 ++ 18 files changed, 344 insertions(+), 26 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx-covert-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-fp-covert-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-int-covert-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-3.c create mode 100644 gcc/testsuite/gcc.target/i386/sse-covert-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sse-int-covert-1.c -- 2.17.1 Thanks, Lili.