Ping … ...

Jojo
在 2020年11月6日 +0800 PM5:38,Jojo R <jiejie_r...@c-sky.com>,写道:
> Insn seqs before sched:
>
> .L1:
> a5 = insn-1 (a0)
> a6 = insn-2 (a1)
> a7 = insn-3 (a7, a5)
> a8 = insn-4 (a8, a6)
> Jmp .L1
>
> Insn-3 & insn-4 is REG_DEP_TRUE of insn-1 & insn-2,
> so insn-3 & insn-4 will be as the last of ready list.
> And this patch will put 0 cost dependency due to a bypass
> as highest numbered class also if some target have forward
> feature between DEP_PRO and DEP_CON.
>
> if the insns are in the same cost class on -fsched-last-insn-heuristic,
> And then, go to "prefer the insn which has more later insns that depend on 
> it",
> return from dep_list_size() is not satisfied, it includes all dependence of 
> insn.
> We need to ignore the ones that have a 0 cost dependency due to a bypass.
>
> With this patch and pipeline description as below:
>
> (define_bypass 0 "insn-1, insn-2" "insn-3, insn-4")
>
> We can get better insn seqs after sched:
>
> .L1:
> a5 = insn-1 (a0)
> a7 = insn-3 (a7, a5)
> a6 = insn-2 (a1)
> a8 = insn-4 (a8, a6)
> Jmp .L1
>
> I have tested on ck860 of C-SKY arch and C960 of T-Head based on RISCV arch
>
> gcc/
> * haifa-sched.c (dep_list_costs): New.
> (rank_for_schedule): Replace dep_list_size with dep_list_costs.
> Add 0 cost dependency due to bypass on -fsched-last-insn-heuristic.
>
> ---
> gcc/haifa-sched.c | 49 +++++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 45 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
> index 350178c82b8..51c6d23d3a5 100644
> --- a/gcc/haifa-sched.c
> +++ b/gcc/haifa-sched.c
> @@ -1584,6 +1584,44 @@ dep_list_size (rtx_insn *insn, sd_list_types_def list)
> return nodbgcount;
> }
>
> +/* Get the bypass cost of dependence DEP. */
> +
> +HAIFA_INLINE static int
> +dep_cost_bypass(dep_t dep)
> +{
> + if (dep == NULL)
> + return -1;
> +
> + if (INSN_CODE (DEP_PRO (dep)) >= 0
> + && bypass_p (DEP_PRO (dep))
> + && recog_memoized (DEP_CON (dep)) >= 0)
> + return dep_cost (dep);
> +
> + return -1;
> +}
> +
> +/* Compute the costs of nondebug deps in list LIST for INSN. */
> +
> +static int
> +dep_list_costs (rtx_insn *insn, sd_list_types_def list)
> +{
> + sd_iterator_def sd_it;
> + dep_t dep;
> + int costs = 0;
> +
> + FOR_EACH_DEP (insn, list, sd_it, dep)
> + {
> + if (!DEBUG_INSN_P (DEP_CON (dep))
> + && !DEBUG_INSN_P (DEP_PRO (dep)))
> + {
> + if (dep_cost_bypass (dep) != 0)
> + costs++;
> + }
> + }
> +
> + return costs;
> +}
> +
> bool sched_fusion;
>
> /* Compute the priority number for INSN. */
> @@ -2758,10 +2796,12 @@ rank_for_schedule (const void *x, const void *y)
> 1) Data dependent on last schedule insn.
> 2) Anti/Output dependent on last scheduled insn.
> 3) Independent of last scheduled insn, or has latency of one.
> + 4) bypass of last scheduled insn, and has latency of zero.
> Choose the insn from the highest numbered class if different. */
> dep1 = sd_find_dep_between (last, tmp, true);
>
> - if (dep1 == NULL || dep_cost (dep1) == 1)
> + if (dep1 == NULL || dep_cost (dep1) == 1
> + || (dep_cost_bypass (dep1) == 0))
> tmp_class = 3;
> else if (/* Data dependence. */
> DEP_TYPE (dep1) == REG_DEP_TRUE)
> @@ -2771,7 +2811,8 @@ rank_for_schedule (const void *x, const void *y)
>
> dep2 = sd_find_dep_between (last, tmp2, true);
>
> - if (dep2 == NULL || dep_cost (dep2) == 1)
> + if (dep2 == NULL || dep_cost (dep2) == 1
> + || (dep_cost_bypass (dep2) == 0))
> tmp2_class = 3;
> else if (/* Data dependence. */
> DEP_TYPE (dep2) == REG_DEP_TRUE)
> @@ -2795,8 +2836,8 @@ rank_for_schedule (const void *x, const void *y)
> This gives the scheduler more freedom when scheduling later
> instructions at the expense of added register pressure. */
>
> - val = (dep_list_size (tmp2, SD_LIST_FORW)
> - - dep_list_size (tmp, SD_LIST_FORW));
> + val = (dep_list_costs (tmp2, SD_LIST_FORW)
> + - dep_list_costs (tmp, SD_LIST_FORW));
>
> if (flag_sched_dep_count_heuristic && val != 0)
> return rfs_result (RFS_DEP_COUNT, val, tmp, tmp2);
> --
> 2.24.3 (Apple Git-128)

Reply via email to