Hello Peter,

thank you for your reply.

On Tue, 2016-08-02 at 12:37 +0200, Peter Zijlstra wrote:
> On Tue, Jul 26, 2016 at 04:07:14PM +0200, Giovanni Gherdovich wrote:
> 
> > Signed-off-by: Mike Galbraith <mgalbra...@suse.de>
> > Signed-off-by: Giovanni Gherdovich <ggherdov...@suse.cz>
> 
> SoB chain is borken. Either Mike wrote the patch in which case you're
> missing a From: Mike header someplace, or you wrote it and Mike needs
> to be a Ack/Reviewed or somesuch.

Right. As Mike already explained, this patch is the result of him
correcting a much more involved/complicated solution I prepared to
solve the problem. I will put the "From: Mike" in v2.

> 
> > ---
> >  kernel/sched/core.c | 4 ++++
> >  1 file changed, 4 insertions(+)
> > 
> > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > index 51d7105..0ef1e69 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -2998,6 +2998,10 @@ unsigned long long task_sched_runtime(struct
> > task_struct *p)
> >      * thread, breaking clock_gettime().
> >      */
> >     if (task_current(rq, p) && task_on_rq_queued(p)) {
> > +#if defined(CONFIG_FAIR_GROUP_SCHED)
> 
> This here wants a comment on why we're doing this. Because I'm sure
> that if someone were to read this code in a few weeks they'd go
> WTF!?

I had that config variable set in the machine I was testing on, and
thought that for some reason it was related to my observations. I will
repeat the experiment without it, and if I obtain the same results I
will drop the conditional. Otherwise I will motivate its necessity.

I will submit a v2 early next week, rebasing the patch on the
forthcoming 4.8-rc1 tag and updating the experimental data.

> 
> Also, is there a possibility of manual CSE we should do?
> 
> > +           prefetch((&p->se)->cfs_rq->curr);
> > +           prefetch(&(&p->se)->cfs_rq->curr->exec_start);
> > +#endif
> >             update_rq_clock(rq);
> >             p->sched_class->update_curr(rq);
> >     }

Good point. I verified and GCC 4.8.5 gets it already without hints
needed. This is the alternative code with the CSE that I compiled:

-- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 51d7105..5d676db 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2998,6 +2998,11 @@ unsigned long long task_sched_runtime(struct
task_struct *p)
         * thread, breaking clock_gettime().
         */
        if (task_current(rq, p) && task_on_rq_queued(p)) {
+#if defined(CONFIG_FAIR_GROUP_SCHED)
+               struct sched_entity *curr = (&p->se)->cfs_rq->curr;
+               prefetch(curr);
+               prefetch(&curr->exec_start);
+#endif
                update_rq_clock(rq);
                p->sched_class->update_curr(rq);
        }
-- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 

I post below the snippets of generated code with and without CSE that
I got running 'disassemble /m task_sched_runtime' in gdb; you'll see
they're identical. If you prefer the explicit hint I'll include it in
v2, but it's probably safe to say it isn't needed.

Regards,
Giovanni



with CSE: -- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 

3001#if defined(CONFIG_FAIR_GROUP_SCHED)
3002                struct sched_entity *curr = (&p->se)->cfs_rq->curr;

   <+117>:   mov    0x1d0(%rbx),%rdx
   <+124>:   mov    0x38(%rdx),%rdx

3003                prefetch(curr);
3004                prefetch(&curr->exec_start);
3005#endif
3006                update_rq_clock(rq);
3007                p->sched_class->update_curr(rq);

   <+144>:   mov    0x58(%rbx),%rdx
   <+148>:   mov    %rax,%rdi
   <+151>:   mov    %rax,-0x20(%rbp)
   <+155>:   callq  *0xb0(%rdx)
   <+161>:   mov    -0x20(%rbp),%rax
   <+165>:   jmp    <task_sched_runtime+66>
   <+167>:   mov    %rax,%rdi
   <+170>:   mov    %rax,-0x20(%rbp)
   <+174>:   callq  <update_rq_clock>
   <+179>:   mov    -0x20(%rbp),%rax
   <+183>:   jmp    <task_sched_runtime+144>
         :  nopl   0x0(%rax)

3008        }
3009        ns = p->se.sum_exec_runtime;

   <+66>:    mov    0xc8(%rbx),%r12

3010        task_rq_unlock(rq, p, &rf);
3011
3012        return ns;

   <+103>:   mov    %r12,%rax


w/o CSE: -- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8

3001#if defined(CONFIG_FAIR_GROUP_SCHED)
3002                prefetch((&p->se)->cfs_rq->curr);

   <+117>:   mov    0x1d0(%rbx),%rdx
   <+124>:   mov    0x38(%rdx),%rdx

3003                prefetch(&(&p->se)->cfs_rq->curr->exec_start);
3004#endif
3005                update_rq_clock(rq);
3006                p->sched_class->update_curr(rq);

   <+144>:   mov    0x58(%rbx),%rdx
   <+148>:   mov    %rax,%rdi
   <+151>:   mov    %rax,-0x20(%rbp)
   <+155>:   callq  *0xb0(%rdx)
   <+161>:   mov    -0x20(%rbp),%rax
   <+165>:   jmp    <task_sched_runtime+66>
   <+167>:   mov    %rax,%rdi
   <+170>:   mov    %rax,-0x20(%rbp)
   <+174>:   callq  <update_rq_clock>
   <+179>:   mov    -0x20(%rbp),%rax
   <+183>:   jmp    <task_sched_runtime+144>
         :   nopl   0x0(%rax)

3007        }
3008        ns = p->se.sum_exec_runtime;

   <+66>:    mov    0xc8(%rbx),%r12

3009        task_rq_unlock(rq, p, &rf);
3010
3011        return ns;

   <+103>:   mov    %r12,%rax

Reply via email to