hi everybody: I have encountered a "NULL pointer dereference" problem in pick_next_task_fair() in linux 3.10.0 based
static struct task_struct *pick_next_task_fair(struct rq *rq) { struct task_struct *p; struct cfs_rq *cfs_rq = &rq->cfs; struct sched_entity *se; if (!cfs_rq->nr_running) return NULL; do { se = pick_next_entity(cfs_rq); set_next_entity(cfs_rq, se); cfs_rq = group_cfs_rq(se); } while (cfs_rq); p = task_of(se); if (hrtick_enabled(rq)) hrtick_start_fair(rq, p); return p; } In pick_next_entity(): static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) { struct sched_entity *se = __pick_first_entity(cfs_rq); struct sched_entity *left = se; /* * Avoid running the skip buddy, if running something else can * be done without getting too unfair. */ if (cfs_rq->skip == se) { struct sched_entity *second = __pick_next_entity(se); if (second && wakeup_preempt_entity(second, left) < 1) se = second; } /* * Prefer last buddy, try to return the CPU to a preempted task. */ if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) se = cfs_rq->last; /* * Someone really wants this to run. If it's not unfair, run it. */ if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) se = cfs_rq->next; clear_buddies(cfs_rq, se); return se; } in pick_next_entity 1. struct sched_entity *se = __pick_first_entity(cfs_rq); This se may be get NULL. 2. Then if cfs_rq->skip is also null, cfs_rq->skip == se. 3. struct sched_entity *second = __pick_next_entity(se); This code will lead to the "NULL pointer dereference" because se is NULL in __pick_next_entity -> rb_next() The oops is as follow: [795797.511960] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 [795797.622950] task: ffff883f24924560 ti: ffff883f2495c000 task.ti: ffff883f2495c000 [795797.630541] RIP: 0010:[] [] rb_next+0x1/0x50 [795797.638153] RSP: 0018:ffff883f2495fdc0 EFLAGS: 00010046 [795797.643575] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [795797.650824] RDX: 0000000000000001 RSI: ffff883f7f5f4868 RDI: 0000000000000010 [795797.658068] RBP: ffff883f2495fe08 R08: 0000000000000000 R09: 0000000000000000 [795797.666476] R10: 0000000000000000 R11: ffff883f249f5400 R12: ffff883d82207200 [795797.674821] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [795797.683160] FS: 0000000000000000(0000) GS:ffff883f7f5e0000(0000) knlGS:0000000000000000 [795797.692460] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [795797.699395] CR2: 0000000000000010 CR3: 0000003f1de58000 CR4: 00000000003407e0 [795797.707703] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [795797.715992] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [795797.724267] Stack: [795797.727416] ffff883f2495fe08 ffffffff810b87f9 ffff883f2495fe08 ffff883f7f5f47c0 [795797.736040] ffff883f24924b40 ffff883f7f5f47c0 000000000000003f ffff883f24924560 [795797.744658] 0000000000000000 ffff883f2495fe68 ffffffff816324ea ffff883f24924560 [795797.753255] Call Trace: [795797.756818] [] ? pick_next_task_fair+0x129/0x1d0 [795797.764207] [] __schedule+0x12a/0x910 [795797.770622] [] schedule+0x29/0x70 [795797.776683] [] smpboot_thread_fn+0xd3/0x1a0 [795797.783599] [] ? schedule+0x29/0x70 [795797.789807] [] ? lg_double_unlock+0x90/0x90 [795797.796706] [] kthread+0xcf/0xe0 [795797.802632] [] ? kthread_create_on_node+0x140/0x140 [795797.810207] [] ret_from_fork+0x58/0x90 [795797.816646] [] ? kthread_create_on_node+0x140/0x140 I find there are something different in upstream (in pick_next_entity ) static struct sched_entity * pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) { struct sched_entity *left = __pick_first_entity(cfs_rq); struct sched_entity *se; /* * If curr is set we have to see if its left of the leftmost entity * still in the tree, provided there was anything in the tree at all. */ if (!left || (curr && entity_before(curr, left))) left = curr; se = left; /* ideally we run the leftmost entity */ /* * Avoid running the skip buddy, if running something else can * be done without getting too unfair. */ if (cfs_rq->skip == se) { struct sched_entity *second; if (se == curr) { second = __pick_first_entity(cfs_rq); } else { second = __pick_next_entity(se); if (!second || (curr && entity_before(curr, second))) second = curr; } if (second && wakeup_preempt_entity(second, left) < 1) se = second; } /* * Prefer last buddy, try to return the CPU to a preempted task. */ if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) se = cfs_rq->last; /* * Someone really wants this to run. If it's not unfair, run it. */ if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) se = cfs_rq->next; clear_buddies(cfs_rq, se); return se; } if the se,curr,cfs_rq->skip are all NULL, the wakeup_preempt_entity() or clear_buddies will be also occured the "NULL pointer dereference" problem. I think the following patch can work in the case: --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4639,10 +4639,9 @@ struct cfs_rq *cfs_rq = &rq->cfs; struct sched_entity *se; - if (!cfs_rq->nr_running) - return NULL; - do { + if (!cfs_rq->nr_running) + return NULL; se = pick_next_entity(cfs_rq); set_next_entity(cfs_rq, se); cfs_rq = group_cfs_rq(se); Do I miss some rules or this is a bug?