On 1/22/21 8:17 PM, Joel Fernandes (Google) wrote:
+static void __sched_core_update_cookie(struct task_struct *p) +{ + struct rb_node *parent, **node; + struct sched_core_cookie *node_core_cookie, *match; + static const struct sched_core_cookie zero_cookie; + struct sched_core_cookie requested_cookie; + bool is_zero_cookie; + struct sched_core_cookie *const curr_cookie = + (struct sched_core_cookie *)p->core_cookie; + + /* + * Ensures that we do not cause races/corruption by modifying/reading + * task cookie fields. Also ensures task cannot get migrated. + */ + lockdep_assert_held(rq_lockp(task_rq(p))); + + sched_core_cookie_init_from_task(&requested_cookie, p); + + is_zero_cookie = !sched_core_cookie_cmp(&requested_cookie, &zero_cookie); + + /* + * Already have a cookie matching the requested settings? Nothing to + * do. + */ + if ((curr_cookie && !sched_core_cookie_cmp(curr_cookie, &requested_cookie)) || + (!curr_cookie && is_zero_cookie)) + return; + + raw_spin_lock(&sched_core_cookies_lock); + + if (is_zero_cookie) { + match = NULL; + goto finish; + } + +retry: + match = NULL; + + node = &sched_core_cookies.rb_node; + parent = *node; + while (*node) { + int cmp; + + node_core_cookie = + container_of(*node, struct sched_core_cookie, node); + parent = *node; + + cmp = sched_core_cookie_cmp(&requested_cookie, node_core_cookie); + if (cmp < 0) { + node = &parent->rb_left; + } else if (cmp > 0) { + node = &parent->rb_right; + } else { + match = node_core_cookie; + break; + } + } + + if (!match) { + /* No existing cookie; create and insert one */ + match = kmalloc(sizeof(struct sched_core_cookie), GFP_ATOMIC); + + /* Fall back to zero cookie */ + if (WARN_ON_ONCE(!match)) + goto finish; + + *match = requested_cookie; + refcount_set(&match->refcnt, 1); + + rb_link_node(&match->node, parent, node); + rb_insert_color(&match->node, &sched_core_cookies); + } else { + /* + * Cookie exists, increment refcnt. If refcnt is currently 0, + * we're racing with a put() (refcnt decremented but cookie not + * yet removed from the tree). In this case, we can simply + * perform the removal ourselves and retry. + * sched_core_put_cookie() will still function correctly. + */ + if (unlikely(!refcount_inc_not_zero(&match->refcnt))) { + __sched_core_erase_cookie(match); + goto retry; + } + } + +finish: + p->core_cookie = (unsigned long)match; + + raw_spin_unlock(&sched_core_cookies_lock); + + sched_core_put_cookie(curr_cookie); +} + +/* + * sched_core_update_cookie - Common helper to update a task's core cookie. This + * updates the selected cookie field and then updates the overall cookie. + * @p: The task whose cookie should be updated. + * @cookie: The new cookie. + * @cookie_type: The cookie field to which the cookie corresponds. + */ +static void sched_core_update_cookie(struct task_struct *p, unsigned long cookie, + enum sched_core_cookie_type cookie_type) +{ + struct rq_flags rf; + struct rq *rq; + + if (!p) + return; + + rq = task_rq_lock(p, &rf); + + switch (cookie_type) { + case sched_core_task_cookie_type: + p->core_task_cookie = cookie; + break; + case sched_core_group_cookie_type: + p->core_group_cookie = cookie; + break; + default: + WARN_ON_ONCE(1); + } + + /* Set p->core_cookie, which is the overall cookie */ + __sched_core_update_cookie(p);
While trying to test the new prctl() code I'm working on, I ran into a bug I chased back into this v10 code. Under a fair amount of stress, when the function __sched_core_update_cookie() is ultimately called from sched_core_fork(), the system deadlocks or otherwise non-visibly crashes. I've not had much success figuring out why/what. I'm running with LOCKDEP on and seeing no complaints. Duplicating it only requires setting a cookie on a task and forking a bunch of threads ... all of which then want to update their cookie.
-chrish