While fixing ->pd_alloc_fn() bug, ab94b0382d81 ("blkcg: Fix ->pd_alloc_fn() being called with the wrong blkcg on policy activation") broke the pd_prealloc error handling.
* pd's were freed using kfree(). They should be freed with ->pd_free_fn(). * pd_prealloc could be kfree()'d and then ->pd_free_fn()'d again. * When GFP_KERNEL allocation fails, pinned_blkg wasn't put. There are also a couple existing issues. * Each pd is initialized as they get allocated. If alloc fails, the policy will get freed with pd's initialized on it. * After the above partial failure, the partial pds are not freed. This patch fixes all of the above issues. Signed-off-by: Tejun Heo <t...@kernel.org> Reported-by: Julia Lawall <julia.law...@lip6.fr> Fixes: ab94b0382d81 ("blkcg: Fix ->pd_alloc_fn() being called with the wrong blkcg on policy activation") --- block/blk-cgroup.c | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1373,13 +1373,14 @@ int blkcg_activate_policy(struct request retry: spin_lock_irq(&q->queue_lock); - /* blkg_list is pushed at the head, reverse walk to init parents first */ + /* blkg_list is pushed at the head, reverse walk to allocate parents first */ list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { struct blkg_policy_data *pd; if (blkg->pd[pol->plid]) continue; + /* If prealloc matches, use it; otherwise try GFP_NOWAIT */ if (blkg == pinned_blkg) { pd = pd_prealloc; pd_prealloc = NULL; @@ -1389,6 +1390,10 @@ retry: } if (!pd) { + /* + * GFP_NOWAIT failed. Free the existing one and + * prealloc for @blkg w/ GFP_KERNEL. + */ if (pinned_blkg) blkg_put(pinned_blkg); blkg_get(blkg); @@ -1396,38 +1401,51 @@ retry: spin_unlock_irq(&q->queue_lock); - kfree(pd_prealloc); + if (pd_prealloc) + pol->pd_free_fn(pd_prealloc); pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, blkg->blkcg); - if (pd_prealloc) { + if (pd_prealloc) goto retry; - } else { - ret = -ENOMEM; - goto out_bypass_end; - } + else + goto enomem; } blkg->pd[pol->plid] = pd; pd->blkg = blkg; pd->plid = pol->plid; - if (pol->pd_init_fn) - pol->pd_init_fn(pd); } - if (pinned_blkg) - blkg_put(pinned_blkg); - kfree(pd_prealloc); + /* all allocated, init in the same order */ + if (pol->pd_init_fn) + list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) + pol->pd_init_fn(blkg->pd[pol->plid]); __set_bit(pol->plid, q->blkcg_pols); ret = 0; spin_unlock_irq(&q->queue_lock); -out_bypass_end: +out: if (queue_is_mq(q)) blk_mq_unfreeze_queue(q); + if (pinned_blkg) + blkg_put(pinned_blkg); if (pd_prealloc) pol->pd_free_fn(pd_prealloc); return ret; + +enomem: + /* alloc failed, nothing's initialized yet, free everything */ + spin_lock_irq(&q->queue_lock); + list_for_each_entry(blkg, &q->blkg_list, q_node) { + if (blkg->pd[pol->plid]) { + pol->pd_free_fn(blkg->pd[pol->plid]); + blkg->pd[pol->plid] = NULL; + } + } + spin_unlock_irq(&q->queue_lock); + ret = -ENOMEM; + goto out; } EXPORT_SYMBOL_GPL(blkcg_activate_policy);