On 08/24, Oleg Nesterov wrote:
>
> Peter, if you think it can work for you and if you agree with
> the implementation I will be happy to send the patch.

I think I should try anyway ;)

To simplify the review, I attached the resulting code below.

Changes:

        - Comments.

        - Not sure this is really better, but task_work_run()
          does not need to actually take pi_lock, unlock_wait
          is enough.

          However, in this case the dummy entry is better than
          the fake pointer.

Oleg.

#include <linux/spinlock.h>
#include <linux/task_work.h>
#include <linux/tracehook.h>

static struct callback_head work_exited; /* all we need is ->next == NULL */

int
task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
{
        struct callback_head *head;

        do {
                head = ACCESS_ONCE(task->task_works);
                if (unlikely(head == &work_exited))
                        return -ESRCH;
                work->next = head;
        } while (cmpxchg(&task->task_works, head, work) != head);

        if (notify)
                set_notify_resume(task);
        return 0;
}

struct callback_head *
task_work_cancel(struct task_struct *task, task_work_func_t func)
{
        struct callback_head **pprev = &task->task_works;
        struct callback_head *work = NULL;
        unsigned long flags;
        /*
         * If cmpxchg() fails we continue without updating pprev.
         * Either we raced with task_work_add() which added the
         * new entry before this work, we will find it again. Or
         * we raced with task_work_run(), *pprev == NULL/exited.
         */
        raw_spin_lock_irqsave(&task->pi_lock, flags);
        while ((work = ACCESS_ONCE(*pprev))) {
                read_barrier_depends();
                if (work->func != func)
                        pprev = &work->next;
                else if (cmpxchg(pprev, work, work->next) == work)
                        break;
        }
        raw_spin_unlock_irqrestore(&task->pi_lock, flags);

        return work;
}

void task_work_run(void)
{
        struct task_struct *task = current;
        struct callback_head *work, *head, *next;

        for (;;) {
                /*
                 * work->func() can do task_work_add(), do not set
                 * work_exited unless the list is empty.
                 */
                do {
                        work = ACCESS_ONCE(task->task_works);
                        head = !work && (task->flags & PF_EXITING) ?
                                &work_exited : NULL;
                } while (cmpxchg(&task->task_works, work, head) != work);

                if (!work)
                        break;
                /*
                 * Synchronize with task_work_cancel(). It can't remove
                 * the first entry == work, cmpxchg(task_works) should
                 * fail, but it can play with *work and other entries.
                 */
                raw_spin_unlock_wait(&task->pi_lock);
                smp_mb();

                /* Reverse the list to run the works in fifo order */
                head = NULL;
                do {
                        next = work->next;
                        work->next = head;
                        head = work;
                        work = next;
                } while (work);

                work = head;
                do {
                        next = work->next;
                        work->func(work);
                        work = next;
                        cond_resched();
                } while (work);
        }
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to