This is similar to NTSYNC_IOC_WAIT_ANY, but waits until all of the objects are
simultaneously signaled, and then acquires all of them as a single atomic
operation.

Signed-off-by: Elizabeth Figura <zfig...@codeweavers.com>
---
 drivers/misc/ntsync.c       | 243 ++++++++++++++++++++++++++++++++++--
 include/uapi/linux/ntsync.h |   1 +
 2 files changed, 236 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index c6f84a5fc8c0..e914d626465a 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -55,7 +55,34 @@ struct ntsync_obj {
                } sem;
        } u;
 
+       /*
+        * any_waiters is protected by the object lock, but all_waiters is
+        * protected by the device wait_all_lock.
+        */
        struct list_head any_waiters;
+       struct list_head all_waiters;
+
+       /*
+        * Hint describing how many tasks are queued on this object in a
+        * wait-all operation.
+        *
+        * Any time we do a wake, we may need to wake "all" waiters as well as
+        * "any" waiters. In order to atomically wake "all" waiters, we must
+        * lock all of the objects, and that means grabbing the wait_all_lock
+        * below (and, due to lock ordering rules, before locking this object).
+        * However, wait-all is a rare operation, and grabbing the wait-all
+        * lock for every wake would create unnecessary contention.
+        * Therefore we first check whether all_hint is zero, and, if it is,
+        * we skip trying to wake "all" waiters.
+        *
+        * This hint isn't protected by any lock. It might change during the
+        * course of a wake, but there's no meaningful race there; it's only a
+        * hint.
+        *
+        * Since wait requests must originate from user-space threads, we're
+        * limited here by PID_MAX_LIMIT, so there's no risk of overflow.
+        */
+       atomic_t all_hint;
 };
 
 struct ntsync_q_entry {
@@ -76,14 +103,100 @@ struct ntsync_q {
         */
        atomic_t signaled;
 
+       bool all;
        __u32 count;
        struct ntsync_q_entry entries[];
 };
 
 struct ntsync_device {
+       /*
+        * Wait-all operations must atomically grab all objects, and be totally
+        * ordered with respect to each other and wait-any operations.
+        * If one thread is trying to acquire several objects, another thread
+        * cannot touch the object at the same time.
+        *
+        * We achieve this by grabbing multiple object locks at the same time.
+        * However, this creates a lock ordering problem. To solve that problem,
+        * wait_all_lock is taken first whenever multiple objects must be locked
+        * at the same time.
+        */
+       spinlock_t wait_all_lock;
+
        struct file *file;
 };
 
+static bool is_signaled(struct ntsync_obj *obj, __u32 owner)
+{
+       lockdep_assert_held(&obj->lock);
+
+       switch (obj->type) {
+       case NTSYNC_TYPE_SEM:
+               return !!obj->u.sem.count;
+       }
+
+       WARN(1, "bad object type %#x\n", obj->type);
+       return false;
+}
+
+/*
+ * "locked_obj" is an optional pointer to an object which is already locked and
+ * should not be locked again. This is necessary so that changing an object's
+ * state and waking it can be a single atomic operation.
+ */
+static void try_wake_all(struct ntsync_device *dev, struct ntsync_q *q,
+                        struct ntsync_obj *locked_obj)
+{
+       __u32 count = q->count;
+       bool can_wake = true;
+       int signaled = -1;
+       __u32 i;
+
+       lockdep_assert_held(&dev->wait_all_lock);
+       if (locked_obj)
+               lockdep_assert_held(&locked_obj->lock);
+
+       for (i = 0; i < count; i++) {
+               if (q->entries[i].obj != locked_obj)
+                       spin_lock_nest_lock(&q->entries[i].obj->lock, 
&dev->wait_all_lock);
+       }
+
+       for (i = 0; i < count; i++) {
+               if (!is_signaled(q->entries[i].obj, q->owner)) {
+                       can_wake = false;
+                       break;
+               }
+       }
+
+       if (can_wake && atomic_try_cmpxchg(&q->signaled, &signaled, 0)) {
+               for (i = 0; i < count; i++) {
+                       struct ntsync_obj *obj = q->entries[i].obj;
+
+                       switch (obj->type) {
+                       case NTSYNC_TYPE_SEM:
+                               obj->u.sem.count--;
+                               break;
+                       }
+               }
+               wake_up_process(q->task);
+       }
+
+       for (i = 0; i < count; i++) {
+               if (q->entries[i].obj != locked_obj)
+                       spin_unlock(&q->entries[i].obj->lock);
+       }
+}
+
+static void try_wake_all_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
+{
+       struct ntsync_q_entry *entry;
+
+       lockdep_assert_held(&dev->wait_all_lock);
+       lockdep_assert_held(&obj->lock);
+
+       list_for_each_entry(entry, &obj->all_waiters, node)
+               try_wake_all(dev, entry->q, obj);
+}
+
 static void try_wake_any_sem(struct ntsync_obj *sem)
 {
        struct ntsync_q_entry *entry;
@@ -124,6 +237,7 @@ static int post_sem_state(struct ntsync_obj *sem, __u32 
count)
 
 static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp)
 {
+       struct ntsync_device *dev = sem->dev;
        __u32 __user *user_args = argp;
        __u32 prev_count;
        __u32 args;
@@ -135,14 +249,29 @@ static int ntsync_sem_post(struct ntsync_obj *sem, void 
__user *argp)
        if (sem->type != NTSYNC_TYPE_SEM)
                return -EINVAL;
 
-       spin_lock(&sem->lock);
+       if (atomic_read(&sem->all_hint) > 0) {
+               spin_lock(&dev->wait_all_lock);
+               spin_lock_nest_lock(&sem->lock, &dev->wait_all_lock);
 
-       prev_count = sem->u.sem.count;
-       ret = post_sem_state(sem, args);
-       if (!ret)
-               try_wake_any_sem(sem);
+               prev_count = sem->u.sem.count;
+               ret = post_sem_state(sem, args);
+               if (!ret) {
+                       try_wake_all_obj(dev, sem);
+                       try_wake_any_sem(sem);
+               }
 
-       spin_unlock(&sem->lock);
+               spin_unlock(&sem->lock);
+               spin_unlock(&dev->wait_all_lock);
+       } else {
+               spin_lock(&sem->lock);
+
+               prev_count = sem->u.sem.count;
+               ret = post_sem_state(sem, args);
+               if (!ret)
+                       try_wake_any_sem(sem);
+
+               spin_unlock(&sem->lock);
+       }
 
        if (!ret && put_user(prev_count, user_args))
                ret = -EFAULT;
@@ -195,6 +324,8 @@ static struct ntsync_obj *ntsync_alloc_obj(struct 
ntsync_device *dev,
        get_file(dev->file);
        spin_lock_init(&obj->lock);
        INIT_LIST_HEAD(&obj->any_waiters);
+       INIT_LIST_HEAD(&obj->all_waiters);
+       atomic_set(&obj->all_hint, 0);
 
        return obj;
 }
@@ -306,7 +437,7 @@ static int ntsync_schedule(const struct ntsync_q *q, const 
struct ntsync_wait_ar
  * Allocate and initialize the ntsync_q structure, but do not queue us yet.
  */
 static int setup_wait(struct ntsync_device *dev,
-                     const struct ntsync_wait_args *args,
+                     const struct ntsync_wait_args *args, bool all,
                      struct ntsync_q **ret_q)
 {
        const __u32 count = args->count;
@@ -333,6 +464,7 @@ static int setup_wait(struct ntsync_device *dev,
        q->task = current;
        q->owner = args->owner;
        atomic_set(&q->signaled, -1);
+       q->all = all;
        q->count = count;
 
        for (i = 0; i < count; i++) {
@@ -342,6 +474,16 @@ static int setup_wait(struct ntsync_device *dev,
                if (!obj)
                        goto err;
 
+               if (all) {
+                       /* Check that the objects are all distinct. */
+                       for (j = 0; j < i; j++) {
+                               if (obj == q->entries[j].obj) {
+                                       put_obj(obj);
+                                       goto err;
+                               }
+                       }
+               }
+
                entry->obj = obj;
                entry->q = q;
                entry->index = i;
@@ -377,7 +519,7 @@ static int ntsync_wait_any(struct ntsync_device *dev, void 
__user *argp)
        if (copy_from_user(&args, argp, sizeof(args)))
                return -EFAULT;
 
-       ret = setup_wait(dev, &args, &q);
+       ret = setup_wait(dev, &args, false, &q);
        if (ret < 0)
                return ret;
 
@@ -439,6 +581,87 @@ static int ntsync_wait_any(struct ntsync_device *dev, void 
__user *argp)
        return ret;
 }
 
+static int ntsync_wait_all(struct ntsync_device *dev, void __user *argp)
+{
+       struct ntsync_wait_args args;
+       struct ntsync_q *q;
+       int signaled;
+       __u32 i;
+       int ret;
+
+       if (copy_from_user(&args, argp, sizeof(args)))
+               return -EFAULT;
+
+       ret = setup_wait(dev, &args, true, &q);
+       if (ret < 0)
+               return ret;
+
+       /* queue ourselves */
+
+       spin_lock(&dev->wait_all_lock);
+
+       for (i = 0; i < args.count; i++) {
+               struct ntsync_q_entry *entry = &q->entries[i];
+               struct ntsync_obj *obj = entry->obj;
+
+               atomic_inc(&obj->all_hint);
+
+               /*
+                * obj->all_waiters is protected by dev->wait_all_lock rather
+                * than obj->lock, so there is no need to acquire obj->lock
+                * here.
+                */
+               list_add_tail(&entry->node, &obj->all_waiters);
+       }
+
+       /* check if we are already signaled */
+
+       try_wake_all(dev, q, NULL);
+
+       spin_unlock(&dev->wait_all_lock);
+
+       /* sleep */
+
+       ret = ntsync_schedule(q, &args);
+
+       /* and finally, unqueue */
+
+       spin_lock(&dev->wait_all_lock);
+
+       for (i = 0; i < args.count; i++) {
+               struct ntsync_q_entry *entry = &q->entries[i];
+               struct ntsync_obj *obj = entry->obj;
+
+               /*
+                * obj->all_waiters is protected by dev->wait_all_lock rather
+                * than obj->lock, so there is no need to acquire it here.
+                */
+               list_del(&entry->node);
+
+               atomic_dec(&obj->all_hint);
+
+               put_obj(obj);
+       }
+
+       spin_unlock(&dev->wait_all_lock);
+
+       signaled = atomic_read(&q->signaled);
+       if (signaled != -1) {
+               struct ntsync_wait_args __user *user_args = argp;
+
+               /* even if we caught a signal, we need to communicate success */
+               ret = 0;
+
+               if (put_user(signaled, &user_args->index))
+                       ret = -EFAULT;
+       } else if (!ret) {
+               ret = -ETIMEDOUT;
+       }
+
+       kfree(q);
+       return ret;
+}
+
 static int ntsync_char_open(struct inode *inode, struct file *file)
 {
        struct ntsync_device *dev;
@@ -447,6 +670,8 @@ static int ntsync_char_open(struct inode *inode, struct 
file *file)
        if (!dev)
                return -ENOMEM;
 
+       spin_lock_init(&dev->wait_all_lock);
+
        file->private_data = dev;
        dev->file = file;
        return nonseekable_open(inode, file);
@@ -470,6 +695,8 @@ static long ntsync_char_ioctl(struct file *file, unsigned 
int cmd,
        switch (cmd) {
        case NTSYNC_IOC_CREATE_SEM:
                return ntsync_create_sem(dev, argp);
+       case NTSYNC_IOC_WAIT_ALL:
+               return ntsync_wait_all(dev, argp);
        case NTSYNC_IOC_WAIT_ANY:
                return ntsync_wait_any(dev, argp);
        default:
diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
index 60ad414b5552..83784d4438a1 100644
--- a/include/uapi/linux/ntsync.h
+++ b/include/uapi/linux/ntsync.h
@@ -33,6 +33,7 @@ struct ntsync_wait_args {
 
 #define NTSYNC_IOC_CREATE_SEM          _IOWR('N', 0x80, struct ntsync_sem_args)
 #define NTSYNC_IOC_WAIT_ANY            _IOWR('N', 0x82, struct 
ntsync_wait_args)
+#define NTSYNC_IOC_WAIT_ALL            _IOWR('N', 0x83, struct 
ntsync_wait_args)
 
 #define NTSYNC_IOC_SEM_POST            _IOWR('N', 0x81, __u32)
 
-- 
2.43.0


Reply via email to