On Tue, Feb 03, 2015 at 12:33:48PM +0100, Peter Zijlstra wrote:
> > block/bsg.c-    prepare_to_wait(&bd->wq_done, &wait, TASK_UNINTERRUPTIBLE);
> > block/bsg.c-    spin_unlock_irq(&bd->lock);
> > block/bsg.c:    io_schedule();
> > block/bsg.c-    finish_wait(&bd->wq_done, &wait);
> > 
> > Which is double buggy because:
> >  1) it doesn't loop
> >  2) it sets TASK_UNINTERRUPTIBLE _after_ testing for the sleep event.
> 
> OK, actually had a look at this one; it might be ok.
> 
> The spinlock might fully serialize the state so no fails, and the entire
> function is called in a loop. Still seriously obtuse code.

Jens, would something like the below work for you?

---
 block/bsg.c          | 72 ++++++++++++++++++----------------------------------
 include/linux/wait.h | 15 +++++++++++
 2 files changed, 40 insertions(+), 47 deletions(-)

diff --git a/block/bsg.c b/block/bsg.c
index 276e869e686c..d214e929ce18 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -136,42 +136,6 @@ static inline struct hlist_head *bsg_dev_idx_hash(int 
index)
        return &bsg_device_list[index & (BSG_LIST_ARRAY_SIZE - 1)];
 }
 
-static int bsg_io_schedule(struct bsg_device *bd)
-{
-       DEFINE_WAIT(wait);
-       int ret = 0;
-
-       spin_lock_irq(&bd->lock);
-
-       BUG_ON(bd->done_cmds > bd->queued_cmds);
-
-       /*
-        * -ENOSPC or -ENODATA?  I'm going for -ENODATA, meaning "I have no
-        * work to do", even though we return -ENOSPC after this same test
-        * during bsg_write() -- there, it means our buffer can't have more
-        * bsg_commands added to it, thus has no space left.
-        */
-       if (bd->done_cmds == bd->queued_cmds) {
-               ret = -ENODATA;
-               goto unlock;
-       }
-
-       if (!test_bit(BSG_F_BLOCK, &bd->flags)) {
-               ret = -EAGAIN;
-               goto unlock;
-       }
-
-       prepare_to_wait(&bd->wq_done, &wait, TASK_UNINTERRUPTIBLE);
-       spin_unlock_irq(&bd->lock);
-       io_schedule();
-       finish_wait(&bd->wq_done, &wait);
-
-       return ret;
-unlock:
-       spin_unlock_irq(&bd->lock);
-       return ret;
-}
-
 static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
                                struct sg_io_v4 *hdr, struct bsg_device *bd,
                                fmode_t has_write_perm)
@@ -482,6 +446,30 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, 
struct sg_io_v4 *hdr,
        return ret;
 }
 
+static bool bsg_complete(struct bsg_device *bd)
+{
+       bool ret = false;
+       bool spin;
+
+       do {
+               spin_lock_irq(&bd->lock);
+
+               BUG_ON(bd->done_cmds > bd->queued_cmds);
+
+               /*
+                * All commands consumed.
+                */
+               if (bd->done_cmds == bd->queued_cmds)
+                       ret = true;
+
+               spin = !test_bit(BSG_F_BLOCK, &bd->flags);
+
+               spin_unlock_irq(&bd->lock);
+       } while (!ret && spin);
+
+       return ret;
+}
+
 static int bsg_complete_all_commands(struct bsg_device *bd)
 {
        struct bsg_command *bc;
@@ -492,17 +480,7 @@ static int bsg_complete_all_commands(struct bsg_device *bd)
        /*
         * wait for all commands to complete
         */
-       ret = 0;
-       do {
-               ret = bsg_io_schedule(bd);
-               /*
-                * look for -ENODATA specifically -- we'll sometimes get
-                * -ERESTARTSYS when we've taken a signal, but we can't
-                * return until we're done freeing the queue, so ignore
-                * it.  The signal will get handled when we're done freeing
-                * the bsg_device.
-                */
-       } while (ret != -ENODATA);
+       io_wait_event(bd->wq_done, bsg_complete(bd));
 
        /*
         * discard done commands
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 2232ed16635a..71fc1d31e48d 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -267,6 +267,21 @@ do {                                                       
                \
        __wait_event(wq, condition);                                    \
 } while (0)
 
+#define __io_wait_event(wq, condition)                                 \
+       (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,  \
+                           io_schedule())
+
+/*
+ * io_wait_event() -- like wait_event() but with io_schedule()
+ */
+#define io_wait_event(wq, condition)                                   \
+do {                                                                   \
+       might_sleep();                                                  \
+       if (condition)                                                  \
+               break;                                                  \
+       __io_wait_event(wq, condition);                                 \
+} while (0)
+
 #define __wait_event_freezable(wq, condition)                          \
        ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,          \
                            schedule(); try_to_freeze())
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to