Re: [patch] aio: fix buggy put_ioctx call in aio_complete - v2

2007-02-01 Thread Jeff Moyer
==> On Fri, 29 Dec 2006 17:34:26 -0800, "Chen, Kenneth W" <[EMAIL PROTECTED]> 
said:

Kenneth> An AIO bug was reported that sleeping function is being called in 
softirq
Kenneth> context:

Kenneth> The real problem is that the condition check of ctx->reqs_active in
Kenneth> wait_for_all_aios() is incorrect that access to reqs_active is not
Kenneth> being properly protected by spin lock.

Kenneth> This patch adds that protective spin lock, and at the same time removes
Kenneth> all duplicate ref counting for each kiocb as reqs_active is already 
used
Kenneth> as a ref count for each active ioctx.  This also ensures that buggy 
call
Kenneth> to flush_workqueue() in softirq context is eliminated.

I was finally able to wrap my head around this, and testing has
confirmed that this patch fixes the problem.  So,

Acked-by: Jeff Moyer <[EMAIL PROTECTED]>

Better late than never.

-Jeff
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] aio: fix buggy put_ioctx call in aio_complete - v2

2007-02-01 Thread Jeff Moyer
== On Fri, 29 Dec 2006 17:34:26 -0800, Chen, Kenneth W [EMAIL PROTECTED] 
said:

Kenneth An AIO bug was reported that sleeping function is being called in 
softirq
Kenneth context:

Kenneth The real problem is that the condition check of ctx-reqs_active in
Kenneth wait_for_all_aios() is incorrect that access to reqs_active is not
Kenneth being properly protected by spin lock.

Kenneth This patch adds that protective spin lock, and at the same time removes
Kenneth all duplicate ref counting for each kiocb as reqs_active is already 
used
Kenneth as a ref count for each active ioctx.  This also ensures that buggy 
call
Kenneth to flush_workqueue() in softirq context is eliminated.

I was finally able to wrap my head around this, and testing has
confirmed that this patch fixes the problem.  So,

Acked-by: Jeff Moyer [EMAIL PROTECTED]

Better late than never.

-Jeff
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [patch] aio: fix buggy put_ioctx call in aio_complete

2006-12-21 Thread Chen, Kenneth W
[EMAIL PROTECTED] wrote on Thursday, December 21, 2006 9:35 AM
> kenneth.w.chen> Take ioctx_lock is one part, the other part is to move
> kenneth.w.chen>   spin_unlock_irqrestore(>ctx_lock, flags);
> kenneth.w.chen> in aio_complete all the way down to the end of the
> kenneth.w.chen> function, after wakeup and put_ioctx.  But then the ref
> kenneth.w.chen> counting on ioctx in aio_complete path is Meaningless,
> kenneth.w.chen> which is the thing I'm trying to remove.
> 
> OK, right.  But are we simply papering over the real problem?  Earlier in
> this thread, you stated:
> 
> > flush_workqueue() is not allowed to be called in the softirq context.
> > However, aio_complete() called from I/O interrupt can potentially call
> > put_ioctx with last ref count on ioctx and trigger a bug warning.  It
> > is simply incorrect to perform ioctx freeing from aio_complete.
> 
> But how do we end up with the last reference to the ioctx in the aio
> completion path?  That's a question I haven't seen answered.


It is explained in this posting, A race between io_destroy and aio_complete:
http://groups.google.com/group/linux.kernel/msg/d2ba7d73aca1dd0c?=en

Trond spotted a bug in that posting.  The correct way of locking is to
move the spin_unlock_irqrestore in aio_complete all the way down instead
of calling aio_put_req at the end.  Like this:


--- ./fs/aio.c.orig 2006-12-21 08:08:14.0 -0800
+++ ./fs/aio.c  2006-12-21 08:14:27.0 -0800
@@ -298,17 +298,23 @@ static void wait_for_all_aios(struct kio
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
 
+   spin_lock_irq(>ctx_lock);
if (!ctx->reqs_active)
-   return;
+   goto out;
 
add_wait_queue(>wait, );
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
while (ctx->reqs_active) {
+   spin_unlock_irq(>ctx_lock);
schedule();
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+   spin_lock_irq(>ctx_lock);
}
__set_task_state(tsk, TASK_RUNNING);
remove_wait_queue(>wait, );
+
+out:
+   spin_unlock_irq(>ctx_lock);
 }
 
 /* wait_on_sync_kiocb:
@@ -424,7 +430,6 @@ static struct kiocb fastcall *__aio_get_
ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0);
if (ctx->reqs_active < aio_ring_avail(>ring_info, ring)) {
list_add(>ki_list, >active_reqs);
-   get_ioctx(ctx);
ctx->reqs_active++;
okay = 1;
}
@@ -536,8 +541,6 @@ int fastcall aio_put_req(struct kiocb *r
spin_lock_irq(>ctx_lock);
ret = __aio_put_req(ctx, req);
spin_unlock_irq(>ctx_lock);
-   if (ret)
-   put_ioctx(ctx);
return ret;
 }
 
@@ -782,8 +785,7 @@ static int __aio_run_iocbs(struct kioctx
 */
iocb->ki_users++;   /* grab extra reference */
aio_run_iocb(iocb);
-   if (__aio_put_req(ctx, iocb))  /* drop extra ref */
-   put_ioctx(ctx);
+   __aio_put_req(ctx, iocb);
}
if (!list_empty(>run_list))
return 1;
@@ -998,14 +1000,10 @@ put_rq:
/* everything turned out well, dispose of the aiocb. */
ret = __aio_put_req(ctx, iocb);
 
-   spin_unlock_irqrestore(>ctx_lock, flags);
-
if (waitqueue_active(>wait))
wake_up(>wait);
 
-   if (ret)
-   put_ioctx(ctx);
-
+   spin_unlock_irqrestore(>ctx_lock, flags);
return ret;
 }
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] aio: fix buggy put_ioctx call in aio_complete

2006-12-21 Thread jmoyer
==> Regarding RE: [patch] aio: fix buggy put_ioctx call in aio_complete; "Chen, 
Kenneth W" <[EMAIL PROTECTED]> adds:

kenneth.w.chen> [EMAIL PROTECTED] wrote on Thursday, December 21, 2006 8:56
kenneth.w.chen> AM I think I'm going to abandon this whole synchronize
kenneth.w.chen> thing and going to put the wake up call inside ioctx_lock
kenneth.w.chen> spin lock along with the other patch you mentioned above in
kenneth.w.chen> the waiter path.  On top of that, I have another patch
kenneth.w.chen> attempts to perform wake-up only when the waiter can truly
kenneth.w.chen> proceed in aio_read_evt so dribbling I/O completion doesn't
kenneth.w.chen> inefficiently waking up waiter too frequently and only to
kenneth.w.chen> have waiter put back to sleep again. I will dig that up and
kenneth.w.chen> experiment.
>> In the mean time, can't we simply take the context lock in
>> wait_for_all_aios?  Unless I missed something, I think that will address
>> the reference count problem.

kenneth.w.chen> Take ioctx_lock is one part, the other part is to move

kenneth.w.chen> spin_unlock_irqrestore(>ctx_lock, flags);

kenneth.w.chen> in aio_complete all the way down to the end of the
kenneth.w.chen> function, after wakeup and put_ioctx.  But then the ref
kenneth.w.chen> counting on ioctx in aio_complete path is Meaningless,
kenneth.w.chen> which is the thing I'm trying to remove.

OK, right.  But are we simply papering over the real problem?  Earlier in
this thread, you stated:

> flush_workqueue() is not allowed to be called in the softirq context.
> However, aio_complete() called from I/O interrupt can potentially call
> put_ioctx with last ref count on ioctx and trigger a bug warning.  It
> is simply incorrect to perform ioctx freeing from aio_complete.

But how do we end up with the last reference to the ioctx in the aio
completion path?  That's a question I haven't seen answered.

-Jeff
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [patch] aio: fix buggy put_ioctx call in aio_complete

2006-12-21 Thread Chen, Kenneth W
[EMAIL PROTECTED] wrote on Thursday, December 21, 2006 8:56 AM
> kenneth.w.chen> I think I'm going to abandon this whole synchronize thing
> kenneth.w.chen> and going to put the wake up call inside ioctx_lock spin
> kenneth.w.chen> lock along with the other patch you mentioned above in the
> kenneth.w.chen> waiter path.  On top of that, I have another patch attempts
> kenneth.w.chen> to perform wake-up only when the waiter can truly proceed
> kenneth.w.chen> in aio_read_evt so dribbling I/O completion doesn't
> kenneth.w.chen> inefficiently waking up waiter too frequently and only to
> kenneth.w.chen> have waiter put back to sleep again. I will dig that up and
> kenneth.w.chen> experiment.
> 
> In the mean time, can't we simply take the context lock in
> wait_for_all_aios?  Unless I missed something, I think that will address
> the reference count problem.

Take ioctx_lock is one part, the other part is to move

spin_unlock_irqrestore(>ctx_lock, flags);

in aio_complete all the way down to the end of the function, after wakeup
and put_ioctx.  But then the ref counting on ioctx in aio_complete path is
Meaningless, which is the thing I'm trying to remove.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] aio: fix buggy put_ioctx call in aio_complete

2006-12-21 Thread jmoyer
==> Regarding RE: [patch] aio: fix buggy put_ioctx call in aio_complete; "Chen, 
Kenneth W" <[EMAIL PROTECTED]> adds:

kenneth.w.chen> I think I'm going to abandon this whole synchronize thing
kenneth.w.chen> and going to put the wake up call inside ioctx_lock spin
kenneth.w.chen> lock along with the other patch you mentioned above in the
kenneth.w.chen> waiter path.  On top of that, I have another patch attempts
kenneth.w.chen> to perform wake-up only when the waiter can truly proceed
kenneth.w.chen> in aio_read_evt so dribbling I/O completion doesn't
kenneth.w.chen> inefficiently waking up waiter too frequently and only to
kenneth.w.chen> have waiter put back to sleep again. I will dig that up and
kenneth.w.chen> experiment.

In the mean time, can't we simply take the context lock in
wait_for_all_aios?  Unless I missed something, I think that will address
the reference count problem.

Thanks,

Jeff
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [patch] aio: fix buggy put_ioctx call in aio_complete

2006-12-21 Thread Chen, Kenneth W
Andrew Morton wrote on Thursday, December 21, 2006 12:18 AM
> Alas, your above description doesn't really tell us what the bug is, so I'm
> at a bit of a loss here.
> 
> http://marc.theaimsgroup.com/?l=linux-aio=116616463009218=2>
> 
> So that's a refcounting bug.  But it's really a locking bug, because
> refcounting needs locking too.

I should've quoted the original bug report (kicking myself for those
fat fingers!): 
http://marc.theaimsgroup.com/?l=linux-kernel=116599593200888=2

The bug manifested from an expectation that __put_ioctx can be called
in the softirq context, but it really shouldn't. Normally, aio_complete
will not decrement last ref count on ioctx, but under stressed system,
it might.


> > Problem is in wait_for_all_aios(), it is checking wait status without
> > properly holding an ioctx lock. Perhaps, this patch is walking on thin
> > ice.  It abuses rcu over a buggy code. OTOH, I really don't want to hold
> > ctx_lock over the entire wakeup call at the end of aio_complete:
> > 
> > if (waitqueue_active(>wait))
> > wake_up(>wait);
> > 
> > I'm worried about longer lock hold time in aio_complete and potentially 
> > increase lock contention for concurrent I/O completion.
> 
> There is a potential problem where we deliver a storm of wakeups at the
> waitqueue, and until the waked-up process actually ges going and removes
> itself from the waitqueue, those wake_up()s do lots of work waking up an
> already-runnable task.
> 
> If we were using DEFINE_WAIT/prepare_to_wait/finish_wait in there then the
> *first* wake_up() would do some work, but all the following ones are
> practically free.
> 
> So if you're going to get in there and run the numbers on this, try both
> approaches.

Yes, I agree and I would like to bring your patch on "DEFINE_WAIT..." back
too.  I will run that experiment.


> > A quick look
> > at lockmeter data I had on a 4 socket system (with dual core + HT), it
> > already showing signs of substantial lock contention in aio_complete.
> > I'm afraid putting the above call inside ioctx_lock will make things
> > worse.
> 
> It beats oopsing.

Yeah, correctness absolutely over rule optimization.  I just hope I can
find a way to satisfy both.


> > And synchronize_rcu fits the bill perfectly: aio_complete sets wakeup
> > status, drop ioctx_lock, do the wakeup call all protected inside rcu
> > lock.  Then wait_for_all_aios will just wait for all that sequence to
> > complete before it proceed with __put_ioctx().  All nice and easy.
> 
> Possibly it would be less abusive to use preempt_disable()/enable (with
> suitable comments) and synchronize_kernel().  To at least remove the rcu
> signals from in there.

I think I'm going to abandon this whole synchronize thing and going to
put the wake up call inside ioctx_lock spin lock along with the other
patch you mentioned above in the waiter path.  On top of that, I have
another patch attempts to perform wake-up only when the waiter can truly
proceed in aio_read_evt so dribbling I/O completion doesn't inefficiently
waking up waiter too frequently and only to have waiter put back to sleep
again. I will dig that up and experiment.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] aio: fix buggy put_ioctx call in aio_complete

2006-12-21 Thread Andrew Morton
On Wed, 20 Dec 2006 23:58:43 -0800
"Chen, Kenneth W" <[EMAIL PROTECTED]> wrote:

> Andrew Morton wrote on Wednesday, December 20, 2006 8:06 PM
> > On Tue, 19 Dec 2006 13:49:18 -0800
> > "Chen, Kenneth W" <[EMAIL PROTECTED]> wrote:
> > > Regarding to a bug report on:
> > > http://marc.theaimsgroup.com/?l=linux-kernel=116599593200888=2
> > > 
> > > flush_workqueue() is not allowed to be called in the softirq context.
> > > However, aio_complete() called from I/O interrupt can potentially call
> > > put_ioctx with last ref count on ioctx and trigger a bug warning.  It
> > > is simply incorrect to perform ioctx freeing from aio_complete.
> > > 
> > > This patch removes all duplicate ref counting for each kiocb as
> > > reqs_active already used as a request ref count for each active ioctx.
> > > This also ensures that buggy call to flush_workqueue() in softirq
> > > context is eliminated. wait_for_all_aios currently will wait on last
> > > active kiocb.  However, it is racy.  This patch also tighten it up
> > > by utilizing rcu synchronization mechanism to ensure no further
> > > reference to ioctx before put_ioctx function is run.
> > 
> > hrm, maybe.  Does this count as "abuse of the RCU interfaces".  Or "reuse"?
> 
> Yeah, it's abuse.

Alas, your above description doesn't really tell us what the bug is, so I'm
at a bit of a loss here.

http://marc.theaimsgroup.com/?l=linux-aio=116616463009218=2>

So that's a refcounting bug.  But it's really a locking bug, because
refcounting needs locking too.

> Problem is in wait_for_all_aios(), it is checking wait status without
> properly holding an ioctx lock. Perhaps, this patch is walking on thin
> ice.  It abuses rcu over a buggy code. OTOH, I really don't want to hold
> ctx_lock over the entire wakeup call at the end of aio_complete:
> 
> if (waitqueue_active(>wait))
> wake_up(>wait);
> 
> I'm worried about longer lock hold time in aio_complete and potentially 
> increase lock contention for concurrent I/O completion.

There is a potential problem where we deliver a storm of wakeups at the
waitqueue, and until the waked-up process actually ges going and removes
itself from the waitqueue, those wake_up()s do lots of work waking up an
already-runnable task.

If we were using DEFINE_WAIT/prepare_to_wait/finish_wait in there then the
*first* wake_up() would do some work, but all the following ones are
practically free.

So if you're going to get in there and run the numbers on this, try both
approaches.

>  A quick look
> at lockmeter data I had on a 4 socket system (with dual core + HT), it
> already showing signs of substantial lock contention in aio_complete.
> I'm afraid putting the above call inside ioctx_lock will make things
> worse.

It beats oopsing.

> And synchronize_rcu fits the bill perfectly: aio_complete sets wakeup
> status, drop ioctx_lock, do the wakeup call all protected inside rcu
> lock.  Then wait_for_all_aios will just wait for all that sequence to
> complete before it proceed with __put_ioctx().  All nice and easy.

Possibly it would be less abusive to use preempt_disable()/enable (with
suitable comments) and synchronize_kernel().  To at least remove the rcu
signals from in there.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [patch] aio: fix buggy put_ioctx call in aio_complete

2006-12-21 Thread Chen, Kenneth W
Andrew Morton wrote on Wednesday, December 20, 2006 8:06 PM
> On Tue, 19 Dec 2006 13:49:18 -0800
> "Chen, Kenneth W" <[EMAIL PROTECTED]> wrote:
> > Regarding to a bug report on:
> > http://marc.theaimsgroup.com/?l=linux-kernel=116599593200888=2
> > 
> > flush_workqueue() is not allowed to be called in the softirq context.
> > However, aio_complete() called from I/O interrupt can potentially call
> > put_ioctx with last ref count on ioctx and trigger a bug warning.  It
> > is simply incorrect to perform ioctx freeing from aio_complete.
> > 
> > This patch removes all duplicate ref counting for each kiocb as
> > reqs_active already used as a request ref count for each active ioctx.
> > This also ensures that buggy call to flush_workqueue() in softirq
> > context is eliminated. wait_for_all_aios currently will wait on last
> > active kiocb.  However, it is racy.  This patch also tighten it up
> > by utilizing rcu synchronization mechanism to ensure no further
> > reference to ioctx before put_ioctx function is run.
> 
> hrm, maybe.  Does this count as "abuse of the RCU interfaces".  Or "reuse"?

Yeah, it's abuse.

Problem is in wait_for_all_aios(), it is checking wait status without
properly holding an ioctx lock. Perhaps, this patch is walking on thin
ice.  It abuses rcu over a buggy code. OTOH, I really don't want to hold
ctx_lock over the entire wakeup call at the end of aio_complete:

if (waitqueue_active(>wait))
wake_up(>wait);

I'm worried about longer lock hold time in aio_complete and potentially 
increase lock contention for concurrent I/O completion.  A quick look
at lockmeter data I had on a 4 socket system (with dual core + HT), it
already showing signs of substantial lock contention in aio_complete.
I'm afraid putting the above call inside ioctx_lock will make things
worse.

And synchronize_rcu fits the bill perfectly: aio_complete sets wakeup
status, drop ioctx_lock, do the wakeup call all protected inside rcu
lock.  Then wait_for_all_aios will just wait for all that sequence to
complete before it proceed with __put_ioctx().  All nice and easy.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [patch] aio: fix buggy put_ioctx call in aio_complete

2006-12-21 Thread Chen, Kenneth W
Andrew Morton wrote on Wednesday, December 20, 2006 8:06 PM
 On Tue, 19 Dec 2006 13:49:18 -0800
 Chen, Kenneth W [EMAIL PROTECTED] wrote:
  Regarding to a bug report on:
  http://marc.theaimsgroup.com/?l=linux-kernelm=116599593200888w=2
  
  flush_workqueue() is not allowed to be called in the softirq context.
  However, aio_complete() called from I/O interrupt can potentially call
  put_ioctx with last ref count on ioctx and trigger a bug warning.  It
  is simply incorrect to perform ioctx freeing from aio_complete.
  
  This patch removes all duplicate ref counting for each kiocb as
  reqs_active already used as a request ref count for each active ioctx.
  This also ensures that buggy call to flush_workqueue() in softirq
  context is eliminated. wait_for_all_aios currently will wait on last
  active kiocb.  However, it is racy.  This patch also tighten it up
  by utilizing rcu synchronization mechanism to ensure no further
  reference to ioctx before put_ioctx function is run.
 
 hrm, maybe.  Does this count as abuse of the RCU interfaces.  Or reuse?

Yeah, it's abuse.

Problem is in wait_for_all_aios(), it is checking wait status without
properly holding an ioctx lock. Perhaps, this patch is walking on thin
ice.  It abuses rcu over a buggy code. OTOH, I really don't want to hold
ctx_lock over the entire wakeup call at the end of aio_complete:

if (waitqueue_active(ctx-wait))
wake_up(ctx-wait);

I'm worried about longer lock hold time in aio_complete and potentially 
increase lock contention for concurrent I/O completion.  A quick look
at lockmeter data I had on a 4 socket system (with dual core + HT), it
already showing signs of substantial lock contention in aio_complete.
I'm afraid putting the above call inside ioctx_lock will make things
worse.

And synchronize_rcu fits the bill perfectly: aio_complete sets wakeup
status, drop ioctx_lock, do the wakeup call all protected inside rcu
lock.  Then wait_for_all_aios will just wait for all that sequence to
complete before it proceed with __put_ioctx().  All nice and easy.
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] aio: fix buggy put_ioctx call in aio_complete

2006-12-21 Thread Andrew Morton
On Wed, 20 Dec 2006 23:58:43 -0800
Chen, Kenneth W [EMAIL PROTECTED] wrote:

 Andrew Morton wrote on Wednesday, December 20, 2006 8:06 PM
  On Tue, 19 Dec 2006 13:49:18 -0800
  Chen, Kenneth W [EMAIL PROTECTED] wrote:
   Regarding to a bug report on:
   http://marc.theaimsgroup.com/?l=linux-kernelm=116599593200888w=2
   
   flush_workqueue() is not allowed to be called in the softirq context.
   However, aio_complete() called from I/O interrupt can potentially call
   put_ioctx with last ref count on ioctx and trigger a bug warning.  It
   is simply incorrect to perform ioctx freeing from aio_complete.
   
   This patch removes all duplicate ref counting for each kiocb as
   reqs_active already used as a request ref count for each active ioctx.
   This also ensures that buggy call to flush_workqueue() in softirq
   context is eliminated. wait_for_all_aios currently will wait on last
   active kiocb.  However, it is racy.  This patch also tighten it up
   by utilizing rcu synchronization mechanism to ensure no further
   reference to ioctx before put_ioctx function is run.
  
  hrm, maybe.  Does this count as abuse of the RCU interfaces.  Or reuse?
 
 Yeah, it's abuse.

Alas, your above description doesn't really tell us what the bug is, so I'm
at a bit of a loss here.

finds http://marc.theaimsgroup.com/?l=linux-aiom=116616463009218w=2

So that's a refcounting bug.  But it's really a locking bug, because
refcounting needs locking too.

 Problem is in wait_for_all_aios(), it is checking wait status without
 properly holding an ioctx lock. Perhaps, this patch is walking on thin
 ice.  It abuses rcu over a buggy code. OTOH, I really don't want to hold
 ctx_lock over the entire wakeup call at the end of aio_complete:
 
 if (waitqueue_active(ctx-wait))
 wake_up(ctx-wait);
 
 I'm worried about longer lock hold time in aio_complete and potentially 
 increase lock contention for concurrent I/O completion.

There is a potential problem where we deliver a storm of wakeups at the
waitqueue, and until the waked-up process actually ges going and removes
itself from the waitqueue, those wake_up()s do lots of work waking up an
already-runnable task.

If we were using DEFINE_WAIT/prepare_to_wait/finish_wait in there then the
*first* wake_up() would do some work, but all the following ones are
practically free.

So if you're going to get in there and run the numbers on this, try both
approaches.

  A quick look
 at lockmeter data I had on a 4 socket system (with dual core + HT), it
 already showing signs of substantial lock contention in aio_complete.
 I'm afraid putting the above call inside ioctx_lock will make things
 worse.

It beats oopsing.

 And synchronize_rcu fits the bill perfectly: aio_complete sets wakeup
 status, drop ioctx_lock, do the wakeup call all protected inside rcu
 lock.  Then wait_for_all_aios will just wait for all that sequence to
 complete before it proceed with __put_ioctx().  All nice and easy.

Possibly it would be less abusive to use preempt_disable()/enable (with
suitable comments) and synchronize_kernel().  To at least remove the rcu
signals from in there.

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [patch] aio: fix buggy put_ioctx call in aio_complete

2006-12-21 Thread Chen, Kenneth W
Andrew Morton wrote on Thursday, December 21, 2006 12:18 AM
 Alas, your above description doesn't really tell us what the bug is, so I'm
 at a bit of a loss here.
 
 finds http://marc.theaimsgroup.com/?l=linux-aiom=116616463009218w=2
 
 So that's a refcounting bug.  But it's really a locking bug, because
 refcounting needs locking too.

I should've quoted the original bug report (kicking myself for those
fat fingers!): 
http://marc.theaimsgroup.com/?l=linux-kernelm=116599593200888w=2

The bug manifested from an expectation that __put_ioctx can be called
in the softirq context, but it really shouldn't. Normally, aio_complete
will not decrement last ref count on ioctx, but under stressed system,
it might.


  Problem is in wait_for_all_aios(), it is checking wait status without
  properly holding an ioctx lock. Perhaps, this patch is walking on thin
  ice.  It abuses rcu over a buggy code. OTOH, I really don't want to hold
  ctx_lock over the entire wakeup call at the end of aio_complete:
  
  if (waitqueue_active(ctx-wait))
  wake_up(ctx-wait);
  
  I'm worried about longer lock hold time in aio_complete and potentially 
  increase lock contention for concurrent I/O completion.
 
 There is a potential problem where we deliver a storm of wakeups at the
 waitqueue, and until the waked-up process actually ges going and removes
 itself from the waitqueue, those wake_up()s do lots of work waking up an
 already-runnable task.
 
 If we were using DEFINE_WAIT/prepare_to_wait/finish_wait in there then the
 *first* wake_up() would do some work, but all the following ones are
 practically free.
 
 So if you're going to get in there and run the numbers on this, try both
 approaches.

Yes, I agree and I would like to bring your patch on DEFINE_WAIT... back
too.  I will run that experiment.


  A quick look
  at lockmeter data I had on a 4 socket system (with dual core + HT), it
  already showing signs of substantial lock contention in aio_complete.
  I'm afraid putting the above call inside ioctx_lock will make things
  worse.
 
 It beats oopsing.

Yeah, correctness absolutely over rule optimization.  I just hope I can
find a way to satisfy both.


  And synchronize_rcu fits the bill perfectly: aio_complete sets wakeup
  status, drop ioctx_lock, do the wakeup call all protected inside rcu
  lock.  Then wait_for_all_aios will just wait for all that sequence to
  complete before it proceed with __put_ioctx().  All nice and easy.
 
 Possibly it would be less abusive to use preempt_disable()/enable (with
 suitable comments) and synchronize_kernel().  To at least remove the rcu
 signals from in there.

I think I'm going to abandon this whole synchronize thing and going to
put the wake up call inside ioctx_lock spin lock along with the other
patch you mentioned above in the waiter path.  On top of that, I have
another patch attempts to perform wake-up only when the waiter can truly
proceed in aio_read_evt so dribbling I/O completion doesn't inefficiently
waking up waiter too frequently and only to have waiter put back to sleep
again. I will dig that up and experiment.

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] aio: fix buggy put_ioctx call in aio_complete

2006-12-21 Thread jmoyer
== Regarding RE: [patch] aio: fix buggy put_ioctx call in aio_complete; Chen, 
Kenneth W [EMAIL PROTECTED] adds:

kenneth.w.chen I think I'm going to abandon this whole synchronize thing
kenneth.w.chen and going to put the wake up call inside ioctx_lock spin
kenneth.w.chen lock along with the other patch you mentioned above in the
kenneth.w.chen waiter path.  On top of that, I have another patch attempts
kenneth.w.chen to perform wake-up only when the waiter can truly proceed
kenneth.w.chen in aio_read_evt so dribbling I/O completion doesn't
kenneth.w.chen inefficiently waking up waiter too frequently and only to
kenneth.w.chen have waiter put back to sleep again. I will dig that up and
kenneth.w.chen experiment.

In the mean time, can't we simply take the context lock in
wait_for_all_aios?  Unless I missed something, I think that will address
the reference count problem.

Thanks,

Jeff
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [patch] aio: fix buggy put_ioctx call in aio_complete

2006-12-21 Thread Chen, Kenneth W
[EMAIL PROTECTED] wrote on Thursday, December 21, 2006 8:56 AM
 kenneth.w.chen I think I'm going to abandon this whole synchronize thing
 kenneth.w.chen and going to put the wake up call inside ioctx_lock spin
 kenneth.w.chen lock along with the other patch you mentioned above in the
 kenneth.w.chen waiter path.  On top of that, I have another patch attempts
 kenneth.w.chen to perform wake-up only when the waiter can truly proceed
 kenneth.w.chen in aio_read_evt so dribbling I/O completion doesn't
 kenneth.w.chen inefficiently waking up waiter too frequently and only to
 kenneth.w.chen have waiter put back to sleep again. I will dig that up and
 kenneth.w.chen experiment.
 
 In the mean time, can't we simply take the context lock in
 wait_for_all_aios?  Unless I missed something, I think that will address
 the reference count problem.

Take ioctx_lock is one part, the other part is to move

spin_unlock_irqrestore(ctx-ctx_lock, flags);

in aio_complete all the way down to the end of the function, after wakeup
and put_ioctx.  But then the ref counting on ioctx in aio_complete path is
Meaningless, which is the thing I'm trying to remove.

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] aio: fix buggy put_ioctx call in aio_complete

2006-12-21 Thread jmoyer
== Regarding RE: [patch] aio: fix buggy put_ioctx call in aio_complete; Chen, 
Kenneth W [EMAIL PROTECTED] adds:

kenneth.w.chen [EMAIL PROTECTED] wrote on Thursday, December 21, 2006 8:56
kenneth.w.chen AM I think I'm going to abandon this whole synchronize
kenneth.w.chen thing and going to put the wake up call inside ioctx_lock
kenneth.w.chen spin lock along with the other patch you mentioned above in
kenneth.w.chen the waiter path.  On top of that, I have another patch
kenneth.w.chen attempts to perform wake-up only when the waiter can truly
kenneth.w.chen proceed in aio_read_evt so dribbling I/O completion doesn't
kenneth.w.chen inefficiently waking up waiter too frequently and only to
kenneth.w.chen have waiter put back to sleep again. I will dig that up and
kenneth.w.chen experiment.
 In the mean time, can't we simply take the context lock in
 wait_for_all_aios?  Unless I missed something, I think that will address
 the reference count problem.

kenneth.w.chen Take ioctx_lock is one part, the other part is to move

kenneth.w.chen spin_unlock_irqrestore(ctx-ctx_lock, flags);

kenneth.w.chen in aio_complete all the way down to the end of the
kenneth.w.chen function, after wakeup and put_ioctx.  But then the ref
kenneth.w.chen counting on ioctx in aio_complete path is Meaningless,
kenneth.w.chen which is the thing I'm trying to remove.

OK, right.  But are we simply papering over the real problem?  Earlier in
this thread, you stated:

 flush_workqueue() is not allowed to be called in the softirq context.
 However, aio_complete() called from I/O interrupt can potentially call
 put_ioctx with last ref count on ioctx and trigger a bug warning.  It
 is simply incorrect to perform ioctx freeing from aio_complete.

But how do we end up with the last reference to the ioctx in the aio
completion path?  That's a question I haven't seen answered.

-Jeff
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [patch] aio: fix buggy put_ioctx call in aio_complete

2006-12-21 Thread Chen, Kenneth W
[EMAIL PROTECTED] wrote on Thursday, December 21, 2006 9:35 AM
 kenneth.w.chen Take ioctx_lock is one part, the other part is to move
 kenneth.w.chen   spin_unlock_irqrestore(ctx-ctx_lock, flags);
 kenneth.w.chen in aio_complete all the way down to the end of the
 kenneth.w.chen function, after wakeup and put_ioctx.  But then the ref
 kenneth.w.chen counting on ioctx in aio_complete path is Meaningless,
 kenneth.w.chen which is the thing I'm trying to remove.
 
 OK, right.  But are we simply papering over the real problem?  Earlier in
 this thread, you stated:
 
  flush_workqueue() is not allowed to be called in the softirq context.
  However, aio_complete() called from I/O interrupt can potentially call
  put_ioctx with last ref count on ioctx and trigger a bug warning.  It
  is simply incorrect to perform ioctx freeing from aio_complete.
 
 But how do we end up with the last reference to the ioctx in the aio
 completion path?  That's a question I haven't seen answered.


It is explained in this posting, A race between io_destroy and aio_complete:
http://groups.google.com/group/linux.kernel/msg/d2ba7d73aca1dd0c?hl=en

Trond spotted a bug in that posting.  The correct way of locking is to
move the spin_unlock_irqrestore in aio_complete all the way down instead
of calling aio_put_req at the end.  Like this:


--- ./fs/aio.c.orig 2006-12-21 08:08:14.0 -0800
+++ ./fs/aio.c  2006-12-21 08:14:27.0 -0800
@@ -298,17 +298,23 @@ static void wait_for_all_aios(struct kio
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
 
+   spin_lock_irq(ctx-ctx_lock);
if (!ctx-reqs_active)
-   return;
+   goto out;
 
add_wait_queue(ctx-wait, wait);
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
while (ctx-reqs_active) {
+   spin_unlock_irq(ctx-ctx_lock);
schedule();
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+   spin_lock_irq(ctx-ctx_lock);
}
__set_task_state(tsk, TASK_RUNNING);
remove_wait_queue(ctx-wait, wait);
+
+out:
+   spin_unlock_irq(ctx-ctx_lock);
 }
 
 /* wait_on_sync_kiocb:
@@ -424,7 +430,6 @@ static struct kiocb fastcall *__aio_get_
ring = kmap_atomic(ctx-ring_info.ring_pages[0], KM_USER0);
if (ctx-reqs_active  aio_ring_avail(ctx-ring_info, ring)) {
list_add(req-ki_list, ctx-active_reqs);
-   get_ioctx(ctx);
ctx-reqs_active++;
okay = 1;
}
@@ -536,8 +541,6 @@ int fastcall aio_put_req(struct kiocb *r
spin_lock_irq(ctx-ctx_lock);
ret = __aio_put_req(ctx, req);
spin_unlock_irq(ctx-ctx_lock);
-   if (ret)
-   put_ioctx(ctx);
return ret;
 }
 
@@ -782,8 +785,7 @@ static int __aio_run_iocbs(struct kioctx
 */
iocb-ki_users++;   /* grab extra reference */
aio_run_iocb(iocb);
-   if (__aio_put_req(ctx, iocb))  /* drop extra ref */
-   put_ioctx(ctx);
+   __aio_put_req(ctx, iocb);
}
if (!list_empty(ctx-run_list))
return 1;
@@ -998,14 +1000,10 @@ put_rq:
/* everything turned out well, dispose of the aiocb. */
ret = __aio_put_req(ctx, iocb);
 
-   spin_unlock_irqrestore(ctx-ctx_lock, flags);
-
if (waitqueue_active(ctx-wait))
wake_up(ctx-wait);
 
-   if (ret)
-   put_ioctx(ctx);
-
+   spin_unlock_irqrestore(ctx-ctx_lock, flags);
return ret;
 }
 
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] aio: fix buggy put_ioctx call in aio_complete

2006-12-20 Thread Andrew Morton
On Tue, 19 Dec 2006 13:49:18 -0800
"Chen, Kenneth W" <[EMAIL PROTECTED]> wrote:

> Regarding to a bug report on:
> http://marc.theaimsgroup.com/?l=linux-kernel=116599593200888=2
> 
> flush_workqueue() is not allowed to be called in the softirq context.
> However, aio_complete() called from I/O interrupt can potentially call
> put_ioctx with last ref count on ioctx and trigger a bug warning.  It
> is simply incorrect to perform ioctx freeing from aio_complete.
> 
> This patch removes all duplicate ref counting for each kiocb as
> reqs_active already used as a request ref count for each active ioctx.
> This also ensures that buggy call to flush_workqueue() in softirq
> context is eliminated. wait_for_all_aios currently will wait on last
> active kiocb.  However, it is racy.  This patch also tighten it up
> by utilizing rcu synchronization mechanism to ensure no further
> reference to ioctx before put_ioctx function is run.
> 

hrm, maybe.  Does this count as "abuse of the RCU interfaces".  Or "reuse"?

> 
> 
> --- ./fs/aio.c.orig   2006-12-19 08:35:01.0 -0800
> +++ ./fs/aio.c2006-12-19 08:46:34.0 -0800
> @@ -308,6 +308,7 @@ static void wait_for_all_aios(struct kio
>   set_task_state(tsk, TASK_UNINTERRUPTIBLE);
>   }
>   __set_task_state(tsk, TASK_RUNNING);
> + synchronize_rcu();
>   remove_wait_queue(>wait, );
>  }

argh.  Pity the poor fresh-faced wannabe kernel developer who stumbles
across a stray synchronize_rcu() in the AIO code and wonders what the hell
that's doing there.

Please, this needs good commenting.   More than zero, anyway.

> @@ -425,7 +426,6 @@ static struct kiocb fastcall *__aio_get_
>   ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0);
>   if (ctx->reqs_active < aio_ring_avail(>ring_info, ring)) {
>   list_add(>ki_list, >active_reqs);
> - get_ioctx(ctx);
>   ctx->reqs_active++;
>   okay = 1;
>   }
> @@ -538,8 +538,6 @@ int fastcall aio_put_req(struct kiocb *r
>   spin_lock_irq(>ctx_lock);
>   ret = __aio_put_req(ctx, req);
>   spin_unlock_irq(>ctx_lock);
> - if (ret)
> - put_ioctx(ctx);
>   return ret;
>  }
>  
> @@ -795,8 +793,7 @@ static int __aio_run_iocbs(struct kioctx
>*/
>   iocb->ki_users++;   /* grab extra reference */
>   aio_run_iocb(iocb);
> - if (__aio_put_req(ctx, iocb))  /* drop extra ref */
> - put_ioctx(ctx);
> + __aio_put_req(ctx, iocb);
>   }
>   if (!list_empty(>run_list))
>   return 1;
> @@ -1012,6 +1009,7 @@ int fastcall aio_complete(struct kiocb *
>   iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes);
>  put_rq:
>   /* everything turned out well, dispose of the aiocb. */
> + rcu_read_lock();
>   ret = __aio_put_req(ctx, iocb);
>  
>   spin_unlock_irqrestore(>ctx_lock, flags);
> @@ -1019,9 +1017,7 @@ put_rq:
>   if (waitqueue_active(>wait))
>   wake_up(>wait);
>  
> - if (ret)
> - put_ioctx(ctx);
> -
> + rcu_read_unlock();
>   return ret;
>  }
>  
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] aio: fix buggy put_ioctx call in aio_complete

2006-12-20 Thread Andrew Morton
On Tue, 19 Dec 2006 13:49:18 -0800
Chen, Kenneth W [EMAIL PROTECTED] wrote:

 Regarding to a bug report on:
 http://marc.theaimsgroup.com/?l=linux-kernelm=116599593200888w=2
 
 flush_workqueue() is not allowed to be called in the softirq context.
 However, aio_complete() called from I/O interrupt can potentially call
 put_ioctx with last ref count on ioctx and trigger a bug warning.  It
 is simply incorrect to perform ioctx freeing from aio_complete.
 
 This patch removes all duplicate ref counting for each kiocb as
 reqs_active already used as a request ref count for each active ioctx.
 This also ensures that buggy call to flush_workqueue() in softirq
 context is eliminated. wait_for_all_aios currently will wait on last
 active kiocb.  However, it is racy.  This patch also tighten it up
 by utilizing rcu synchronization mechanism to ensure no further
 reference to ioctx before put_ioctx function is run.
 

hrm, maybe.  Does this count as abuse of the RCU interfaces.  Or reuse?

 
 
 --- ./fs/aio.c.orig   2006-12-19 08:35:01.0 -0800
 +++ ./fs/aio.c2006-12-19 08:46:34.0 -0800
 @@ -308,6 +308,7 @@ static void wait_for_all_aios(struct kio
   set_task_state(tsk, TASK_UNINTERRUPTIBLE);
   }
   __set_task_state(tsk, TASK_RUNNING);
 + synchronize_rcu();
   remove_wait_queue(ctx-wait, wait);
  }

argh.  Pity the poor fresh-faced wannabe kernel developer who stumbles
across a stray synchronize_rcu() in the AIO code and wonders what the hell
that's doing there.

Please, this needs good commenting.   More than zero, anyway.

 @@ -425,7 +426,6 @@ static struct kiocb fastcall *__aio_get_
   ring = kmap_atomic(ctx-ring_info.ring_pages[0], KM_USER0);
   if (ctx-reqs_active  aio_ring_avail(ctx-ring_info, ring)) {
   list_add(req-ki_list, ctx-active_reqs);
 - get_ioctx(ctx);
   ctx-reqs_active++;
   okay = 1;
   }
 @@ -538,8 +538,6 @@ int fastcall aio_put_req(struct kiocb *r
   spin_lock_irq(ctx-ctx_lock);
   ret = __aio_put_req(ctx, req);
   spin_unlock_irq(ctx-ctx_lock);
 - if (ret)
 - put_ioctx(ctx);
   return ret;
  }
  
 @@ -795,8 +793,7 @@ static int __aio_run_iocbs(struct kioctx
*/
   iocb-ki_users++;   /* grab extra reference */
   aio_run_iocb(iocb);
 - if (__aio_put_req(ctx, iocb))  /* drop extra ref */
 - put_ioctx(ctx);
 + __aio_put_req(ctx, iocb);
   }
   if (!list_empty(ctx-run_list))
   return 1;
 @@ -1012,6 +1009,7 @@ int fastcall aio_complete(struct kiocb *
   iocb-ki_nbytes - iocb-ki_left, iocb-ki_nbytes);
  put_rq:
   /* everything turned out well, dispose of the aiocb. */
 + rcu_read_lock();
   ret = __aio_put_req(ctx, iocb);
  
   spin_unlock_irqrestore(ctx-ctx_lock, flags);
 @@ -1019,9 +1017,7 @@ put_rq:
   if (waitqueue_active(ctx-wait))
   wake_up(ctx-wait);
  
 - if (ret)
 - put_ioctx(ctx);
 -
 + rcu_read_unlock();
   return ret;
  }
  
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/