On Tuesday, May 17, 2011 3:16:45 pm Max Laier wrote:
> On 05/17/2011 09:56 AM, John Baldwin wrote:
> > On Tuesday, May 17, 2011 12:20:40 pm Max Laier wrote:
> >> On 05/17/2011 05:16 AM, John Baldwin wrote:
> >> ...
> >>> Index: kern/kern_switch.c
> >>> ===================================================================
> >>> --- kern/kern_switch.c (revision 221536)
> >>> +++ kern/kern_switch.c (working copy)
> >>> @@ -192,15 +192,22 @@
> >>> critical_exit(void)
> >>> {
> >>> struct thread *td;
> >>> - int flags;
> >>> + int flags, owepreempt;
> >>>
> >>> td = curthread;
> >>> KASSERT(td->td_critnest != 0,
> >>> ("critical_exit: td_critnest == 0"));
> >>>
> >>> if (td->td_critnest == 1) {
> >>> + owepreempt = td->td_owepreempt;
> >>> + td->td_owepreempt = 0;
> >>> + /*
> >>> + * XXX: Should move compiler_memory_barrier() from
> >>> + * rmlock to a header.
> >>> + */
> >>
> >> XXX: If we get an interrupt at this point and td_owepreempt was zero,
> >> the new interrupt will re-set it, because td_critnest is still non-zero.
> >>
> >> So we still end up with a thread that is leaking an owepreempt *and*
> >> lose a preemption.
> >
> > I don't see how this can still leak owepreempt.  The nested interrupt 
should
> > do nothing (except for possibly set owepreempt) until td_critnest is 0.
> 
> Exactly.  The interrupt sets owepreempt and after we return here, we set 
> td_critnest to 0 and exit without clearing owepreempt.  Hence we leak 
> the owepreempt.
> 
> > However, we can certainly lose preemptions.
> >
> > I wonder if we can abuse the high bit of td_critnest for the owepreempt 
flag
> > so it is all stored in one cookie.  We only set owepreempt while holding
> > thread_lock() (so interrupts are disabled), so I think we would be ok and 
not
> > need atomic ops.
> >
> > Hmm, actually, the top-half code would have to use atomic ops.  Nuts.  Let 
me
> > think some more.
> 
> I think these two really belong into one single variable.  Setting the 
> owepreempt flag can be a normal RMW.  Increasing and decreasing critnest 
> must be atomic (otherwise we could lose the flag) and dropping the final 
> reference would work like this:
> 
>    if ((curthread->td_critnest & TD_CRITNEST_MASK) == 1) {
>      unsigned int owe;
>      owe = atomic_readandclear(&curthread->td_critnest);
>      if (owe & TD_OWEPREEMPT_FLAG) {
>        /* do the switch */
>    }
> 
> That should do it ... I can put that into a patch, if we agree that's 
> the right thing to do.

Yeah, I already have a patch to do that, but hadn't added atomic ops to 
critical_enter() and critical_exit().  But it also wasn't as fancy in the 
critical_exit() case.  Here is what I have and I think it might actually
be ok (it doesn't use an atomic read and clear, but I think it is safe).
Hmm, actually, it will need to use the read and clear:

Index: kern/sched_ule.c
===================================================================
--- kern/sched_ule.c    (revision 222024)
+++ kern/sched_ule.c    (working copy)
@@ -1785,7 +1785,6 @@
        td->td_oncpu = NOCPU;
        if (!(flags & SW_PREEMPT))
                td->td_flags &= ~TDF_NEEDRESCHED;
-       td->td_owepreempt = 0;
        tdq->tdq_switchcnt++;
        /*
         * The lock pointer in an idle thread should never change.  Reset it
@@ -2066,7 +2065,7 @@
 
                flags = SW_INVOL | SW_PREEMPT;
                if (td->td_critnest > 1)
-                       td->td_owepreempt = 1;
+                       td->td_critnest |= TD_OWEPREEMPT;
                else if (TD_IS_IDLETHREAD(td))
                        mi_switch(flags | SWT_REMOTEWAKEIDLE, NULL);
                else
@@ -2261,7 +2260,7 @@
                return;
        if (!sched_shouldpreempt(pri, cpri, 0))
                return;
-       ctd->td_owepreempt = 1;
+       ctd->td_critnest |= TD_OWEPREEMPT;
 }
 
 /*
Index: kern/kern_switch.c
===================================================================
--- kern/kern_switch.c  (revision 222024)
+++ kern/kern_switch.c  (working copy)
@@ -183,7 +183,7 @@
        struct thread *td;
 
        td = curthread;
-       td->td_critnest++;
+       atomic_add_int(&td->td_critnest, 1);
        CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
            (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
 }
@@ -192,18 +192,16 @@
 critical_exit(void)
 {
        struct thread *td;
-       int flags;
+       int flags, owepreempt;
 
        td = curthread;
        KASSERT(td->td_critnest != 0,
            ("critical_exit: td_critnest == 0"));
 
-       if (td->td_critnest == 1) {
-               td->td_critnest = 0;
-               if (td->td_owepreempt) {
-                       td->td_critnest = 1;
+       if (td->td_critnest && ~TD_OWEPREEMPT == 1) {
+               owepreempt = atomic_readandclear_int(&td->td_owepreempt);
+               if (owepreempt & TD_OWEPREEMPT) {
                        thread_lock(td);
-                       td->td_critnest--;
                        flags = SW_INVOL | SW_PREEMPT;
                        if (TD_IS_IDLETHREAD(td))
                                flags |= SWT_IDLE;
@@ -213,7 +211,7 @@
                        thread_unlock(td);
                }
        } else
-               td->td_critnest--;
+               atomic_subtract_int(&td->td_critnest, 1);
 
        CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
            (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
Index: kern/kern_synch.c
===================================================================
--- kern/kern_synch.c   (revision 222024)
+++ kern/kern_synch.c   (working copy)
@@ -400,9 +400,7 @@
        if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td))
                mtx_assert(&Giant, MA_NOTOWNED);
 #endif
-       KASSERT(td->td_critnest == 1 || (td->td_critnest == 2 &&
-           (td->td_owepreempt) && (flags & SW_INVOL) != 0 &&
-           newtd == NULL) || panicstr,
+       KASSERT(td->td_critnest == 1 || panicstr,
            ("mi_switch: switch in a critical section"));
        KASSERT((flags & (SW_INVOL | SW_VOL)) != 0,
            ("mi_switch: switch must be voluntary or involuntary"));
Index: kern/sched_4bsd.c
===================================================================
--- kern/sched_4bsd.c   (revision 222024)
+++ kern/sched_4bsd.c   (working copy)
@@ -335,7 +335,7 @@
        if (ctd->td_critnest > 1) {
                CTR1(KTR_PROC, "maybe_preempt: in critical section %d",
                    ctd->td_critnest);
-               ctd->td_owepreempt = 1;
+               ctd->td_critnest |= TD_OWEPREEMPT;
                return (0);
        }
        /*
@@ -949,7 +949,6 @@
        td->td_lastcpu = td->td_oncpu;
        if (!(flags & SW_PREEMPT))
                td->td_flags &= ~TDF_NEEDRESCHED;
-       td->td_owepreempt = 0;
        td->td_oncpu = NOCPU;
 
        /*
@@ -1437,7 +1436,7 @@
 {
        thread_lock(td);
        if (td->td_critnest > 1)
-               td->td_owepreempt = 1;
+               td->td_critnest |= TD_OWEPREEMPT;
        else
                mi_switch(SW_INVOL | SW_PREEMPT | SWT_PREEMPT, NULL);
        thread_unlock(td);
Index: kern/kern_rmlock.c
===================================================================
--- kern/kern_rmlock.c  (revision 222024)
+++ kern/kern_rmlock.c  (working copy)
@@ -366,7 +366,8 @@
         * Fast path to combine two common conditions into a single
         * conditional jump.
         */
-       if (0 == (td->td_owepreempt | (rm->rm_writecpus & pc->pc_cpumask)))
+       if (0 == ((td->td_critnest & TD_OWEPREEMPT) |
+           (rm->rm_writecpus & pc->pc_cpumask)))
                return (1);
 
        /* We do not have a read token and need to acquire one. */
@@ -377,7 +378,7 @@
 _rm_unlock_hard(struct thread *td,struct rm_priotracker *tracker)
 {
 
-       if (td->td_owepreempt) {
+       if (td->td_critnest & TD_OWEPREEMPT) {
                td->td_critnest++;
                critical_exit();
        }
@@ -418,7 +419,7 @@
        td->td_critnest--;
        sched_unpin();
 
-       if (0 == (td->td_owepreempt | tracker->rmp_flags))
+       if (0 == ((td->td_critnest & TD_OWEPREEMPT) | tracker->rmp_flags))
                return;
 
        _rm_unlock_hard(td, tracker);
Index: sys/proc.h
===================================================================
--- sys/proc.h  (revision 222024)
+++ sys/proc.h  (working copy)
@@ -228,7 +228,6 @@
        const char      *td_wmesg;      /* (t) Reason for sleep. */
        u_char          td_lastcpu;     /* (t) Last cpu we were on. */
        u_char          td_oncpu;       /* (t) Which cpu we are on. */
-       volatile u_char td_owepreempt;  /* (k*) Preempt on last critical_exit */
        u_char          td_tsqueue;     /* (t) Turnstile queue blocked on. */
        short           td_locks;       /* (k) Count of non-spin locks. */
        short           td_rw_rlocks;   /* (k) Count of rwlock read locks. */
@@ -465,6 +464,12 @@
 #define        TD_SET_CAN_RUN(td)      (td)->td_state = TDS_CAN_RUN
 
 /*
+ * The high bit of td_critnest is used to determine if a preemption is
+ * pending.
+ */
+#define        TD_OWEPREEMPT           ((u_int)INT_MAX + 1)
+
+/*
  * Process structure.
  */
 struct proc {

-- 
John Baldwin
_______________________________________________
freebsd-current@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"

Reply via email to