Author: kib
Date: Fri Jul 12 18:43:24 2019
New Revision: 349951
URL: https://svnweb.freebsd.org/changeset/base/349951

Log:
  Provide protection against starvation of the ll/sc loops when accessing 
userpace.
  
  Casueword(9) on ll/sc architectures must be prepared for userspace
  constantly modifying the same cache line as containing the CAS word,
  and not loop infinitely.  Otherwise, rogue userspace livelocks the
  kernel.
  
  To fix the issue, change casueword(9) interface to return new value 1
  indicating that either comparision or store failed, instead of relying
  on the oldval == *oldvalp comparison.  The primitive no longer retries
  the operation if it failed spuriously.  Modify callers of
  casueword(9), all in kern_umtx.c, to handle retries, and react to
  stops and requests to terminate between retries.
  
  On x86, despite cmpxchg should not return spurious failures, we can
  take advantage of the new interface and just return PSL.ZF.
  
  Reviewed by:  andrew (arm64, previous version), markj
  Tested by:    pho
  Reported by:  https://xenbits.xen.org/xsa/advisory-295.txt
  Sponsored by: The FreeBSD Foundation
  MFC after:    2 weeks
  Differential revision:        https://reviews.freebsd.org/D20772

Modified:
  head/share/man/man9/casuword.9
  head/sys/amd64/amd64/support.S
  head/sys/arm/arm/fusu.S
  head/sys/arm64/arm64/support.S
  head/sys/i386/i386/copyout.c
  head/sys/kern/kern_umtx.c
  head/sys/mips/mips/support.S
  head/sys/powerpc/powerpc/copyinout.c
  head/sys/riscv/riscv/support.S
  head/sys/sparc64/sparc64/support.S
  head/sys/sparc64/sparc64/vm_machdep.c

Modified: head/share/man/man9/casuword.9
==============================================================================
--- head/share/man/man9/casuword.9      Fri Jul 12 18:39:41 2019        
(r349950)
+++ head/share/man/man9/casuword.9      Fri Jul 12 18:43:24 2019        
(r349951)
@@ -1,4 +1,4 @@
-.\" Copyright (c) 2014 The FreeBSD Foundation
+.\" Copyright (c) 2014, 2019 The FreeBSD Foundation
 .\" All rights reserved.
 .\"
 .\" Part of this documentation was written by
@@ -28,7 +28,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 21, 2014
+.Dd April 19, 2019
 .Dt CASU 9
 .Os
 .Sh NAME
@@ -106,7 +106,9 @@ The
 .Fn casueword
 and
 .Fn casueword32
-functions return 0 on success and -1 on failure.
+functions return 0 on success, -1 on failure to access memory,
+and 1 when comparison or store failed.
+The store can fail on load-linked/store-conditional architectures.
 .Sh SEE ALSO
 .Xr atomic 9 ,
 .Xr fetch 9 ,

Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S      Fri Jul 12 18:39:41 2019        
(r349950)
+++ head/sys/amd64/amd64/support.S      Fri Jul 12 18:43:24 2019        
(r349951)
@@ -811,6 +811,7 @@ ENTRY(casueword32_nosmap)
        lock
 #endif
        cmpxchgl %ecx,(%rdi)                    /* new = %ecx */
+       setne   %cl
 
        /*
         * The old value is in %eax.  If the store succeeded it will be the
@@ -828,6 +829,7 @@ ENTRY(casueword32_nosmap)
         */
        movl    %esi,(%rdx)                     /* oldp = %rdx */
        POP_FRAME_POINTER
+       movzbl  %cl, %eax
        ret
 END(casueword32_nosmap)
 
@@ -847,6 +849,7 @@ ENTRY(casueword32_smap)
 #endif
        cmpxchgl %ecx,(%rdi)                    /* new = %ecx */
        clac
+       setne   %cl
 
        /*
         * The old value is in %eax.  If the store succeeded it will be the
@@ -864,6 +867,7 @@ ENTRY(casueword32_smap)
         */
        movl    %esi,(%rdx)                     /* oldp = %rdx */
        POP_FRAME_POINTER
+       movzbl  %cl, %eax
        ret
 END(casueword32_smap)
 
@@ -886,6 +890,7 @@ ENTRY(casueword_nosmap)
        lock
 #endif
        cmpxchgq %rcx,(%rdi)                    /* new = %rcx */
+       setne   %cl
 
        /*
         * The old value is in %rax.  If the store succeeded it will be the
@@ -897,6 +902,7 @@ ENTRY(casueword_nosmap)
        movq    %rax,PCB_ONFAULT(%r8)
        movq    %rsi,(%rdx)
        POP_FRAME_POINTER
+       movzbl  %cl, %eax
        ret
 END(casueword_nosmap)
 
@@ -916,6 +922,7 @@ ENTRY(casueword_smap)
 #endif
        cmpxchgq %rcx,(%rdi)                    /* new = %rcx */
        clac
+       setne   %cl
 
        /*
         * The old value is in %rax.  If the store succeeded it will be the
@@ -927,6 +934,7 @@ ENTRY(casueword_smap)
        movq    %rax,PCB_ONFAULT(%r8)
        movq    %rsi,(%rdx)
        POP_FRAME_POINTER
+       movzbl  %cl, %eax
        ret
 END(casueword_smap)
 

Modified: head/sys/arm/arm/fusu.S
==============================================================================
--- head/sys/arm/arm/fusu.S     Fri Jul 12 18:39:41 2019        (r349950)
+++ head/sys/arm/arm/fusu.S     Fri Jul 12 18:43:24 2019        (r349951)
@@ -63,7 +63,7 @@ EENTRY_NP(casueword32)
        ldr     r4, =(VM_MAXUSER_ADDRESS-3)
        cmp     r0, r4
        mvncs   r0, #0
-       bcs     2f
+       bcs     1f
 
        GET_PCB(r6)
        ldr     r6, [r6]
@@ -78,12 +78,10 @@ EENTRY_NP(casueword32)
        str     r4, [r6, #PCB_ONFAULT]
 
 #if __ARM_ARCH >= 6
-1:
+       mov     r5, #1
        ldrex   r4, [r0]
        cmp     r4, r1
        strexeq r5, r3, [r0]
-       cmpeq   r5, #1
-       beq     1b
 #else
        ldrt    r4, [r0]
        cmp     r4, r1
@@ -92,7 +90,10 @@ EENTRY_NP(casueword32)
        str     r4, [r2]
        mov     r0, #0
        str     r0, [r6, #PCB_ONFAULT]
-2:
+#if __ARM_ARCH >= 6
+       mov     r0, r5
+#endif
+1:
        ldmfd   sp!, {r4, r5, r6}
        RET
 EEND(casueword32)

Modified: head/sys/arm64/arm64/support.S
==============================================================================
--- head/sys/arm64/arm64/support.S      Fri Jul 12 18:39:41 2019        
(r349950)
+++ head/sys/arm64/arm64/support.S      Fri Jul 12 18:43:24 2019        
(r349951)
@@ -57,17 +57,17 @@ ENTRY(casueword32)
        cmp     x0, x4
        b.cs    fsu_fault_nopcb
        adr     x6, fsu_fault           /* Load the fault handler */
+       mov     w5, #1
        SET_FAULT_HANDLER(x6, x4)       /* And set it */
        ENTER_USER_ACCESS(w6, x4)
 1:     ldxr    w4, [x0]                /* Load-exclusive the data */
        cmp     w4, w1                  /* Compare */
        b.ne    2f                      /* Not equal, exit */
        stxr    w5, w3, [x0]            /* Store the new data */
-       cbnz    w5, 1b                  /* Retry on failure */
 2:     EXIT_USER_ACCESS(w6)
-       SET_FAULT_HANDLER(xzr, x5)      /* Reset the fault handler */
+       SET_FAULT_HANDLER(xzr, x6)      /* Reset the fault handler */
        str     w4, [x2]                /* Store the read data */
-       mov     x0, #0                  /* Success */
+       mov     w0, w5                  /* Result same as store status */
        ret                             /* Return */
 END(casueword32)
 
@@ -79,17 +79,17 @@ ENTRY(casueword)
        cmp     x0, x4
        b.cs    fsu_fault_nopcb
        adr     x6, fsu_fault           /* Load the fault handler */
+       mov     w5, #1
        SET_FAULT_HANDLER(x6, x4)       /* And set it */
        ENTER_USER_ACCESS(w6, x4)
 1:     ldxr    x4, [x0]                /* Load-exclusive the data */
        cmp     x4, x1                  /* Compare */
        b.ne    2f                      /* Not equal, exit */
        stxr    w5, x3, [x0]            /* Store the new data */
-       cbnz    w5, 1b                  /* Retry on failure */
 2:     EXIT_USER_ACCESS(w6)
-       SET_FAULT_HANDLER(xzr, x5)      /* Reset the fault handler */
+       SET_FAULT_HANDLER(xzr, x6)      /* Reset the fault handler */
        str     x4, [x2]                /* Store the read data */
-       mov     x0, #0                  /* Success */
+       mov     w0, w5                  /* Result same as store status */
        ret                             /* Return */
 END(casueword)
 

Modified: head/sys/i386/i386/copyout.c
==============================================================================
--- head/sys/i386/i386/copyout.c        Fri Jul 12 18:39:41 2019        
(r349950)
+++ head/sys/i386/i386/copyout.c        Fri Jul 12 18:43:24 2019        
(r349951)
@@ -428,6 +428,7 @@ suword32(volatile void *base, int32_t word)
 struct casueword_arg0 {
        uint32_t oldval;
        uint32_t newval;
+       int res;
 };
 
 static void
@@ -436,7 +437,8 @@ casueword_slow0(vm_offset_t kva, void *arg)
        struct casueword_arg0 *ca;
 
        ca = arg;
-       atomic_fcmpset_int((u_int *)kva, &ca->oldval, ca->newval);
+       ca->res = 1 - atomic_fcmpset_int((u_int *)kva, &ca->oldval,
+           ca->newval);
 }
 
 int
@@ -452,7 +454,7 @@ casueword32(volatile uint32_t *base, uint32_t oldval, 
            casueword_slow0, &ca);
        if (res == 0) {
                *oldvalp = ca.oldval;
-               return (0);
+               return (ca.res);
        }
        return (-1);
 }
@@ -469,7 +471,7 @@ casueword(volatile u_long *base, u_long oldval, u_long
            casueword_slow0, &ca);
        if (res == 0) {
                *oldvalp = ca.oldval;
-               return (0);
+               return (ca.res);
        }
        return (-1);
 }

Modified: head/sys/kern/kern_umtx.c
==============================================================================
--- head/sys/kern/kern_umtx.c   Fri Jul 12 18:39:41 2019        (r349950)
+++ head/sys/kern/kern_umtx.c   Fri Jul 12 18:43:24 2019        (r349951)
@@ -690,8 +690,26 @@ umtxq_count_pi(struct umtx_key *key, struct umtx_q **f
        return (0);
 }
 
+/*
+ * Check for possible stops and suspensions while executing a umtx
+ * locking operation.
+ *
+ * The sleep argument controls whether the function can handle a stop
+ * request itself or it should return ERESTART and the request is
+ * proceed at the kernel/user boundary in ast.
+ *
+ * Typically, when retrying due to casueword(9) failure (rv == 1), we
+ * should handle the stop requests there, with exception of cases when
+ * the thread busied the umtx key, or when functions return
+ * immediately if umtxq_check_susp() returned non-zero.  On the other
+ * hand, retrying the whole lock operation, we better not stop there
+ * but delegate the handling to ast.
+ *
+ * If the request is for thread termination P_SINGLE_EXIT, we cannot
+ * handle it at all, and simply return EINTR.
+ */
 static int
-umtxq_check_susp(struct thread *td)
+umtxq_check_susp(struct thread *td, bool sleep)
 {
        struct proc *p;
        int error;
@@ -710,7 +728,7 @@ umtxq_check_susp(struct thread *td)
                if (p->p_flag & P_SINGLE_EXIT)
                        error = EINTR;
                else
-                       error = ERESTART;
+                       error = sleep ? thread_suspend_check(0) : ERESTART;
        }
        PROC_UNLOCK(p);
        return (error);
@@ -1049,9 +1067,12 @@ do_lock_normal(struct thread *td, struct umutex *m, ui
                                    id | UMUTEX_CONTESTED);
                                if (rv == -1)
                                        return (EFAULT);
-                               if (owner == UMUTEX_RB_OWNERDEAD)
+                               if (rv == 0) {
+                                       MPASS(owner == UMUTEX_RB_OWNERDEAD);
                                        return (EOWNERDEAD); /* success */
-                               rv = umtxq_check_susp(td);
+                               }
+                               MPASS(rv == 1);
+                               rv = umtxq_check_susp(td, false);
                                if (rv != 0)
                                        return (rv);
                                continue;
@@ -1070,13 +1091,16 @@ do_lock_normal(struct thread *td, struct umutex *m, ui
                                return (EFAULT);
 
                        /* The acquire succeeded. */
-                       if (owner == UMUTEX_UNOWNED)
+                       if (rv == 0) {
+                               MPASS(owner == UMUTEX_UNOWNED);
                                return (0);
+                       }
 
                        /*
                         * If no one owns it but it is contested try
                         * to acquire it.
                         */
+                       MPASS(rv == 1);
                        if (owner == UMUTEX_CONTESTED) {
                                rv = casueword32(&m->m_owner,
                                    UMUTEX_CONTESTED, &owner,
@@ -1084,20 +1108,27 @@ do_lock_normal(struct thread *td, struct umutex *m, ui
                                /* The address was invalid. */
                                if (rv == -1)
                                        return (EFAULT);
-
-                               if (owner == UMUTEX_CONTESTED)
+                               if (rv == 0) {
+                                       MPASS(owner == UMUTEX_CONTESTED);
                                        return (0);
+                               }
+                               if (rv == 1) {
+                                       rv = umtxq_check_susp(td, false);
+                                       if (rv != 0)
+                                               return (rv);
+                               }
 
-                               rv = umtxq_check_susp(td);
-                               if (rv != 0)
-                                       return (rv);
-
                                /*
                                 * If this failed the lock has
                                 * changed, restart.
                                 */
                                continue;
                        }
+
+                       /* rv == 1 but not contested, likely store failure */
+                       rv = umtxq_check_susp(td, false);
+                       if (rv != 0)
+                               return (rv);
                }
 
                if (mode == _UMUTEX_TRY)
@@ -1128,14 +1159,21 @@ do_lock_normal(struct thread *td, struct umutex *m, ui
                rv = casueword32(&m->m_owner, owner, &old,
                    owner | UMUTEX_CONTESTED);
 
-               /* The address was invalid. */
-               if (rv == -1) {
+               /* The address was invalid or casueword failed to store. */
+               if (rv == -1 || rv == 1) {
                        umtxq_lock(&uq->uq_key);
                        umtxq_remove(uq);
                        umtxq_unbusy(&uq->uq_key);
                        umtxq_unlock(&uq->uq_key);
                        umtx_key_release(&uq->uq_key);
-                       return (EFAULT);
+                       if (rv == -1)
+                               return (EFAULT);
+                       if (rv == 1) {
+                               rv = umtxq_check_susp(td, false);
+                               if (rv != 0)
+                                       return (rv);
+                       }
+                       continue;
                }
 
                /*
@@ -1145,15 +1183,15 @@ do_lock_normal(struct thread *td, struct umutex *m, ui
                 */
                umtxq_lock(&uq->uq_key);
                umtxq_unbusy(&uq->uq_key);
-               if (old == owner)
-                       error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
-                           NULL : &timo);
+               MPASS(old == owner);
+               error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
+                   NULL : &timo);
                umtxq_remove(uq);
                umtxq_unlock(&uq->uq_key);
                umtx_key_release(&uq->uq_key);
 
                if (error == 0)
-                       error = umtxq_check_susp(td);
+                       error = umtxq_check_susp(td, false);
        }
 
        return (0);
@@ -1170,6 +1208,8 @@ do_unlock_normal(struct thread *td, struct umutex *m, 
        int error, count;
 
        id = td->td_tid;
+
+again:
        /*
         * Make sure we own this mtx.
         */
@@ -1185,9 +1225,14 @@ do_unlock_normal(struct thread *td, struct umutex *m, 
                error = casueword32(&m->m_owner, owner, &old, newlock);
                if (error == -1)
                        return (EFAULT);
-               if (old == owner)
-                       return (0);
-               owner = old;
+               if (error == 1) {
+                       error = umtxq_check_susp(td, false);
+                       if (error != 0)
+                               return (error);
+                       goto again;
+               }
+               MPASS(old == owner);
+               return (0);
        }
 
        /* We should only ever be in here for contested locks */
@@ -1215,8 +1260,14 @@ do_unlock_normal(struct thread *td, struct umutex *m, 
        umtx_key_release(&key);
        if (error == -1)
                return (EFAULT);
-       if (old != owner)
-               return (EINVAL);
+       if (error == 1) {
+               if (old != owner)
+                       return (EINVAL);
+               error = umtxq_check_susp(td, false);
+               if (error != 0)
+                       return (error);
+               goto again;
+       }
        return (0);
 }
 
@@ -1233,6 +1284,7 @@ do_wake_umutex(struct thread *td, struct umutex *m)
        int error;
        int count;
 
+again:
        error = fueword32(&m->m_owner, &owner);
        if (error == -1)
                return (EFAULT);
@@ -1259,14 +1311,27 @@ do_wake_umutex(struct thread *td, struct umutex *m)
            owner != UMUTEX_RB_NOTRECOV) {
                error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
                    UMUTEX_UNOWNED);
-               if (error == -1)
+               if (error == -1) {
                        error = EFAULT;
+               } else if (error == 1) {
+                       umtxq_lock(&key);
+                       umtxq_unbusy(&key);
+                       umtxq_unlock(&key);
+                       umtx_key_release(&key);
+                       error = umtxq_check_susp(td, false);
+                       if (error != 0)
+                               return (error);
+                       goto again;
+               }
        }
 
        umtxq_lock(&key);
-       if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 ||
-           owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV))
+       if (error == 0 && count != 0) {
+               MPASS((owner & ~UMUTEX_CONTESTED) == 0 ||
+                   owner == UMUTEX_RB_OWNERDEAD ||
+                   owner == UMUTEX_RB_NOTRECOV);
                umtxq_signal(&key, 1);
+       }
        umtxq_unbusy(&key);
        umtxq_unlock(&key);
        umtx_key_release(&key);
@@ -1314,49 +1379,32 @@ do_wake2_umutex(struct thread *td, struct umutex *m, u
        umtxq_busy(&key);
        count = umtxq_count(&key);
        umtxq_unlock(&key);
+
+       error = fueword32(&m->m_owner, &owner);
+       if (error == -1)
+               error = EFAULT;
+
        /*
-        * Only repair contention bit if there is a waiter, this means the mutex
-        * is still being referenced by userland code, otherwise don't update
-        * any memory.
+        * Only repair contention bit if there is a waiter, this means
+        * the mutex is still being referenced by userland code,
+        * otherwise don't update any memory.
         */
-       if (count > 1) {
-               error = fueword32(&m->m_owner, &owner);
-               if (error == -1)
+       while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 &&
+           (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) {
+               error = casueword32(&m->m_owner, owner, &old,
+                   owner | UMUTEX_CONTESTED);
+               if (error == -1) {
                        error = EFAULT;
-               while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) {
-                       error = casueword32(&m->m_owner, owner, &old,
-                           owner | UMUTEX_CONTESTED);
-                       if (error == -1) {
-                               error = EFAULT;
-                               break;
-                       }
-                       if (old == owner)
-                               break;
-                       owner = old;
-                       error = umtxq_check_susp(td);
-                       if (error != 0)
-                               break;
+                       break;
                }
-       } else if (count == 1) {
-               error = fueword32(&m->m_owner, &owner);
-               if (error == -1)
-                       error = EFAULT;
-               while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 &&
-                   (owner & UMUTEX_CONTESTED) == 0) {
-                       error = casueword32(&m->m_owner, owner, &old,
-                           owner | UMUTEX_CONTESTED);
-                       if (error == -1) {
-                               error = EFAULT;
-                               break;
-                       }
-                       if (old == owner)
-                               break;
-                       owner = old;
-                       error = umtxq_check_susp(td);
-                       if (error != 0)
-                               break;
+               if (error == 0) {
+                       MPASS(old == owner);
+                       break;
                }
+               owner = old;
+               error = umtxq_check_susp(td, false);
        }
+
        umtxq_lock(&key);
        if (error == EFAULT) {
                umtxq_signal(&key, INT_MAX);
@@ -1842,9 +1890,9 @@ do_lock_pi(struct thread *td, struct umutex *m, uint32
                        error = EFAULT;
                        break;
                }
-
                /* The acquire succeeded. */
-               if (owner == UMUTEX_UNOWNED) {
+               if (rv == 0) {
+                       MPASS(owner == UMUTEX_UNOWNED);
                        error = 0;
                        break;
                }
@@ -1854,6 +1902,16 @@ do_lock_pi(struct thread *td, struct umutex *m, uint32
                        break;
                }
 
+               /*
+                * Avoid overwriting a possible error from sleep due
+                * to the pending signal with suspension check result.
+                */
+               if (error == 0) {
+                       error = umtxq_check_susp(td, true);
+                       if (error != 0)
+                               break;
+               }
+
                /* If no one owns it but it is contested try to acquire it. */
                if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) {
                        old_owner = owner;
@@ -1864,36 +1922,40 @@ do_lock_pi(struct thread *td, struct umutex *m, uint32
                                error = EFAULT;
                                break;
                        }
-
-                       if (owner == old_owner) {
-                               umtxq_lock(&uq->uq_key);
-                               umtxq_busy(&uq->uq_key);
-                               error = umtx_pi_claim(pi, td);
-                               umtxq_unbusy(&uq->uq_key);
-                               umtxq_unlock(&uq->uq_key);
-                               if (error != 0) {
-                                       /*
-                                        * Since we're going to return an
-                                        * error, restore the m_owner to its
-                                        * previous, unowned state to avoid
-                                        * compounding the problem.
-                                        */
-                                       (void)casuword32(&m->m_owner,
-                                           id | UMUTEX_CONTESTED,
-                                           old_owner);
+                       if (rv == 1) {
+                               if (error == 0) {
+                                       error = umtxq_check_susp(td, true);
+                                       if (error != 0)
+                                               return (error);
                                }
-                               if (error == 0 &&
-                                   old_owner == UMUTEX_RB_OWNERDEAD)
-                                       error = EOWNERDEAD;
-                               break;
+
+                               /*
+                                * If this failed the lock could
+                                * changed, restart.
+                                */
+                               continue;
                        }
 
-                       error = umtxq_check_susp(td);
-                       if (error != 0)
-                               break;
-
-                       /* If this failed the lock has changed, restart. */
-                       continue;
+                       MPASS(rv == 0);
+                       MPASS(owner == old_owner);
+                       umtxq_lock(&uq->uq_key);
+                       umtxq_busy(&uq->uq_key);
+                       error = umtx_pi_claim(pi, td);
+                       umtxq_unbusy(&uq->uq_key);
+                       umtxq_unlock(&uq->uq_key);
+                       if (error != 0) {
+                               /*
+                                * Since we're going to return an
+                                * error, restore the m_owner to its
+                                * previous, unowned state to avoid
+                                * compounding the problem.
+                                */
+                               (void)casuword32(&m->m_owner,
+                                   id | UMUTEX_CONTESTED, old_owner);
+                       }
+                       if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD)
+                               error = EOWNERDEAD;
+                       break;
                }
 
                if ((owner & ~UMUTEX_CONTESTED) == id) {
@@ -1932,28 +1994,33 @@ do_lock_pi(struct thread *td, struct umutex *m, uint32
                        error = EFAULT;
                        break;
                }
-
-               umtxq_lock(&uq->uq_key);
-               /*
-                * We set the contested bit, sleep. Otherwise the lock changed
-                * and we need to retry or we lost a race to the thread
-                * unlocking the umtx.  Note that the UMUTEX_RB_OWNERDEAD
-                * value for owner is impossible there.
-                */
-               if (old == owner) {
-                       error = umtxq_sleep_pi(uq, pi,
-                           owner & ~UMUTEX_CONTESTED,
-                           "umtxpi", timeout == NULL ? NULL : &timo,
-                           (flags & USYNC_PROCESS_SHARED) != 0);
+               if (rv == 1) {
+                       umtxq_unbusy_unlocked(&uq->uq_key);
+                       error = umtxq_check_susp(td, true);
                        if (error != 0)
-                               continue;
-               } else {
-                       umtxq_unbusy(&uq->uq_key);
-                       umtxq_unlock(&uq->uq_key);
+                               break;
+
+                       /*
+                        * The lock changed and we need to retry or we
+                        * lost a race to the thread unlocking the
+                        * umtx.  Note that the UMUTEX_RB_OWNERDEAD
+                        * value for owner is impossible there.
+                        */
+                       continue;
                }
 
-               error = umtxq_check_susp(td);
+               umtxq_lock(&uq->uq_key);
+
+               /* We set the contested bit, sleep. */
+               MPASS(old == owner);
+               error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
+                   "umtxpi", timeout == NULL ? NULL : &timo,
+                   (flags & USYNC_PROCESS_SHARED) != 0);
                if (error != 0)
+                       continue;
+
+               error = umtxq_check_susp(td, false);
+               if (error != 0)
                        break;
        }
 
@@ -1978,6 +2045,8 @@ do_unlock_pi(struct thread *td, struct umutex *m, uint
        int count, error, pri;
 
        id = td->td_tid;
+
+usrloop:
        /*
         * Make sure we own this mtx.
         */
@@ -1995,6 +2064,12 @@ do_unlock_pi(struct thread *td, struct umutex *m, uint
                error = casueword32(&m->m_owner, owner, &old, new_owner);
                if (error == -1)
                        return (EFAULT);
+               if (error == 1) {
+                       error = umtxq_check_susp(td, true);
+                       if (error != 0)
+                               return (error);
+                       goto usrloop;
+               }
                if (old == owner)
                        return (0);
                owner = old;
@@ -2074,15 +2149,20 @@ do_unlock_pi(struct thread *td, struct umutex *m, uint
 
        if (count > 1)
                new_owner |= UMUTEX_CONTESTED;
+again:
        error = casueword32(&m->m_owner, owner, &old, new_owner);
-
+       if (error == 1) {
+               error = umtxq_check_susp(td, false);
+               if (error == 0)
+                       goto again;
+       }
        umtxq_unbusy_unlocked(&key);
        umtx_key_release(&key);
        if (error == -1)
                return (EFAULT);
-       if (old != owner)
+       if (error == 0 && old != owner)
                return (EINVAL);
-       return (0);
+       return (error);
 }
 
 /*
@@ -2149,31 +2229,49 @@ do_lock_pp(struct thread *td, struct umutex *m, uint32
                        error = EFAULT;
                        break;
                }
-
-               if (owner == UMUTEX_CONTESTED) {
+               if (rv == 0) {
+                       MPASS(owner == UMUTEX_CONTESTED);
                        error = 0;
                        break;
-               } else if (owner == UMUTEX_RB_OWNERDEAD) {
+               }
+               /* rv == 1 */
+               if (owner == UMUTEX_RB_OWNERDEAD) {
                        rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD,
                            &owner, id | UMUTEX_CONTESTED);
                        if (rv == -1) {
                                error = EFAULT;
                                break;
                        }
-                       if (owner == UMUTEX_RB_OWNERDEAD) {
+                       if (rv == 0) {
+                               MPASS(owner == UMUTEX_RB_OWNERDEAD);
                                error = EOWNERDEAD; /* success */
                                break;
                        }
-                       error = 0;
+
+                       /*
+                        *  rv == 1, only check for suspension if we
+                        *  did not already catched a signal.  If we
+                        *  get an error from the check, the same
+                        *  condition is checked by the umtxq_sleep()
+                        *  call below, so we should obliterate the
+                        *  error to not skip the last loop iteration.
+                        */
+                       if (error == 0) {
+                               error = umtxq_check_susp(td, false);
+                               if (error == 0) {
+                                       if (try != 0)
+                                               error = EBUSY;
+                                       else
+                                               continue;
+                               }
+                               error = 0;
+                       }
                } else if (owner == UMUTEX_RB_NOTRECOV) {
                        error = ENOTRECOVERABLE;
-                       break;
                }
 
-               if (try != 0) {
+               if (try != 0)
                        error = EBUSY;
-                       break;
-               }
 
                /*
                 * If we caught a signal, we have retried and now
@@ -2668,11 +2766,12 @@ do_rw_rdlock(struct thread *td, struct urwlock *rwlock
                                umtx_key_release(&uq->uq_key);
                                return (EFAULT);
                        }
-                       if (oldstate == state) {
+                       if (rv == 0) {
+                               MPASS(oldstate == state);
                                umtx_key_release(&uq->uq_key);
                                return (0);
                        }
-                       error = umtxq_check_susp(td);
+                       error = umtxq_check_susp(td, true);
                        if (error != 0)
                                break;
                        state = oldstate;
@@ -2703,10 +2802,12 @@ do_rw_rdlock(struct thread *td, struct urwlock *rwlock
                                error = EFAULT;
                                break;
                        }
-                       if (oldstate == state)
+                       if (rv == 0) {
+                               MPASS(oldstate == state);
                                goto sleep;
+                       }
                        state = oldstate;
-                       error = umtxq_check_susp(td);
+                       error = umtxq_check_susp(td, false);
                        if (error != 0)
                                break;
                }
@@ -2718,7 +2819,7 @@ do_rw_rdlock(struct thread *td, struct urwlock *rwlock
                /* state is changed while setting flags, restart */
                if (!(state & wrflags)) {
                        umtxq_unbusy_unlocked(&uq->uq_key);
-                       error = umtxq_check_susp(td);
+                       error = umtxq_check_susp(td, true);
                        if (error != 0)
                                break;
                        continue;
@@ -2781,10 +2882,12 @@ sleep:
                                        error = EFAULT;
                                        break;
                                }
-                               if (oldstate == state)
+                               if (rv == 0) {
+                                       MPASS(oldstate == state);
                                        break;
+                               }
                                state = oldstate;
-                               error1 = umtxq_check_susp(td);
+                               error1 = umtxq_check_susp(td, false);
                                if (error1 != 0) {
                                        if (error == 0)
                                                error = error1;
@@ -2840,22 +2943,25 @@ do_rw_wrlock(struct thread *td, struct urwlock *rwlock
                                umtx_key_release(&uq->uq_key);
                                return (EFAULT);
                        }
-                       if (oldstate == state) {
+                       if (rv == 0) {
+                               MPASS(oldstate == state);
                                umtx_key_release(&uq->uq_key);
                                return (0);
                        }
                        state = oldstate;
-                       error = umtxq_check_susp(td);
+                       error = umtxq_check_susp(td, true);
                        if (error != 0)
                                break;
                }
 
                if (error) {
-                       if (!(state & 
(URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
+                       if ((state & (URWLOCK_WRITE_OWNER |
+                           URWLOCK_WRITE_WAITERS)) == 0 &&
                            blocked_readers != 0) {
                                umtxq_lock(&uq->uq_key);
                                umtxq_busy(&uq->uq_key);
-                               umtxq_signal_queue(&uq->uq_key, INT_MAX, 
UMTX_SHARED_QUEUE);
+                               umtxq_signal_queue(&uq->uq_key, INT_MAX,
+                                   UMTX_SHARED_QUEUE);
                                umtxq_unbusy(&uq->uq_key);
                                umtxq_unlock(&uq->uq_key);
                        }
@@ -2885,10 +2991,12 @@ do_rw_wrlock(struct thread *td, struct urwlock *rwlock
                                error = EFAULT;
                                break;
                        }
-                       if (oldstate == state)
+                       if (rv == 0) {
+                               MPASS(oldstate == state);
                                goto sleep;
+                       }
                        state = oldstate;
-                       error = umtxq_check_susp(td);
+                       error = umtxq_check_susp(td, false);
                        if (error != 0)
                                break;
                }
@@ -2900,7 +3008,7 @@ do_rw_wrlock(struct thread *td, struct urwlock *rwlock
                if ((state & URWLOCK_WRITE_OWNER) == 0 &&
                    URWLOCK_READER_COUNT(state) == 0) {
                        umtxq_unbusy_unlocked(&uq->uq_key);
-                       error = umtxq_check_susp(td);
+                       error = umtxq_check_susp(td, false);
                        if (error != 0)
                                break;
                        continue;
@@ -2958,10 +3066,12 @@ sleep:
                                        error = EFAULT;
                                        break;
                                }
-                               if (oldstate == state)
+                               if (rv == 0) {
+                                       MPASS(oldstate == state);
                                        break;
+                               }
                                state = oldstate;
-                               error1 = umtxq_check_susp(td);
+                               error1 = umtxq_check_susp(td, false);
                                /*
                                 * We are leaving the URWLOCK_WRITE_WAITERS
                                 * behind, but this should not harm the
@@ -3021,13 +3131,13 @@ do_rw_unlock(struct thread *td, struct urwlock *rwlock
                                error = EFAULT;
                                goto out;
                        }
-                       if (oldstate != state) {
+                       if (rv == 1) {
                                state = oldstate;
                                if (!(oldstate & URWLOCK_WRITE_OWNER)) {
                                        error = EPERM;
                                        goto out;
                                }
-                               error = umtxq_check_susp(td);
+                               error = umtxq_check_susp(td, true);
                                if (error != 0)
                                        goto out;
                        } else
@@ -3041,13 +3151,13 @@ do_rw_unlock(struct thread *td, struct urwlock *rwlock
                                error = EFAULT;
                                goto out;
                        }
-                       if (oldstate != state) {
+                       if (rv == 1) {
                                state = oldstate;
                                if (URWLOCK_READER_COUNT(oldstate) == 0) {
                                        error = EPERM;
                                        goto out;
                                }
-                               error = umtxq_check_susp(td);
+                               error = umtxq_check_susp(td, true);
                                if (error != 0)
                                        goto out;
                        } else
@@ -3097,7 +3207,7 @@ do_sem_wait(struct thread *td, struct _usem *sem, stru
        struct abs_timeout timo;
        struct umtx_q *uq;
        uint32_t flags, count, count1;
-       int error, rv;
+       int error, rv, rv1;
 
        uq = td->td_umtxq;
        error = fueword32(&sem->_flags, &flags);
@@ -3110,20 +3220,30 @@ do_sem_wait(struct thread *td, struct _usem *sem, stru
        if (timeout != NULL)
                abs_timeout_init2(&timo, timeout);
 
+again:
        umtxq_lock(&uq->uq_key);
        umtxq_busy(&uq->uq_key);
        umtxq_insert(uq);
        umtxq_unlock(&uq->uq_key);
        rv = casueword32(&sem->_has_waiters, 0, &count1, 1);
        if (rv == 0)
-               rv = fueword32(&sem->_count, &count);
-       if (rv == -1 || count != 0) {
+               rv1 = fueword32(&sem->_count, &count);
+       if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || rv == 1) {
                umtxq_lock(&uq->uq_key);
                umtxq_unbusy(&uq->uq_key);
                umtxq_remove(uq);
                umtxq_unlock(&uq->uq_key);
-               umtx_key_release(&uq->uq_key);
-               return (rv == -1 ? EFAULT : 0);
+               if (rv == 1) {
+                       rv = umtxq_check_susp(td, true);
+                       if (rv == 0)
+                               goto again;
+                       error = rv;
+                       goto out;
+               }
+               if (rv == 0)
+                       rv = rv1;
+               error = rv == -1 ? EFAULT : 0;
+               goto out;
        }
        umtxq_lock(&uq->uq_key);
        umtxq_unbusy(&uq->uq_key);
@@ -3140,6 +3260,7 @@ do_sem_wait(struct thread *td, struct _usem *sem, stru
                        error = EINTR;
        }
        umtxq_unlock(&uq->uq_key);
+out:
        umtx_key_release(&uq->uq_key);
        return (error);
 }
@@ -3201,6 +3322,7 @@ do_sem2_wait(struct thread *td, struct _usem2 *sem, st
        if (timeout != NULL)
                abs_timeout_init2(&timo, timeout);
 
+again:
        umtxq_lock(&uq->uq_key);
        umtxq_busy(&uq->uq_key);
        umtxq_insert(uq);
@@ -3226,16 +3348,19 @@ do_sem2_wait(struct thread *td, struct _usem2 *sem, st
                if (count == USEM_HAS_WAITERS)
                        break;
                rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS);
-               if (rv == -1) {
-                       umtxq_lock(&uq->uq_key);
-                       umtxq_unbusy(&uq->uq_key);
-                       umtxq_remove(uq);
-                       umtxq_unlock(&uq->uq_key);
-                       umtx_key_release(&uq->uq_key);
-                       return (EFAULT);
-               }
-               if (count == 0)
+               if (rv == 0)

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to