As pointed by Mateusz Guzik [0], on x86 the cmpxchg{q,l} used by rw_enter(9) and rw_exit(9) already include an implicit memory barrier, so we can avoids using an explicit expensive one by using the following variants.
[0] https://marc.info/?l=openbsd-tech&m=150765959923113&w=2 ok? Index: kern/kern_rwlock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_rwlock.c,v retrieving revision 1.31 diff -u -p -r1.31 kern_rwlock.c --- kern/kern_rwlock.c 12 Oct 2017 09:19:45 -0000 1.31 +++ kern/kern_rwlock.c 16 Oct 2017 08:24:27 -0000 @@ -96,7 +96,7 @@ _rw_enter_read(struct rwlock *rwl LOCK_F rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR))) _rw_enter(rwl, RW_READ LOCK_FL_ARGS); else { - membar_enter(); + membar_enter_after_atomic(); WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, file, line, NULL); WITNESS_LOCK(&rwl->rwl_lock_obj, 0, file, line); @@ -112,7 +112,7 @@ _rw_enter_write(struct rwlock *rwl LOCK_ RW_PROC(p) | RWLOCK_WRLOCK))) _rw_enter(rwl, RW_WRITE LOCK_FL_ARGS); else { - membar_enter(); + membar_enter_after_atomic(); WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_EXCLUSIVE | LOP_NEWORDER, file, line, NULL); WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE, file, line); @@ -126,7 +126,7 @@ _rw_exit_read(struct rwlock *rwl LOCK_FL rw_assert_rdlock(rwl); - membar_exit(); + membar_exit_before_atomic(); if (__predict_false((owner & RWLOCK_WAIT) || rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR))) _rw_exit(rwl LOCK_FL_ARGS); @@ -141,7 +141,7 @@ _rw_exit_write(struct rwlock *rwl LOCK_F rw_assert_wrlock(rwl); - membar_exit(); + membar_exit_before_atomic(); if (__predict_false((owner & RWLOCK_WAIT) || rw_cas(&rwl->rwl_owner, owner, 0))) _rw_exit(rwl LOCK_FL_ARGS); @@ -261,7 +261,7 @@ retry: if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc))) goto retry; - membar_enter(); + membar_enter_after_atomic(); /* * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we @@ -295,7 +295,7 @@ _rw_exit(struct rwlock *rwl LOCK_FL_VARS WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0, file, line); - membar_exit(); + membar_exit_before_atomic(); do { owner = rwl->rwl_owner; if (wrlock)