On Mon, Aug 15, 2011 at 9:08 PM, Paolo Bonzini <pbonz...@redhat.com> wrote: > This includes a (mangled) copy of the urcu-qsbr code from liburcu. > The main changes are: 1) removing dependencies on many other header files > in liburcu; 2) removing for simplicity the tentative busy waiting in > synchronize_rcu, which has limited performance effects; 3) replacing > futexes in synchronize_rcu with QemuEvents for Win32 portability. > The API is the same as liburcu, so it should be possible in the future > to require liburcu on POSIX systems for example and use our copy only > on Windows. > > Among the various versions available I chose urcu-qsbr, which has the > fastest rcu_read_{lock,unlock} but requires the program to manually > annotate quiescent points or intervals. QEMU threads usually have easily > identified quiescent periods, so this should not be a problem. > > Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> > --- > Makefile.objs | 2 +- > qemu-queue.h | 11 +++ > rcu-pointer.h | 119 +++++++++++++++++++++++++++++++ > rcu.c | 221 > +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > rcu.h | 135 +++++++++++++++++++++++++++++++++++ > 5 files changed, 487 insertions(+), 1 deletions(-) > create mode 100644 rcu-pointer.h > create mode 100644 rcu.c > create mode 100644 rcu.h > > diff --git a/Makefile.objs b/Makefile.objs > index 16eef38..902a083 100644 > --- a/Makefile.objs > +++ b/Makefile.objs > @@ -6,7 +6,7 @@ qobject-obj-y += qerror.o error.o > > ####################################################################### > # oslib-obj-y is code depending on the OS (win32 vs posix) > -oslib-obj-y = osdep.o > +oslib-obj-y = osdep.o rcu.o > oslib-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o > oslib-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o > > diff --git a/qemu-queue.h b/qemu-queue.h > index 1d07745..cc8e55b 100644 > --- a/qemu-queue.h > +++ b/qemu-queue.h > @@ -100,6 +100,17 @@ struct { > \ > (head)->lh_first = NULL; \ > } while (/*CONSTCOND*/0) > > +#define QLIST_SWAP(dstlist, srclist, field) do { \ > + void *tmplist; \ > + tmplist = (srclist)->lh_first; \ > + (srclist)->lh_first = (dstlist)->lh_first; \ > + if ((srclist)->lh_first != NULL) \ > + (srclist)->lh_first->field.le_prev = &(srclist)->lh_first; \ > + (dstlist)->lh_first = tmplist; \ > + if ((dstlist)->lh_first != NULL) \ > + (dstlist)->lh_first->field.le_prev = &(dstlist)->lh_first; \ > +} while (/*CONSTCOND*/0) > + > #define QLIST_INSERT_AFTER(listelm, elm, field) do { \ > if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \ > (listelm)->field.le_next->field.le_prev = \ > diff --git a/rcu-pointer.h b/rcu-pointer.h > new file mode 100644 > index 0000000..4381313 > --- /dev/null > +++ b/rcu-pointer.h > @@ -0,0 +1,119 @@ > +#ifndef _URCU_POINTER_STATIC_H > +#define _URCU_POINTER_STATIC_H > + > +/* > + * urcu-pointer-static.h > + * > + * Userspace RCU header. Operations on pointers. > + * > + * TO BE INCLUDED ONLY IN LGPL-COMPATIBLE CODE. See urcu-pointer.h for > + * linking dynamically with the userspace rcu library. > + * > + * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoy...@efficios.com> > + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + * > + * IBM's contributions to this file may be relicensed under LGPLv2 or later. > + */ > + > +#include "compiler.h" > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#ifdef __alpha__ > +#define smp_read_barrier_depends() asm volatile("mb":::"memory") > +#else > +#define smp_read_barrier_depends() > +#endif > + > +/** > + * rcu_dereference - reads (copy) a RCU-protected pointer to a local variable > + * into a RCU read-side critical section. The pointer can later be safely > + * dereferenced within the critical section. > + * > + * This ensures that the pointer copy is invariant thorough the whole > critical > + * section. > + * > + * Inserts memory barriers on architectures that require them (currently only > + * Alpha) and documents which pointers are protected by RCU. > + * > + * The compiler memory barrier in ACCESS_ONCE() ensures that > value-speculative > + * optimizations (e.g. VSS: Value Speculation Scheduling) does not perform > the > + * data read before the pointer read by speculating the value of the pointer. > + * Correct ordering is ensured because the pointer is read as a volatile > access. > + * This acts as a global side-effect operation, which forbids reordering of > + * dependent memory operations. Note that such concern about > dependency-breaking > + * optimizations will eventually be taken care of by the > "memory_order_consume" > + * addition to forthcoming C++ standard. > + * > + * Should match rcu_assign_pointer() or rcu_xchg_pointer(). > + */ > + > +#define rcu_dereference(p) ({ \ > + typeof(p) _________p1 = ACCESS_ONCE(p); \ > + smp_read_barrier_depends(); \ > + (_________p1); \ > + }) > + > +/** > + * rcu_cmpxchg_pointer - same as rcu_assign_pointer, but tests if the pointer > + * is as expected by "old". If succeeds, returns the previous pointer to the > + * data structure, which can be safely freed after waiting for a quiescent > state > + * using synchronize_rcu(). If fails (unexpected value), returns old (which > + * should not be freed !). > + */ > + > +#define rcu_cmpxchg_pointer(p, old, _new) \ > + ({ \ > + typeof(*p) _________pold = (old); \ > + typeof(*p) _________pnew = (_new); \ > + if (!__builtin_constant_p(_new) || \ > + ((_new) != NULL)) \ > + __sync_synchronize(); \ > + uatomic_cmpxchg(p, _________pold, _________pnew); \ > + }) > + > +#define rcu_set_pointer(p, v) \ > + ({ \ > + typeof(*p) _________pv = (v); \ > + if (!__builtin_constant_p(v) || \ > + ((v) != NULL)) \ > + __sync_synchronize(); \ > + ACCESS_ONCE(*p) = _________pv; \ > + }) > + > +/** > + * rcu_assign_pointer - assign (publicize) a pointer to a new data structure > + * meant to be read by RCU read-side critical sections. Returns the assigned > + * value. > + * > + * Documents which pointers will be dereferenced by RCU read-side critical > + * sections and adds the required memory barriers on architectures requiring > + * them. It also makes sure the compiler does not reorder code initializing > the > + * data structure before its publication. > + * > + * Should match rcu_dereference_pointer(). > + */ > + > +#define rcu_assign_pointer(p, v) rcu_set_pointer(&(p), v) > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* _URCU_POINTER_STATIC_H */ > diff --git a/rcu.c b/rcu.c > new file mode 100644 > index 0000000..e2f347a > --- /dev/null > +++ b/rcu.c > @@ -0,0 +1,221 @@ > +/* > + * urcu-qsbr.c > + * > + * Userspace RCU QSBR library > + * > + * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoy...@efficios.com> > + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. > + * Copyright 2011 Red Hat, Inc. > + * > + * Ported to QEMU by Paolo Bonzini <pbonz...@redhat.com> > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + * > + * IBM's contributions to this file may be relicensed under LGPLv2 or later. > + */ > + > +#include <stdio.h> > +#include <assert.h> > +#include <stdlib.h> > +#include <stdint.h> > +#include <errno.h> > +#include "rcu.h" > +#include "qemu-barrier.h" > + > +/* > + * Global grace period counter. Bit 0 is one if the thread is online. > + * Bits 1 and above are defined in synchronize_rcu/update_counter_and_wait. > + */ > +#define RCU_GP_ONLINE (1UL << 0) > +#define RCU_GP_CTR (1UL << 1) > + > +unsigned long rcu_gp_ctr = RCU_GP_ONLINE; > + > +QemuEvent rcu_gp_event; > +QemuMutex rcu_gp_lock; > + > +/* > + * Check whether a quiescent state was crossed between the beginning of > + * update_counter_and_wait and now. > + */ > +static inline int rcu_gp_ongoing(unsigned long *ctr) > +{ > + unsigned long v; > + > + v = ACCESS_ONCE(*ctr); > + return v && (v != rcu_gp_ctr); > +} > + > +/* > + * Written to only by each individual reader. Read by both the reader and the > + * writers. > + */ > +__thread struct rcu_reader rcu_reader; > + > +typedef QLIST_HEAD(, rcu_reader) ThreadList; > +static ThreadList registry = QLIST_HEAD_INITIALIZER(registry); > + > +static void update_counter_and_wait(void) > +{ > + ThreadList qsreaders = QLIST_HEAD_INITIALIZER(qsreaders); > + struct rcu_reader *index, *tmp; > + > + if (sizeof (rcu_gp_ctr) <= 4) { > + /* Switch parity: 0 -> 1, 1 -> 0 */ > + ACCESS_ONCE(rcu_gp_ctr) = rcu_gp_ctr ^ RCU_GP_CTR; > + } else { > + /* Increment current grace period. */ > + ACCESS_ONCE(rcu_gp_ctr) = rcu_gp_ctr + RCU_GP_CTR; > + } > + > + barrier(); > + > + /* > + * Wait for each thread rcu_reader_qs_gp count to become 0. > + */ > + for (;;) { > + /* > + * We want to be notified of changes made to rcu_gp_ongoing > + * while we walk the list. > + */ > + qemu_event_reset(&rcu_gp_event); > + QLIST_FOREACH_SAFE(index, ®istry, node, tmp) { > + if (!rcu_gp_ongoing(&index->ctr)) { > + QLIST_REMOVE(index, node); > + QLIST_INSERT_HEAD(&qsreaders, index, node); > + } > + } > + > + if (QLIST_EMPTY(®istry)) { > + break; > + } > + > + /* > + * Wait for one thread to report a quiescent state and > + * try again. > + */ > + qemu_event_wait(&rcu_gp_event); > + } > + smp_mb(); > + > + /* put back the reader list in the registry */ > + QLIST_SWAP(®istry, &qsreaders, node); > +} > + > +/* > + * Using a two-subphases algorithm for architectures with smaller than 64-bit > + * long-size to ensure we do not encounter an overflow bug. > + */ > + > +void synchronize_rcu(void) > +{ > + unsigned long was_online; > + > + was_online = rcu_reader.ctr; > + > + /* All threads should read qparity before accessing data structure > + * where new ptr points to. > + */ > + > + /* > + * Mark the writer thread offline to make sure we don't wait for > + * our own quiescent state. This allows using synchronize_rcu() > + * in threads registered as readers. > + * > + * Also includes a memory barrier. > + */ > + if (was_online) { > + rcu_thread_offline(); > + } else { > + smp_mb(); > + } > + > + qemu_mutex_lock(&rcu_gp_lock); > + > + if (QLIST_EMPTY(®istry)) > + goto out; > + > + if (sizeof(rcu_gp_ctr) <= 4) { > + /* > + * Wait for previous parity to be empty of readers. > + */ > + update_counter_and_wait(); /* 0 -> 1, wait readers in > parity 0 */ > + > + /* > + * Must finish waiting for quiescent state for parity 0 before > + * committing next rcu_gp_ctr update to memory. Failure to > + * do so could result in the writer waiting forever while new > + * readers are always accessing data (no progress). Enforce > + * compiler-order of load rcu_reader ctr before store to > + * rcu_gp_ctr. > + */ > + barrier(); > + > + /* > + * Adding a memory barrier which is _not_ formally required, > + * but makes the model easier to understand. It does not have > a > + * big performance impact anyway, given this is the > write-side. > + */ > + smp_mb(); > + } > + > + /* > + * Wait for previous parity/grace period to be empty of readers. > + */ > + update_counter_and_wait(); /* 1 -> 0, wait readers in parity 1 */ > +out: > + qemu_mutex_unlock(&rcu_gp_lock); > + > + if (was_online) { > + /* Also includes a memory barrier. */ > + rcu_thread_online(); > + } else { > + /* > + * Finish waiting for reader threads before letting the old > + * ptr being freed. > + */ > + smp_mb(); > + } > +} > + > +static void rcu_init(void) > +{ > + qemu_mutex_init(&rcu_gp_lock); > + qemu_event_init(&rcu_gp_event, true); > +} > + > +void rcu_register_thread(void) > +{ > + static QemuOnce init = QEMU_ONCE_INIT; > + assert(rcu_reader.ctr == 0); > + > + qemu_once(&init, rcu_init); > + qemu_mutex_lock(&rcu_gp_lock); > + QLIST_INSERT_HEAD(®istry, &rcu_reader, node); > + qemu_mutex_unlock(&rcu_gp_lock); > + rcu_thread_online(); > +} > + > +void rcu_unregister_thread(void) > +{ > + /* > + * We have to make the thread offline otherwise we end up dealocking > + * with a waiting writer. > + */ > + rcu_thread_offline(); > + qemu_mutex_lock(&rcu_gp_lock); > + QLIST_REMOVE(&rcu_reader, node); > + qemu_mutex_unlock(&rcu_gp_lock); > +} > diff --git a/rcu.h b/rcu.h > new file mode 100644 > index 0000000..569f696 > --- /dev/null > +++ b/rcu.h > @@ -0,0 +1,135 @@ > +#ifndef _URCU_QSBR_H > +#define _URCU_QSBR_H > + > +/* > + * urcu-qsbr.h > + * > + * Userspace RCU QSBR header. > + * > + * LGPL-compatible code should include this header with : > + * > + * #define _LGPL_SOURCE > + * #include <urcu.h> > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + * > +c* IBM's contributions to this file may be relicensed under LGPLv2 or later. > + */ > + > +#include <stdlib.h> > +#include <assert.h> > +#include <limits.h> > +#include <unistd.h> > +#include <stdint.h> > +#include <stdbool.h> > + > +#include "compiler.h" > +#include "rcu-pointer.h" > +#include "qemu-thread.h" > +#include "qemu-queue.h" > +#include "qemu-barrier.h" > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +/* > + * Important ! > + * > + * Each thread containing read-side critical sections must be registered > + * with rcu_register_thread() before calling rcu_read_lock(). > + * rcu_unregister_thread() should be called before the thread exits. > + */ > + > +#ifdef DEBUG_RCU > +#define rcu_assert(args...) assert(args) > +#else > +#define rcu_assert(args...) > +#endif > + > +#define RCU_GP_ONLINE (1UL << 0) > +#define RCU_GP_CTR (1UL << 1) > + > +/* > + * Global quiescent period counter with low-order bits unused. > + * Using a int rather than a char to eliminate false register dependencies > + * causing stalls on some architectures. > + */ > +extern unsigned long rcu_gp_ctr; > + > +extern QemuEvent rcu_gp_event; > + > +struct rcu_reader { > + /* Data used by both reader and synchronize_rcu() */ > + unsigned long ctr; > + /* Data used for registry */ > + QLIST_ENTRY(rcu_reader) node; > +}; > + > +extern __thread struct rcu_reader rcu_reader;
$ cat thread.c static __thread int sigusr1_wfd; $ gcc -v Reading specs from /usr/bin/../lib/gcc-lib/sparc64-unknown-openbsd4.9/4.2.1/specs Target: sparc64-unknown-openbsd4.9 Configured with: OpenBSD/sparc64 system compiler Thread model: posix gcc version 4.2.1 20070719 $ gcc -c -pthread thread.c thread.c:1: error: thread-local storage not supported for this target > + > +static inline void rcu_read_lock(void) > +{ > + rcu_assert(rcu_reader.ctr); > +} > + > +static inline void rcu_read_unlock(void) > +{ > +} > + > +static inline void rcu_quiescent_state(void) > +{ > + smp_mb(); > + ACCESS_ONCE(rcu_reader.ctr) = ACCESS_ONCE(rcu_gp_ctr); > +#if 0 > + /* write rcu_reader.ctr before read futex. Included in > + * qemu_event_set. */ > + smp_mb(); > +#endif > + qemu_event_set(&rcu_gp_event); > +} > + > +static inline void rcu_thread_offline(void) > +{ > + smp_mb(); > + ACCESS_ONCE(rcu_reader.ctr) = 0; > +#if 0 > + /* write rcu_reader.ctr before read futex. Included in > + * qemu_event_set. */ > + smp_mb(); > +#endif > + qemu_event_set(&rcu_gp_event); > +} > + > +static inline void rcu_thread_online(void) > +{ > + smp_mb(); /* Ensure the compiler does not reorder us with mutex > */ > + ACCESS_ONCE(rcu_reader.ctr) = ACCESS_ONCE(rcu_gp_ctr); > + smp_mb(); > +} > + > +extern void synchronize_rcu(void); > + > +/* > + * Reader thread registration. > + */ > +extern void rcu_register_thread(void); > +extern void rcu_unregister_thread(void); > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* _URCU_QSBR_H */ > -- > 1.7.6 > > > >