On Mon, Jun 27, 2016 at 02:45:06PM +0800, Boqun Feng wrote: > +++ b/include/linux/vcpu_preempt.h > @@ -0,0 +1,82 @@ > +/* > + * Primitives for checking the vcpu preemption from the guest. > + */ > + > +static long __vcpu_preempt_count(void) > +{ > + return 0; > +} > + > +static bool __vcpu_has_preempted(long vpc) > +{ > + return false; > +} > + > +static bool __vcpu_is_preempted(int cpu) > +{ > + return false; > +} > + > +struct vcpu_preempt_ops { > + /* > + * Get the current vcpu's "preempt count", which is going to use for > + * checking whether the current vcpu has ever been preempted > + */ > + long (*preempt_count)(void); > + > + /* > + * Return whether a vcpu is preempted > + */ > + bool (*is_preempted)(int cpu); > + > + /* > + * Given a "vcpu preempt count", Return whether a vcpu preemption ever > + * happened after the .preempt_count() was called. > + */ > + bool (*has_preempted)(long vpc); > +}; > + > +extern struct vcpu_preempt_ops vcpu_preempt_ops; > + > +/* Default boilerplate */ > +#define DEFAULT_VCPU_PREEMPT_OPS \ > + { \ > + .preempt_count = __vcpu_preempt_count, \ > + .is_preempted = __vcpu_is_preempted, \ > + .has_preempted = __vcpu_has_preempted \ > + } > + > +#ifdef CONFIG_HAS_VCPU_PREEMPTION_DETECTION > +/* > + * vcpu_preempt_count: Get the current cpu's "vcpu preempt count"(vpc). > + * > + * The vpc is used for checking whether the current vcpu has ever been > + * preempted via vcpu_has_preempted(). > + * > + * This function and vcpu_has_preepmted() should be called in the same > + * preemption disabled critical section. > + */ > +#define vcpu_preempt_count() vcpu_preempt_ops.preempt_count() > + > +/* > + * vcpu_is_preempted: Check whether @cpu's vcpu is preempted. > + */ > +#define vcpu_is_preempted(cpu) vcpu_preempt_ops.is_preempted(cpu) > + > +/* > + * vcpu_has_preepmted: Check whether the current cpu's vcpu has ever been > + * preempted. > + * > + * The checked duration is between the vcpu_preempt_count() which returns > @vpc > + * is called and this function called. > + * > + * This function and corresponding vcpu_preempt_count() should be in the same > + * preemption disabled cirtial section. > + */ > +#define vcpu_has_preempted(vpc) vcpu_preempt_ops.has_preempted(vpc) > + > +#else /* CONFIG_HAS_VCPU_PREEMPTION_DETECTION */ > +#define vcpu_preempt_count() __vcpu_preempt_count() > +#define vcpu_is_preempted(cpu) __vcpu_is_preempted(cpu) > +#define vcpu_has_preempted(vpc) __vcpu_has_preempted(vpc) > +#endif /* CONFIG_HAS_VCPU_PREEPMTION_DETECTION */
No, this is entirely insane, also broken. No vectors, no actual function calls, nothing like that. You want the below to completely compile away and generate the exact 100% same code it does today. > +++ b/kernel/locking/osq_lock.c > @@ -1,6 +1,7 @@ > #include <linux/percpu.h> > #include <linux/sched.h> > #include <linux/osq_lock.h> > +#include <linux/vcpu_preempt.h> > > /* > * An MCS like lock especially tailored for optimistic spinning for sleeping > @@ -87,6 +88,8 @@ bool osq_lock(struct optimistic_spin_queue *lock) > struct optimistic_spin_node *prev, *next; > int curr = encode_cpu(smp_processor_id()); > int old; > + int loops; > + long vpc; > > node->locked = 0; > node->next = NULL; > @@ -106,6 +109,9 @@ bool osq_lock(struct optimistic_spin_queue *lock) > node->prev = prev; > WRITE_ONCE(prev->next, node); > > + old = old - 1; That's just nasty, and could result in an unconditional decrement being issues, even though its never used. > + vpc = vcpu_preempt_count(); > + > /* > * Normally @prev is untouchable after the above store; because at that > * moment unlock can proceed and wipe the node element from stack. > @@ -118,8 +124,14 @@ bool osq_lock(struct optimistic_spin_queue *lock) > while (!READ_ONCE(node->locked)) { > /* > * If we need to reschedule bail... so we can block. > + * An over-committed guest with more vCPUs than pCPUs > + * might fall in this loop and cause a huge overload. > + * This is because vCPU A(prev) hold the osq lock and yield out, > + * vCPU B(node) wait ->locked to be set, IOW, wait till > + * vCPU A run and unlock the osq lock. > + * NOTE that vCPU A and vCPU B might run on same physical cpu. > */ > - if (need_resched()) > + if (need_resched() || vcpu_is_preempted(old) || > vcpu_has_preempted(vpc)) > goto unqueue; > > cpu_relax_lowlatency(); >