On Mon, Jun 27, 2016 at 02:45:06PM +0800, Boqun Feng wrote:
> +++ b/include/linux/vcpu_preempt.h
> @@ -0,0 +1,82 @@
> +/*
> + * Primitives for checking the vcpu preemption from the guest.
> + */
> +
> +static long __vcpu_preempt_count(void)
> +{
> +     return 0;
> +}
> +
> +static bool __vcpu_has_preempted(long vpc)
> +{
> +     return false;
> +}
> +
> +static bool __vcpu_is_preempted(int cpu)
> +{
> +     return false;
> +}
> +
> +struct vcpu_preempt_ops {
> +     /*
> +      * Get the current vcpu's "preempt count", which is going to use for
> +      * checking whether the current vcpu has ever been preempted
> +      */
> +     long (*preempt_count)(void);
> +
> +     /*
> +      * Return whether a vcpu is preempted
> +      */
> +     bool (*is_preempted)(int cpu);
> +
> +     /*
> +      * Given a "vcpu preempt count", Return whether a vcpu preemption ever
> +      * happened after the .preempt_count() was called.
> +      */
> +     bool (*has_preempted)(long vpc);
> +};
> +
> +extern struct vcpu_preempt_ops vcpu_preempt_ops;
> +
> +/* Default boilerplate */
> +#define DEFAULT_VCPU_PREEMPT_OPS                     \
> +     {                                               \
> +             .preempt_count = __vcpu_preempt_count,  \
> +             .is_preempted = __vcpu_is_preempted,    \
> +             .has_preempted = __vcpu_has_preempted   \
> +     }
> +
> +#ifdef CONFIG_HAS_VCPU_PREEMPTION_DETECTION
> +/*
> + * vcpu_preempt_count: Get the current cpu's "vcpu preempt count"(vpc).
> + *
> + * The vpc is used for checking whether the current vcpu has ever been
> + * preempted via vcpu_has_preempted().
> + *
> + * This function and vcpu_has_preepmted() should be called in the same
> + * preemption disabled critical section.
> + */
> +#define vcpu_preempt_count() vcpu_preempt_ops.preempt_count()
> +
> +/*
> + * vcpu_is_preempted: Check whether @cpu's vcpu is preempted.
> + */
> +#define vcpu_is_preempted(cpu)       vcpu_preempt_ops.is_preempted(cpu)
> +
> +/*
> + * vcpu_has_preepmted: Check whether the current cpu's vcpu has ever been
> + * preempted.
> + *
> + * The checked duration is between the vcpu_preempt_count() which returns 
> @vpc
> + * is called and this function called.
> + *
> + * This function and corresponding vcpu_preempt_count() should be in the same
> + * preemption disabled cirtial section.
> + */
> +#define vcpu_has_preempted(vpc)      vcpu_preempt_ops.has_preempted(vpc)
> +
> +#else /* CONFIG_HAS_VCPU_PREEMPTION_DETECTION */
> +#define vcpu_preempt_count() __vcpu_preempt_count()
> +#define vcpu_is_preempted(cpu) __vcpu_is_preempted(cpu)
> +#define vcpu_has_preempted(vpc) __vcpu_has_preempted(vpc)
> +#endif /* CONFIG_HAS_VCPU_PREEPMTION_DETECTION */

No, this is entirely insane, also broken.

No vectors, no actual function calls, nothing like that. You want the
below to completely compile away and generate the exact 100% same code
it does today.

> +++ b/kernel/locking/osq_lock.c
> @@ -1,6 +1,7 @@
>  #include <linux/percpu.h>
>  #include <linux/sched.h>
>  #include <linux/osq_lock.h>
> +#include <linux/vcpu_preempt.h>
>  
>  /*
>   * An MCS like lock especially tailored for optimistic spinning for sleeping
> @@ -87,6 +88,8 @@ bool osq_lock(struct optimistic_spin_queue *lock)
>       struct optimistic_spin_node *prev, *next;
>       int curr = encode_cpu(smp_processor_id());
>       int old;
> +     int loops;
> +     long vpc;
>  
>       node->locked = 0;
>       node->next = NULL;
> @@ -106,6 +109,9 @@ bool osq_lock(struct optimistic_spin_queue *lock)
>       node->prev = prev;
>       WRITE_ONCE(prev->next, node);
>  
> +     old = old - 1;

That's just nasty, and could result in an unconditional decrement being
issues, even though its never used.

> +     vpc = vcpu_preempt_count();
> +
>       /*
>        * Normally @prev is untouchable after the above store; because at that
>        * moment unlock can proceed and wipe the node element from stack.
> @@ -118,8 +124,14 @@ bool osq_lock(struct optimistic_spin_queue *lock)
>       while (!READ_ONCE(node->locked)) {
>               /*
>                * If we need to reschedule bail... so we can block.
> +              * An over-committed guest with more vCPUs than pCPUs
> +              * might fall in this loop and cause a huge overload.
> +              * This is because vCPU A(prev) hold the osq lock and yield out,
> +              * vCPU B(node) wait ->locked to be set, IOW, wait till
> +              * vCPU A run and unlock the osq lock.
> +              * NOTE that vCPU A and vCPU B might run on same physical cpu.
>                */
> -             if (need_resched())
> +             if (need_resched() || vcpu_is_preempted(old) || 
> vcpu_has_preempted(vpc))
>                       goto unqueue;
>  
>               cpu_relax_lowlatency();
> 

Reply via email to