On 20/04/21 10:48, Wanpeng Li wrote:
I was thinking of something simpler:diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9b8e30dd5b9b..455c648f9adc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3198,10 +3198,9 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) { struct kvm *kvm = me->kvm; struct kvm_vcpu *vcpu; - int last_boosted_vcpu = me->kvm->last_boosted_vcpu; int yielded = 0; int try = 3; - int pass; + int pass, num_passes = 1; int i; kvm_vcpu_set_in_spin_loop(me, true); @@ -3212,13 +3211,14 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) * VCPU is holding the lock that we need and will release it. * We approximate round-robin by starting at the last boosted VCPU. */ - for (pass = 0; pass < 2 && !yielded && try; pass++) { - kvm_for_each_vcpu(i, vcpu, kvm) { - if (!pass && i <= last_boosted_vcpu) { - i = last_boosted_vcpu; - continue; - } else if (pass && i > last_boosted_vcpu) - break; + for (pass = 0; pass < num_passes; pass++) { + int idx = me->kvm->last_boosted_vcpu; + int n = atomic_read(&kvm->online_vcpus); + for (i = 0; i < n; i++, idx++) { + if (idx == n) + idx = 0; + + vcpu = kvm_get_vcpu(kvm, idx); if (!READ_ONCE(vcpu->ready)) continue; if (vcpu == me) @@ -3226,23 +3226,36 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) if (rcuwait_active(&vcpu->wait) && !vcpu_dy_runnable(vcpu)) continue; - if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode && - !kvm_arch_vcpu_in_kernel(vcpu)) - continue; if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) continue; + if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode && + !kvm_arch_vcpu_in_kernel(vcpu)) { + /* + * A vCPU running in userspace can get to kernel mode via + * an interrupt. That's a worse choice than a CPU already + * in kernel mode so only do it on a second pass. + */ + if (!vcpu_dy_runnable(vcpu)) + continue; + if (pass == 0) { + num_passes = 2; + continue; + } + } + yielded = kvm_vcpu_yield_to(vcpu); if (yielded > 0) { kvm->last_boosted_vcpu = i; - break; + goto done; } else if (yielded < 0) { try--; if (!try) - break; + goto done; } } } +done:We just tested the above post against 96 vCPUs VM in an over-subscribe scenario, the score of pbzip2 fluctuated drastically. Sometimes it is worse than vanilla, but the average improvement is around 2.2%. The new version of my post is around 9.3%,the origial posted patch is around 10% which is totally as expected since now both IPI receivers in user-mode and lock-waiters are second class citizens.
Fair enough. Of the two patches you posted I prefer the original, so I'll go with that one.
Paolo

