Rather than calling cond_resched() in kvmppc_run_core() before doing the post-processing for the vcpus that we have just run (that is, calling kvmppc_handle_exit_hv(), kvmppc_set_timer(), etc.), we now do that post-processing before calling cond_resched(), and that post- processing is moved out into its own function, post_guest_process().
The reschedule point is now in kvmppc_run_vcpu() and we define a new vcore state, VCORE_PREEMPT, to indicate that that the vcore's runner task is runnable but not running. (Doing the reschedule with the vcore in VCORE_INACTIVE state would be bad because there are potentially other vcpus waiting for the runner in kvmppc_wait_for_exec() which then wouldn't get woken up.) Also, we make use of the handy cond_resched_lock() function, which unlocks and relocks vc->lock for us around the reschedule. Signed-off-by: Paul Mackerras <pau...@samba.org> --- arch/powerpc/include/asm/kvm_host.h | 5 +- arch/powerpc/kvm/book3s_hv.c | 92 +++++++++++++++++++++---------------- 2 files changed, 55 insertions(+), 42 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 3eecd88..83c4425 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -304,8 +304,9 @@ struct kvmppc_vcore { /* Values for vcore_state */ #define VCORE_INACTIVE 0 #define VCORE_SLEEPING 1 -#define VCORE_RUNNING 2 -#define VCORE_EXITING 3 +#define VCORE_PREEMPT 2 +#define VCORE_RUNNING 3 +#define VCORE_EXITING 4 /* * Struct used to manage memory for a virtual processor area diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1a6ea6e..5a1abf6 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1882,15 +1882,50 @@ static void prepare_threads(struct kvmppc_vcore *vc) } } +static void post_guest_process(struct kvmppc_vcore *vc) +{ + u64 now; + long ret; + struct kvm_vcpu *vcpu, *vnext; + + now = get_tb(); + list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, + arch.run_list) { + /* cancel pending dec exception if dec is positive */ + if (now < vcpu->arch.dec_expires && + kvmppc_core_pending_dec(vcpu)) + kvmppc_core_dequeue_dec(vcpu); + + trace_kvm_guest_exit(vcpu); + + ret = RESUME_GUEST; + if (vcpu->arch.trap) + ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu, + vcpu->arch.run_task); + + vcpu->arch.ret = ret; + vcpu->arch.trap = 0; + + if (vcpu->arch.ceded) { + if (!is_kvmppc_resume_guest(ret)) + kvmppc_end_cede(vcpu); + else + kvmppc_set_timer(vcpu); + } + if (!is_kvmppc_resume_guest(vcpu->arch.ret)) { + kvmppc_remove_runnable(vc, vcpu); + wake_up(&vcpu->arch.cpu_run); + } + } +} + /* * Run a set of guest threads on a physical core. * Called with vc->lock held. */ static void kvmppc_run_core(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu, *vnext; - long ret; - u64 now; + struct kvm_vcpu *vcpu; int i; int srcu_idx; @@ -1922,8 +1957,11 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) */ if ((threads_per_core > 1) && ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { vcpu->arch.ret = -EBUSY; + kvmppc_remove_runnable(vc, vcpu); + wake_up(&vcpu->arch.cpu_run); + } goto out; } @@ -1979,44 +2017,12 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) kvm_guest_exit(); preempt_enable(); - cond_resched(); spin_lock(&vc->lock); - now = get_tb(); - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { - /* cancel pending dec exception if dec is positive */ - if (now < vcpu->arch.dec_expires && - kvmppc_core_pending_dec(vcpu)) - kvmppc_core_dequeue_dec(vcpu); - - trace_kvm_guest_exit(vcpu); - - ret = RESUME_GUEST; - if (vcpu->arch.trap) - ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu, - vcpu->arch.run_task); - - vcpu->arch.ret = ret; - vcpu->arch.trap = 0; - - if (vcpu->arch.ceded) { - if (!is_kvmppc_resume_guest(ret)) - kvmppc_end_cede(vcpu); - else - kvmppc_set_timer(vcpu); - } - } + post_guest_process(vc); out: vc->vcore_state = VCORE_INACTIVE; - list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, - arch.run_list) { - if (!is_kvmppc_resume_guest(vcpu->arch.ret)) { - kvmppc_remove_runnable(vc, vcpu); - wake_up(&vcpu->arch.cpu_run); - } - } - trace_kvmppc_run_core(vc, 1); } @@ -2138,7 +2144,6 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) } if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) break; - vc->runner = vcpu; n_ceded = 0; list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { if (!v->arch.pending_exceptions) @@ -2146,10 +2151,17 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) else v->arch.ceded = 0; } - if (n_ceded == vc->n_runnable) + vc->runner = vcpu; + if (n_ceded == vc->n_runnable) { kvmppc_vcore_blocked(vc); - else + } else if (should_resched()) { + vc->vcore_state = VCORE_PREEMPT; + /* Let something else run */ + cond_resched_lock(&vc->lock); + vc->vcore_state = VCORE_INACTIVE; + } else { kvmppc_run_core(vc); + } vc->runner = NULL; } -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html