Re: [PATCH v3 9/9] KVM: PPC: Book3S HV: Add tunable to control H_IPI redirection

2015-12-21 Thread Suresh E. Warrier
Redirecting the wakeup of a VCPU from the H_IPI hypercall to
a core running in the host is usually a good idea, most workloads
seemed to benefit. However, in one heavily interrupt-driven SMT1
workload, some regression was observed. This patch adds a kvm_hv
module parameter called h_ipi_redirect to control this feature.

The default value for this tunable is 1 - that is enable the feature.

Signed-off-by: Suresh Warrier 
---
Resending the updated patch with the updated diff since an 
earlier patch (patch 8/9) had to be resent to fix a build
break.

 arch/powerpc/include/asm/kvm_ppc.h   |  1 +
 arch/powerpc/kvm/book3s_hv.c | 11 +++
 arch/powerpc/kvm/book3s_hv_rm_xics.c |  5 -
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 1b93519..29d1442 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -448,6 +448,7 @@ extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 
icpval);
 extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
 extern void kvmppc_xics_ipi_action(void);
+extern int h_ipi_redirect;
 #else
 static inline void kvmppc_alloc_host_rm_ops(void) {};
 static inline void kvmppc_free_host_rm_ops(void) {};
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d6280ed..182ec84 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -81,6 +81,17 @@ static int target_smt_mode;
 module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
 
+#ifdef CONFIG_KVM_XICS
+static struct kernel_param_ops module_param_ops = {
+   .set = param_set_int,
+   .get = param_get_int,
+};
+
+module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
+   S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
+#endif
+
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index e673fb9..980d8a6 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -24,6 +24,9 @@
 
 #define DEBUG_PASSUP
 
+int h_ipi_redirect = 1;
+EXPORT_SYMBOL(h_ipi_redirect);
+
 static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp 
*icp,
u32 new_irq);
 
@@ -148,7 +151,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
cpu = vcpu->arch.thread_cpu;
if (cpu < 0 || cpu >= nr_cpu_ids) {
hcore = -1;
-   if (kvmppc_host_rm_ops_hv)
+   if (kvmppc_host_rm_ops_hv && h_ipi_redirect)
hcore = find_available_hostcore(XICS_RM_KICK_VCPU);
if (hcore != -1) {
icp_send_hcore_msg(hcore, vcpu);
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 8/9] KVM: PPC: Book3S HV: Send IPI to host core to wake VCPU

2015-12-21 Thread Suresh E. Warrier
This patch adds support to real-mode KVM to search for a core
running in the host partition and send it an IPI message with
VCPU to be woken. This avoids having to switch to the host
partition to complete an H_IPI hypercall when the VCPU which
is the target of the the H_IPI is not loaded (is not running
in the guest).

The patch also includes the support in the IPI handler running
in the host to do the wakeup by calling kvmppc_xics_ipi_action
for the PPC_MSG_RM_HOST_ACTION message.

When a guest is being destroyed, we need to ensure that there
are no pending IPIs waiting to wake up a VCPU before we free
the VCPUs of the guest. This is accomplished by:
- Forces a PPC_MSG_CALL_FUNCTION IPI to be completed by all CPUs
  before freeing any VCPUs in kvm_arch_destroy_vm().
- Any PPC_MSG_RM_HOST_ACTION messages must be executed first
  before any other PPC_MSG_CALL_FUNCTION messages.

Signed-off-by: Suresh Warrier 
---
Fixed build break for CONFIG_SMP=n (thanks to Mike Ellerman for
pointing that out).

 arch/powerpc/kernel/smp.c| 11 +
 arch/powerpc/kvm/book3s_hv_rm_xics.c | 92 ++--
 arch/powerpc/kvm/powerpc.c   | 10 
 3 files changed, 110 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e222efc..cb8be5d 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -257,6 +257,17 @@ irqreturn_t smp_ipi_demux(void)
 
do {
all = xchg(&info->messages, 0);
+#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+   /*
+* Must check for PPC_MSG_RM_HOST_ACTION messages
+* before PPC_MSG_CALL_FUNCTION messages because when
+* a VM is destroyed, we call kick_all_cpus_sync()
+* to ensure that any pending PPC_MSG_RM_HOST_ACTION
+* messages have completed before we free any VCPUs.
+*/
+   if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
+   kvmppc_xics_ipi_action();
+#endif
if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
generic_smp_call_function_interrupt();
if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 43ffbfe..e673fb9 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -51,11 +51,84 @@ static void ics_rm_check_resend(struct kvmppc_xics *xics,
 
 /* -- ICP routines -- */
 
+#ifdef CONFIG_SMP
+static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
+{
+   int hcpu;
+
+   hcpu = hcore << threads_shift;
+   kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
+   smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
+   icp_native_cause_ipi_rm(hcpu);
+}
+#else
+static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
+#endif
+
+/*
+ * We start the search from our current CPU Id in the core map
+ * and go in a circle until we get back to our ID looking for a
+ * core that is running in host context and that hasn't already
+ * been targeted for another rm_host_ops.
+ *
+ * In the future, could consider using a fairer algorithm (one
+ * that distributes the IPIs better)
+ *
+ * Returns -1, if no CPU could be found in the host
+ * Else, returns a CPU Id which has been reserved for use
+ */
+static inline int grab_next_hostcore(int start,
+   struct kvmppc_host_rm_core *rm_core, int max, int action)
+{
+   bool success;
+   int core;
+   union kvmppc_rm_state old, new;
+
+   for (core = start + 1; core < max; core++)  {
+   old = new = READ_ONCE(rm_core[core].rm_state);
+
+   if (!old.in_host || old.rm_action)
+   continue;
+
+   /* Try to grab this host core if not taken already. */
+   new.rm_action = action;
+
+   success = cmpxchg64(&rm_core[core].rm_state.raw,
+   old.raw, new.raw) == old.raw;
+   if (success) {
+   /*
+* Make sure that the store to the rm_action is made
+* visible before we return to caller (and the
+* subsequent store to rm_data) to synchronize with
+* the IPI handler.
+*/
+   smp_wmb();
+   return core;
+   }
+   }
+
+   return -1;
+}
+
+static inline int find_available_hostcore(int action)
+{
+   int core;
+   int my_core = smp_processor_id() >> threads_shift;
+   struct kvmppc_host_rm_core *rm_core = kvmppc_host_rm_ops_hv->rm_core;
+
+   core = grab_next_hostcore(my_core, rm_core, cpu_nr_cores(), action);
+   if (core == -1)
+   core = grab_next_hostcore(core, rm_core, my

Re: [kbuild-all] [PATCH 5/6] KVM: PPC: Book3S HV: Send IPI to host core to wake VCPU

2015-11-02 Thread Suresh E. Warrier
Hi Fengguang,

I understand the problem.

I will send you the URL for the powerpc git tree once my patches
are pulled in so that we can then pass that to the robot.

Thanks.
-suresh

On 11/01/2015 08:51 PM, Fengguang Wu wrote:
> Hi Suresh,
> 
> Sorry for the noise!
> 
> Do you have a git tree that the robot can monitor and test?
> 
> In this case of one patchset depending on another, there is no chance
> for the robot to do valid testing based on emailed patches.
> 
> Thanks,
> Fengguang
> 
> On Fri, Oct 30, 2015 at 10:16:06AM -0500, Suresh E. Warrier wrote:
>> This patch set depends upon a previous patch set that I had submitted to
>> linux-ppc. The URL for that is:
>>
>> https://lists.ozlabs.org/pipermail/linuxppc-dev/2015-October/135794.html
>>
>> -suresh
>>
>> On 10/29/2015 11:52 PM, kbuild test robot wrote:
>>> Hi Suresh,
>>>
>>> [auto build test ERROR on kvm/linux-next -- if it's inappropriate base, 
>>> please suggest rules for selecting the more suitable base]
>>>
>>> url:
>>> https://github.com/0day-ci/linux/commits/Suresh-Warrier/KVM-PPC-Book3S-HV-Optimize-wakeup-VCPU-from-H_IPI/20151030-081329
>>> config: powerpc-defconfig (attached as .config)
>>> reproduce:
>>> wget 
>>> https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
>>>  -O ~/bin/make.cross
>>> chmod +x ~/bin/make.cross
>>> # save the attached .config to linux build tree
>>> make.cross ARCH=powerpc 
>>>
>>> All errors (new ones prefixed by >>):
>>>
>>>arch/powerpc/kvm/book3s_hv_rm_xics.c: In function 'icp_rm_set_vcpu_irq':
>>>>> arch/powerpc/kvm/book3s_hv_rm_xics.c:142:4: error: implicit declaration 
>>>>> of function 'smp_muxed_ipi_rm_message_pass' 
>>>>> [-Werror=implicit-function-declaration]
>>>smp_muxed_ipi_rm_message_pass(hcpu,
>>>^
>>>arch/powerpc/kvm/book3s_hv_rm_xics.c:143:7: error: 
>>> 'PPC_MSG_RM_HOST_ACTION' undeclared (first use in this function)
>>>   PPC_MSG_RM_HOST_ACTION);
>>>   ^
>>>arch/powerpc/kvm/book3s_hv_rm_xics.c:143:7: note: each undeclared 
>>> identifier is reported only once for each function it appears in
>>>cc1: all warnings being treated as errors
>>>
>>> vim +/smp_muxed_ipi_rm_message_pass +142 
>>> arch/powerpc/kvm/book3s_hv_rm_xics.c
>>>
>>>136  hcore = -1;
>>>137  if (kvmppc_host_rm_ops_hv)
>>>138  hcore = 
>>> find_available_hostcore(XICS_RM_KICK_VCPU);
>>>139  if (hcore != -1) {
>>>140  hcpu = hcore << threads_shift;
>>>141  
>>> kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
>>>  > 142  smp_muxed_ipi_rm_message_pass(hcpu,
>>>143  
>>> PPC_MSG_RM_HOST_ACTION);
>>>144  } else {
>>>145  this_icp->rm_action |= 
>>> XICS_RM_KICK_VCPU;
>>>
>>> ---
>>> 0-DAY kernel test infrastructureOpen Source Technology 
>>> Center
>>> https://lists.01.org/pipermail/kbuild-all   Intel 
>>> Corporation
>>>
>>
>> ___
>> kbuild-all mailing list
>> kbuild-...@lists.01.org
>> https://lists.01.org/mailman/listinfo/kbuild-all
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/6] KVM: PPC: Book3S HV: Send IPI to host core to wake VCPU

2015-10-30 Thread Suresh E. Warrier
This patch set depends upon a previous patch set that I had submitted to
linux-ppc. The URL for that is:

https://lists.ozlabs.org/pipermail/linuxppc-dev/2015-October/135794.html

-suresh

On 10/29/2015 11:52 PM, kbuild test robot wrote:
> Hi Suresh,
> 
> [auto build test ERROR on kvm/linux-next -- if it's inappropriate base, 
> please suggest rules for selecting the more suitable base]
> 
> url:
> https://github.com/0day-ci/linux/commits/Suresh-Warrier/KVM-PPC-Book3S-HV-Optimize-wakeup-VCPU-from-H_IPI/20151030-081329
> config: powerpc-defconfig (attached as .config)
> reproduce:
> wget 
> https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
>  -O ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # save the attached .config to linux build tree
> make.cross ARCH=powerpc 
> 
> All errors (new ones prefixed by >>):
> 
>arch/powerpc/kvm/book3s_hv_rm_xics.c: In function 'icp_rm_set_vcpu_irq':
>>> arch/powerpc/kvm/book3s_hv_rm_xics.c:142:4: error: implicit declaration of 
>>> function 'smp_muxed_ipi_rm_message_pass' 
>>> [-Werror=implicit-function-declaration]
>smp_muxed_ipi_rm_message_pass(hcpu,
>^
>arch/powerpc/kvm/book3s_hv_rm_xics.c:143:7: error: 
> 'PPC_MSG_RM_HOST_ACTION' undeclared (first use in this function)
>   PPC_MSG_RM_HOST_ACTION);
>   ^
>arch/powerpc/kvm/book3s_hv_rm_xics.c:143:7: note: each undeclared 
> identifier is reported only once for each function it appears in
>cc1: all warnings being treated as errors
> 
> vim +/smp_muxed_ipi_rm_message_pass +142 arch/powerpc/kvm/book3s_hv_rm_xics.c
> 
>136hcore = -1;
>137if (kvmppc_host_rm_ops_hv)
>138hcore = 
> find_available_hostcore(XICS_RM_KICK_VCPU);
>139if (hcore != -1) {
>140hcpu = hcore << threads_shift;
>141
> kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
>  > 142smp_muxed_ipi_rm_message_pass(hcpu,
>143
> PPC_MSG_RM_HOST_ACTION);
>144} else {
>145this_icp->rm_action |= 
> XICS_RM_KICK_VCPU;
> 
> ---
> 0-DAY kernel test infrastructureOpen Source Technology Center
> https://lists.01.org/pipermail/kbuild-all   Intel Corporation
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM: PPC: Book3S HV: Tracepoints for KVM HV guest interactions

2014-12-03 Thread Suresh E. Warrier
This patch adds trace points in the guest entry and exit code and also
for exceptions handled by the host in kernel mode - hypercalls and page
faults. The new events are added to /sys/kernel/debug/tracing/events
under a new subsystem called kvm_hv.

Acked-by: Paul Mackerras 
Signed-off-by: Suresh Warrier 
---
Added new include file for common trace defines for kvm_pr and kvm_hv.
Replaced hand-written numbers with defines in trace_hv.h.

 arch/powerpc/kvm/book3s_64_mmu_hv.c |  12 +-
 arch/powerpc/kvm/book3s_hv.c|  19 ++
 arch/powerpc/kvm/trace_book3s.h |  32 +++
 arch/powerpc/kvm/trace_hv.h | 477 
 arch/powerpc/kvm/trace_pr.h |  25 +-
 5 files changed, 538 insertions(+), 27 deletions(-)
 create mode 100644 arch/powerpc/kvm/trace_book3s.h
 create mode 100644 arch/powerpc/kvm/trace_hv.h

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 8190e36..52e8fa1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -39,6 +39,7 @@
 #include 
 
 #include "book3s_hv_cma.h"
+#include "trace_hv.h"
 
 /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
 #define MAX_LPID_970   63
@@ -628,6 +629,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
gfn = gpa >> PAGE_SHIFT;
memslot = gfn_to_memslot(kvm, gfn);
 
+   trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr);
+
/* No memslot means it's an emulated MMIO region */
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
gpa |= (ea & (psize - 1));
@@ -642,6 +645,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
mmu_seq = kvm->mmu_notifier_seq;
smp_rmb();
 
+   ret = -EFAULT;
is_io = 0;
pfn = 0;
page = NULL;
@@ -665,7 +669,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
}
up_read(¤t->mm->mmap_sem);
if (!pfn)
-   return -EFAULT;
+   goto out_put;
} else {
page = pages[0];
if (PageHuge(page)) {
@@ -693,14 +697,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
pfn = page_to_pfn(page);
}
 
-   ret = -EFAULT;
if (psize > pte_size)
goto out_put;
 
/* Check WIMG vs. the actual page we're accessing */
if (!hpte_cache_flags_ok(r, is_io)) {
if (is_io)
-   return -EFAULT;
+   goto out_put;
+
/*
 * Allow guest to map emulated device memory as
 * uncacheable, but actually make it cacheable.
@@ -756,6 +760,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
SetPageDirty(page);
 
  out_put:
+   trace_kvm_page_fault_exit(vcpu, hpte, ret);
+
if (page) {
/*
 * We drop pages[0] here, not page because page might
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c2d2535..40615ab 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -58,6 +58,9 @@
 
 #include "book3s.h"
 
+#define CREATE_TRACE_POINTS
+#include "trace_hv.h"
+
 /* #define EXIT_DEBUG */
 /* #define EXIT_DEBUG_SIMPLE */
 /* #define EXIT_DEBUG_INT */
@@ -1721,6 +1724,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
kvmppc_start_thread(vcpu);
kvmppc_create_dtl_entry(vcpu, vc);
+   trace_kvm_guest_enter(vcpu);
}
 
/* Set this explicitly in case thread 0 doesn't have a vcpu */
@@ -1729,6 +1733,9 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 
vc->vcore_state = VCORE_RUNNING;
preempt_disable();
+
+   trace_kvmppc_run_core(vc, 0);
+
spin_unlock(&vc->lock);
 
kvm_guest_enter();
@@ -1774,6 +1781,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
kvmppc_core_pending_dec(vcpu))
kvmppc_core_dequeue_dec(vcpu);
 
+   trace_kvm_guest_exit(vcpu);
+
ret = RESUME_GUEST;
if (vcpu->arch.trap)
ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
@@ -1799,6 +1808,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
wake_up(&vcpu->arch.cpu_run);
}
}
+
+   trace_kvmppc_run_core(vc, 1);
 }
 
 /*
@@ -1845,11 +1856,13 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore 
*vc)
}
 
vc->vcore_state = VCORE_SLEEPING;
+   trace_kvmppc_vcore_blocked(vc, 0);
spin_unlock(&vc->lock);
schedule();
finish_wait(&vc->wq, &wait);
spin_lock(&vc->lock

Re: [PATCH] KVM: PPC: Book3S HV: Tracepoints for KVM HV guest interactions

2014-12-01 Thread Suresh E. Warrier


On 11/20/2014 08:01 AM, Steven Rostedt wrote:
> On Thu, 20 Nov 2014 13:10:12 +0100
> Alexander Graf  wrote:
> 
>>
>>
>> On 20.11.14 11:40, Aneesh Kumar K.V wrote:
>>> "Suresh E. Warrier"  writes:
>>>
>>>> This patch adds trace points in the guest entry and exit code and also
>>>> for exceptions handled by the host in kernel mode - hypercalls and page
>>>> faults. The new events are added to /sys/kernel/debug/tracing/events
>>>> under a new subsystem called kvm_hv.
>>>
>>> 
>>>
>>>>/* Set this explicitly in case thread 0 doesn't have a vcpu */
>>>> @@ -1687,6 +1691,9 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
>>>>  
>>>>vc->vcore_state = VCORE_RUNNING;
>>>>preempt_disable();
>>>> +
>>>> +  trace_kvmppc_run_core(vc, 0);
>>>> +
>>>>spin_unlock(&vc->lock);
>>>
>>> Do we really want to call tracepoint with spin lock held ? Is that a good
>>> thing to do ?. 
>>
>> I thought it was safe to call tracepoints inside of spin lock regions?
>> Steve?
>>
> 
> There's tracepoints in the guts of the scheduler where rq lock is held.
> Don't worry about it. The tracing system is lockless.
> 

Thanks for confirming.

-suresh
 
> -- Steve
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/5] KVM: PPC: Book3S HV: Check wait conditions before sleeping in kvmppc_vcore_blocked

2014-11-20 Thread Suresh E. Warrier


On 11/20/2014 11:36 AM, Alexander Graf wrote:
> 
> 
> On 03.11.14 05:52, Paul Mackerras wrote:
>> From: "Suresh E. Warrier" 
>>
>> The kvmppc_vcore_blocked() code does not check for the wait condition
>> after putting the process on the wait queue. This means that it is
>> possible for an external interrupt to become pending, but the vcpu to
>> remain asleep until the next decrementer interrupt.  The fix is to
>> make one last check for pending exceptions and ceded state before
>> calling schedule().
>>
>> Signed-off-by: Suresh Warrier 
>> Signed-off-by: Paul Mackerras 
> 
> I don't understand the race you're fixing here. Can you please explain it?
> 

When a virtual interrupt needs to be delivered to the guest, and the
virtual ICS state for the interrupt and virtual ICP state for the VCPU
allow for the VCPU to be immediately interrupted, we
1. Set the BOOK3S_INTERRUPT_EXTERNAL_LEVEL bit in pending_exceptions.
2. Call kvmppc_fast_vcpu_kick_hv(), which checks the wait queue at vcpu->wq
   to wake the VCPU up.

The caller of kvmppc_vcore_blocked() does the check for pending exceptions, but
there is a race condition here and we do need to check again after the VCPU
is put on the wait queue.

-suresh

> 
> Alex
> 
>> ---
>>  arch/powerpc/kvm/book3s_hv.c | 20 
>>  1 file changed, 20 insertions(+)
>>
>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index cd7e030..1a7a281 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -1828,9 +1828,29 @@ static void kvmppc_wait_for_exec(struct kvm_vcpu 
>> *vcpu, int wait_state)
>>   */
>>  static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
>>  {
>> +struct kvm_vcpu *vcpu;
>> +int do_sleep = 1;
>> +
>>  DEFINE_WAIT(wait);
>>  
>>  prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
>> +
>> +/*
>> + * Check one last time for pending exceptions and ceded state after
>> + * we put ourselves on the wait queue
>> + */
>> +list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
>> +if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
>> +do_sleep = 0;
>> +break;
>> +}
>> +}
>> +
>> +if (!do_sleep) {
>> +finish_wait(&vc->wq, &wait);
>> +return;
>> +}
>> +
>>  vc->vcore_state = VCORE_SLEEPING;
>>  spin_unlock(&vc->lock);
>>  schedule();
>>
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: PPC: Book3S HV: Tracepoints for KVM HV guest interactions

2014-11-19 Thread Suresh E. Warrier


On 11/14/2014 04:56 AM, Alexander Graf wrote:
> 
> 
> 
>> Am 14.11.2014 um 00:29 schrieb Suresh E. Warrier 
>> :
>>
>> This patch adds trace points in the guest entry and exit code and also
>> for exceptions handled by the host in kernel mode - hypercalls and page
>> faults. The new events are added to /sys/kernel/debug/tracing/events
>> under a new subsystem called kvm_hv.
>>
>> Acked-by: Paul Mackerras 
>> Signed-off-by: Suresh Warrier 
>> ---
>> arch/powerpc/kvm/book3s_64_mmu_hv.c |  12 +-
>> arch/powerpc/kvm/book3s_hv.c|  19 ++
>> arch/powerpc/kvm/trace_hv.h | 497 
>> 
>> 3 files changed, 525 insertions(+), 3 deletions(-)
>> create mode 100644 arch/powerpc/kvm/trace_hv.h
>>
>> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
>> b/arch/powerpc/kvm/book3s_64_mmu_hv.c
>> index 70feb7b..20cbad1 100644
>> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
>> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
>> @@ -38,6 +38,7 @@
>> #include 
>>
>> #include "book3s_hv_cma.h"
>> +#include "trace_hv.h"
>>
>> /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
>> #define MAX_LPID_97063
>> @@ -627,6 +628,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, 
>> struct kvm_vcpu *vcpu,
>>gfn = gpa >> PAGE_SHIFT;
>>memslot = gfn_to_memslot(kvm, gfn);
>>
>> +trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr);
>> +
>>/* No memslot means it's an emulated MMIO region */
>>if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
>>return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
>> @@ -639,6 +642,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, 
>> struct kvm_vcpu *vcpu,
>>mmu_seq = kvm->mmu_notifier_seq;
>>smp_rmb();
>>
>> +ret = -EFAULT;
>>is_io = 0;
>>pfn = 0;
>>page = NULL;
>> @@ -662,7 +666,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, 
>> struct kvm_vcpu *vcpu,
>>}
>>up_read(¤t->mm->mmap_sem);
>>if (!pfn)
>> -return -EFAULT;
>> +goto out_put;
>>} else {
>>page = pages[0];
>>if (PageHuge(page)) {
>> @@ -690,14 +694,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, 
>> struct kvm_vcpu *vcpu,
>>pfn = page_to_pfn(page);
>>}
>>
>> -ret = -EFAULT;
>>if (psize > pte_size)
>>goto out_put;
>>
>>/* Check WIMG vs. the actual page we're accessing */
>>if (!hpte_cache_flags_ok(r, is_io)) {
>>if (is_io)
>> -return -EFAULT;
>> +goto out_put;
>> +
>>/*
>> * Allow guest to map emulated device memory as
>> * uncacheable, but actually make it cacheable.
>> @@ -753,6 +757,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, 
>> struct kvm_vcpu *vcpu,
>>SetPageDirty(page);
>>
>>  out_put:
>> +trace_kvm_page_fault_exit(vcpu, hpte, ret);
>> +
>>if (page) {
>>/*
>> * We drop pages[0] here, not page because page might
>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index 69d4085..5143d17 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -57,6 +57,9 @@
>>
>> #include "book3s.h"
>>
>> +#define CREATE_TRACE_POINTS
>> +#include "trace_hv.h"
>> +
>> /* #define EXIT_DEBUG */
>> /* #define EXIT_DEBUG_SIMPLE */
>> /* #define EXIT_DEBUG_INT */
>> @@ -1679,6 +1682,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
>>list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
>>kvmppc_start_thread(vcpu);
>>kvmppc_create_dtl_entry(vcpu, vc);
>> +trace_kvm_guest_enter(vcpu);
>>}
>>
>>/* Set this explicitly in case thread 0 doesn't have a vcpu */
>> @@ -1687,6 +1691,9 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
>>
>>vc->vcore_state = VCORE_RUNNING;
>>preempt_disable();
>> +
>> +trace_kvmppc_run_core(vc, 0);
>> +
>>spin_unlock(&vc->lock);
>>
>>kvm_guest_enter();
>> @@ -1732,6 +1739,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
>>kvmppc_core_pending_dec(vcpu))
>>kvmppc_core_dequeue_dec(vcpu);
>>
>> +  

[PATCH] KVM: PPC: Book3S HV: Tracepoints for KVM HV guest interactions

2014-11-13 Thread Suresh E. Warrier
This patch adds trace points in the guest entry and exit code and also
for exceptions handled by the host in kernel mode - hypercalls and page
faults. The new events are added to /sys/kernel/debug/tracing/events
under a new subsystem called kvm_hv.

Acked-by: Paul Mackerras 
Signed-off-by: Suresh Warrier 
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c |  12 +-
 arch/powerpc/kvm/book3s_hv.c|  19 ++
 arch/powerpc/kvm/trace_hv.h | 497 
 3 files changed, 525 insertions(+), 3 deletions(-)
 create mode 100644 arch/powerpc/kvm/trace_hv.h

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 70feb7b..20cbad1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -38,6 +38,7 @@
 #include 
 
 #include "book3s_hv_cma.h"
+#include "trace_hv.h"
 
 /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
 #define MAX_LPID_970   63
@@ -627,6 +628,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
gfn = gpa >> PAGE_SHIFT;
memslot = gfn_to_memslot(kvm, gfn);
 
+   trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr);
+
/* No memslot means it's an emulated MMIO region */
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
@@ -639,6 +642,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
mmu_seq = kvm->mmu_notifier_seq;
smp_rmb();
 
+   ret = -EFAULT;
is_io = 0;
pfn = 0;
page = NULL;
@@ -662,7 +666,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
}
up_read(¤t->mm->mmap_sem);
if (!pfn)
-   return -EFAULT;
+   goto out_put;
} else {
page = pages[0];
if (PageHuge(page)) {
@@ -690,14 +694,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
pfn = page_to_pfn(page);
}
 
-   ret = -EFAULT;
if (psize > pte_size)
goto out_put;
 
/* Check WIMG vs. the actual page we're accessing */
if (!hpte_cache_flags_ok(r, is_io)) {
if (is_io)
-   return -EFAULT;
+   goto out_put;
+
/*
 * Allow guest to map emulated device memory as
 * uncacheable, but actually make it cacheable.
@@ -753,6 +757,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
SetPageDirty(page);
 
  out_put:
+   trace_kvm_page_fault_exit(vcpu, hpte, ret);
+
if (page) {
/*
 * We drop pages[0] here, not page because page might
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 69d4085..5143d17 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -57,6 +57,9 @@
 
 #include "book3s.h"
 
+#define CREATE_TRACE_POINTS
+#include "trace_hv.h"
+
 /* #define EXIT_DEBUG */
 /* #define EXIT_DEBUG_SIMPLE */
 /* #define EXIT_DEBUG_INT */
@@ -1679,6 +1682,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
kvmppc_start_thread(vcpu);
kvmppc_create_dtl_entry(vcpu, vc);
+   trace_kvm_guest_enter(vcpu);
}
 
/* Set this explicitly in case thread 0 doesn't have a vcpu */
@@ -1687,6 +1691,9 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 
vc->vcore_state = VCORE_RUNNING;
preempt_disable();
+
+   trace_kvmppc_run_core(vc, 0);
+
spin_unlock(&vc->lock);
 
kvm_guest_enter();
@@ -1732,6 +1739,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
kvmppc_core_pending_dec(vcpu))
kvmppc_core_dequeue_dec(vcpu);
 
+   trace_kvm_guest_exit(vcpu);
+
ret = RESUME_GUEST;
if (vcpu->arch.trap)
ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
@@ -1757,6 +1766,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
wake_up(&vcpu->arch.cpu_run);
}
}
+
+   trace_kvmppc_run_core(vc, 1);
 }
 
 /*
@@ -1783,11 +1794,13 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore 
*vc)
 
prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
vc->vcore_state = VCORE_SLEEPING;
+   trace_kvmppc_vcore_blocked(vc, 0);
spin_unlock(&vc->lock);
schedule();
finish_wait(&vc->wq, &wait);
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
+   trace_kvmppc_vcore_blocked(vc, 1);
 }
 
 static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
@@ -1796,6 +1809,8 @@ static int kv