[PATCH 11/14] KVM: PPC: Book3S HV: Tunable to disable KVM IRQ bypass

2016-02-26 Thread Suresh Warrier
Add a  module parameter kvm_irq_bypass for kvm_hv.ko to
disable IRQ bypass for passthrough interrupts. The default
value of this tunable is 1 - that is enable the feature.

Since the tunable is used by built-in kernel code, we use
the module_param_cb macro to achieve this.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/kvm_book3s.h |  1 +
 arch/powerpc/include/asm/kvm_ppc.h|  2 +-
 arch/powerpc/kvm/book3s_hv.c  | 13 +
 arch/powerpc/kvm/book3s_hv_rm_xics.c  |  2 ++
 4 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 8f39796..8e5fac6 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -191,6 +191,7 @@ extern void kvmppc_copy_to_svcpu(struct 
kvmppc_book3s_shadow_vcpu *svcpu,
 struct kvm_vcpu *vcpu);
 extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
   struct kvmppc_book3s_shadow_vcpu *svcpu);
+extern int kvm_irq_bypass;
 
 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 93531cc..a13fd2b 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -465,7 +465,7 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
 static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
struct kvm_vcpu *vcpu)
 {
-   if (vcpu)
+   if (vcpu && kvm_irq_bypass)
return vcpu->kvm->arch.pimap;
else
return NULL;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 487657f..2d82c4d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -91,6 +91,10 @@ static struct kernel_param_ops module_param_ops = {
.get = param_get_int,
 };
 
+module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass,
+   S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization");
+
 module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
@@ -3317,6 +3321,9 @@ static int kvmppc_cache_passthru_irq_hv(struct kvm *kvm, 
int irq)
struct kvmppc_passthru_irqmap *pimap;
int cidx, midx;
 
+   if (!kvm_irq_bypass)
+   return 1;
+
mutex_lock(&kvm->lock);
 
if (kvm->arch.pimap == NULL)
@@ -3421,6 +3428,9 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int 
host_irq, int guest_gsi)
struct irq_chip *chip;
int i;
 
+   if (!kvm_irq_bypass)
+   return 0;
+
desc = irq_to_desc(host_irq);
if (!desc)
return -EIO;
@@ -3484,6 +3494,9 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int 
host_irq, int guest_gsi)
struct kvmppc_passthru_irqmap *pimap;
int i;
 
+   if (!kvm_irq_bypass)
+   return 0;
+
desc = irq_to_desc(host_irq);
if (!desc)
return -EIO;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 8390c50..97a09c2 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -27,6 +27,8 @@
 
 int h_ipi_redirect = 1;
 EXPORT_SYMBOL(h_ipi_redirect);
+int kvm_irq_bypass = 1;
+EXPORT_SYMBOL(kvm_irq_bypass);
 
 static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp 
*icp,
u32 new_irq);
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 14/14] KVM: PPC: Book3S HV: Counters for passthrough IRQ stats

2016-02-26 Thread Suresh Warrier
Add VCPU stat counters to track affinity for passthrough
interrupts.

pthru_all: Counts all passthrough interrupts whose IRQ mappings have
   been cached in the kvmppc_passthru_irq_map cache.
pthru_host: Counts all cached passthrough interrupts that were injected
from the host through kvm_set_irq.
pthru_bad_aff: Counts how many cached passthrough interrupts have
   bad affinity (receiving CPU is not running VCPU that is
   the target of the virtual interrupt in the guest).

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/kvm_host.h  | 3 +++
 arch/powerpc/kvm/book3s.c| 3 +++
 arch/powerpc/kvm/book3s_hv_rm_xics.c | 7 +++
 3 files changed, 13 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 558d195..9230b1a 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -128,6 +128,9 @@ struct kvm_vcpu_stat {
u32 ld_slow;
u32 st_slow;
 #endif
+   u32 pthru_all;
+   u32 pthru_host;
+   u32 pthru_bad_aff;
 };
 
 enum kvm_exit_types {
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 1b4f5bd..b3d44b1 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -65,6 +65,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "ld_slow", VCPU_STAT(ld_slow) },
{ "st",  VCPU_STAT(st) },
{ "st_slow", VCPU_STAT(st_slow) },
+   { "pthru_all",   VCPU_STAT(pthru_all) },
+   { "pthru_host",  VCPU_STAT(pthru_host) },
+   { "pthru_bad_aff",   VCPU_STAT(pthru_bad_aff) },
{ NULL }
 };
 
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index e2bbfdf..4004a35 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -696,6 +696,7 @@ static struct kvmppc_irq_map *get_irqmap_gsi(
 unsigned long irq_map_err;
 
 /*
+ * Count affinity for passthrough IRQs.
  * Change affinity to CPU running the target VCPU.
  */
 static void ics_set_affinity_passthru(struct ics_irq_state *state,
@@ -708,17 +709,23 @@ static void ics_set_affinity_passthru(struct 
ics_irq_state *state,
s16 intr_cpu;
u32 pcpu;
 
+   vcpu->stat.pthru_all++;
+
intr_cpu = state->intr_cpu;
 
if  (intr_cpu == -1)
return;
 
+   vcpu->stat.pthru_host++;
+
state->intr_cpu = -1;
 
pcpu = cpu_first_thread_sibling(raw_smp_processor_id());
if (intr_cpu == pcpu)
return;
 
+   vcpu->stat.pthru_bad_aff++;
+
pimap = kvmppc_get_passthru_irqmap(vcpu);
if (likely(pimap)) {
irq_map = get_irqmap_gsi(pimap, irq);
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 13/14] KVM: PPC: Book3S HV: Change affinity for passthrough IRQ

2016-02-26 Thread Suresh Warrier
Change the affinity in the host for a passthrough interrupt
to the hardware thread running the VCPU which has affinity
to this interrupt in the guest. Since the cores run in single
threaded mode on a PowerKVM host, the affinity is actually
changed to the hardware thread's first sibling thread in its
core. This is only done for IRQs that have been mapped for
IRQ bypass since in this case if the interrupt occurs while
the core is in the guest, the real mode KVM will be able
to simply redirect the interrupt to the appropriate sibling
hardware thread.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/kvm/book3s_hv_builtin.c | 14 +--
 arch/powerpc/kvm/book3s_hv_rm_xics.c | 78 
 arch/powerpc/kvm/book3s_xics.c   |  7 
 arch/powerpc/kvm/book3s_xics.h   |  3 +-
 4 files changed, 98 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c 
b/arch/powerpc/kvm/book3s_hv_builtin.c
index 62252da..f95aa63 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -288,8 +288,16 @@ void kvmhv_commence_exit(int trap)
 struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
 EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);
 
-static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap,
-u32 xisr)
+/*
+ * This returns the v_hwirq -> r_hwirq mapping, if any,
+ * when the r_hwirq is passed in as input
+ * There is also the similar get_irqmap_gsi() routine
+ * defined elsewhere, which returns the mapping when passed
+ * the v_hwirq as input.
+ */
+static struct kvmppc_irq_map *get_irqmap_xisr(
+   struct kvmppc_passthru_irqmap *pimap,
+   u32 xisr)
 {
int i;
 
@@ -425,7 +433,7 @@ long kvmppc_read_intr(struct kvm_vcpu *vcpu, int path)
 */
pimap = kvmppc_get_passthru_irqmap(vcpu);
if (pimap) {
-   irq_map = get_irqmap(pimap, xisr);
+   irq_map = get_irqmap_xisr(pimap, xisr);
if (irq_map) {
r = kvmppc_deliver_irq_passthru(vcpu, xirr,
irq_map, pimap);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index f33c7cc..e2bbfdf 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -661,6 +661,80 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long 
cppr)
return check_too_hard(xics, icp);
 }
 
+/*
+ * This returns the v_hwirq -> r_hwirq mapping, if any,
+ * when the v_hwirq is passed in as input
+ * There is also the similar get_irqmap_xisr() routine
+ * defined elsewhere, which returns the mapping when passed
+ * the r_hwirq as input.
+ */
+
+static struct kvmppc_irq_map *get_irqmap_gsi(
+   struct kvmppc_passthru_irqmap *pimap,
+   u32 gsi)
+{
+   int i;
+
+   /*
+* We access this array unsafely.
+* Read comments in get_irqmap_xisr for details of this
+* as well as the need for the memory barrier used below.
+*/
+   for (i = 0; i < pimap->n_cached; i++)  {
+   if (gsi == pimap->cached[i].v_hwirq) {
+   /*
+* Order subsequent reads in the caller to serialize
+* with the writer.
+*/
+   smp_rmb();
+   return &pimap->cached[i];
+   }
+   }
+   return NULL;
+}
+
+unsigned long irq_map_err;
+
+/*
+ * Change affinity to CPU running the target VCPU.
+ */
+static void ics_set_affinity_passthru(struct ics_irq_state *state,
+ struct kvm_vcpu *vcpu,
+ u32 irq)
+{
+   struct kvmppc_passthru_irqmap *pimap;
+   struct kvmppc_irq_map *irq_map;
+   struct irq_data *d;
+   s16 intr_cpu;
+   u32 pcpu;
+
+   intr_cpu = state->intr_cpu;
+
+   if  (intr_cpu == -1)
+   return;
+
+   state->intr_cpu = -1;
+
+   pcpu = cpu_first_thread_sibling(raw_smp_processor_id());
+   if (intr_cpu == pcpu)
+   return;
+
+   pimap = kvmppc_get_passthru_irqmap(vcpu);
+   if (likely(pimap)) {
+   irq_map = get_irqmap_gsi(pimap, irq);
+   if (unlikely(!irq_map)) {
+   irq_map_err++;
+   return;
+   }
+   d = irq_desc_get_irq_data(irq_map->desc);
+   if (unlikely(!d->chip->irq_set_affinity))
+   return;
+   d->chip->irq_set_affinity(d, cpumask_of(pcpu), false);
+   } else
+   irq_map_err++;
+
+}
+
 int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 {
struct kvmppc_xics

[PATCH 12/14] KVM: PPC: Book3S HV: Update irq stats for IRQs handled in real mode

2016-02-26 Thread Suresh Warrier
When a passthrough IRQ is handled completely within KVM real
mode code, it has to also update the IRQ stats since this
does not go through the generic IRQ handling code.

However, the per CPU kstat_irqs field is an allocated (not static)
field and so cannot be directly accessed in real mode safely.

The function this_cpu_inc_rm() is introduced to safely increment
per CPU fields (currently coded for unsigned integers only) that
are allocated and could thus be vmalloced also.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/kvm/book3s_hv_rm_xics.c | 50 
 1 file changed, 50 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 97a09c2..f33c7cc 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -18,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -734,6 +736,53 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 
xirr)
_stwcix(xics_phys + XICS_XIRR, xirr);
 }
 
+/*
+ * Increment a per-CPU 32-bit unsigned integer variable.
+ * Safe to call in real-mode. Handles vmalloc'ed addresses
+ *
+ * ToDo: Make this work for any integral type
+ */
+
+static inline void this_cpu_inc_rm(unsigned int __percpu *addr)
+{
+   unsigned long l;
+   unsigned int *raddr;
+   int cpu = smp_processor_id();
+
+   raddr = per_cpu_ptr(addr, cpu);
+   l = (unsigned long)raddr;
+
+   if (REGION_ID(l) == VMALLOC_REGION_ID) {
+   l = vmalloc_to_phys(raddr);
+   raddr = (unsigned int *)l;
+   }
+   ++*raddr;
+}
+
+/*
+ * We don't try to update the flags in the irq_desc 'istate' field in
+ * here as would happen in the normal IRQ handling path for several reasons:
+ *  - state flags represent internal IRQ state and are not expected to be
+ *updated outside the IRQ subsystem
+ *  - more importantly, these are useful for edge triggered interrupts,
+ *IRQ probing, etc., but we are only handling MSI/MSIx interrupts here
+ *and these states shouldn't apply to us.
+ *
+ * However, we do update irq_stats - we somewhat duplicate the code in
+ * kstat_incr_irqs_this_cpu() for this since this function is defined
+ * in irq/internal.h which we don't want to include here.
+ * The only difference is that desc->kstat_irqs is an allocated per CPU
+ * variable and could have been vmalloc'ed, so we can't directly
+ * call __this_cpu_inc() on it. The kstat structure is a static
+ * per CPU variable and it should be accessible by real-mode KVM.
+ *
+ */
+static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc)
+{
+   this_cpu_inc_rm(desc->kstat_irqs);
+   __this_cpu_inc(kstat.irqs_sum);
+}
+
 long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
 u32 xirr,
 struct kvmppc_irq_map *irq_map,
@@ -747,6 +796,7 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
xics = vcpu->kvm->arch.xics;
icp = vcpu->arch.icp;
 
+   kvmppc_rm_handle_irq_desc(irq_map->desc);
icp_rm_deliver_irq(xics, icp, irq);
 
/* EOI the interrupt */
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 10/14] KVM: PPC: Book3S HV: Dump irqmap in debugfs

2016-02-26 Thread Suresh Warrier
Dump the passthrough irqmap structure associated with a
guest as part of /sys/kernel/debug/powerpc/kvm-xics-*.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/kvm/book3s_xics.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index b90570c..855d669 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -905,6 +905,21 @@ EXPORT_SYMBOL_GPL(kvmppc_xics_hcall);
 
 /* -- Initialisation code etc. -- */
 
+static void xics_debugfs_irqmap(struct seq_file *m,
+   struct kvmppc_passthru_irqmap *pimap)
+{
+   int i;
+
+   if (!pimap)
+   return;
+   seq_printf(m, "\nPIRQMAP Cache: %d maps\n===\n",
+   pimap->n_cached);
+   for (i = 0; i < pimap->n_cached; i++)  {
+   seq_printf(m, "r_hwirq=%x, v_hwirq=%x\n",
+   pimap->cached[i].r_hwirq, pimap->cached[i].v_hwirq);
+   }
+}
+
 static int xics_debug_show(struct seq_file *m, void *private)
 {
struct kvmppc_xics *xics = m->private;
@@ -926,6 +941,8 @@ static int xics_debug_show(struct seq_file *m, void 
*private)
t_check_resend = 0;
t_reject = 0;
 
+   xics_debugfs_irqmap(m, kvm->arch.pimap);
+
seq_printf(m, "=\nICP state\n=\n");
 
kvm_for_each_vcpu(i, vcpu, kvm) {
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 09/14] KVM: PPC: Book3S HV: Enable KVM real mode handling of passthrough IRQs

2016-02-26 Thread Suresh Warrier
The KVM real mode passthrough handling code only searches for
"cached" IRQ maps in the passthrough IRQ map when checking for
passthrough IRQs that can be redirected to the guest.
This patch enables KVM real mode handling of passthrough IRQs
by turning on caching of selected passthrough IRQs. Currently,
we follow a simple method and cache any passthrough IRQ when its
virtual IRQ is first injected into the guest.

Since we have a limit of 16 cache entries per guest, this will
limit passthrough IRQs that are handled in KVM real mode to 16.
This should work well for the general case for VMs with small
number of passthrough adapters or SRIOV VFs. In the future, we
can increase the number of cached entries, but we would then need
to come up with faster search/filtering mechanisms for an IRQ in
the map of cached passthrough IRQs.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/kvm_host.h |  1 +
 arch/powerpc/include/asm/kvm_ppc.h  |  2 ++
 arch/powerpc/kvm/book3s.c   | 10 +
 arch/powerpc/kvm/book3s_hv.c|  4 
 arch/powerpc/kvm/book3s_xics.c  | 41 +
 arch/powerpc/kvm/book3s_xics.h  |  2 ++
 6 files changed, 60 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index fc10248..558d195 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -63,6 +63,7 @@ extern int kvm_unmap_hva_range(struct kvm *kvm,
 extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long 
end);
 extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+extern int kvmppc_cache_passthru_irq(struct kvm *kvm, int guest_gsi);
 
 static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 unsigned long address)
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index b19bb30..93531cc 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -484,6 +484,8 @@ extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 
icpval);
 extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
 extern void kvmppc_xics_ipi_action(void);
+extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq);
+extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long irq);
 extern int h_ipi_redirect;
 #else
 static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 2492b7e..1b4f5bd 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -953,6 +953,16 @@ void kvm_arch_irq_bypass_del_producer(struct 
irq_bypass_consumer *cons,
kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod);
 }
 
+int kvmppc_cache_passthru_irq(struct kvm *kvm, int irq)
+{
+   int r = 0;
+
+   if (kvm->arch.kvm_ops->cache_passthru_irq)
+   r = kvm->arch.kvm_ops->cache_passthru_irq(kvm, irq);
+
+   return r;
+}
+
 static int kvmppc_book3s_init(void)
 {
int r;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index cc5aea96..487657f 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3468,6 +3468,8 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int 
host_irq, int guest_gsi)
 
pimap->n_mapped++;
 
+   kvmppc_xics_set_mapped(kvm, guest_gsi);
+
if (!kvm->arch.pimap)
kvm->arch.pimap = pimap;
 
@@ -3522,6 +3524,8 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int 
host_irq, int guest_gsi)
if (i != pimap->n_mapped)
pimap->mapped[i] = pimap->mapped[pimap->n_mapped];
 
+   kvmppc_xics_clr_mapped(kvm, guest_gsi);
+
/*
 * We don't free this structure even when the count goes to
 * zero. The structure is freed when we destroy the VM.
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index be23f88..b90570c 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -88,6 +88,18 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 
irq, u32 level)
return -EINVAL;
 
/*
+* If this is a mapped passthrough IRQ that is not cached,
+* add this to the IRQ cached map so that real mode KVM
+* will redirect this directly to the guest where possible.
+* Currently, we will cache a passthrough IRQ the first time
+* we  inject it into the guest.
+*/
+   if (state->pmapped && !state->pcached) {
+   if (kvmppc_cache_passthru_irq(xics->kvm, irq) == 0)
+   state->pcached = 1;
+   }
+
+   /*

[PATCH 07/14] KVM: PPC: Book3S HV: Handle passthrough interrupts in guest

2016-02-26 Thread Suresh Warrier
Currently, KVM switches back to the host to handle any external
interrupt (when the interrupt is received while running in the
guest). This patch updates real-mode KVM to check if an interrupt
is generated by a passthrough adapter that is owned by this guest.
If so, the real mode KVM will directly inject the corresponding
virtual interrupt to the guest VCPU's ICS and also EOI the interrupt
in hardware. In short, the interrupt is handled entirely in real
mode in the guest context without switching back to the host.

In some rare cases, the interrupt cannot be completely handled in
real mode, for instance, a VCPU that is sleeping needs to be woken
up. In this case, KVM simply switches back to the host with trap
reason set to 0x500. This works, but it is clearly not very efficient.
A following patch will distinguish this case and handle it
correctly in the host. Note that we can use the existing
check_too_hard() routine even though we are not in a hypercall to
determine if there is unfinished business that needs to be
completed in host virtual mode.

The patch assumes that the mapping between hardware interrupt IRQ
and virtual IRQ to be injected to the guest already exists for the
PCI passthrough interrupts that need to be handled in real mode.
If the mapping does not exist, KVM falls back to the default
existing behavior.

The KVM real mode code only reads mappings from the cached array
in the passthrough IRQ map. The caching code fails if there are
no more cache slots available and the uncaching code is only
called a mapping is removed.  We also carefully orders the loads
and stores of the fields in the kvmppc_irq_map data structure
using memory barriers to avoid an inconsistent mapping being seen
by the reader. Thus, although it is possible to miss a cache entry,
it is not possible to read a stale value.

One additional complication involves HOT plugging of SRIOV
functions. If a SRIOV function gets removed and then re-added through
HOT plug to the same guest, it is possible for the HW IRQ to
be assigned a new value for the guest GSI. To ensure that the KVM
real mode handlers do not read a stale value for this case, we
call kick_all_cpus_sync() after unmapping which does not return
until every vcpu executing in the guest has come back to the host
at least once.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/kvm_ppc.h|  3 ++
 arch/powerpc/include/asm/pnv-pci.h|  1 +
 arch/powerpc/kvm/book3s_hv.c  | 21 ++
 arch/powerpc/kvm/book3s_hv_builtin.c  | 64 +++
 arch/powerpc/kvm/book3s_hv_rm_xics.c  | 44 +
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 16 
 arch/powerpc/platforms/powernv/pci-ioda.c | 14 +--
 7 files changed, 160 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 4107f7f..c5c7386 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -226,6 +226,9 @@ extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, 
u32 *server,
u32 *priority);
 extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
 extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
+extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, u32 xirr,
+struct kvmppc_irq_map *irq_map,
+struct kvmppc_passthru_irqmap *pimap);
 
 void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu);
 void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/pnv-pci.h 
b/arch/powerpc/include/asm/pnv-pci.h
index 6f77f71..f0564ee 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -20,6 +20,7 @@ int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num);
 void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num);
 int pnv_cxl_get_irq_count(struct pci_dev *dev);
 struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev);
+int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq);
 
 #ifdef CONFIG_CXL_BASE
 int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs,
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 97150f0..8504a5d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3343,6 +3343,12 @@ static int kvmppc_cache_passthru_irq_hv(struct kvm *kvm, 
int irq)
 
pimap->cached[cidx].v_hwirq = pimap->mapped[midx].v_hwirq;
pimap->cached[cidx].desc = pimap->mapped[midx].desc;
+
+   /*
+* Order the above two stores before the next to serialize with
+* the KVM real mode handler.
+*/
+   smp_wmb();
pimap->cached[cidx].r_hwirq = pimap->mapped[midx].r_hwirq;
 
if (cidx >= pimap->n_cached)
@@ -3369,6 +3375,10 @@ static void _uncache_passthru_irq(struct 
kvmppc_pa

[PATCH 08/14] KVM: PPC: Book3S HV: Complete passthrough interrupt in host

2016-02-26 Thread Suresh Warrier
In existing real mode ICP code, when updating the virtual ICP
state, if there is a required action that cannot be completely
handled in real mode, as for instance, a VCPU needs to be woken
up, flags are set in the ICP to indicate the required action.
This is checked when returning from hypercalls to decide whether
the call needs switch back to the host where the action can be
performed in virtual mode. Note that if h_ipi_redirect is enabled,
real mode code will first try to message a free host CPU to
complete this job instead of returning the host to do it ourselves.

Currently, the real mode PCI passthrough interrupt handling code
checks if any of these flags are set and simply returns to the host.
This is not good enough as the trap value (0x500) is treated as an
external interrupt by the host code. It is only when the trap value
is a hypercall that the host code searches for and acts on unfinished
work by calling kvmppc_xics_rm_complete.

This patch introduces a special trap BOOK3S_INTERRUPT_HV_RM_HARD
which is returned by KVM if there is unfinished business to be
completed in host virtual mode after handling a PCI passthrough
interrupt. The host checks for this special interrupt condition
and calls into the kvmppc_xics_rm_complete, which is made an
exported function for this reason.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/kvm_asm.h  | 10 ++
 arch/powerpc/include/asm/kvm_ppc.h  |  3 +++
 arch/powerpc/kvm/book3s_hv.c|  8 +++-
 arch/powerpc/kvm/book3s_hv_builtin.c|  1 +
 arch/powerpc/kvm/book3s_hv_rm_xics.c|  2 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 26 ++
 arch/powerpc/kvm/book3s_xics.c  |  3 ++-
 7 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_asm.h 
b/arch/powerpc/include/asm/kvm_asm.h
index 5bca220..05cabed 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -105,6 +105,15 @@
 #define BOOK3S_INTERRUPT_FAC_UNAVAIL   0xf60
 #define BOOK3S_INTERRUPT_H_FAC_UNAVAIL 0xf80
 
+/* book3s_hv */
+
+/*
+ * Special trap used to indicate to host that this is a
+ * passthrough interrupt that could not be handled
+ * completely in the guest.
+ */
+#define BOOK3S_INTERRUPT_HV_RM_HARD0x
+
 #define BOOK3S_IRQPRIO_SYSTEM_RESET0
 #define BOOK3S_IRQPRIO_DATA_SEGMENT1
 #define BOOK3S_IRQPRIO_INST_SEGMENT2
@@ -136,6 +145,7 @@
 #define RESUME_FLAG_NV  (1<<0)  /* Reload guest nonvolatile state? */
 #define RESUME_FLAG_HOST(1<<1)  /* Resume host? */
 #define RESUME_FLAG_ARCH1  (1<<2)
+#define RESUME_FLAG_ARCH2  (1<<3)
 
 #define RESUME_GUEST0
 #define RESUME_GUEST_NV RESUME_FLAG_NV
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index c5c7386..b19bb30 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -474,6 +474,7 @@ static inline struct kvmppc_passthru_irqmap 
*kvmppc_get_passthru_irqmap(
 extern void kvmppc_alloc_host_rm_ops(void);
 extern void kvmppc_free_host_rm_ops(void);
 extern void kvmppc_free_pimap(struct kvm *kvm);
+extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
 extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
 extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
 extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
@@ -491,6 +492,8 @@ static inline struct kvmppc_passthru_irqmap 
*kvmppc_get_passthru_irqmap(
 static inline void kvmppc_alloc_host_rm_ops(void) {};
 static inline void kvmppc_free_host_rm_ops(void) {};
 static inline void kvmppc_free_pimap(struct kvm *kvm) {};
+static inline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
+   { return 0; }
 static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
 static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8504a5d..cc5aea96 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -70,6 +70,8 @@
 
 /* Used to indicate that a guest page fault needs to be handled */
 #define RESUME_PAGE_FAULT  (RESUME_GUEST | RESUME_FLAG_ARCH1)
+/* Used to indicate that a guest passthrough interrupt needs to be handled */
+#define RESUME_PASSTHROUGH (RESUME_GUEST | RESUME_FLAG_ARCH2)
 
 /* Used as a "null" value for timebase values */
 #define TB_NIL (~(u64)0)
@@ -991,6 +993,9 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
break;
+   case BOOK3S_INTERRUPT_HV_RM_HARD:
+   r = RESUME_PASSTHROUGH;
+   break;
default:
kvmppc_dump_regs(vcpu);
pr

[PATCH 03/14] KVM: PPC: select IRQ_BYPASS_MANAGER

2016-02-26 Thread Suresh Warrier
Select IRQ_BYPASS_MANAGER for PPC when CONFIG_KVM is set.
Add the PPC producer functions for add and del producer.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/kvm_ppc.h |  4 
 arch/powerpc/kvm/Kconfig   |  2 ++
 arch/powerpc/kvm/book3s.c  | 32 
 3 files changed, 38 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 197a8ac..780a017 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -287,6 +287,10 @@ struct kvmppc_ops {
long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
  unsigned long arg);
int (*hcall_implemented)(unsigned long hcall);
+   int (*irq_bypass_add_producer)(struct irq_bypass_consumer *,
+  struct irq_bypass_producer *);
+   void (*irq_bypass_del_producer)(struct irq_bypass_consumer *,
+   struct irq_bypass_producer *);
 };
 
 extern struct kvmppc_ops *kvmppc_hv_ops;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index c2024ac..7ac0569 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -22,6 +22,8 @@ config KVM
select ANON_INODES
select HAVE_KVM_EVENTFD
select SRCU
+   select IRQ_BYPASS_MANAGER
+   select HAVE_KVM_IRQ_BYPASS
 
 config KVM_BOOK3S_HANDLER
bool
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index b34220d..2492b7e 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -35,6 +35,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include "book3s.h"
 #include "trace.h"
@@ -921,6 +923,36 @@ int kvmppc_book3s_hcall_implemented(struct kvm *kvm, 
unsigned long hcall)
return kvm->arch.kvm_ops->hcall_implemented(hcall);
 }
 
+/*
+ * irq_bypass_add_producer and irq_bypass_del_producer are only
+ * useful if the architecture supports PCI passthrough.
+ * irq_bypass_stop and irq_bypass_start are not needed and so
+ * kvm_ops are not defined for them.
+ */
+int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
+struct irq_bypass_producer *prod)
+{
+   struct kvm_kernel_irqfd *irqfd =
+   container_of(cons, struct kvm_kernel_irqfd, consumer);
+   struct kvm *kvm = irqfd->kvm;
+
+   if (kvm->arch.kvm_ops->irq_bypass_add_producer)
+   return kvm->arch.kvm_ops->irq_bypass_add_producer(cons, prod);
+
+   return 0;
+}
+
+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+   struct kvm_kernel_irqfd *irqfd =
+   container_of(cons, struct kvm_kernel_irqfd, consumer);
+   struct kvm *kvm = irqfd->kvm;
+
+   if (kvm->arch.kvm_ops->irq_bypass_del_producer)
+   kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod);
+}
+
 static int kvmppc_book3s_init(void)
 {
int r;
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 04/14] KVM: PPC: Book3S HV: Introduce kvmppc_passthru_irqmap

2016-02-26 Thread Suresh Warrier
This patch introduces an IRQ mapping structure, the
kvmppc_passthru_irqmap structure that is to be used
to map the real hardware IRQ in the host with the virtual
hardware IRQ (gsi) that is injected into a guest by KVM for
passthrough adapters.

Currently, we assume a separate IRQ mapping structure for
each guest. Each kvmppc_passthru_irqmap has two mapping arrays,
the mapped array contains all defined real<->virtual IRQs, the
cached array can be used to limit the real<->virtual IRQs to
a smaller subset, like a cache of the most frequently used
mappings.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/kvm_host.h | 21 +
 arch/powerpc/include/asm/kvm_ppc.h  | 15 +++
 arch/powerpc/kvm/book3s_hv.c| 19 +++
 3 files changed, 55 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index ffdbc2d..fc10248 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -191,6 +191,8 @@ struct kvmppc_spapr_tce_table {
 struct kvmppc_xics;
 struct kvmppc_icp;
 
+struct kvmppc_passthru_irqmap;
+
 /*
  * The reverse mapping array has one entry for each HPTE,
  * which stores the guest's view of the second word of the HPTE
@@ -261,6 +263,7 @@ struct kvm_arch {
 #endif
 #ifdef CONFIG_KVM_XICS
struct kvmppc_xics *xics;
+   struct kvmppc_passthru_irqmap *pimap;
 #endif
struct kvmppc_ops *kvm_ops;
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -391,6 +394,24 @@ struct kvmhv_tb_accumulator {
u64 tb_max; /* max time */
 };
 
+#ifdef CONFIG_PPC_BOOK3S_64
+struct kvmppc_irq_map {
+   u32 r_hwirq;
+   u32 v_hwirq;
+   struct irq_desc *desc;
+};
+
+#defineKVMPPC_PIRQ_CACHED  16
+#defineKVMPPC_PIRQ_MAPPED  1024
+struct kvmppc_passthru_irqmap {
+   int n_cached;
+   int n_mapped;
+   struct irq_chip *irq_chip;
+   struct kvmppc_irq_map cached[KVMPPC_PIRQ_CACHED];
+   struct kvmppc_irq_map mapped[KVMPPC_PIRQ_MAPPED];
+};
+#endif
+
 # ifdef CONFIG_PPC_FSL_BOOK3E
 #define KVMPPC_BOOKE_IAC_NUM   2
 #define KVMPPC_BOOKE_DAC_NUM   2
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 780a017..75d4c64 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -457,8 +457,19 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu 
*vcpu)
 {
return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
 }
+
+static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
+   struct kvm_vcpu *vcpu)
+{
+   if (vcpu)
+   return vcpu->kvm->arch.pimap;
+   else
+   return NULL;
+}
+
 extern void kvmppc_alloc_host_rm_ops(void);
 extern void kvmppc_free_host_rm_ops(void);
+extern void kvmppc_free_pimap(struct kvm *kvm);
 extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
 extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
 extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
@@ -470,8 +481,12 @@ extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
 extern void kvmppc_xics_ipi_action(void);
 extern int h_ipi_redirect;
 #else
+static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
+   struct kvm_vcpu *vcpu)
+   { return NULL; }
 static inline void kvmppc_alloc_host_rm_ops(void) {};
 static inline void kvmppc_free_host_rm_ops(void) {};
+static inline void kvmppc_free_pimap(struct kvm *kvm) {};
 static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
 static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index f47fffe..22d3054 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3275,6 +3275,25 @@ static int kvmppc_core_check_processor_compat_hv(void)
return 0;
 }
 
+#ifdef CONFIG_KVM_XICS
+
+void kvmppc_free_pimap(struct kvm *kvm)
+{
+   kfree(kvm->arch.pimap);
+}
+
+struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(struct irq_desc *desc)
+{
+   struct kvmppc_passthru_irqmap *pimap;
+
+   pimap = kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL);
+   if (pimap != NULL)
+   pimap->irq_chip = irq_data_get_irq_chip(&desc->irq_data);
+
+   return pimap;
+}
+#endif
+
 static long kvm_arch_vm_ioctl_hv(struct file *filp,
 unsigned int ioctl, unsigned long arg)
 {
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 02/14] KVM: PPC: Book3S HV: Convert kvmppc_read_intr to a C function

2016-02-26 Thread Suresh Warrier
Modify kvmppc_read_intr to make it a C function.

This also adds in the optimization of clearing saved_xirr in the case
where we completely handle and EOI an IPI.  Without this, the next
device interrupt will require two trips through the host interrupt
handling code.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/kvm/book3s_hv_builtin.c|  84 +++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 184 +---
 2 files changed, 179 insertions(+), 89 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c 
b/arch/powerpc/kvm/book3s_hv_builtin.c
index 5f0380d..5db386a 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define KVM_CMA_CHUNK_ORDER18
 
@@ -286,3 +287,86 @@ void kvmhv_commence_exit(int trap)
 
 struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
 EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);
+
+/*
+ * Determine what sort of external interrupt is pending (if any).
+ * Returns:
+ * 0 if no interrupt is pending
+ * 1 if an interrupt is pending that needs to be handled by the host
+ * -1 if there was a guest wakeup IPI (which has now been cleared)
+ */
+
+long kvmppc_read_intr(struct kvm_vcpu *vcpu, int path)
+{
+   unsigned long xics_phys;
+   u32 h_xirr;
+   __be32 xirr;
+   u32 xisr;
+   u8 host_ipi;
+
+   /* see if a host IPI is pending */
+   host_ipi = local_paca->kvm_hstate.host_ipi;
+   if (host_ipi)
+   return 1;
+
+   /* Now read the interrupt from the ICP */
+   xics_phys = local_paca->kvm_hstate.xics_phys;
+   if (unlikely(!xics_phys))
+   return 1;
+
+   /*
+* Save XIRR for later. Since we get control in reverse endian
+* on LE systems, save it byte reversed and fetch it back in
+* host endian. Note that xirr is the value read from the
+* XIRR register, while h_xirr is the host endian version.
+*/
+   xirr = _lwzcix(xics_phys + XICS_XIRR);
+   h_xirr = be32_to_cpu(xirr);
+   local_paca->kvm_hstate.saved_xirr = h_xirr;
+   xisr = h_xirr & 0xff;
+   /*
+* Ensure that the store/load complete to guarantee all side
+* effects of loading from XIRR has completed
+*/
+   smp_mb();
+
+   /* if nothing pending in the ICP */
+   if (!xisr)
+   return 0;
+
+   /* We found something in the ICP...
+*
+* If it is an IPI, clear the MFRR and EOI it.
+*/
+   if (xisr == XICS_IPI) {
+   _stbcix(xics_phys + XICS_MFRR, 0xff);
+   _stwcix(xics_phys + XICS_XIRR, xirr);
+   /*
+* Need to ensure side effects of above stores
+* complete before proceeding.
+*/
+   smp_mb();
+
+   /*
+* We need to re-check host IPI now in case it got set in the
+* meantime. If it's clear, we bounce the interrupt to the
+* guest
+*/
+   host_ipi = local_paca->kvm_hstate.host_ipi;
+   if (unlikely(host_ipi != 0)) {
+   /* We raced with the host,
+* we need to resend that IPI, bummer
+*/
+   _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY);
+   /* Let side effects complete */
+   smp_mb();
+   return 1;
+   }
+
+   /* OK, it's an IPI for us */
+   local_paca->kvm_hstate.saved_xirr = 0;
+   return -1;
+   }
+
+   return 1;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index ed16182..29e6a8a 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -220,6 +220,13 @@ kvmppc_primary_no_guest:
li  r3, 0   /* Don't wake on privileged (OS) doorbell */
b   kvm_do_nap
 
+/*
+ * kvm_novcpu_wakeup
+ * Entered from kvm_start_guest if kvm_hstate.napping is set
+ * to NAPPING_NOVCPU
+ * r2 = kernel TOC
+ * r13 = paca
+ */
 kvm_novcpu_wakeup:
ld  r1, HSTATE_HOST_R1(r13)
ld  r5, HSTATE_KVM_VCORE(r13)
@@ -227,8 +234,18 @@ kvm_novcpu_wakeup:
stb r0, HSTATE_NAPPING(r13)
 
/* check the wake reason */
+   ld  r3, HSTATE_KVM_VCPU(r13)
bl  kvmppc_check_wake_reason
 
+   /*
+* Restore volatile registers since we could have called
+* a C routine in kvmppc_check_wake_reason.
+* Wake reason (trap) is returned through r31
+*  r5 = VCORE
+*/
+   ld  r5, HSTATE_KVM_VCORE(r13)
+   mr  r12, r31
+
/* see if any other thread is already exiting */
lwz r0, VCORE_ENTRY_

[PATCH 00/14] PCI Passthrough Interrupt Optimizations

2016-02-26 Thread Suresh Warrier
This patch set adds support for handling interrupts for PCI adapters
entirely in the guest under the right conditions. When an interrupt
is received by KVM in real mode, if the interrupt is from a PCI
passthrough adapter owned by the guest, KVM will update the virtual
ICP for the VCPU that is the target of the interrupt entirely in
real mode and generate the virtual interrupt. If the VCPU is not
running in the guest, it will wake up the VCPU.  It will also update
the affinity of the interrupt to directly target the CPU (core)
where this VCPU is being scheduled as an optimization. 

KVM needs the mapping between hardware interrupt numbers in the host
to the virtual hardware interrupt (GSI) that needs to get injected
into the guest. This patch set takes advantage of the IRQ bypass
manager feature to create this mapping. For now, we allocate and
manage a separate mapping structure per VM.

Although a mapping is created for every passthrough IRQ requested
in the guest, we also maintain a cache of mappings that is used to
speed up search. For now, KVM real mode code only looks in the cache for
a mapping. If no mapping is found, we fall back on the usual interrupt
routing mechanism - switch back to host and run the VFIO interrupt
handler.

This is based on 4.5-rc1 plus the patch set in
http://www.spinics.net/lists/kvm-ppc/msg11131.html since it has
dependencies on vmalloc_to_phys() being public.

Suresh Warrier (14):
  powerpc: Add simple cache inhibited MMIO accessors
  KVM: PPC: Book3S HV: Convert kvmppc_read_intr to a C function
  KVM: PPC: select IRQ_BYPASS_MANAGER
  KVM: PPC: Book3S HV: Introduce kvmppc_passthru_irqmap
  KVM: PPC: Book3S HV: Enable IRQ bypass
  KVM: PPC: Book3S HV: Caching for passthrough IRQ map
  KVM: PPC: Book3S HV: Handle passthrough interrupts in guest
  KVM: PPC: Book3S HV: Complete passthrough interrupt in host
  KVM: PPC: Book3S HV: Enable KVM real mode handling of passthrough IRQs
  KVM: PPC: Book3S HV: Dump irqmap in debugfs
  KVM: PPC: Book3S HV: Tunable to disable KVM IRQ bypass
  KVM: PPC: Book3S HV: Update irq stats for IRQs handled in real mode
  KVM: PPC: Book3S HV: Change affinity for passthrough IRQ
  KVM: PPC: Book3S HV: Counters for passthrough IRQ stats

 arch/powerpc/include/asm/io.h |  28 +++
 arch/powerpc/include/asm/kvm_asm.h|  10 +
 arch/powerpc/include/asm/kvm_book3s.h |   1 +
 arch/powerpc/include/asm/kvm_host.h   |  25 +++
 arch/powerpc/include/asm/kvm_ppc.h|  28 +++
 arch/powerpc/include/asm/pnv-pci.h|   1 +
 arch/powerpc/kvm/Kconfig  |   2 +
 arch/powerpc/kvm/book3s.c |  45 +
 arch/powerpc/kvm/book3s_hv.c  | 318 +-
 arch/powerpc/kvm/book3s_hv_builtin.c  | 157 +++
 arch/powerpc/kvm/book3s_hv_rm_xics.c  | 181 +
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 226 -
 arch/powerpc/kvm/book3s_xics.c|  68 ++-
 arch/powerpc/kvm/book3s_xics.h|   3 +
 arch/powerpc/platforms/powernv/pci-ioda.c |  14 +-
 15 files changed, 1013 insertions(+), 94 deletions(-)

-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 06/14] KVM: PPC: Book3S HV: Caching for passthrough IRQ map

2016-02-26 Thread Suresh Warrier
Add the following functions to support caching of the
IRQ mapped entries:

kvmppc_cache_passthru_irq()
  Caches an existing mapping in the cached array.

_uncache_passthru_irq()
  Uncaches a cached entry. This is an internal function and
  is only invoked when unmapping a passthrough IRQ mapping.
  There is no support to just uncache an entry.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/kvm_ppc.h |  1 +
 arch/powerpc/kvm/book3s_hv.c   | 97 ++
 2 files changed, 98 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 75d4c64..4107f7f 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -291,6 +291,7 @@ struct kvmppc_ops {
   struct irq_bypass_producer *);
void (*irq_bypass_del_producer)(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
+   int (*cache_passthru_irq)(struct kvm *kvm, int irq);
 };
 
 extern struct kvmppc_ops *kvmppc_hv_ops;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 4d802b8..97150f0 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3297,6 +3297,95 @@ static struct kvmppc_passthru_irqmap 
*kvmppc_alloc_pimap(struct irq_desc *desc)
return pimap;
 }
 
+/*
+ * Cache a passthrough IRQ
+ * This is accomplished by copying the IRQ details from the
+ * mapped array to the cached array.
+ *
+ * Return:
+ * 0:  if this was accomplished successfully
+ * 1:  if the caching could not be done
+ */
+static int kvmppc_cache_passthru_irq_hv(struct kvm *kvm, int irq)
+{
+   struct kvmppc_passthru_irqmap *pimap;
+   int cidx, midx;
+
+   mutex_lock(&kvm->lock);
+
+   if (kvm->arch.pimap == NULL)
+   goto err_out;
+
+   pimap = kvm->arch.pimap;
+
+   /* Look for first empty slot */
+   for (cidx = 0; cidx < KVMPPC_PIRQ_CACHED; cidx++)
+   if (pimap->cached[cidx].r_hwirq == 0)
+   break;
+
+   /* Out of empty cache slots */
+   if (cidx == KVMPPC_PIRQ_CACHED)
+   goto err_out;
+
+   /* Find entry in the mapped array */
+   for (midx = 0; midx < pimap->n_mapped; midx++) {
+   if (irq == pimap->mapped[midx].v_hwirq)
+   break;
+   }
+
+   /* IRQ not found */
+   if (midx == pimap->n_mapped)
+   goto err_out;
+
+   if (pimap->mapped[midx].r_hwirq == 0)
+   /* Someone beat us to caching the IRQ */
+   goto err_out;
+
+   pimap->cached[cidx].v_hwirq = pimap->mapped[midx].v_hwirq;
+   pimap->cached[cidx].desc = pimap->mapped[midx].desc;
+   pimap->cached[cidx].r_hwirq = pimap->mapped[midx].r_hwirq;
+
+   if (cidx >= pimap->n_cached)
+   pimap->n_cached = cidx + 1;
+
+   /* r_hwirq == 0 in mapped array to indicate a cached IRQ */
+   pimap->mapped[midx].r_hwirq = 0;
+
+   mutex_unlock(&kvm->lock);
+   return 0;
+
+err_out:
+   mutex_unlock(&kvm->lock);
+   return 1;
+}
+
+/* Called with kvm->lock already acquired */
+static void _uncache_passthru_irq(struct kvmppc_passthru_irqmap *pimap, int 
irq)
+{
+   int i;
+
+   for (i = 0; i < pimap->n_cached; i++) {
+   if (irq == pimap->cached[i].v_hwirq) {
+
+   /*
+* Zero out the IRQ being uncached.
+*/
+   pimap->cached[i].r_hwirq = 0;
+   pimap->cached[i].v_hwirq = 0;
+   pimap->cached[i].desc = NULL;
+
+   /*
+* Only need to decrement maximum cached count if
+* this is the highest entry being uncached.
+*/
+   if (i + 1 == pimap->n_cached)
+   pimap->n_cached--;
+   return;
+   }
+   }
+
+}
+
 static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int 
guest_gsi)
 {
struct irq_desc *desc;
@@ -3391,6 +3480,13 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int 
host_irq, int guest_gsi)
}
 
/*
+* If this is a cached IRQ, remove it from the cached array also
+* mapped.r_hwirq is set to zero when we cache an entry.
+*/
+   if (!pimap->mapped[i].r_hwirq)
+   _uncache_passthru_irq(pimap, guest_gsi);
+
+   /*
 * Replace mapped entry to be cleared with highest entry (unless
 * this is already the highest) so as to not leave any holes in
 * the array of mapped.
@@ -3567,6 +3663,7 @@ static struct kvmppc_ops kvm_ops_hv = {
 #ifdef CONFIG_KVM_XICS
.irq_bypass_add_produc

[PATCH 05/14] KVM: PPC: Book3S HV: Enable IRQ bypass

2016-02-26 Thread Suresh Warrier
Add the irq_bypass_add_producer and irq_bypass_del_producer
functions. These functions get called whenever a GSI is being
defined for a guest. They create/remove the mapping between
host real IRQ numbers and the guest GSI.

Add the following helper functions to manage the
passthrough IRQ map.

kvmppc_set_passthru_irq()
  Creates a mapping in the passthrough IRQ map that maps a host
  IRQ to a guest GSI. It allocates the structure (one per guest VM)
  the first time it is called.

kvmppc_clr_passthru_irq()
  Removes the passthrough IRQ map entry given a guest GSI.
  The passthrough IRQ map structure is not freed even when the
  number of mapped entries goes to zero. It is only freed when
  the VM is destroyed.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/kvm/book3s_hv.c | 158 ++-
 1 file changed, 157 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 22d3054..4d802b8 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -55,6 +55,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 
 #include "book3s.h"
@@ -3246,6 +3248,8 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
kvmppc_free_vcores(kvm);
 
kvmppc_free_hpt(kvm);
+
+   kvmppc_free_pimap(kvm);
 }
 
 /* We don't need to emulate any privileged instructions or dcbz */
@@ -3282,7 +3286,7 @@ void kvmppc_free_pimap(struct kvm *kvm)
kfree(kvm->arch.pimap);
 }
 
-struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(struct irq_desc *desc)
+static struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(struct irq_desc *desc)
 {
struct kvmppc_passthru_irqmap *pimap;
 
@@ -3292,6 +3296,154 @@ struct kvmppc_passthru_irqmap 
*kvmppc_alloc_pimap(struct irq_desc *desc)
 
return pimap;
 }
+
+static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int 
guest_gsi)
+{
+   struct irq_desc *desc;
+   struct kvmppc_irq_map *irq_map;
+   struct kvmppc_passthru_irqmap *pimap;
+   struct irq_chip *chip;
+   int i;
+
+   desc = irq_to_desc(host_irq);
+   if (!desc)
+   return -EIO;
+
+   mutex_lock(&kvm->lock);
+
+   if (kvm->arch.pimap == NULL) {
+   /* First call, allocate structure to hold IRQ map */
+   pimap = kvmppc_alloc_pimap(desc);
+   if (pimap == NULL) {
+   mutex_unlock(&kvm->lock);
+   return -ENOMEM;
+   }
+   } else
+   pimap = kvm->arch.pimap;
+
+   /*
+* For now, we support only a single IRQ chip
+*/
+   chip = irq_data_get_irq_chip(&desc->irq_data);
+   if (!chip || (strcmp(chip->name, pimap->irq_chip->name) != 0)) {
+   pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map 
for (%d,%d)\n",
+   host_irq, guest_gsi);
+   mutex_unlock(&kvm->lock);
+   return -ENOENT;
+   }
+
+   if (pimap->n_mapped == KVMPPC_PIRQ_MAPPED) {
+   mutex_unlock(&kvm->lock);
+   return -EAGAIN;
+   }
+
+   for (i = 0; i < pimap->n_mapped; i++) {
+   if (guest_gsi == pimap->mapped[i].v_hwirq) {
+   mutex_unlock(&kvm->lock);
+   return -EINVAL;
+   }
+   }
+
+   irq_map = &pimap->mapped[pimap->n_mapped];
+
+   irq_map->v_hwirq = guest_gsi;
+   irq_map->r_hwirq = desc->irq_data.hwirq;
+   irq_map->desc = desc;
+
+   pimap->n_mapped++;
+
+   if (!kvm->arch.pimap)
+   kvm->arch.pimap = pimap;
+
+   mutex_unlock(&kvm->lock);
+
+   return 0;
+}
+
+static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int 
guest_gsi)
+{
+   struct irq_desc *desc;
+   struct kvmppc_passthru_irqmap *pimap;
+   int i;
+
+   desc = irq_to_desc(host_irq);
+   if (!desc)
+   return -EIO;
+
+   mutex_lock(&kvm->lock);
+
+   if (kvm->arch.pimap == NULL) {
+   mutex_unlock(&kvm->lock);
+   return 0;
+   }
+   pimap = kvm->arch.pimap;
+
+   WARN_ON(pimap->n_mapped < 1);
+
+   for (i = 0; i < pimap->n_mapped; i++) {
+   if (guest_gsi == pimap->mapped[i].v_hwirq)
+   break;
+   }
+
+   if (i == pimap->n_mapped) {
+   mutex_unlock(&kvm->lock);
+   return -ENODEV;
+   }
+
+   /*
+* Replace mapped entry to be cleared with highest entry (unless
+* this is already the highest) so as to not leave any holes in
+* the array of mapped.
+*/
+   pimap->n_mapped--;
+   if (i != pimap->n_mapped)
+   pimap->mapped[i] = pimap->mapped[pimap->n_mapp

[PATCH 01/14] powerpc: Add simple cache inhibited MMIO accessors

2016-02-26 Thread Suresh Warrier
Add simple cache inhibited accessors for memory mapped I/O.
Unlike the accessors built from the DEF_MMIO_* macros, these
don't include any hardware memory barriers, callers need to
manage memory barriers on their own. These can only be called
in hypervisor mode.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/io.h | 28 
 1 file changed, 28 insertions(+)

diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 6c1297e..d329a01 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -241,6 +241,34 @@ static inline void out_be64(volatile u64 __iomem *addr, 
u64 val)
 #endif
 #endif /* __powerpc64__ */
 
+
+/*
+ * Simple Cache inhibited accessors
+ * Unlike the DEF_MMIO_* macros, these don't include any h/w memory
+ * barriers, callers need to manage memory barriers on their own.
+ */
+
+static inline u32 _lwzcix(unsigned long addr)
+{
+   u32 ret;
+
+   __asm__ __volatile__("lwzcix %0,0, %1"
+: "=r" (ret) : "r" (addr) : "memory");
+   return ret;
+}
+
+static inline void _stbcix(u64 addr, u8 val)
+{
+   __asm__ __volatile__("stbcix %0,0,%1"
+   : : "r" (val), "r" (addr) : "memory");
+}
+
+static inline void _stwcix(u64 addr, u32 val)
+{
+   __asm__ __volatile__("stwcix %0,0,%1"
+   : : "r" (val), "r" (addr) : "memory");
+}
+
 /*
  * Low level IO stream instructions are defined out of line for now
  */
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 2/9] powerpc/smp: Add smp_muxed_ipi_set_message

2015-12-17 Thread Suresh Warrier
smp_muxed_ipi_message_pass() invokes smp_ops->cause_ipi, which
uses an ioremapped address to access registers on the XICS
interrupt controller to cause the IPI. Because of this real
mode callers cannot call smp_muxed_ipi_message_pass() for IPI
messaging.

This patch creates a separate function smp_muxed_ipi_set_message
just to set the IPI message without the cause_ipi routine.
After calling this function to set the IPI message, real
mode callers must cause the IPI by writing to the XICS registers
directly.

As part of this, we also change smp_muxed_ipi_message_pass
to call smp_muxed_ipi_set_message to set the message instead
of doing it directly inside the routine.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/smp.h | 1 +
 arch/powerpc/kernel/smp.c  | 9 -
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 9ef9c37..78083ed 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -124,6 +124,7 @@ extern const char *smp_ipi_name[];
 /* for irq controllers with only a single ipi */
 extern void smp_muxed_ipi_set_data(int cpu, unsigned long data);
 extern void smp_muxed_ipi_message_pass(int cpu, int msg);
+extern void smp_muxed_ipi_set_message(int cpu, int msg);
 extern irqreturn_t smp_ipi_demux(void);
 
 void smp_init_pSeries(void);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index a53a130..e222efc 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -218,7 +218,7 @@ void smp_muxed_ipi_set_data(int cpu, unsigned long data)
info->data = data;
 }
 
-void smp_muxed_ipi_message_pass(int cpu, int msg)
+void smp_muxed_ipi_set_message(int cpu, int msg)
 {
struct cpu_messages *info = &per_cpu(ipi_message, cpu);
char *message = (char *)&info->messages;
@@ -228,6 +228,13 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
 */
smp_mb();
message[msg] = 1;
+}
+
+void smp_muxed_ipi_message_pass(int cpu, int msg)
+{
+   struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+
+   smp_muxed_ipi_set_message(cpu, msg);
/*
 * cause_ipi functions are required to include a full barrier
 * before doing whatever causes the IPI.
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 0/9] KVM: PPC: Book3S HV: Optimize wakeup VCPU from H_IPI

2015-12-17 Thread Suresh Warrier
When the VCPU target of an H_IPI hypercall is not running
in the guest, we need to do a kick VCPU (wake the VCPU thread)
to make it runnable. The real-mode version of the H_IPI hypercall
cannot do this because it involves waking a sleeping thread.
Thus the hcall returns H_TOO_HARD which forces a switch back
to host so that the H_IPI call can be completed in virtual mode.
This has been found to cause a slowdown for many workloads like
YCSB MongoDB, small message networking, etc. 

One solution is to hand off this job of waking the VCPU to a CPU
that is running in the host by sending it a message through the 
IPI mechanism from the hypercall.

This patch set optimizes the wakeup of the target VCPU by posting
a free core already running in the host to do the wakeup, thus
avoiding the switch to host and back. It requires maintaining a
bitmask of all the available cores in the system to indicate if
they are in the host or running in some guest. It also requires
the H_IPI hypercall to search for a free host core and send it a
new IPI message PPC_MSG_RM_HOST_ACTION after stashing away some
parameters like the pointer to VCPU for the IPI handler. Locks
are avoided by using atomic operations to save core state, to
find and reserve a core in the host, etc.

Note that it is possible for a guest to be destroyed and its
VCPUs freed before the IPI handler gets to run. This case is
handled by ensuring that any pending PPC_MSG_RM_HOST_ACTION
IPIs are completed before proceeding with freeing the VCPUs.

Currently, powerpc only support 4 IPI messages and all 4 are 
already taken for other purposes. This patch also set increases
the number of supported IPI messages to 8. It also provides the
code to send an IPI from hypercall running in real-mode since
the existing cause_ipi functions cannot be executed in real-mode.

A tunable h_ipi_redirect is also included in the patch set to
disable the feature. 

v3:
* Updated/clarified commit logs.
* Fix build break when building without KVM.

v2:
* Complete patch set sent to both kvm and linuxppc mailing lists
  to avoid build-breaks.
* Broke up real mode IPI messaging function into two pieces - one
  to set the message and one to cause the IPI. New function
  icp_native_cause_ipi_rm added to arch/powerpc/sysdev/xics/icp-native.c


Suresh Warrier (9):
  powerpc/smp: Support more IPI messages
  powerpc/smp: Add smp_muxed_ipi_set_message
  powerpc/xics: Add icp_native_cause_ipi_rm
  KVM: PPC: Book3S HV: Host-side RM data structures
  KVM: PPC: Book3S HV: Manage core host state
  KVM: PPC: Book3S HV: kvmppc_host_rm_ops - handle offlining CPUs
  KVM: PPC: Book3S HV: Host side kick VCPU when poked by real-mode KVM
  KVM: PPC: Book3S HV: Send IPI to host core to wake VCPU
  KVM: PPC: Book3S HV: Add tunable to control H_IPI redirection

 arch/powerpc/include/asm/kvm_ppc.h|  33 +++
 arch/powerpc/include/asm/smp.h|   4 +
 arch/powerpc/include/asm/xics.h   |   1 +
 arch/powerpc/kernel/smp.c |  28 +-
 arch/powerpc/kvm/book3s_hv.c  | 166 ++
 arch/powerpc/kvm/book3s_hv_builtin.c  |   3 +
 arch/powerpc/kvm/book3s_hv_rm_xics.c  | 120 +++-
 arch/powerpc/kvm/powerpc.c|  10 ++
 arch/powerpc/sysdev/xics/icp-native.c |  21 +
 9 files changed, 378 insertions(+), 8 deletions(-)

-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 5/9] KVM: PPC: Book3S HV: Manage core host state

2015-12-17 Thread Suresh Warrier
Update the core host state in kvmppc_host_rm_ops whenever
the primary thread of the core enters the guest or returns
back.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/kvm/book3s_hv.c | 44 
 1 file changed, 44 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 4042623..95a2ed3 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2261,6 +2261,46 @@ static void post_guest_process(struct kvmppc_vcore *vc, 
bool is_master)
 }
 
 /*
+ * Clear core from the list of active host cores as we are about to
+ * enter the guest. Only do this if it is the primary thread of the
+ * core (not if a subcore) that is entering the guest.
+ */
+static inline void kvmppc_clear_host_core(int cpu)
+{
+   int core;
+
+   if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
+   return;
+   /*
+* Memory barrier can be omitted here as we will do a smp_wmb()
+* later in kvmppc_start_thread and we need ensure that state is
+* visible to other CPUs only after we enter guest.
+*/
+   core = cpu >> threads_shift;
+   kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
+}
+
+/*
+ * Advertise this core as an active host core since we exited the guest
+ * Only need to do this if it is the primary thread of the core that is
+ * exiting.
+ */
+static inline void kvmppc_set_host_core(int cpu)
+{
+   int core;
+
+   if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
+   return;
+
+   /*
+* Memory barrier can be omitted here because we do a spin_unlock
+* immediately after this which provides the memory barrier.
+*/
+   core = cpu >> threads_shift;
+   kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
+}
+
+/*
  * Run a set of guest threads on a physical core.
  * Called with vc->lock held.
  */
@@ -2372,6 +2412,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore 
*vc)
}
}
 
+   kvmppc_clear_host_core(pcpu);
+
/* Start all the threads */
active = 0;
for (sub = 0; sub < core_info.n_subcores; ++sub) {
@@ -2468,6 +2510,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore 
*vc)
kvmppc_ipi_thread(pcpu + i);
}
 
+   kvmppc_set_host_core(pcpu);
+
spin_unlock(&vc->lock);
 
/* make sure updates to secondary vcpu structs are visible now */
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 9/9] KVM: PPC: Book3S HV: Add tunable to control H_IPI redirection

2015-12-17 Thread Suresh Warrier
Redirecting the wakeup of a VCPU from the H_IPI hypercall to
a core running in the host is usually a good idea, most workloads
seemed to benefit. However, in one heavily interrupt-driven SMT1
workload, some regression was observed. This patch adds a kvm_hv
module parameter called h_ipi_redirect to control this feature.

The default value for this tunable is 1 - that is enable the feature.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/kvm_ppc.h   |  1 +
 arch/powerpc/kvm/book3s_hv.c | 11 +++
 arch/powerpc/kvm/book3s_hv_rm_xics.c |  5 -
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 1b93519..29d1442 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -448,6 +448,7 @@ extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 
icpval);
 extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
 extern void kvmppc_xics_ipi_action(void);
+extern int h_ipi_redirect;
 #else
 static inline void kvmppc_alloc_host_rm_ops(void) {};
 static inline void kvmppc_free_host_rm_ops(void) {};
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d6280ed..182ec84 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -81,6 +81,17 @@ static int target_smt_mode;
 module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
 
+#ifdef CONFIG_KVM_XICS
+static struct kernel_param_ops module_param_ops = {
+   .set = param_set_int,
+   .get = param_get_int,
+};
+
+module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
+   S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
+#endif
+
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index a8ca3ed..4c062e7 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -24,6 +24,9 @@
 
 #define DEBUG_PASSUP
 
+int h_ipi_redirect = 1;
+EXPORT_SYMBOL(h_ipi_redirect);
+
 static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp 
*icp,
u32 new_irq);
 
@@ -134,7 +137,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
cpu = vcpu->arch.thread_cpu;
if (cpu < 0 || cpu >= nr_cpu_ids) {
hcore = -1;
-   if (kvmppc_host_rm_ops_hv)
+   if (kvmppc_host_rm_ops_hv && h_ipi_redirect)
hcore = find_available_hostcore(XICS_RM_KICK_VCPU);
if (hcore != -1) {
hcpu = hcore << threads_shift;
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 8/9] KVM: PPC: Book3S HV: Send IPI to host core to wake VCPU

2015-12-17 Thread Suresh Warrier
This patch adds support to real-mode KVM to search for a core
running in the host partition and send it an IPI message with
VCPU to be woken. This avoids having to switch to the host
partition to complete an H_IPI hypercall when the VCPU which
is the target of the the H_IPI is not loaded (is not running
in the guest).

The patch also includes the support in the IPI handler running
in the host to do the wakeup by calling kvmppc_xics_ipi_action
for the PPC_MSG_RM_HOST_ACTION message.

When a guest is being destroyed, we need to ensure that there
are no pending IPIs waiting to wake up a VCPU before we free
the VCPUs of the guest. This is accomplished by:
- Forces a PPC_MSG_CALL_FUNCTION IPI to be completed by all CPUs
  before freeing any VCPUs in kvm_arch_destroy_vm()
- Any PPC_MSG_RM_HOST_ACTION messages must be executed first
  before any other PPC_MSG_CALL_FUNCTION messages

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/kernel/smp.c| 11 +
 arch/powerpc/kvm/book3s_hv_rm_xics.c | 81 ++--
 arch/powerpc/kvm/powerpc.c   | 10 +
 3 files changed, 99 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e222efc..cb8be5d 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -257,6 +257,17 @@ irqreturn_t smp_ipi_demux(void)
 
do {
all = xchg(&info->messages, 0);
+#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+   /*
+* Must check for PPC_MSG_RM_HOST_ACTION messages
+* before PPC_MSG_CALL_FUNCTION messages because when
+* a VM is destroyed, we call kick_all_cpus_sync()
+* to ensure that any pending PPC_MSG_RM_HOST_ACTION
+* messages have completed before we free any VCPUs.
+*/
+   if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
+   kvmppc_xics_ipi_action();
+#endif
if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
generic_smp_call_function_interrupt();
if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 43ffbfe..a8ca3ed 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -51,11 +51,70 @@ static void ics_rm_check_resend(struct kvmppc_xics *xics,
 
 /* -- ICP routines -- */
 
+/*
+ * We start the search from our current CPU Id in the core map
+ * and go in a circle until we get back to our ID looking for a
+ * core that is running in host context and that hasn't already
+ * been targeted for another rm_host_ops.
+ *
+ * In the future, could consider using a fairer algorithm (one
+ * that distributes the IPIs better)
+ *
+ * Returns -1, if no CPU could be found in the host
+ * Else, returns a CPU Id which has been reserved for use
+ */
+static inline int grab_next_hostcore(int start,
+   struct kvmppc_host_rm_core *rm_core, int max, int action)
+{
+   bool success;
+   int core;
+   union kvmppc_rm_state old, new;
+
+   for (core = start + 1; core < max; core++)  {
+   old = new = READ_ONCE(rm_core[core].rm_state);
+
+   if (!old.in_host || old.rm_action)
+   continue;
+
+   /* Try to grab this host core if not taken already. */
+   new.rm_action = action;
+
+   success = cmpxchg64(&rm_core[core].rm_state.raw,
+   old.raw, new.raw) == old.raw;
+   if (success) {
+   /*
+* Make sure that the store to the rm_action is made
+* visible before we return to caller (and the
+* subsequent store to rm_data) to synchronize with
+* the IPI handler.
+*/
+   smp_wmb();
+   return core;
+   }
+   }
+
+   return -1;
+}
+
+static inline int find_available_hostcore(int action)
+{
+   int core;
+   int my_core = smp_processor_id() >> threads_shift;
+   struct kvmppc_host_rm_core *rm_core = kvmppc_host_rm_ops_hv->rm_core;
+
+   core = grab_next_hostcore(my_core, rm_core, cpu_nr_cores(), action);
+   if (core == -1)
+   core = grab_next_hostcore(core, rm_core, my_core, action);
+
+   return core;
+}
+
 static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
struct kvm_vcpu *this_vcpu)
 {
struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
int cpu;
+   int hcore, hcpu;
 
/* Mark the target VCPU as having an interrupt pending */
vcpu->stat.queue_intr++;
@@ -67,11 +126,25 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
 

[PATCH v3 7/9] KVM: PPC: Book3S HV: Host side kick VCPU when poked by real-mode KVM

2015-12-17 Thread Suresh Warrier
This patch adds the support for the kick VCPU operation for
kvmppc_host_rm_ops. The kvmppc_xics_ipi_action() function
provides the function to be invoked for a host side operation
when poked by the real mode KVM. This is initiated by KVM by
sending an IPI to any free host core.

KVM real mode must set the rm_action to XICS_RM_KICK_VCPU and
rm_data to point to the VCPU to be woken up before sending the IPI.
Note that we have allocated one kvmppc_host_rm_core structure
per core. The above values need to be set in the structure
corresponding to the core to which the IPI will be sent.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/kvm_ppc.h   |  1 +
 arch/powerpc/kvm/book3s_hv.c |  2 ++
 arch/powerpc/kvm/book3s_hv_rm_xics.c | 36 
 3 files changed, 39 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 47cd441..1b93519 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -447,6 +447,7 @@ extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
 extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
 extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xics_ipi_action(void);
 #else
 static inline void kvmppc_alloc_host_rm_ops(void) {};
 static inline void kvmppc_free_host_rm_ops(void) {};
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index da2cc56..d6280ed 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3085,6 +3085,8 @@ void kvmppc_alloc_host_rm_ops(void)
ops->rm_core[core].rm_state.in_host = 1;
}
 
+   ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv;
+
/*
 * Make the contents of the kvmppc_host_rm_ops structure visible
 * to other CPUs before we assign it to the global variable.
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 24f5807..43ffbfe 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "book3s_xics.h"
@@ -623,3 +624,38 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long 
xirr)
  bail:
return check_too_hard(xics, icp);
 }
+
+/*  --- Non-real mode XICS-related built-in routines ---  */
+
+/**
+ * Host Operations poked by RM KVM
+ */
+static void rm_host_ipi_action(int action, void *data)
+{
+   switch (action) {
+   case XICS_RM_KICK_VCPU:
+   kvmppc_host_rm_ops_hv->vcpu_kick(data);
+   break;
+   default:
+   WARN(1, "Unexpected rm_action=%d data=%p\n", action, data);
+   break;
+   }
+
+}
+
+void kvmppc_xics_ipi_action(void)
+{
+   int core;
+   unsigned int cpu = smp_processor_id();
+   struct kvmppc_host_rm_core *rm_corep;
+
+   core = cpu >> threads_shift;
+   rm_corep = &kvmppc_host_rm_ops_hv->rm_core[core];
+
+   if (rm_corep->rm_data) {
+   rm_host_ipi_action(rm_corep->rm_state.rm_action,
+   rm_corep->rm_data);
+   rm_corep->rm_data = NULL;
+   rm_corep->rm_state.rm_action = 0;
+   }
+}
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 6/9] KVM: PPC: Book3S HV: kvmppc_host_rm_ops - handle offlining CPUs

2015-12-17 Thread Suresh Warrier
The kvmppc_host_rm_ops structure keeps track of which cores are
are in the host by maintaining a bitmask of active/runnable
online CPUs that have not entered the guest. This patch adds
support to manage the bitmask when a CPU is offlined or onlined
in the host.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/kvm/book3s_hv.c | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 95a2ed3..da2cc56 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3012,6 +3012,36 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu 
*vcpu)
 }
 
 #ifdef CONFIG_KVM_XICS
+static int kvmppc_cpu_notify(struct notifier_block *self, unsigned long action,
+   void *hcpu)
+{
+   unsigned long cpu = (long)hcpu;
+
+   switch (action) {
+   case CPU_UP_PREPARE:
+   case CPU_UP_PREPARE_FROZEN:
+   kvmppc_set_host_core(cpu);
+   break;
+
+#ifdef CONFIG_HOTPLUG_CPU
+   case CPU_DEAD:
+   case CPU_DEAD_FROZEN:
+   case CPU_UP_CANCELED:
+   case CPU_UP_CANCELED_FROZEN:
+   kvmppc_clear_host_core(cpu);
+   break;
+#endif
+   default:
+   break;
+   }
+
+   return NOTIFY_OK;
+}
+
+static struct notifier_block kvmppc_cpu_notifier = {
+   .notifier_call = kvmppc_cpu_notify,
+};
+
 /*
  * Allocate a per-core structure for managing state about which cores are
  * running in the host versus the guest and for exchanging data between
@@ -3045,6 +3075,8 @@ void kvmppc_alloc_host_rm_ops(void)
return;
}
 
+   get_online_cpus();
+
for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
if (!cpu_online(cpu))
continue;
@@ -3063,14 +3095,21 @@ void kvmppc_alloc_host_rm_ops(void)
l_ops = (unsigned long) ops;
 
if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
+   put_online_cpus();
kfree(ops->rm_core);
kfree(ops);
+   return;
}
+
+   register_cpu_notifier(&kvmppc_cpu_notifier);
+
+   put_online_cpus();
 }
 
 void kvmppc_free_host_rm_ops(void)
 {
if (kvmppc_host_rm_ops_hv) {
+   unregister_cpu_notifier(&kvmppc_cpu_notifier);
kfree(kvmppc_host_rm_ops_hv->rm_core);
kfree(kvmppc_host_rm_ops_hv);
kvmppc_host_rm_ops_hv = NULL;
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 3/9] powerpc/xics: Add icp_native_cause_ipi_rm

2015-12-17 Thread Suresh Warrier
Function to cause an IPI by directly updating the MFFR register
in the XICS. The function is meant for real-mode callers since
they cannot use the smp_ops->cause_ipi function which uses an
ioremapped address.

Normal usage is for the the KVM real mode code to set the IPI message
using smp_muxed_ipi_message_pass and then invoke icp_native_cause_ipi_rm
to cause the actual IPI.

The function requires kvm_hstate.xics_phys to have been initialized
with the physical address of XICS.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/xics.h   |  1 +
 arch/powerpc/sysdev/xics/icp-native.c | 21 +
 2 files changed, 22 insertions(+)

diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 0e25bdb..2546048 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -30,6 +30,7 @@
 #ifdef CONFIG_PPC_ICP_NATIVE
 extern int icp_native_init(void);
 extern void icp_native_flush_interrupt(void);
+extern void icp_native_cause_ipi_rm(int cpu);
 #else
 static inline int icp_native_init(void) { return -ENODEV; }
 #endif
diff --git a/arch/powerpc/sysdev/xics/icp-native.c 
b/arch/powerpc/sysdev/xics/icp-native.c
index eae3265..afdf62f 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -159,6 +159,27 @@ static void icp_native_cause_ipi(int cpu, unsigned long 
data)
icp_native_set_qirr(cpu, IPI_PRIORITY);
 }
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void icp_native_cause_ipi_rm(int cpu)
+{
+   /*
+* Currently not used to send IPIs to another CPU
+* on the same core. Only caller is KVM real mode.
+* Need the physical address of the XICS to be
+* previously saved in kvm_hstate in the paca.
+*/
+   unsigned long xics_phys;
+
+   /*
+* Just like the cause_ipi functions, it is required to
+* include a full barrier (out8 includes a sync) before
+* causing the IPI.
+*/
+   xics_phys = paca[cpu].kvm_hstate.xics_phys;
+   out_rm8((u8 *)(xics_phys + XICS_MFRR), IPI_PRIORITY);
+}
+#endif
+
 /*
  * Called when an interrupt is received on an off-line CPU to
  * clear the interrupt, so that the CPU can go back to nap mode.
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 4/9] KVM: PPC: Book3S HV: Host-side RM data structures

2015-12-17 Thread Suresh Warrier
This patch defines the data structures to support the setting up
of host side operations while running in real mode in the guest,
and also the functions to allocate and free it.

The operations are for now limited to virtual XICS operations.
Currently, we have only defined one operation in the data
structure:
 - Wake up a VCPU sleeping in the host when it
   receives a virtual interrupt

The operations are assigned at the core level because PowerKVM
requires that the host run in SMT off mode. For each core,
we will need to manage its state atomically - where the state
is defined by:
1. Is the core running in the host?
2. Is there a Real Mode (RM) operation pending on the host?

Currently, core state is only managed at the whole-core level
even when the system is in split-core mode. This just limits
the number of free or "available" cores in the host to perform
any host-side operations.

The kvmppc_host_rm_core.rm_data allows any data to be passed by
KVM in real mode to the host core along with the operation to
be performed.

The kvmppc_host_rm_ops structure is allocated the very first time
a guest VM is started. Initial core state is also set - all online
cores are in the host. This structure is never deleted, not even
when there are no active guests. However, it needs to be freed
when the module is unloaded because the kvmppc_host_rm_ops_hv
can contain function pointers to kvm-hv.ko functions for the
different supported host operations.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/kvm_ppc.h   | 31 
 arch/powerpc/kvm/book3s_hv.c | 70 
 arch/powerpc/kvm/book3s_hv_builtin.c |  3 ++
 3 files changed, 104 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index c6ef05b..47cd441 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -437,6 +437,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
 {
return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
 }
+extern void kvmppc_alloc_host_rm_ops(void);
+extern void kvmppc_free_host_rm_ops(void);
 extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
 extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
 extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
@@ -446,6 +448,8 @@ extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 
icpval);
 extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
 #else
+static inline void kvmppc_alloc_host_rm_ops(void) {};
+static inline void kvmppc_free_host_rm_ops(void) {};
 static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
 static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
@@ -459,6 +463,33 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, 
u32 cmd)
{ return 0; }
 #endif
 
+/*
+ * Host-side operations we want to set up while running in real
+ * mode in the guest operating on the xics.
+ * Currently only VCPU wakeup is supported.
+ */
+
+union kvmppc_rm_state {
+   unsigned long raw;
+   struct {
+   u32 in_host;
+   u32 rm_action;
+   };
+};
+
+struct kvmppc_host_rm_core {
+   union kvmppc_rm_state rm_state;
+   void *rm_data;
+   char pad[112];
+};
+
+struct kvmppc_host_rm_ops {
+   struct kvmppc_host_rm_core  *rm_core;
+   void(*vcpu_kick)(struct kvm_vcpu *vcpu);
+};
+
+extern struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
+
 static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_KVM_BOOKE_HV
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 54b45b7..4042623 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2967,6 +2967,73 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu 
*vcpu)
goto out_srcu;
 }
 
+#ifdef CONFIG_KVM_XICS
+/*
+ * Allocate a per-core structure for managing state about which cores are
+ * running in the host versus the guest and for exchanging data between
+ * real mode KVM and CPU running in the host.
+ * This is only done for the first VM.
+ * The allocated structure stays even if all VMs have stopped.
+ * It is only freed when the kvm-hv module is unloaded.
+ * It's OK for this routine to fail, we just don't support host
+ * core operations like redirecting H_IPI wakeups.
+ */
+void kvmppc_alloc_host_rm_ops(void)
+{
+   struct kvmppc_host_rm_ops *ops;
+   unsigned long l_ops;
+   int cpu, core;
+   int size;
+
+   /* Not the first time here ? */
+   if (kvmppc_host_rm_ops_hv != NULL)
+   return;
+
+   ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL);
+   if (!ops)
+   return;
+
+   size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core);
+   ops-

[PATCH v3 1/9] powerpc/smp: Support more IPI messages

2015-12-17 Thread Suresh Warrier
This patch increases the number of demuxed messages for a
controller with a single ipi to 8 for 64-bit systems

This is required because we want to use the IPI mechanism
to send messages from a CPU running in KVM real mode in a
guest to a CPU in the host to take some action. Currently,
we only support 4 messages and all 4 are already taken.

Define a fifth message PPC_MSG_RM_HOST_ACTION for this
purpose.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/smp.h | 3 +++
 arch/powerpc/kernel/smp.c  | 8 
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 825663c..9ef9c37 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -114,6 +114,9 @@ extern int cpu_to_core_id(int cpu);
 #define PPC_MSG_TICK_BROADCAST 2
 #define PPC_MSG_DEBUGGER_BREAK  3
 
+/* This is only used by the powernv kernel */
+#define PPC_MSG_RM_HOST_ACTION 4
+
 /* for irq controllers that have dedicated ipis per message (4) */
 extern int smp_request_message_ipi(int virq, int message);
 extern const char *smp_ipi_name[];
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ec9ec20..a53a130 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -206,7 +206,7 @@ int smp_request_message_ipi(int virq, int msg)
 
 #ifdef CONFIG_PPC_SMP_MUXED_IPI
 struct cpu_messages {
-   int messages;   /* current messages */
+   long messages;  /* current messages */
unsigned long data; /* data for cause ipi */
 };
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
@@ -236,15 +236,15 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
 }
 
 #ifdef __BIG_ENDIAN__
-#define IPI_MESSAGE(A) (1 << (24 - 8 * (A)))
+#define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
 #else
-#define IPI_MESSAGE(A) (1 << (8 * (A)))
+#define IPI_MESSAGE(A) (1uL << (8 * (A)))
 #endif
 
 irqreturn_t smp_ipi_demux(void)
 {
struct cpu_messages *info = this_cpu_ptr(&ipi_message);
-   unsigned int all;
+   unsigned long all;
 
mb();   /* order any irq clear */
 
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 9/9] KVM: PPC: Book3S HV: Add tunable to control H_IPI redirection

2015-11-25 Thread Suresh Warrier
Redirecting the wakeup of a VCPU from the H_IPI hypercall to
a core running in the host is usually a good idea, most workloads
seemed to benefit. However, in one heavily interrupt-driven SMT1
workload, some regression was observed. This patch adds a kvm_hv
module parameter called h_ipi_redirect to control this feature.

The default value for this tunable is 1 - that is enable the feature.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/kvm_ppc.h   |  1 +
 arch/powerpc/kvm/book3s_hv.c | 11 +++
 arch/powerpc/kvm/book3s_hv_rm_xics.c |  5 -
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 1b93519..29d1442 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -448,6 +448,7 @@ extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 
icpval);
 extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
 extern void kvmppc_xics_ipi_action(void);
+extern int h_ipi_redirect;
 #else
 static inline void kvmppc_alloc_host_rm_ops(void) {};
 static inline void kvmppc_free_host_rm_ops(void) {};
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d6280ed..182ec84 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -81,6 +81,17 @@ static int target_smt_mode;
 module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
 
+#ifdef CONFIG_KVM_XICS
+static struct kernel_param_ops module_param_ops = {
+   .set = param_set_int,
+   .get = param_get_int,
+};
+
+module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
+   S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
+#endif
+
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index a8ca3ed..4c062e7 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -24,6 +24,9 @@
 
 #define DEBUG_PASSUP
 
+int h_ipi_redirect = 1;
+EXPORT_SYMBOL(h_ipi_redirect);
+
 static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp 
*icp,
u32 new_irq);
 
@@ -134,7 +137,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
cpu = vcpu->arch.thread_cpu;
if (cpu < 0 || cpu >= nr_cpu_ids) {
hcore = -1;
-   if (kvmppc_host_rm_ops_hv)
+   if (kvmppc_host_rm_ops_hv && h_ipi_redirect)
hcore = find_available_hostcore(XICS_RM_KICK_VCPU);
if (hcore != -1) {
hcpu = hcore << threads_shift;
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 7/9] KVM: PPC: Book3S HV: Host side kick VCPU when poked by real-mode KVM

2015-11-25 Thread Suresh Warrier
This patch adds the support for the kick VCPU operation for
kvmppc_host_rm_ops. The kvmppc_xics_ipi_action() function
provides the function to be invoked for a host side operation
when poked by the real mode KVM. This is initiated by KVM by
sending an IPI to any free host core.

KVM real mode must set the rm_action to XICS_RM_KICK_VCPU and
rm_data to point to the VCPU to be woken up before sending the IPI.
Note that we have allocated one kvmppc_host_rm_core structure
per core. The above values need to be set in the structure
corresponding to the core to which the IPI will be sent.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/kvm_ppc.h   |  1 +
 arch/powerpc/kvm/book3s_hv.c |  2 ++
 arch/powerpc/kvm/book3s_hv_rm_xics.c | 36 
 3 files changed, 39 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 47cd441..1b93519 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -447,6 +447,7 @@ extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
 extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
 extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xics_ipi_action(void);
 #else
 static inline void kvmppc_alloc_host_rm_ops(void) {};
 static inline void kvmppc_free_host_rm_ops(void) {};
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index da2cc56..d6280ed 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3085,6 +3085,8 @@ void kvmppc_alloc_host_rm_ops(void)
ops->rm_core[core].rm_state.in_host = 1;
}
 
+   ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv;
+
/*
 * Make the contents of the kvmppc_host_rm_ops structure visible
 * to other CPUs before we assign it to the global variable.
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 24f5807..43ffbfe 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "book3s_xics.h"
@@ -623,3 +624,38 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long 
xirr)
  bail:
return check_too_hard(xics, icp);
 }
+
+/*  --- Non-real mode XICS-related built-in routines ---  */
+
+/**
+ * Host Operations poked by RM KVM
+ */
+static void rm_host_ipi_action(int action, void *data)
+{
+   switch (action) {
+   case XICS_RM_KICK_VCPU:
+   kvmppc_host_rm_ops_hv->vcpu_kick(data);
+   break;
+   default:
+   WARN(1, "Unexpected rm_action=%d data=%p\n", action, data);
+   break;
+   }
+
+}
+
+void kvmppc_xics_ipi_action(void)
+{
+   int core;
+   unsigned int cpu = smp_processor_id();
+   struct kvmppc_host_rm_core *rm_corep;
+
+   core = cpu >> threads_shift;
+   rm_corep = &kvmppc_host_rm_ops_hv->rm_core[core];
+
+   if (rm_corep->rm_data) {
+   rm_host_ipi_action(rm_corep->rm_state.rm_action,
+   rm_corep->rm_data);
+   rm_corep->rm_data = NULL;
+   rm_corep->rm_state.rm_action = 0;
+   }
+}
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 8/9] KVM: PPC: Book3S HV: Send IPI to host core to wake VCPU

2015-11-25 Thread Suresh Warrier
This patch adds support to real-mode KVM to search for a core
running in the host partition and send it an IPI message with
VCPU to be woken. This avoids having to switch to the host
partition to complete an H_IPI hypercall when the VCPU which
is the target of the the H_IPI is not loaded (is not running
in the guest).

The patch also includes the support in the IPI handler running
in the host to do the wakeup by calling kvmppc_xics_ipi_action
for the PPC_MSG_RM_HOST_ACTION message.

When a guest is being destroyed, we need to ensure that there
are no pending IPIs waiting to wake up a VCPU before we free
the VCPUs of the guest. This is accomplished by:
- Forces a PPC_MSG_CALL_FUNCTION IPI to be completed by all CPUs
  before freeing any VCPUs in kvm_arch_destroy_vm()
- Any PPC_MSG_RM_HOST_ACTION messages must be executed first
  before any other PPC_MSG_CALL_FUNCTION messages

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/kernel/smp.c| 11 +
 arch/powerpc/kvm/book3s_hv_rm_xics.c | 81 ++--
 arch/powerpc/kvm/powerpc.c   | 10 +
 3 files changed, 99 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e222efc..cb8be5d 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -257,6 +257,17 @@ irqreturn_t smp_ipi_demux(void)
 
do {
all = xchg(&info->messages, 0);
+#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+   /*
+* Must check for PPC_MSG_RM_HOST_ACTION messages
+* before PPC_MSG_CALL_FUNCTION messages because when
+* a VM is destroyed, we call kick_all_cpus_sync()
+* to ensure that any pending PPC_MSG_RM_HOST_ACTION
+* messages have completed before we free any VCPUs.
+*/
+   if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
+   kvmppc_xics_ipi_action();
+#endif
if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
generic_smp_call_function_interrupt();
if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 43ffbfe..a8ca3ed 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -51,11 +51,70 @@ static void ics_rm_check_resend(struct kvmppc_xics *xics,
 
 /* -- ICP routines -- */
 
+/*
+ * We start the search from our current CPU Id in the core map
+ * and go in a circle until we get back to our ID looking for a
+ * core that is running in host context and that hasn't already
+ * been targeted for another rm_host_ops.
+ *
+ * In the future, could consider using a fairer algorithm (one
+ * that distributes the IPIs better)
+ *
+ * Returns -1, if no CPU could be found in the host
+ * Else, returns a CPU Id which has been reserved for use
+ */
+static inline int grab_next_hostcore(int start,
+   struct kvmppc_host_rm_core *rm_core, int max, int action)
+{
+   bool success;
+   int core;
+   union kvmppc_rm_state old, new;
+
+   for (core = start + 1; core < max; core++)  {
+   old = new = READ_ONCE(rm_core[core].rm_state);
+
+   if (!old.in_host || old.rm_action)
+   continue;
+
+   /* Try to grab this host core if not taken already. */
+   new.rm_action = action;
+
+   success = cmpxchg64(&rm_core[core].rm_state.raw,
+   old.raw, new.raw) == old.raw;
+   if (success) {
+   /*
+* Make sure that the store to the rm_action is made
+* visible before we return to caller (and the
+* subsequent store to rm_data) to synchronize with
+* the IPI handler.
+*/
+   smp_wmb();
+   return core;
+   }
+   }
+
+   return -1;
+}
+
+static inline int find_available_hostcore(int action)
+{
+   int core;
+   int my_core = smp_processor_id() >> threads_shift;
+   struct kvmppc_host_rm_core *rm_core = kvmppc_host_rm_ops_hv->rm_core;
+
+   core = grab_next_hostcore(my_core, rm_core, cpu_nr_cores(), action);
+   if (core == -1)
+   core = grab_next_hostcore(core, rm_core, my_core, action);
+
+   return core;
+}
+
 static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
struct kvm_vcpu *this_vcpu)
 {
struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
int cpu;
+   int hcore, hcpu;
 
/* Mark the target VCPU as having an interrupt pending */
vcpu->stat.queue_intr++;
@@ -67,11 +126,25 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
 

[PATCH v2 6/9] KVM: PPC: Book3S HV: kvmppc_host_rm_ops - handle offlining CPUs

2015-11-25 Thread Suresh Warrier
The kvmppc_host_rm_ops structure keeps track of which cores are
are in the host by maintaining a bitmask of active/runnable
online CPUs that have not entered the guest. This patch adds
support to manage the bitmask when a CPU is offlined or onlined
in the host.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/kvm/book3s_hv.c | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 95a2ed3..da2cc56 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3012,6 +3012,36 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu 
*vcpu)
 }
 
 #ifdef CONFIG_KVM_XICS
+static int kvmppc_cpu_notify(struct notifier_block *self, unsigned long action,
+   void *hcpu)
+{
+   unsigned long cpu = (long)hcpu;
+
+   switch (action) {
+   case CPU_UP_PREPARE:
+   case CPU_UP_PREPARE_FROZEN:
+   kvmppc_set_host_core(cpu);
+   break;
+
+#ifdef CONFIG_HOTPLUG_CPU
+   case CPU_DEAD:
+   case CPU_DEAD_FROZEN:
+   case CPU_UP_CANCELED:
+   case CPU_UP_CANCELED_FROZEN:
+   kvmppc_clear_host_core(cpu);
+   break;
+#endif
+   default:
+   break;
+   }
+
+   return NOTIFY_OK;
+}
+
+static struct notifier_block kvmppc_cpu_notifier = {
+   .notifier_call = kvmppc_cpu_notify,
+};
+
 /*
  * Allocate a per-core structure for managing state about which cores are
  * running in the host versus the guest and for exchanging data between
@@ -3045,6 +3075,8 @@ void kvmppc_alloc_host_rm_ops(void)
return;
}
 
+   get_online_cpus();
+
for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
if (!cpu_online(cpu))
continue;
@@ -3063,14 +3095,21 @@ void kvmppc_alloc_host_rm_ops(void)
l_ops = (unsigned long) ops;
 
if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
+   put_online_cpus();
kfree(ops->rm_core);
kfree(ops);
+   return;
}
+
+   register_cpu_notifier(&kvmppc_cpu_notifier);
+
+   put_online_cpus();
 }
 
 void kvmppc_free_host_rm_ops(void)
 {
if (kvmppc_host_rm_ops_hv) {
+   unregister_cpu_notifier(&kvmppc_cpu_notifier);
kfree(kvmppc_host_rm_ops_hv->rm_core);
kfree(kvmppc_host_rm_ops_hv);
kvmppc_host_rm_ops_hv = NULL;
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 5/9] KVM: PPC: Book3S HV: Manage core host state

2015-11-25 Thread Suresh Warrier
Update the core host state in kvmppc_host_rm_ops whenever
the primary thread of the core enters the guest or returns
back.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/kvm/book3s_hv.c | 44 
 1 file changed, 44 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 4042623..95a2ed3 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2261,6 +2261,46 @@ static void post_guest_process(struct kvmppc_vcore *vc, 
bool is_master)
 }
 
 /*
+ * Clear core from the list of active host cores as we are about to
+ * enter the guest. Only do this if it is the primary thread of the
+ * core (not if a subcore) that is entering the guest.
+ */
+static inline void kvmppc_clear_host_core(int cpu)
+{
+   int core;
+
+   if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
+   return;
+   /*
+* Memory barrier can be omitted here as we will do a smp_wmb()
+* later in kvmppc_start_thread and we need ensure that state is
+* visible to other CPUs only after we enter guest.
+*/
+   core = cpu >> threads_shift;
+   kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
+}
+
+/*
+ * Advertise this core as an active host core since we exited the guest
+ * Only need to do this if it is the primary thread of the core that is
+ * exiting.
+ */
+static inline void kvmppc_set_host_core(int cpu)
+{
+   int core;
+
+   if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
+   return;
+
+   /*
+* Memory barrier can be omitted here because we do a spin_unlock
+* immediately after this which provides the memory barrier.
+*/
+   core = cpu >> threads_shift;
+   kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
+}
+
+/*
  * Run a set of guest threads on a physical core.
  * Called with vc->lock held.
  */
@@ -2372,6 +2412,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore 
*vc)
}
}
 
+   kvmppc_clear_host_core(pcpu);
+
/* Start all the threads */
active = 0;
for (sub = 0; sub < core_info.n_subcores; ++sub) {
@@ -2468,6 +2510,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore 
*vc)
kvmppc_ipi_thread(pcpu + i);
}
 
+   kvmppc_set_host_core(pcpu);
+
spin_unlock(&vc->lock);
 
/* make sure updates to secondary vcpu structs are visible now */
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 4/9] KVM: PPC: Book3S HV: Host-side RM data structures

2015-11-25 Thread Suresh Warrier
This patch defines the data structures to support the setting up
of host side operations while running in real mode in the guest,
and also the functions to allocate and free it.

The operations are for now limited to virtual XICS operations.
Currently, we have only defined one operation in the data
structure:
 - Wake up a VCPU sleeping in the host when it
   receives a virtual interrupt

The operations are assigned at the core level because PowerKVM
requires that the host run in SMT off mode. For each core,
we will need to manage its state atomically - where the state
is defined by:
1. Is the core running in the host?
2. Is there a Real Mode (RM) operation pending on the host?

Currently, core state is only managed at the whole-core level
even when the system is in split-core mode. This just limits
the number of free or "available" cores in the host to perform
any host-side operations.

The kvmppc_host_rm_core.rm_data allows any data to be passed by
KVM in real mode to the host core along with the operation to
be performed.

The kvmppc_host_rm_ops structure is allocated the very first time
a guest VM is started. Initial core state is also set - all online
cores are in the host. This structure is never deleted, not even
when there are no active guests. However, it needs to be freed
when the module is unloaded because the kvmppc_host_rm_ops_hv
can contain function pointers to kvm-hv.ko functions for the
different supported host operations.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/kvm_ppc.h   | 31 
 arch/powerpc/kvm/book3s_hv.c | 70 
 arch/powerpc/kvm/book3s_hv_builtin.c |  3 ++
 3 files changed, 104 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index c6ef05b..47cd441 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -437,6 +437,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
 {
return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
 }
+extern void kvmppc_alloc_host_rm_ops(void);
+extern void kvmppc_free_host_rm_ops(void);
 extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
 extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
 extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
@@ -446,6 +448,8 @@ extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 
icpval);
 extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
 #else
+static inline void kvmppc_alloc_host_rm_ops(void) {};
+static inline void kvmppc_free_host_rm_ops(void) {};
 static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
 static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
@@ -459,6 +463,33 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, 
u32 cmd)
{ return 0; }
 #endif
 
+/*
+ * Host-side operations we want to set up while running in real
+ * mode in the guest operating on the xics.
+ * Currently only VCPU wakeup is supported.
+ */
+
+union kvmppc_rm_state {
+   unsigned long raw;
+   struct {
+   u32 in_host;
+   u32 rm_action;
+   };
+};
+
+struct kvmppc_host_rm_core {
+   union kvmppc_rm_state rm_state;
+   void *rm_data;
+   char pad[112];
+};
+
+struct kvmppc_host_rm_ops {
+   struct kvmppc_host_rm_core  *rm_core;
+   void(*vcpu_kick)(struct kvm_vcpu *vcpu);
+};
+
+extern struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
+
 static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_KVM_BOOKE_HV
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 54b45b7..4042623 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2967,6 +2967,73 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu 
*vcpu)
goto out_srcu;
 }
 
+#ifdef CONFIG_KVM_XICS
+/*
+ * Allocate a per-core structure for managing state about which cores are
+ * running in the host versus the guest and for exchanging data between
+ * real mode KVM and CPU running in the host.
+ * This is only done for the first VM.
+ * The allocated structure stays even if all VMs have stopped.
+ * It is only freed when the kvm-hv module is unloaded.
+ * It's OK for this routine to fail, we just don't support host
+ * core operations like redirecting H_IPI wakeups.
+ */
+void kvmppc_alloc_host_rm_ops(void)
+{
+   struct kvmppc_host_rm_ops *ops;
+   unsigned long l_ops;
+   int cpu, core;
+   int size;
+
+   /* Not the first time here ? */
+   if (kvmppc_host_rm_ops_hv != NULL)
+   return;
+
+   ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL);
+   if (!ops)
+   return;
+
+   size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core);
+   ops-

[PATCH v2 3/9] powerpc/powernv: Add icp_native_cause_ipi_rm

2015-11-25 Thread Suresh Warrier
Function to cause an IPI. Requires kvm_hstate.xics_phys
to be initialized with physical address of XICS.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/xics.h   |  1 +
 arch/powerpc/sysdev/xics/icp-native.c | 19 +++
 2 files changed, 20 insertions(+)

diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 0e25bdb..2546048 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -30,6 +30,7 @@
 #ifdef CONFIG_PPC_ICP_NATIVE
 extern int icp_native_init(void);
 extern void icp_native_flush_interrupt(void);
+extern void icp_native_cause_ipi_rm(int cpu);
 #else
 static inline int icp_native_init(void) { return -ENODEV; }
 #endif
diff --git a/arch/powerpc/sysdev/xics/icp-native.c 
b/arch/powerpc/sysdev/xics/icp-native.c
index eae3265..e39b18a 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -159,6 +159,25 @@ static void icp_native_cause_ipi(int cpu, unsigned long 
data)
icp_native_set_qirr(cpu, IPI_PRIORITY);
 }
 
+void icp_native_cause_ipi_rm(int cpu)
+{
+   /*
+* Currently not used to send IPIs to another CPU
+* on the same core. Only caller is KVM real mode.
+* Need the physical address of the XICS to be
+* previously saved in kvm_hstate in the paca.
+*/
+   unsigned long xics_phys;
+
+   /*
+* Just like the cause_ipi functions, it is required to
+* include a full barrier (out8 includes a sync) before
+* causing the IPI.
+*/
+   xics_phys = paca[cpu].kvm_hstate.xics_phys;
+   out_rm8((u8 *)(xics_phys + XICS_MFRR), IPI_PRIORITY);
+}
+
 /*
  * Called when an interrupt is received on an off-line CPU to
  * clear the interrupt, so that the CPU can go back to nap mode.
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 2/9] powerpc/smp: Add smp_muxed_ipi_set_message

2015-11-25 Thread Suresh Warrier
smp_muxed_ipi_message_pass() invokes smp_ops->cause_ipi, which
updates the MFFR through an ioremapped address, to cause the
IPI. Because of this real mode callers cannot call
smp_muxed_ipi_message_pass() for IPI messaging.

This patch creates a separate function smp_muxed_ipi_set_message
just to set the IPI message without the cause_ipi routine.
After calling this function to set the IPI message, real
mode callers must cause the IPI directly.

As part of this, we also change smp_muxed_ipi_message_pass
to call smp_muxed_ipi_set_message to set the message instead
of doing it directly inside the routine.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/smp.h | 1 +
 arch/powerpc/kernel/smp.c  | 9 -
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 9ef9c37..78083ed 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -124,6 +124,7 @@ extern const char *smp_ipi_name[];
 /* for irq controllers with only a single ipi */
 extern void smp_muxed_ipi_set_data(int cpu, unsigned long data);
 extern void smp_muxed_ipi_message_pass(int cpu, int msg);
+extern void smp_muxed_ipi_set_message(int cpu, int msg);
 extern irqreturn_t smp_ipi_demux(void);
 
 void smp_init_pSeries(void);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index a53a130..e222efc 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -218,7 +218,7 @@ void smp_muxed_ipi_set_data(int cpu, unsigned long data)
info->data = data;
 }
 
-void smp_muxed_ipi_message_pass(int cpu, int msg)
+void smp_muxed_ipi_set_message(int cpu, int msg)
 {
struct cpu_messages *info = &per_cpu(ipi_message, cpu);
char *message = (char *)&info->messages;
@@ -228,6 +228,13 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
 */
smp_mb();
message[msg] = 1;
+}
+
+void smp_muxed_ipi_message_pass(int cpu, int msg)
+{
+   struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+
+   smp_muxed_ipi_set_message(cpu, msg);
/*
 * cause_ipi functions are required to include a full barrier
 * before doing whatever causes the IPI.
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 1/9] powerpc/smp: Support more IPI messages

2015-11-25 Thread Suresh Warrier
This patch increases the number of demuxed messages for a
controller with a single ipi to 8 for 64-bit systems

This is required because we want to use the IPI mechanism
to send messages from a CPU running in KVM real mode in a
guest to a CPU in the host to take some action. Currently,
we only support 4 messages and all 4 are already taken.

Define a fifth message PPC_MSG_RM_HOST_ACTION for this
purpose.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/smp.h | 3 +++
 arch/powerpc/kernel/smp.c  | 8 
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 825663c..9ef9c37 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -114,6 +114,9 @@ extern int cpu_to_core_id(int cpu);
 #define PPC_MSG_TICK_BROADCAST 2
 #define PPC_MSG_DEBUGGER_BREAK  3
 
+/* This is only used by the powernv kernel */
+#define PPC_MSG_RM_HOST_ACTION 4
+
 /* for irq controllers that have dedicated ipis per message (4) */
 extern int smp_request_message_ipi(int virq, int message);
 extern const char *smp_ipi_name[];
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ec9ec20..a53a130 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -206,7 +206,7 @@ int smp_request_message_ipi(int virq, int msg)
 
 #ifdef CONFIG_PPC_SMP_MUXED_IPI
 struct cpu_messages {
-   int messages;   /* current messages */
+   long messages;  /* current messages */
unsigned long data; /* data for cause ipi */
 };
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
@@ -236,15 +236,15 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
 }
 
 #ifdef __BIG_ENDIAN__
-#define IPI_MESSAGE(A) (1 << (24 - 8 * (A)))
+#define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
 #else
-#define IPI_MESSAGE(A) (1 << (8 * (A)))
+#define IPI_MESSAGE(A) (1uL << (8 * (A)))
 #endif
 
 irqreturn_t smp_ipi_demux(void)
 {
struct cpu_messages *info = this_cpu_ptr(&ipi_message);
-   unsigned int all;
+   unsigned long all;
 
mb();   /* order any irq clear */
 
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 0/9] KVM: PPC: Book3S HV: Optimize wakeup VCPU from H_IPI

2015-11-25 Thread Suresh Warrier
When the VCPU target of an H_IPI hypercall is not running
in the guest, we need to do a kick VCPU (wake the VCPU thread)
to make it runnable. The real-mode version of the H_IPI hypercall
cannot do this because it involves waking a sleeping thread.
Thus the hcall returns H_TOO_HARD which forces a switch back
to host so that the H_IPI call can be completed in virtual mode.
This has been found to cause a slowdown for many workloads like
YCSB MongoDB, small message networking, etc. 

One solution is to hand off this job of waking the VCPU to a CPU
that is running in the host by sending it a message through the 
IPI mechanism from the hypercall.

This patch set optimizes the wakeup of the target VCPU by posting
a free core already running in the host to do the wakeup, thus
avoiding the switch to host and back. It requires maintaining a
bitmask of all the available cores in the system to indicate if
they are in the host or running in some guest. It also requires
the H_IPI hypercall to search for a free host core and send it a
new IPI message PPC_MSG_RM_HOST_ACTION after stashing away some
parameters like the pointer to VCPU for the IPI handler. Locks
are avoided by using atomic operations to save core state, to
find and reserve a core in the host, etc.

Note that it is possible for a guest to be destroyed and its
VCPUs freed before the IPI handler gets to run. This case is
handled by ensuring that any pending PPC_MSG_RM_HOST_ACTION
IPIs are completed before proceeding with freeing the VCPUs.

Currently, powerpc only support 4 IPI messages and all 4 are 
already taken for other purposes. This patch also set increases
the number of supported IPI messages to 8. It also provides the
code to send an IPI from hypercall running in real-mode since
the existing cause_ipi functions cannot be executed in real-mode.

A tunable h_ipi_redirect is also included in the patch set to
disable the feature. 

v2:
* Complete patch set sent to both kvm and linuxppc mailing lists
  to avoid build-breaks.
* Broke up real mode IPI messaging function into two pieces - one
  to set the message and one to cause the IPI. New function
  icp_native_cause_ipi_rm added to arch/powerpc/sysdev/xics/icp-native.c

Suresh Warrier (9):
  powerpc/smp: Support more IPI messages
  powerpc/smp: Add smp_muxed_ipi_set_message
  powerpc/powernv: Add icp_native_cause_ipi_rm
  KVM: PPC: Book3S HV: Host-side RM data structures
  KVM: PPC: Book3S HV: Manage core host state
  KVM: PPC: Book3S HV: kvmppc_host_rm_ops - handle offlining CPUs
  KVM: PPC: Book3S HV: Host side kick VCPU when poked by real-mode KVM
  KVM: PPC: Book3S HV: Send IPI to host core to wake VCPU
  KVM: PPC: Book3S HV: Add tunable to control H_IPI redirection

 arch/powerpc/include/asm/kvm_ppc.h|  33 +++
 arch/powerpc/include/asm/smp.h|   4 +
 arch/powerpc/include/asm/xics.h   |   1 +
 arch/powerpc/kernel/smp.c |  28 +-
 arch/powerpc/kvm/book3s_hv.c  | 166 ++
 arch/powerpc/kvm/book3s_hv_builtin.c  |   3 +
 arch/powerpc/kvm/book3s_hv_rm_xics.c  | 120 +++-
 arch/powerpc/kvm/powerpc.c|  10 ++
 arch/powerpc/sysdev/xics/icp-native.c |  19 
 9 files changed, 376 insertions(+), 8 deletions(-)

-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/2] powerpc/smp: Add smp_muxed_ipi_rm_message_pass

2015-10-29 Thread Suresh Warrier
This function supports IPI message passing for real
mode callers.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/smp.h |  1 +
 arch/powerpc/kernel/smp.c  | 30 ++
 2 files changed, 31 insertions(+)

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 9ef9c37..851a37a 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -124,6 +124,7 @@ extern const char *smp_ipi_name[];
 /* for irq controllers with only a single ipi */
 extern void smp_muxed_ipi_set_data(int cpu, unsigned long data);
 extern void smp_muxed_ipi_message_pass(int cpu, int msg);
+extern void smp_muxed_ipi_rm_message_pass(int cpu, int msg);
 extern irqreturn_t smp_ipi_demux(void);
 
 void smp_init_pSeries(void);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index a53a130..8c07bfad 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -53,6 +53,9 @@
 #include 
 #include 
 #include 
+#ifdef CONFIG_KVM_XICS
+#include 
+#endif
 
 #ifdef DEBUG
 #include 
@@ -235,6 +238,33 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
smp_ops->cause_ipi(cpu, info->data);
 }
 
+#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+/*
+ * Message passing code for real mode callers. It does not use the
+ * smp_ops->cause_ipi function to cause an IPI, because those functions
+ * access the MFFR through an ioremapped address.
+ */
+void smp_muxed_ipi_rm_message_pass(int cpu, int msg)
+{
+   struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+   char *message = (char *)&info->messages;
+   unsigned long xics_phys;
+
+   /*
+* Order previous accesses before accesses in the IPI handler.
+*/
+   smp_mb();
+   message[msg] = 1;
+
+   /*
+* cause_ipi functions are required to include a full barrier
+* before doing whatever causes the IPI.
+*/
+   xics_phys = paca[cpu].kvm_hstate.xics_phys;
+   out_rm8((u8 *)(xics_phys + XICS_MFRR), IPI_PRIORITY);
+}
+#endif
+
 #ifdef __BIG_ENDIAN__
 #define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
 #else
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/2] powerpc/smp: Support more IPI messages

2015-10-29 Thread Suresh Warrier
This patch increases the number of demuxed messages for a
controller with a single ipi to 8 for 64-bit systems

This is required because we want to use the IPI mechanism
to send messages from a CPU running in KVM real mode in a
guest to a CPU in the host to take some action. Currently,
we only support 4 messages and all 4 are already taken.

Define a fifth message PPC_MSG_RM_HOST_ACTION for this
purpose.

Signed-off-by: Suresh Warrier 
---
 arch/powerpc/include/asm/smp.h | 3 +++
 arch/powerpc/kernel/smp.c  | 8 
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 825663c..9ef9c37 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -114,6 +114,9 @@ extern int cpu_to_core_id(int cpu);
 #define PPC_MSG_TICK_BROADCAST 2
 #define PPC_MSG_DEBUGGER_BREAK  3
 
+/* This is only used by the powernv kernel */
+#define PPC_MSG_RM_HOST_ACTION 4
+
 /* for irq controllers that have dedicated ipis per message (4) */
 extern int smp_request_message_ipi(int virq, int message);
 extern const char *smp_ipi_name[];
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ec9ec20..a53a130 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -206,7 +206,7 @@ int smp_request_message_ipi(int virq, int msg)
 
 #ifdef CONFIG_PPC_SMP_MUXED_IPI
 struct cpu_messages {
-   int messages;   /* current messages */
+   long messages;  /* current messages */
unsigned long data; /* data for cause ipi */
 };
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
@@ -236,15 +236,15 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
 }
 
 #ifdef __BIG_ENDIAN__
-#define IPI_MESSAGE(A) (1 << (24 - 8 * (A)))
+#define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
 #else
-#define IPI_MESSAGE(A) (1 << (8 * (A)))
+#define IPI_MESSAGE(A) (1uL << (8 * (A)))
 #endif
 
 irqreturn_t smp_ipi_demux(void)
 {
struct cpu_messages *info = this_cpu_ptr(&ipi_message);
-   unsigned int all;
+   unsigned long all;
 
mb();   /* order any irq clear */
 
-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 0/2] Increase number of supported IPI messages

2015-10-29 Thread Suresh Warrier
When a H_IPI hypercall is made, we often need to switch to the 
host to poke the VCPU to which the virtual IPI is targeted. 
This is because we cannot do a thread wake-up in real mode in 
the guest. Peformance tests have shown that this is impacting
several different workloads - from MongoDB to small message
networking.

One solution is to hand off this job of waking the VCPU to a CPU
that is running in the host by sending it a message through the 
IPI mechanism from the guest. 

Currently, we only support 4 IPI messages and all 4 are already 
used for other purposes. This patch set increases the number
of supported IPI messages to 8. It also provides the code to
send an IPI from KVM real-mode since the existing cause_ipi
functions cannot be executed in real-mode.

There is an associated patch set for "KVM: PPC: Book3S HV" 
that implements the actual solution to avoid the switch to
host to do the VCPU wakeup. 

Suresh Warrier (2):
  powerpc/smp: Support more IPI messages
  powerpc/smp: Add smp_muxed_ipi_rm_message_pass

 arch/powerpc/include/asm/smp.h |  4 
 arch/powerpc/kernel/smp.c  | 38 ++
 2 files changed, 38 insertions(+), 4 deletions(-)

-- 
1.8.3.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev