[PATCH v5 10/33] KVM: PPC: Book3S HV: Handle hypervisor instruction faults better

2018-10-07 Thread Paul Mackerras
Currently the code for handling hypervisor instruction page faults
passes 0 for the flags indicating the type of fault, which is OK in
the usual case that the page is not mapped in the partition-scoped
page tables.  However, there are other causes for hypervisor
instruction page faults, such as not being to update a reference
(R) or change (C) bit.  The cause is indicated in bits in HSRR1,
including a bit which indicates that the fault is due to not being
able to write to a page (for example to update an R or C bit).
Not handling these other kinds of faults correctly can lead to a
loop of continual faults without forward progress in the guest.

In order to handle these faults better, this patch constructs a
"DSISR-like" value from the bits which DSISR and SRR1 (for a HISI)
have in common, and passes it to kvmppc_book3s_hv_page_fault() so
that it knows what caused the fault.

Reviewed-by: David Gibson 
Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/reg.h | 1 +
 arch/powerpc/kvm/book3s_hv.c   | 5 -
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e5b314e..6fda746 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -766,6 +766,7 @@
 #define SPRN_HSRR0 0x13A   /* Save/Restore Register 0 */
 #define SPRN_HSRR1 0x13B   /* Save/Restore Register 1 */
 #define   HSRR1_DENORM 0x0010 /* Denorm exception */
+#define   HSRR1_HISI_WRITE 0x0001 /* HISI bcs couldn't update mem */
 
 #define SPRN_TBCTL 0x35f   /* PA6T Timebase control register */
 #define   TBCTL_FREEZE 0xull /* Freeze all tbs */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 0c1dd76..e310117 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1188,7 +1188,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
break;
case BOOK3S_INTERRUPT_H_INST_STORAGE:
vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
-   vcpu->arch.fault_dsisr = 0;
+   vcpu->arch.fault_dsisr = vcpu->arch.shregs.msr &
+   DSISR_SRR1_MATCH_64S;
+   if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
+   vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
r = RESUME_PAGE_FAULT;
break;
/*
-- 
2.7.4



[PATCH v5 09/33] KVM: PPC: Book3S HV: Streamlined guest entry/exit path on P9 for radix guests

2018-10-07 Thread Paul Mackerras
This creates an alternative guest entry/exit path which is used for
radix guests on POWER9 systems when we have indep_threads_mode=Y.  In
these circumstances there is exactly one vcpu per vcore and there is
no coordination required between vcpus or vcores; the vcpu can enter
the guest without needing to synchronize with anything else.

The new fast path is implemented almost entirely in C in book3s_hv.c
and runs with the MMU on until the guest is entered.  On guest exit
we use the existing path until the point where we are committed to
exiting the guest (as distinct from handling an interrupt in the
low-level code and returning to the guest) and we have pulled the
guest context from the XIVE.  At that point we check a flag in the
stack frame to see whether we came in via the old path and the new
path; if we came in via the new path then we go back to C code to do
the rest of the process of saving the guest context and restoring the
host context.

The C code is split into separate functions for handling the
OS-accessible state and the hypervisor state, with the idea that the
latter can be replaced by a hypercall when we implement nested
virtualization.

Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/asm-prototypes.h |   2 +
 arch/powerpc/include/asm/kvm_ppc.h|   2 +
 arch/powerpc/kvm/book3s_hv.c  | 429 +-
 arch/powerpc/kvm/book3s_hv_ras.c  |   2 +
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   |  95 ++-
 arch/powerpc/kvm/book3s_xive.c|  63 +
 6 files changed, 589 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h 
b/arch/powerpc/include/asm/asm-prototypes.h
index 0c1a2b0..5c9b00c 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -165,4 +165,6 @@ void kvmhv_load_host_pmu(void);
 void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
 void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
 
+int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
+
 #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 83d61b8..245e564 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -585,6 +585,7 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 
icpval);
 
 extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
   int level, bool line_status);
+extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
 #else
 static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
   u32 priority) { return -1; }
@@ -607,6 +608,7 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu 
*vcpu, u64 icpval) { retur
 
 static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 
irq,
  int level, bool line_status) { return 
-ENODEV; }
+static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
 #endif /* CONFIG_KVM_XIVE */
 
 /*
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 0e17593..0c1dd76 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3080,6 +3080,269 @@ static noinline void kvmppc_run_core(struct 
kvmppc_vcore *vc)
 }
 
 /*
+ * Load up hypervisor-mode registers on P9.
+ */
+static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit)
+{
+   struct kvmppc_vcore *vc = vcpu->arch.vcore;
+   s64 hdec;
+   u64 tb, purr, spurr;
+   int trap;
+   unsigned long host_hfscr = mfspr(SPRN_HFSCR);
+   unsigned long host_ciabr = mfspr(SPRN_CIABR);
+   unsigned long host_dawr = mfspr(SPRN_DAWR);
+   unsigned long host_dawrx = mfspr(SPRN_DAWRX);
+   unsigned long host_psscr = mfspr(SPRN_PSSCR);
+   unsigned long host_pidr = mfspr(SPRN_PID);
+
+   hdec = time_limit - mftb();
+   if (hdec < 0)
+   return BOOK3S_INTERRUPT_HV_DECREMENTER;
+   mtspr(SPRN_HDEC, hdec);
+
+   if (vc->tb_offset) {
+   u64 new_tb = mftb() + vc->tb_offset;
+   mtspr(SPRN_TBU40, new_tb);
+   tb = mftb();
+   if ((tb & 0xff) < (new_tb & 0xff))
+   mtspr(SPRN_TBU40, new_tb + 0x100);
+   vc->tb_offset_applied = vc->tb_offset;
+   }
+
+   if (vc->pcr)
+   mtspr(SPRN_PCR, vc->pcr);
+   mtspr(SPRN_DPDES, vc->dpdes);
+   mtspr(SPRN_VTB, vc->vtb);
+
+   local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
+   local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
+   mtspr(SPRN_PURR, vcpu->arch.purr);
+   mtspr(SPRN_SPURR, vcpu->arch.spurr);
+
+   if (cpu_has_feature(CPU_FTR_DAWR)) {
+   mtspr(SPRN_DAWR, vcpu->arch.dawr);
+   mtspr(SPRN_DAWRX, vcpu->arch.dawrx);
+   }
+   mtspr(SPRN_CIABR, 

[PATCH v5 08/33] KVM: PPC: Book3S HV: Call kvmppc_handle_exit_hv() with vcore unlocked

2018-10-07 Thread Paul Mackerras
Currently kvmppc_handle_exit_hv() is called with the vcore lock held
because it is called within a for_each_runnable_thread loop.
However, we already unlock the vcore within kvmppc_handle_exit_hv()
under certain circumstances, and this is safe because (a) any vcpus
that become runnable and are added to the runnable set by
kvmppc_run_vcpu() have their vcpu->arch.trap == 0 and can't actually
run in the guest (because the vcore state is VCORE_EXITING), and
(b) for_each_runnable_thread is safe against addition or removal
of vcpus from the runnable set.

Therefore, in order to simplify things for following patches, let's
drop the vcore lock in the for_each_runnable_thread loop, so
kvmppc_handle_exit_hv() gets called without the vcore lock held.

Reviewed-by: David Gibson 
Signed-off-by: Paul Mackerras 
---
 arch/powerpc/kvm/book3s_hv.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 49a686c..0e17593 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1084,7 +1084,6 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu 
*vcpu)
return RESUME_GUEST;
 }
 
-/* Called with vcpu->arch.vcore->lock held */
 static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 struct task_struct *tsk)
 {
@@ -1205,10 +1204,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
swab32(vcpu->arch.emul_inst) :
vcpu->arch.emul_inst;
if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
-   /* Need vcore unlocked to call kvmppc_get_last_inst */
-   spin_unlock(>arch.vcore->lock);
r = kvmppc_emulate_debug_inst(run, vcpu);
-   spin_lock(>arch.vcore->lock);
} else {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
@@ -1224,12 +1220,8 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
r = EMULATE_FAIL;
if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) &&
-   cpu_has_feature(CPU_FTR_ARCH_300)) {
-   /* Need vcore unlocked to call kvmppc_get_last_inst */
-   spin_unlock(>arch.vcore->lock);
+   cpu_has_feature(CPU_FTR_ARCH_300))
r = kvmppc_emulate_doorbell_instr(vcpu);
-   spin_lock(>arch.vcore->lock);
-   }
if (r == EMULATE_FAIL) {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
@@ -2599,6 +2591,14 @@ static void post_guest_process(struct kvmppc_vcore *vc, 
bool is_master)
spin_lock(>lock);
now = get_tb();
for_each_runnable_thread(i, vcpu, vc) {
+   /*
+* It's safe to unlock the vcore in the loop here, because
+* for_each_runnable_thread() is safe against removal of
+* the vcpu, and the vcore state is VCORE_EXITING here,
+* so any vcpus becoming runnable will have their arch.trap
+* set to zero and can't actually run in the guest.
+*/
+   spin_unlock(>lock);
/* cancel pending dec exception if dec is positive */
if (now < vcpu->arch.dec_expires &&
kvmppc_core_pending_dec(vcpu))
@@ -2614,6 +2614,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, 
bool is_master)
vcpu->arch.ret = ret;
vcpu->arch.trap = 0;
 
+   spin_lock(>lock);
if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
if (vcpu->arch.pending_exceptions)
kvmppc_core_prepare_to_enter(vcpu);
-- 
2.7.4



[PATCH v5 07/33] KVM: PPC: Book3S: Rework TM save/restore code and make it C-callable

2018-10-07 Thread Paul Mackerras
This adds a parameter to __kvmppc_save_tm and __kvmppc_restore_tm
which allows the caller to indicate whether it wants the nonvolatile
register state to be preserved across the call, as required by the C
calling conventions.  This parameter being non-zero also causes the
MSR bits that enable TM, FP, VMX and VSX to be preserved.  The
condition register and DSCR are now always preserved.

With this, kvmppc_save_tm_hv and kvmppc_restore_tm_hv can be called
from C code provided the 3rd parameter is non-zero.  So that these
functions can be called from modules, they now include code to set
the TOC pointer (r2) on entry, as they can call other built-in C
functions which will assume the TOC to have been set.

Also, the fake suspend code in kvmppc_save_tm_hv is modified here to
assume that treclaim in fake-suspend state does not modify any registers,
which is the case on POWER9.  This enables the code to be simplified
quite a bit.

_kvmppc_save_tm_pr and _kvmppc_restore_tm_pr become much simpler with
this change, since they now only need to save and restore TAR and pass
1 for the 3rd argument to __kvmppc_{save,restore}_tm.

Reviewed-by: David Gibson 
Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/asm-prototypes.h |  10 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   |  49 +++---
 arch/powerpc/kvm/tm.S | 250 --
 3 files changed, 169 insertions(+), 140 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h 
b/arch/powerpc/include/asm/asm-prototypes.h
index 024e8fc..0c1a2b0 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -150,6 +150,16 @@ extern s32 patch__memset_nocache, patch__memcpy_nocache;
 
 extern long flush_count_cache;
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
+void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
+#else
+static inline void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
+bool preserve_nv) { }
+static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
+   bool preserve_nv) { }
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
 void kvmhv_save_host_pmu(void);
 void kvmhv_load_host_pmu(void);
 void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index fc360b5..45dd637 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -759,11 +759,13 @@ BEGIN_FTR_SECTION
b   91f
 END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/*
-* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
 */
mr  r3, r4
ld  r4, VCPU_MSR(r3)
+   li  r5, 0   /* don't preserve non-vol regs */
bl  kvmppc_restore_tm_hv
+   nop
ld  r4, HSTATE_KVM_VCPU(r13)
 91:
 #endif
@@ -1603,11 +1605,13 @@ BEGIN_FTR_SECTION
b   91f
 END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/*
-* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
 */
mr  r3, r9
ld  r4, VCPU_MSR(r3)
+   li  r5, 0   /* don't preserve non-vol regs */
bl  kvmppc_save_tm_hv
+   nop
ld  r9, HSTATE_KVM_VCPU(r13)
 91:
 #endif
@@ -2486,11 +2490,13 @@ BEGIN_FTR_SECTION
b   91f
 END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/*
-* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
 */
ld  r3, HSTATE_KVM_VCPU(r13)
ld  r4, VCPU_MSR(r3)
+   li  r5, 0   /* don't preserve non-vol regs */
bl  kvmppc_save_tm_hv
+   nop
 91:
 #endif
 
@@ -2606,11 +2612,13 @@ BEGIN_FTR_SECTION
b   91f
 END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/*
-* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
 */
mr  r3, r4
ld  r4, VCPU_MSR(r3)
+   li  r5, 0   /* don't preserve non-vol regs */
bl  kvmppc_restore_tm_hv
+   nop
ld  r4, HSTATE_KVM_VCPU(r13)
 91:
 #endif
@@ -2943,10 +2951,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
  * Save transactional state and TM-related registers.
  * Called with r3 pointing to the vcpu struct and r4 containing
  * the guest MSR value.
- * This can modify all checkpointed registers, but
+ * r5 is non-zero iff 

[PATCH v5 06/33] KVM: PPC: Book3S HV: Simplify real-mode interrupt handling

2018-10-07 Thread Paul Mackerras
This streamlines the first part of the code that handles a hypervisor
interrupt that occurred in the guest.  With this, all of the real-mode
handling that occurs is done before the "guest_exit_cont" label; once
we get to that label we are committed to exiting to host virtual mode.
Thus the machine check and HMI real-mode handling is moved before that
label.

Also, the code to handle external interrupts is moved out of line, as
is the code that calls kvmppc_realmode_hmi_handler().

Signed-off-by: Paul Mackerras 
---
 arch/powerpc/kvm/book3s_hv_ras.c|   8 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 220 
 2 files changed, 119 insertions(+), 109 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index b11043b..ee564b6 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -331,5 +331,13 @@ long kvmppc_realmode_hmi_handler(void)
} else {
wait_for_tb_resync();
}
+
+   /*
+* Reset tb_offset_applied so the guest exit code won't try
+* to subtract the previous timebase offset from the timebase.
+*/
+   if (local_paca->kvm_hstate.kvm_vcore)
+   local_paca->kvm_hstate.kvm_vcore->tb_offset_applied = 0;
+
return 0;
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 5b2ae34..fc360b5 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1018,8 +1018,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 no_xive:
 #endif /* CONFIG_KVM_XICS */
 
-deliver_guest_interrupt:
-kvmppc_cede_reentry:   /* r4 = vcpu, r13 = paca */
+deliver_guest_interrupt:   /* r4 = vcpu, r13 = paca */
/* Check if we can deliver an external or decrementer interrupt now */
ld  r0, VCPU_PENDING_EXC(r4)
 BEGIN_FTR_SECTION
@@ -1269,18 +1268,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r3, VCPU_CTR(r9)
std r4, VCPU_XER(r9)
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-   /* For softpatch interrupt, go off and do TM instruction emulation */
-   cmpwi   r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
-   beq kvmppc_tm_emul
-#endif
+   /* Save more register state  */
+   mfdar   r3
+   mfdsisr r4
+   std r3, VCPU_DAR(r9)
+   stw r4, VCPU_DSISR(r9)
 
/* If this is a page table miss then see if it's theirs or ours */
cmpwi   r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
beq kvmppc_hdsi
+   std r3, VCPU_FAULT_DAR(r9)
+   stw r4, VCPU_FAULT_DSISR(r9)
cmpwi   r12, BOOK3S_INTERRUPT_H_INST_STORAGE
beq kvmppc_hisi
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+   /* For softpatch interrupt, go off and do TM instruction emulation */
+   cmpwi   r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
+   beq kvmppc_tm_emul
+#endif
+
/* See if this is a leftover HDEC interrupt */
cmpwi   r12,BOOK3S_INTERRUPT_HV_DECREMENTER
bne 2f
@@ -1303,7 +1310,7 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi   r0, 0
-   beq 4f
+   beq maybe_reenter_guest
b   guest_exit_cont
 3:
/* If it's a hypervisor facility unavailable interrupt, save HFSCR */
@@ -1315,82 +1322,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 14:
/* External interrupt ? */
cmpwi   r12, BOOK3S_INTERRUPT_EXTERNAL
-   bne+guest_exit_cont
-
-   /* External interrupt, first check for host_ipi. If this is
-* set, we know the host wants us out so let's do it now
-*/
-   bl  kvmppc_read_intr
-
-   /*
-* Restore the active volatile registers after returning from
-* a C function.
-*/
-   ld  r9, HSTATE_KVM_VCPU(r13)
-   li  r12, BOOK3S_INTERRUPT_EXTERNAL
-
-   /*
-* kvmppc_read_intr return codes:
-*
-* Exit to host (r3 > 0)
-*   1 An interrupt is pending that needs to be handled by the host
-* Exit guest and return to host by branching to guest_exit_cont
-*
-*   2 Passthrough that needs completion in the host
-* Exit guest and return to host by branching to guest_exit_cont
-* However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
-* to indicate to the host to complete handling the interrupt
-*
-* Before returning to guest, we check if any CPU is heading out
-* to the host and if so, we head out also. If no CPUs are heading
-* check return values <= 0.
-*
-* Return to guest (r3 <= 0)
-*  0 No external interrupt is pending
-* -1 A guest wakeup IPI (which has now been cleared)
-*In either case, we return to guest to deliver any pending
-*guest interrupts.
-*
-* -2 A PCI 

[PATCH v5 05/33] KVM: PPC: Book3S HV: Extract PMU save/restore operations as C-callable functions

2018-10-07 Thread Paul Mackerras
This pulls out the assembler code that is responsible for saving and
restoring the PMU state for the host and guest into separate functions
so they can be used from an alternate entry path.  The calling
convention is made compatible with C.

Reviewed-by: David Gibson 
Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/asm-prototypes.h |   5 +
 arch/powerpc/kvm/book3s_hv_interrupts.S   |  95 
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 363 --
 3 files changed, 253 insertions(+), 210 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h 
b/arch/powerpc/include/asm/asm-prototypes.h
index 1f4691c..024e8fc 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -150,4 +150,9 @@ extern s32 patch__memset_nocache, patch__memcpy_nocache;
 
 extern long flush_count_cache;
 
+void kvmhv_save_host_pmu(void);
+void kvmhv_load_host_pmu(void);
+void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
+void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
+
 #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S 
b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 666b91c..a6d1001 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -64,52 +64,7 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
/* Save host PMU registers */
-BEGIN_FTR_SECTION
-   /* Work around P8 PMAE bug */
-   li  r3, -1
-   clrrdi  r3, r3, 10
-   mfspr   r8, SPRN_MMCR2
-   mtspr   SPRN_MMCR2, r3  /* freeze all counters using MMCR2 */
-   isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-   li  r3, 1
-   sldir3, r3, 31  /* MMCR0_FC (freeze counters) bit */
-   mfspr   r7, SPRN_MMCR0  /* save MMCR0 */
-   mtspr   SPRN_MMCR0, r3  /* freeze all counters, disable 
interrupts */
-   mfspr   r6, SPRN_MMCRA
-   /* Clear MMCRA in order to disable SDAR updates */
-   li  r5, 0
-   mtspr   SPRN_MMCRA, r5
-   isync
-   lbz r5, PACA_PMCINUSE(r13)  /* is the host using the PMU? */
-   cmpwi   r5, 0
-   beq 31f /* skip if not */
-   mfspr   r5, SPRN_MMCR1
-   mfspr   r9, SPRN_SIAR
-   mfspr   r10, SPRN_SDAR
-   std r7, HSTATE_MMCR0(r13)
-   std r5, HSTATE_MMCR1(r13)
-   std r6, HSTATE_MMCRA(r13)
-   std r9, HSTATE_SIAR(r13)
-   std r10, HSTATE_SDAR(r13)
-BEGIN_FTR_SECTION
-   mfspr   r9, SPRN_SIER
-   std r8, HSTATE_MMCR2(r13)
-   std r9, HSTATE_SIER(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-   mfspr   r3, SPRN_PMC1
-   mfspr   r5, SPRN_PMC2
-   mfspr   r6, SPRN_PMC3
-   mfspr   r7, SPRN_PMC4
-   mfspr   r8, SPRN_PMC5
-   mfspr   r9, SPRN_PMC6
-   stw r3, HSTATE_PMC1(r13)
-   stw r5, HSTATE_PMC2(r13)
-   stw r6, HSTATE_PMC3(r13)
-   stw r7, HSTATE_PMC4(r13)
-   stw r8, HSTATE_PMC5(r13)
-   stw r9, HSTATE_PMC6(r13)
-31:
+   bl  kvmhv_save_host_pmu
 
/*
 * Put whatever is in the decrementer into the
@@ -161,3 +116,51 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld  r0, PPC_LR_STKOFF(r1)
mtlrr0
blr
+
+_GLOBAL(kvmhv_save_host_pmu)
+BEGIN_FTR_SECTION
+   /* Work around P8 PMAE bug */
+   li  r3, -1
+   clrrdi  r3, r3, 10
+   mfspr   r8, SPRN_MMCR2
+   mtspr   SPRN_MMCR2, r3  /* freeze all counters using MMCR2 */
+   isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+   li  r3, 1
+   sldir3, r3, 31  /* MMCR0_FC (freeze counters) bit */
+   mfspr   r7, SPRN_MMCR0  /* save MMCR0 */
+   mtspr   SPRN_MMCR0, r3  /* freeze all counters, disable 
interrupts */
+   mfspr   r6, SPRN_MMCRA
+   /* Clear MMCRA in order to disable SDAR updates */
+   li  r5, 0
+   mtspr   SPRN_MMCRA, r5
+   isync
+   lbz r5, PACA_PMCINUSE(r13)  /* is the host using the PMU? */
+   cmpwi   r5, 0
+   beq 31f /* skip if not */
+   mfspr   r5, SPRN_MMCR1
+   mfspr   r9, SPRN_SIAR
+   mfspr   r10, SPRN_SDAR
+   std r7, HSTATE_MMCR0(r13)
+   std r5, HSTATE_MMCR1(r13)
+   std r6, HSTATE_MMCRA(r13)
+   std r9, HSTATE_SIAR(r13)
+   std r10, HSTATE_SDAR(r13)
+BEGIN_FTR_SECTION
+   mfspr   r9, SPRN_SIER
+   std r8, HSTATE_MMCR2(r13)
+   std r9, HSTATE_SIER(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+   mfspr   r3, SPRN_PMC1
+   mfspr   r5, SPRN_PMC2
+   mfspr   r6, SPRN_PMC3
+   mfspr   r7, SPRN_PMC4
+   mfspr   r8, SPRN_PMC5
+   mfspr   r9, SPRN_PMC6
+   stw r3, HSTATE_PMC1(r13)
+   stw r5, HSTATE_PMC2(r13)
+   stw r6, HSTATE_PMC3(r13)
+   stw r7, 

[PATCH v5 04/33] KVM: PPC: Book3S HV: Move interrupt delivery on guest entry to C code

2018-10-07 Thread Paul Mackerras
This is based on a patch by Suraj Jitindar Singh.

This moves the code in book3s_hv_rmhandlers.S that generates an
external, decrementer or privileged doorbell interrupt just before
entering the guest to C code in book3s_hv_builtin.c.  This is to
make future maintenance and modification easier.  The algorithm
expressed in the C code is almost identical to the previous
algorithm.

Reviewed-by: David Gibson 
Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/kvm_ppc.h  |  1 +
 arch/powerpc/kvm/book3s_hv.c|  3 +-
 arch/powerpc/kvm/book3s_hv_builtin.c| 48 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 70 -
 4 files changed, 67 insertions(+), 55 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index e991821..83d61b8 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -652,6 +652,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long 
server,
 unsigned long mfrr);
 int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
 int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
+void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu);
 
 /*
  * Host-side operations we want to set up while running in real
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3e3a715..49a686c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -730,8 +730,7 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
/*
 * Ensure that the read of vcore->dpdes comes after the read
 * of vcpu->doorbell_request.  This barrier matches the
-* lwsync in book3s_hv_rmhandlers.S just before the
-* fast_guest_return label.
+* smb_wmb() in kvmppc_guest_entry_inject().
 */
smp_rmb();
vc = vcpu->arch.vcore;
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c 
b/arch/powerpc/kvm/book3s_hv_builtin.c
index fc6bb96..ccfea5b 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -729,3 +729,51 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
smp_mb();
local_paca->kvm_hstate.kvm_split_mode = NULL;
 }
+
+/*
+ * Is there a PRIV_DOORBELL pending for the guest (on POWER9)?
+ * Can we inject a Decrementer or a External interrupt?
+ */
+void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
+{
+   int ext;
+   unsigned long vec = 0;
+   unsigned long lpcr;
+
+   /* Insert EXTERNAL bit into LPCR at the MER bit position */
+   ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1;
+   lpcr = mfspr(SPRN_LPCR);
+   lpcr |= ext << LPCR_MER_SH;
+   mtspr(SPRN_LPCR, lpcr);
+   isync();
+
+   if (vcpu->arch.shregs.msr & MSR_EE) {
+   if (ext) {
+   vec = BOOK3S_INTERRUPT_EXTERNAL;
+   } else {
+   long int dec = mfspr(SPRN_DEC);
+   if (!(lpcr & LPCR_LD))
+   dec = (int) dec;
+   if (dec < 0)
+   vec = BOOK3S_INTERRUPT_DECREMENTER;
+   }
+   }
+   if (vec) {
+   unsigned long msr, old_msr = vcpu->arch.shregs.msr;
+
+   kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
+   kvmppc_set_srr1(vcpu, old_msr);
+   kvmppc_set_pc(vcpu, vec);
+   msr = vcpu->arch.intr_msr;
+   if (MSR_TM_ACTIVE(old_msr))
+   msr |= MSR_TS_S;
+   vcpu->arch.shregs.msr = msr;
+   }
+
+   if (vcpu->arch.doorbell_request) {
+   mtspr(SPRN_DPDES, 1);
+   vcpu->arch.vcore->dpdes = 1;
+   smp_wmb();
+   vcpu->arch.doorbell_request = 0;
+   }
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 77960e6..6752da1 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1101,13 +1101,20 @@ no_xive:
 #endif /* CONFIG_KVM_XICS */
 
 deliver_guest_interrupt:
-   ld  r6, VCPU_CTR(r4)
-   ld  r7, VCPU_XER(r4)
-
-   mtctr   r6
-   mtxer   r7
-
 kvmppc_cede_reentry:   /* r4 = vcpu, r13 = paca */
+   /* Check if we can deliver an external or decrementer interrupt now */
+   ld  r0, VCPU_PENDING_EXC(r4)
+BEGIN_FTR_SECTION
+   /* On POWER9, also check for emulated doorbell interrupt */
+   lbz r3, VCPU_DBELL_REQ(r4)
+   or  r0, r0, r3
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+   cmpdi   r0, 0
+   beq 71f
+   mr  r3, r4
+   bl  kvmppc_guest_entry_inject_int
+   ld  r4, HSTATE_KVM_VCPU(r13)
+71:
ld  r10, VCPU_PC(r4)
ld  r11, VCPU_MSR(r4)
ld  r6, VCPU_SRR0(r4)
@@ -1120,53 +1127,10 @@ 

[PATCH v5 03/33] KVM: PPC: Book3S HV: Remove left-over code in XICS-on-XIVE emulation

2018-10-07 Thread Paul Mackerras
This removes code that clears the external interrupt pending bit in
the pending_exceptions bitmap.  This is left over from an earlier
iteration of the code where this bit was set when an escalation
interrupt arrived in order to wake the vcpu from cede.  Currently
we set the vcpu->arch.irq_pending flag instead for this purpose.
Therefore there is no need to do anything with the pending_exceptions
bitmap.

Reviewed-by: David Gibson 
Signed-off-by: Paul Mackerras 
---
 arch/powerpc/kvm/book3s_xive_template.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_xive_template.c 
b/arch/powerpc/kvm/book3s_xive_template.c
index 203ea65..033363d 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -280,14 +280,6 @@ X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu 
*vcpu)
/* First collect pending bits from HW */
GLUE(X_PFX,ack_pending)(xc);
 
-   /*
-* Cleanup the old-style bits if needed (they may have been
-* set by pull or an escalation interrupts).
-*/
-   if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, >arch.pending_exceptions))
-   clear_bit(BOOK3S_IRQPRIO_EXTERNAL,
- >arch.pending_exceptions);
-
pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
 xc->pending, xc->hw_cppr, xc->cppr);
 
-- 
2.7.4



[PATCH v5 02/33] KVM: PPC: Book3S: Simplify external interrupt handling

2018-10-07 Thread Paul Mackerras
Currently we use two bits in the vcpu pending_exceptions bitmap to
indicate that an external interrupt is pending for the guest, one
for "one-shot" interrupts that are cleared when delivered, and one
for interrupts that persist until cleared by an explicit action of
the OS (e.g. an acknowledge to an interrupt controller).  The
BOOK3S_IRQPRIO_EXTERNAL bit is used for one-shot interrupt requests
and BOOK3S_IRQPRIO_EXTERNAL_LEVEL is used for persisting interrupts.

In practice BOOK3S_IRQPRIO_EXTERNAL never gets used, because our
Book3S platforms generally, and pseries in particular, expect
external interrupt requests to persist until they are acknowledged
at the interrupt controller.  That combined with the confusion
introduced by having two bits for what is essentially the same thing
makes it attractive to simplify things by only using one bit.  This
patch does that.

With this patch there is only BOOK3S_IRQPRIO_EXTERNAL, and by default
it has the semantics of a persisting interrupt.  In order to avoid
breaking the ABI, we introduce a new "external_oneshot" flag which
preserves the behaviour of the KVM_INTERRUPT ioctl with the
KVM_INTERRUPT_SET argument.

Reviewed-by: David Gibson 
Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/kvm_asm.h |  4 +--
 arch/powerpc/include/asm/kvm_host.h|  1 +
 arch/powerpc/kvm/book3s.c  | 43 --
 arch/powerpc/kvm/book3s_hv_rm_xics.c   |  5 ++-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S|  4 +--
 arch/powerpc/kvm/book3s_pr.c   |  1 -
 arch/powerpc/kvm/book3s_xics.c | 11 +++
 arch/powerpc/kvm/book3s_xive_template.c|  2 +-
 arch/powerpc/kvm/trace_book3s.h|  1 -
 tools/perf/arch/powerpc/util/book3s_hv_exits.h |  1 -
 10 files changed, 44 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_asm.h 
b/arch/powerpc/include/asm/kvm_asm.h
index a790d5c..1f32191 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -84,7 +84,6 @@
 #define BOOK3S_INTERRUPT_INST_STORAGE  0x400
 #define BOOK3S_INTERRUPT_INST_SEGMENT  0x480
 #define BOOK3S_INTERRUPT_EXTERNAL  0x500
-#define BOOK3S_INTERRUPT_EXTERNAL_LEVEL0x501
 #define BOOK3S_INTERRUPT_EXTERNAL_HV   0x502
 #define BOOK3S_INTERRUPT_ALIGNMENT 0x600
 #define BOOK3S_INTERRUPT_PROGRAM   0x700
@@ -134,8 +133,7 @@
 #define BOOK3S_IRQPRIO_EXTERNAL14
 #define BOOK3S_IRQPRIO_DECREMENTER 15
 #define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 16
-#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL  17
-#define BOOK3S_IRQPRIO_MAX 18
+#define BOOK3S_IRQPRIO_MAX 17
 
 #define BOOK3S_HFLAG_DCBZ320x1
 #define BOOK3S_HFLAG_SLB   0x2
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 906bcbdf..3cd0b9f 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -707,6 +707,7 @@ struct kvm_vcpu_arch {
u8 hcall_needed;
u8 epr_flags; /* KVMPPC_EPR_xxx */
u8 epr_needed;
+   u8 external_oneshot;/* clear external irq after delivery */
 
u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
 
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 87348e4..66a5521 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -150,7 +150,6 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break;
case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break;
case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break;
-   case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL;   break;
case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT;break;
case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM;  break;
case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL;   break;
@@ -236,18 +235,35 @@ EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec);
 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
 struct kvm_interrupt *irq)
 {
-   unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL;
-
-   if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
-   vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL;
+   /*
+* This case (KVM_INTERRUPT_SET) should never actually arise for
+* a pseries guest (because pseries guests expect their interrupt
+* controllers to continue asserting an external interrupt request
+* until it is acknowledged at the interrupt controller), but is
+* included to avoid ABI breakage and potentially for other
+* sorts of guest.
+*
+* There is a subtlety here: HV KVM does not test the
+* external_oneshot flag in the code that synthesizes
+* external interrupts for the guest 

[PATCH v5 01/33] powerpc: Turn off CPU_FTR_P9_TM_HV_ASSIST in non-hypervisor mode

2018-10-07 Thread Paul Mackerras
When doing nested virtualization, it is only necessary to do the
transactional memory hypervisor assist at level 0, that is, when
we are in hypervisor mode.  Nested hypervisors can just use the TM
facilities as architected.  Therefore we should clear the
CPU_FTR_P9_TM_HV_ASSIST bit when we are not in hypervisor mode,
along with the CPU_FTR_HVMODE bit.

Doing this will not change anything at this stage because the only
code that tests CPU_FTR_P9_TM_HV_ASSIST is in HV KVM, which currently
can only be used when when CPU_FTR_HVMODE is set.

Reviewed-by: David Gibson 
Signed-off-by: Paul Mackerras 
---
 arch/powerpc/kernel/cpu_setup_power.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/cpu_setup_power.S 
b/arch/powerpc/kernel/cpu_setup_power.S
index 458b928..c317080 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -147,8 +147,8 @@ __init_hvmode_206:
rldicl. r0,r3,4,63
bnelr
ld  r5,CPU_SPEC_FEATURES(r4)
-   LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE)
-   xor r5,r5,r6
+   LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST)
+   andcr5,r5,r6
std r5,CPU_SPEC_FEATURES(r4)
blr
 
-- 
2.7.4



[PATCH v5 00/33] KVM: PPC: Book3S HV: Nested HV virtualization

2018-10-07 Thread Paul Mackerras
This patch series implements nested virtualization in the KVM-HV
module for radix guests on POWER9 systems.  Unlike PR KVM, nested
guests are able to run in supervisor mode, meaning that performance is
much better than with PR KVM, and is very close to the performance of
a non-nested guests for most things.

The way this works is that each nested guest is also a guest of the
real hypervisor, also known as the level 0 or L0 hypervisor, which
runs in the CPU's hypervisor mode.  Its guests are at level 1, and
when a L1 system wants to run a nested guest, it performs hypercalls
to L0 to set up a virtual partition table in its (L1's) memory and to
enter the L2 guest.  The L0 hypervisor maintains a shadow
partition-scoped page table for the L2 guest and demand-faults entries
into it by translating the L1 real addresses in the partition-scoped
page table in L1 memory into L0 real addresses and puts them in the
shadow partition-scoped page table for L2.

Essentially what this is doing is providing L1 with the ability to do
(some) hypervisor functions using paravirtualization; optionally,
TLB invalidations can be done through emulation of the tlbie
instruction rather than a hypercall.

Along the way, this implements a new guest entry/exit path for radix
guests on POWER9 systems which is written almost entirely in C and
does not do any of the inter-thread coordination that the existing
entry/exit path does.  It is only used for radix guests and when
indep_threads_mode=Y (the default).

The limitations of this scheme are:

- Host and all nested hypervisors and their guests must be in radix
  mode.

- Nested hypervisors cannot use indep_threads_mode=N.

- If the host (i.e. the L0 hypervisor) has indep_threads_mode=N then
  only one nested vcpu can be run on any core at any given time; the
  secondary threads will do nothing.

- A nested hypervisor can't use a smaller page size than the base page
  size of the hypervisor(s) above it.

- A nested hypervisor is limited to having at most 1023 guests below
  it, each of which can have at most NR_CPUS virtual CPUs (and the
  virtual CPU ids have to be < NR_CPUS as well).

This patch series is against the kvm tree's next branch.

Changes in this version since version 4:

- Added KVM_PPC_NO_HASH to flags field of struct kvm_ppc_smmu_info rather
  than disabling the KVM_PPC_GET_SMMU_INFO ioctl entirely.

- Make sure the hypercalls for controlling nested guests will fail if
  the guest is in HPT mode.

- Made the KVM_CAP_PPC_MMU_HASH_V3 capability report false in a nested
  hypervisor.

- Fixed a bug causing HPT guests to fail to execute real-mode
  hypercalls correctly.

- Fix crashes seen on VM exit or when switching from HPT to radix,
  due to leftover rmap values.

- Removed enable/disable flag from KVM_ENABLE_CAP on the
  KVM_CAP_PPC_NESTED_HV capability; it always enables.

- Fixed a bug causing memory corruption on nested guest startup,
  and a bug where we were never clearing bits in the cpu_in_guest
  cpumask.

- Simplified some code in kvmhv_run_single_vcpu.

Paul.

 Documentation/virtual/kvm/api.txt  |   19 +
 arch/powerpc/include/asm/asm-prototypes.h  |   21 +
 arch/powerpc/include/asm/book3s/64/mmu-hash.h  |   12 +
 .../powerpc/include/asm/book3s/64/tlbflush-radix.h |1 +
 arch/powerpc/include/asm/hvcall.h  |   41 +
 arch/powerpc/include/asm/kvm_asm.h |4 +-
 arch/powerpc/include/asm/kvm_book3s.h  |   45 +-
 arch/powerpc/include/asm/kvm_book3s_64.h   |  118 +-
 arch/powerpc/include/asm/kvm_book3s_asm.h  |3 +
 arch/powerpc/include/asm/kvm_booke.h   |4 +-
 arch/powerpc/include/asm/kvm_host.h|   16 +-
 arch/powerpc/include/asm/kvm_ppc.h |4 +
 arch/powerpc/include/asm/ppc-opcode.h  |1 +
 arch/powerpc/include/asm/reg.h |2 +
 arch/powerpc/include/uapi/asm/kvm.h|1 +
 arch/powerpc/kernel/asm-offsets.c  |5 +-
 arch/powerpc/kernel/cpu_setup_power.S  |4 +-
 arch/powerpc/kvm/Makefile  |3 +-
 arch/powerpc/kvm/book3s.c  |   43 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c|7 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  718 ---
 arch/powerpc/kvm/book3s_emulate.c  |   13 +-
 arch/powerpc/kvm/book3s_hv.c   |  852 -
 arch/powerpc/kvm/book3s_hv_builtin.c   |   92 +-
 arch/powerpc/kvm/book3s_hv_interrupts.S|   95 +-
 arch/powerpc/kvm/book3s_hv_nested.c| 1291 
 arch/powerpc/kvm/book3s_hv_ras.c   |   10 +
 arch/powerpc/kvm/book3s_hv_rm_xics.c   |   13 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S|  823 +++--
 arch/powerpc/kvm/book3s_hv_tm.c|6 +-
 arch/powerpc/kvm/book3s_hv_tm_builtin.c 

[PATCH] powerpc: Fix HMIs on big-endian with CONFIG_RELOCATABLE=y

2018-10-07 Thread Benjamin Herrenschmidt
HMIs will crash the kernel due to

BRANCH_LINK_TO_FAR(hmi_exception_realmode)

Calling into the OPD instead of the actual code.

Signed-off-by: Benjamin Herrenschmidt 
---

This hack fixes it for me, but it's not great. Nick, any better idea ?

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index ea04dfb..752709cc8 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1119,7 +1119,11 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
EXCEPTION_PROLOG_COMMON_3(0xe60)
addir3,r1,STACK_FRAME_OVERHEAD
+#ifdef PPC64_ELF_ABI_v1
+   BRANCH_LINK_TO_FAR(.hmi_exception_realmode) /* Function call ABI */
+#else
BRANCH_LINK_TO_FAR(hmi_exception_realmode) /* Function call ABI */
+#endif
cmpdi   cr0,r3,0
 
/* Windup the stack. */




Re: [PATCH v4 25/32] KVM: PPC: Book3S HV: Invalidate TLB when nested vcpu moves physical cpu

2018-10-07 Thread David Gibson
On Fri, Oct 05, 2018 at 03:32:26PM +1000, Paul Mackerras wrote:
> On Fri, Oct 05, 2018 at 02:54:28PM +1000, David Gibson wrote:
> > On Fri, Oct 05, 2018 at 02:23:50PM +1000, Paul Mackerras wrote:
> > > On Fri, Oct 05, 2018 at 02:09:08PM +1000, David Gibson wrote:
> > > > On Thu, Oct 04, 2018 at 09:56:02PM +1000, Paul Mackerras wrote:
> > > > > From: Suraj Jitindar Singh 
> > > > > 
> > > > > This is only done at level 0, since only level 0 knows which physical
> > > > > CPU a vcpu is running on.  This does for nested guests what L0 already
> > > > > did for its own guests, which is to flush the TLB on a pCPU when it
> > > > > goes to run a vCPU there, and there is another vCPU in the same VM
> > > > > which previously ran on this pCPU and has now started to run on 
> > > > > another
> > > > > pCPU.  This is to handle the situation where the other vCPU touched
> > > > > a mapping, moved to another pCPU and did a tlbiel (local-only tlbie)
> > > > > on that new pCPU and thus left behind a stale TLB entry on this pCPU.
> > > > > 
> > > > > This introduces a limit on the the vcpu_token values used in the
> > > > > H_ENTER_NESTED hcall -- they must now be less than NR_CPUS.
> > > > 
> > > > This does make the vcpu tokens no longer entirely opaque to the L0.
> > > > It works for now, because the only L1 is Linux and we know basically
> > > > how it allocates those tokens.  Eventually we probably want some way
> > > > to either remove this restriction or to advertise the limit to the L1.
> > > 
> > > Right, we could use something like a hash table and have it be
> > > basically just as efficient as the array when the set of IDs is dense
> > > while also handling arbitrary ID values.  (We'd have to make sure that
> > > L1 couldn't trigger unbounded memory consumption in L0, though.)
> > 
> > Another approach would be to sacifice some performance for L0
> > simplicity:  when an L1 vCPU changes pCPU, flush all the nested LPIDs
> > associated with that L1.  When an L2 vCPU changes L1 vCPU (and
> > therefore, indirectly pCPU), the L1 would be responsible for flushing
> > it.
> 
> That was one of the approaches I considered initially, but it has
> complexities that aren't apparent, and it could be quite inefficient
> for a guest with a lot of nested guests.  For a start you have to
> provide a way for L1 to flush the TLB for another LPID, which guests
> can't do themselves (it's a hypervisor privileged operation).  Then
> there's the fact that it's not the pCPU where the moving vCPU has
> moved to that needs the flush, it's the pCPU that it moved from (where
> presumably something else is now running).  All in all, the simplest
> solution was to have L0 do it, because L0 knows unambiguously the real
> physical CPU where any given vCPU last ran.

Ah, I see.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH 15/36] dt-bindings: arm: Convert Actions Semi bindings to jsonschema

2018-10-07 Thread Rob Herring
On Sat, Oct 6, 2018 at 5:40 AM Andreas Färber  wrote:
>
> Hi Rob,
>
> Am 05.10.18 um 18:58 schrieb Rob Herring:
> > Convert Actions Semi SoC bindings to DT schema format using json-schema.
>
> This sounds like the next Yanny vs. Laurel... I fail to see any json. ;)

Read the docs in patch 8.

> Also, it may help my understanding to be CC'ed on the cover letter, too?

Sorry, trying to avoid a huge Cc list.

> > Cc: "Andreas Färber" 
> > Cc: Mark Rutland 
> > Cc: linux-arm-ker...@lists.infradead.org
> > Cc: devicet...@vger.kernel.org
> > Signed-off-by: Rob Herring 
> > ---
> >  .../devicetree/bindings/arm/actions.txt   | 56 ---
> >  .../devicetree/bindings/arm/actions.yaml  | 34 +++
> >  2 files changed, 34 insertions(+), 56 deletions(-)
> >  delete mode 100644 Documentation/devicetree/bindings/arm/actions.txt
> >  create mode 100644 Documentation/devicetree/bindings/arm/actions.yaml
> >
> > diff --git a/Documentation/devicetree/bindings/arm/actions.txt 
> > b/Documentation/devicetree/bindings/arm/actions.txt
> > deleted file mode 100644
> > index d54f33c4e0da..
> > --- a/Documentation/devicetree/bindings/arm/actions.txt
> > +++ /dev/null
> > @@ -1,56 +0,0 @@
> > -Actions Semi platforms device tree bindings
> > 
> > -
> > -
> > -S500 SoC
> > -
> > -
> > -Required root node properties:
> > -
> > - - compatible :  must contain "actions,s500"
> > -
> > -
> > -Modules:
> > -
> > -Root node property compatible must contain, depending on module:
> > -
> > - - LeMaker Guitar: "lemaker,guitar"
> > -
> > -
> > -Boards:
> > -
> > -Root node property compatible must contain, depending on board:
> > -
> > - - Allo.com Sparky: "allo,sparky"
> > - - Cubietech CubieBoard6: "cubietech,cubieboard6"
> > - - LeMaker Guitar Base Board rev. B: "lemaker,guitar-bb-rev-b", 
> > "lemaker,guitar"
> > -
> > -
> > -S700 SoC
> > -
> > -
> > -Required root node properties:
> > -
> > -- compatible :  must contain "actions,s700"
> > -
> > -
> > -Boards:
> > -
> > -Root node property compatible must contain, depending on board:
> > -
> > - - Cubietech CubieBoard7: "cubietech,cubieboard7"
> > -
> > -
> > -S900 SoC
> > -
> > -
> > -Required root node properties:
> > -
> > -- compatible :  must contain "actions,s900"
> > -
> > -
> > -Boards:
> > -
> > -Root node property compatible must contain, depending on board:
> > -
> > - - uCRobotics Bubblegum-96: "ucrobotics,bubblegum-96"
> > diff --git a/Documentation/devicetree/bindings/arm/actions.yaml 
> > b/Documentation/devicetree/bindings/arm/actions.yaml
> > new file mode 100644
> > index ..af9345a228b4
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/arm/actions.yaml
> > @@ -0,0 +1,34 @@
> > +%YAML 1.2
> > +---
> > +$id: http://devicetree.org/schemas/soc/arm/actions.yaml#
> > +$schema: http://devicetree.org/meta-schemas/core.yaml#
>
> 404 for the schema. Where does one find an explanation?

In the docs. This could someday actually be hosted on devicetree.org,
but for now only the path, not the host, matters.

>
> > +
> > +title: Actions Semi platforms device tree bindings
> > +
> > +maintainers:
> > +  - Andreas Färber 
>
> Mani is now officially reviewer and the closest I have to a
> co-maintainer.

Okay, NP.

> I suggest we add him here in some form. I assume this is
> independent of MAINTAINERS patterns though, or will get_maintainers.pl
> parse this, too?

It is independent. Not really ideal, but the bindings don't live just
in the kernel. (And lots are missing a maintainer anyways).

> > +
> > +description: |
>
> Does the | have any meaning, or a stray typo?

Explained in other patch.

> > +  The Actions Semi S500 is a quad-core ARM Cortex-A9 SoC. The Actions Semi
> > +  S900 is a quad-core ARM Cortex-A53 SoC.
>
> You forgot the S700 as another quad-core Cortex-A53 SoC.
> Also, arm or Arm rather than ARM these days?

I was going to say it's just copied out from the old file, but that
doesn't seem to be the case here. I am sure I didn't spend time on
nice descriptions. :)

> > +
> > +properties:
> > +  compatible:
> > +oneOf:
> > +  - items:
> > +  - enum:
> > +  - lemaker,guitar-bb-rev-b
> > +  - enum:
> > +  - lemaker,guitar
> > +  - allo,sparky
> > +  - cubietech,cubieboard6
> > +  - const: actions,s500
> > +minItems: 2
> > +maxItems: 3
> > +additionalItems: false
>
> Objection. You've managed to turn a perfectly human-comprehensible text
> into a machine-parseable representation incomprehensible for humans.
>
> First, there should remain some free-text explanation of the values
> defined here. Are comments allowed after the value or indented maybe?

Yes, both. Comments are the major reason for using YAML over JSON file format.

> Alternatively we could have a per-vendor file à la vendor-prefixes.txt,
> but that would seem inefficient.

You mean per 

Re: [PATCH 27/36] dt-bindings: arm: Convert Realtek board/soc bindings to json-schema

2018-10-07 Thread Rob Herring
On Sat, Oct 6, 2018 at 5:54 AM Andreas Färber  wrote:
>
> Am 05.10.18 um 18:58 schrieb Rob Herring:
> > Convert Realtek SoC bindings to DT schema format using json-schema.
>
> YAML (2x)

?

> > Cc: "Andreas Färber" 
> > Cc: Mark Rutland 
> > Cc: linux-arm-ker...@lists.infradead.org
> > Cc: devicet...@vger.kernel.org
> > Signed-off-by: Rob Herring 
> > ---
> >  .../devicetree/bindings/arm/realtek.txt   | 22 
> >  .../devicetree/bindings/arm/realtek.yaml  | 25 +++
> >  2 files changed, 25 insertions(+), 22 deletions(-)
> >  delete mode 100644 Documentation/devicetree/bindings/arm/realtek.txt
> >  create mode 100644 Documentation/devicetree/bindings/arm/realtek.yaml
> >
> > diff --git a/Documentation/devicetree/bindings/arm/realtek.txt 
> > b/Documentation/devicetree/bindings/arm/realtek.txt
> > deleted file mode 100644
> > index 95839e19ae92..
> > --- a/Documentation/devicetree/bindings/arm/realtek.txt
> > +++ /dev/null
> > @@ -1,22 +0,0 @@
> > -Realtek platforms device tree bindings
> > ---
> > -
> > -
> > -RTD1295 SoC
> > -===
> > -
> > -Required root node properties:
> > -
> > - - compatible :  must contain "realtek,rtd1295"
> > -
> > -
> > -Root node property compatible must contain, depending on board:
> > -
> > - - MeLE V9: "mele,v9"
> > - - ProBox2 AVA: "probox2,ava"
> > - - Zidoo X9S: "zidoo,x9s"
> > -
> > -
> > -Example:
> > -
> > -compatible = "zidoo,x9s", "realtek,rtd1295";
> > diff --git a/Documentation/devicetree/bindings/arm/realtek.yaml 
> > b/Documentation/devicetree/bindings/arm/realtek.yaml
> > new file mode 100644
> > index ..9e3bb3249c77
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/arm/realtek.yaml
> > @@ -0,0 +1,25 @@
> > +# SPDX-License-Identifier: None
>
> What is the expected license for such bindings?

Good question. I'd meant to figure something out for this placeholder.
The default would be GPL-2 inheriting from the old doc. My preference
would be to dual license these with BSD as they're not just kernel
files, but I don't really want to track down copyright holders (also
not explicitly declared) to do that.

> You did not add such a line for actions.yaml.
>
> > +%YAML 1.2
> > +---
> > +$id: http://devicetree.org/schemas/bindings/arm/realtek.yaml#
> > +$schema: http://devicetree.org/meta-schemas/core.yaml#
> > +
> > +title: Realtek platforms device tree bindings
> > +
> > +maintainers:
> > +  - Andreas Färber 
> > +
> > +description: |+
>
> "|+"?

The '|'  means a literal block. The '+' is a block chomping indicator:

http://yaml.org/spec/1.2/spec.html#id2794534

This was all converted using my doc2yaml script and ruamel.yaml
decided it needed the '+'. I'm not sure exactly why. It may be based
on how many trailing newlines the text had.

> > +  RTD1295 SoC

In this case, this isn't really useful and we should just remove
description unless you want to add something.

> > +
> > +properties:
> > +  $nodename:
> > +const: '/'
> > +  compatible:
> > +items:
> > +  - enum:
> > +  - mele,v9
> > +  - probox2,ava
> > +  - zidoo,x9s
> > +  - const: realtek,rtd1295
> > +...
>
> That does not look like a full "PATCH" yet? It also confuses me whether
> or when leading dashes are necessary - for Actions Semi "items" had one.

'...' is the end of YAML document marker.

'-' means a list item (a YAML/JSON list, not to be confused with
'items' the json-schema keyword). Actions uses 'oneOf' (which is a
list of schemas) because there are multiple SoCs.

And also 'items' itself can be a list or dict, but we restrict it to
lists for the DT meta-schema.

> I have preparations on my GitHub staging tree for three more SoCs, so we
> should prepare the structure to ease adding SoCs and avoid re-indenting
> patches - adding SoCs was much easier in the original flat text format.
> Please also consider for other vendors.

Good point.

One option is always use 'oneOf' even if just 1 item. The problem is
that the use of oneOf/allOf/anyOf makes the error reporting pretty
vague.

Another option is to make each SoC a separate schema which could be
either separate docs or multiple yaml docs within a file. The downside
is just repeating all the top-level properties.

>
> Same comment as for Actions: We're losing a human description of the
> enum values.

I kept those as comments when there was meaningful information. I did
not feel that "MeLE V9" as a description of "mele,v9" added any value.
If you want to add 'model' schema that would be better than having
just comments, but I'm not going to find all the values of model which
aren't documented.

Rob


Re: [PATCH v5 1/9] book3s/64: avoid circular header inclusion in mmu-hash.h

2018-10-07 Thread Christophe LEROY




Le 06/10/2018 à 14:47, Michael Ellerman a écrit :

Christophe LEROY  writes:


The serie has been successfully compiled tested at
http://kisskb.ellerman.id.au/kisskb/head/358723b36b126a381d827c82d04ee226321416b2/


I guess we need to turn on BPF_JIT in some configs :)

This works (builds), but not runtime tested:

diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h
index 6f4daacad296..dc50a8d4b3b9 100644
--- a/arch/powerpc/net/bpf_jit32.h
+++ b/arch/powerpc/net/bpf_jit32.h
@@ -106,9 +106,8 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
} while (0)
  #else
  #define PPC_BPF_LOAD_CPU(r) \
-   do { BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4);  
\
-   PPC_LHZ_OFFS(r, (1 & ~(THREAD_SIZE - 1)),   \
-   offsetof(struct thread_info, cpu)); 
\
+   do { BUILD_BUG_ON(FIELD_SIZEOF(struct task_struct, cpu) != 4);  
\
+   PPC_LHZ_OFFS(r, 2, offsetof(struct task_struct, cpu));  
\
} while(0)
  #endif
  #else



Thanks, I'll take that.

Christophe


Re: [PATCH v5 3/9] powerpc: Prepare for moving thread_info into task_struct

2018-10-07 Thread Christophe Leroy




On 10/06/2018 12:40 PM, Michael Ellerman wrote:

Christophe Leroy  writes:


diff --git a/arch/powerpc/include/asm/livepatch.h 
b/arch/powerpc/include/asm/livepatch.h
index 47a03b9b528b..818451bf629c 100644
--- a/arch/powerpc/include/asm/livepatch.h
+++ b/arch/powerpc/include/asm/livepatch.h
@@ -49,7 +49,7 @@ static inline void klp_init_thread_info(struct thread_info 
*ti)
ti->livepatch_sp = (unsigned long *)(ti + 1) + 1;


We need to do something about this.


Oops I missed that one.



Currently the thread_info sits at the low address of the stack, and we
use the space immediately above that as a miniature upward growing stack
for livepatching.

If we keep the livepatch_sp in the thread_info then we need to
initialise it somewhere when the task starts running on a stack. And I
don't know how that works if we end up running on the emergency stack
for example.

So I'm not sure that makes much sense.

Instead we might need to keep the livepatch_sp on the stack page at the
base, where thread_info currently lives.

That obviously sucks because you can still overflow into it and trash
it, but it's no worse than what we have now for livepatching.

Need to think about it some more.



I think for that serie we can leave with it in the stack, as it won't be 
worst than before. Then in a future patch we can change it. I'll open an 
issue for that on github.


Then for now, I'll add the following change in this patch.

diff --git a/arch/powerpc/include/asm/livepatch.h 
b/arch/powerpc/include/asm/livepatch.h

index 47a03b9b528b..8a81d10ccc82 100644
--- a/arch/powerpc/include/asm/livepatch.h
+++ b/arch/powerpc/include/asm/livepatch.h
@@ -43,13 +43,14 @@ static inline unsigned long 
klp_get_ftrace_location(unsigned long faddr)

return ftrace_location_range(faddr, faddr + 16);
 }

-static inline void klp_init_thread_info(struct thread_info *ti)
+static inline void klp_init_thread_info(struct task_struct *p)
 {
+   struct thread_info *ti = task_thread_info(p);
/* + 1 to account for STACK_END_MAGIC */
-   ti->livepatch_sp = (unsigned long *)(ti + 1) + 1;
+   ti->livepatch_sp = end_of_stack(p) + 1;
 }
 #else
-static void klp_init_thread_info(struct thread_info *ti) { }
+static inline void klp_init_thread_info(struct task_struct *p) { }
 #endif /* CONFIG_LIVEPATCH */

 #endif /* _ASM_POWERPC_LIVEPATCH_H */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index d9d4eb2ea6c9..a12307ebb7ef 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1632,7 +1632,7 @@ int copy_thread(unsigned long clone_flags, 
unsigned long usp,

unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
struct thread_info *ti = task_thread_info(p);

-   klp_init_thread_info(ti);
+   klp_init_thread_info(p);

/* Copy registers */
sp -= sizeof(struct pt_regs);
diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c

index 9ca9db707bcb..8054a7b9e026 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -940,7 +940,7 @@ void __init setup_arch(char **cmdline_p)
/* Reserve large chunks of memory for use by CMA for KVM. */
kvm_cma_reserve();

-   klp_init_thread_info(_thread_info);
+   klp_init_thread_info(_task);

init_mm.start_code = (unsigned long)_stext;
init_mm.end_code = (unsigned long) _etext;

Christophe