RE: [PATCH 3/3] KVM: PPC: booke: Added debug handler

2012-08-09 Thread Bhushan Bharat-R65777


> -Original Message-
> From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-ow...@vger.kernel.org] On
> Behalf Of Alexander Graf
> Sent: Wednesday, August 08, 2012 4:41 PM
> To: Bhushan Bharat-R65777
> Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org; k...@vger.kernel.org
> Subject: Re: [PATCH 3/3] KVM: PPC: booke: Added debug handler
> 
> 
> On 08.08.2012, at 03:02, Bhushan Bharat-R65777 wrote:
> 
> >
> >
> >> -Original Message-
> >> From: Wood Scott-B07421
> >> Sent: Wednesday, August 08, 2012 2:15 AM
> >> To: Alexander Graf
> >> Cc: Bhushan Bharat-R65777; kvm-ppc@vger.kernel.org;
> >> k...@vger.kernel.org; Bhushan
> >> Bharat-R65777
> >> Subject: Re: [PATCH 3/3] KVM: PPC: booke: Added debug handler
> >>
> >> On 08/07/2012 05:47 AM, Alexander Graf wrote:
>  diff --git a/arch/powerpc/kvm/booke_interrupts.S
>  b/arch/powerpc/kvm/booke_interrupts.S
>  index 3539805..890673c 100644
>  --- a/arch/powerpc/kvm/booke_interrupts.S
>  +++ b/arch/powerpc/kvm/booke_interrupts.S
>  @@ -73,6 +73,51 @@ _GLOBAL(kvmppc_handler_\ivor_nr)
>   bctr
>  .endm
> 
>  +.macro KVM_DBG_HANDLER ivor_nr scratch srr0
> >>>
> >>> This is a lot of asm code. Any chance to share a good share of it
> >>> with the
> >> generic handler?
> >>
> >> That entire file could use an update to lok more like
> >> bookehv_interrupts.S and its use of asm macros.
> >
> > In booke there is assumption that size of KVM IVORs will not me more than 
> > host
> IVORs size so that only IVPR is changed.
> >
> > I tried to give it that shape of bookehv_interrupts.S  and found that size 
> > of
> some IVORs become more than host IVORs.
> 
> We can always jump off to another (more generic?) function and only have a 
> small
> stub in the IVOR referenced code.

What extra KVM_DBG_HANDLER have from KVM_HANDLER is the handing of debug single 
step (which is similar to host).
So do you want a jump in assembly for handling the debug single step? Or you 
really think of moving something from the KVM_HANDLER to more generic?

Thanks
-Bharat

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 2/3] KVM: PPC: booke: Allow multiple exception types

2012-08-09 Thread Bhushan Bharat-R65777


> -Original Message-
> From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-ow...@vger.kernel.org] On
> Behalf Of Alexander Graf
> Sent: Tuesday, August 07, 2012 4:16 PM
> To: Bhushan Bharat-R65777
> Cc: kvm-ppc@vger.kernel.org; k...@vger.kernel.org; Bhushan Bharat-R65777
> Subject: Re: [PATCH 2/3] KVM: PPC: booke: Allow multiple exception types
> 
> 
> On 03.08.2012, at 09:08, Bharat Bhushan wrote:
> 
> > Current kvmppc_booke_handlers uses the same macro (KVM_HANDLER) and
> > all handlers are considered to be the same size. This will not be
> > the case if we want to use different macros for different handlers.
> >
> > This patch improves the kvmppc_booke_handler so that it can
> > support different macros for different handlers.
> >
> > Signed-off-by: Liu Yu 
> > [bharat.bhus...@freescale.com: Substantial changes]
> > Signed-off-by: Bharat Bhushan 
> > ---
> > arch/powerpc/include/asm/kvm_ppc.h  |2 -
> > arch/powerpc/kvm/booke.c|9 ---
> > arch/powerpc/kvm/booke.h|1 +
> > arch/powerpc/kvm/booke_interrupts.S |   37 
> > --
> > arch/powerpc/kvm/e500.c |   13 +++
> > 5 files changed, 48 insertions(+), 14 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/kvm_ppc.h
> b/arch/powerpc/include/asm/kvm_ppc.h
> > index 1438a5e..deb55bd 100644
> > --- a/arch/powerpc/include/asm/kvm_ppc.h
> > +++ b/arch/powerpc/include/asm/kvm_ppc.h
> > @@ -47,8 +47,6 @@ enum emulation_result {
> >
> > extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
> > extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu 
> > *vcpu);
> > -extern char kvmppc_handlers_start[];
> > -extern unsigned long kvmppc_handler_len;
> > extern void kvmppc_handler_highmem(void);
> >
> > extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu);
> > diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> > index aebbb8b..1338233 100644
> > --- a/arch/powerpc/kvm/booke.c
> > +++ b/arch/powerpc/kvm/booke.c
> > @@ -1541,6 +1541,7 @@ int __init kvmppc_booke_init(void)
> > {
> > #ifndef CONFIG_KVM_BOOKE_HV
> > unsigned long ivor[16];
> > +   unsigned long *handler = kvmppc_booke_handler_addr;
> > unsigned long max_ivor = 0;
> > int i;
> >
> > @@ -1574,14 +1575,14 @@ int __init kvmppc_booke_init(void)
> >
> > for (i = 0; i < 16; i++) {
> > if (ivor[i] > max_ivor)
> > -   max_ivor = ivor[i];
> > +   max_ivor = i;
> >
> > memcpy((void *)kvmppc_booke_handlers + ivor[i],
> > -  kvmppc_handlers_start + i * kvmppc_handler_len,
> > -  kvmppc_handler_len);
> > +  (void *)handler[i], handler[i + 1] - handler[i]);
> > }
> > flush_icache_range(kvmppc_booke_handlers,
> > -  kvmppc_booke_handlers + max_ivor + 
> > kvmppc_handler_len);
> > +  kvmppc_booke_handlers + ivor[max_ivor] +
> > +   handler[max_ivor + 1] - handler[max_ivor]);
> > #endif /* !BOOKE_HV */
> > return 0;
> > }
> > diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
> > index ba61974..de9e526 100644
> > --- a/arch/powerpc/kvm/booke.h
> > +++ b/arch/powerpc/kvm/booke.h
> > @@ -65,6 +65,7 @@
> >   (1 << BOOKE_IRQPRIO_CRITICAL))
> >
> > extern unsigned long kvmppc_booke_handlers;
> > +extern unsigned long kvmppc_booke_handler_addr[];
> >
> > void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
> > void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
> > diff --git a/arch/powerpc/kvm/booke_interrupts.S
> b/arch/powerpc/kvm/booke_interrupts.S
> > index bb46b32..3539805 100644
> > --- a/arch/powerpc/kvm/booke_interrupts.S
> > +++ b/arch/powerpc/kvm/booke_interrupts.S
> > @@ -73,6 +73,14 @@ _GLOBAL(kvmppc_handler_\ivor_nr)
> > bctr
> > .endm
> >
> > +.macro KVM_HANDLER_ADDR ivor_nr
> > +   .long   kvmppc_handler_\ivor_nr
> > +.endm
> > +
> > +.macro KVM_HANDLER_END
> > +   .long   kvmppc_handlers_end
> > +.endm
> > +
> > _GLOBAL(kvmppc_handlers_start)
> > KVM_HANDLER BOOKE_INTERRUPT_CRITICAL SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
> > KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK  SPRN_SPRG_RSCRATCH_MC SPRN_MCSRR0
> > @@ -93,9 +101,7 @@ KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT
> SPRN_CSRR0
> > KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
> > KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0
> > KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0
> > -
> > -_GLOBAL(kvmppc_handler_len)
> > -   .long kvmppc_handler_1 - kvmppc_handler_0
> > +_GLOBAL(kvmppc_handlers_end)
> >
> > /* Registers:
> >  *  SPRG_SCRATCH0: guest r4
> > @@ -463,6 +469,31 @@ lightweight_exit:
> > lwz r4, VCPU_GPR(R4)(r4)
> > rfi
> >
> > +   .data
> > +   .align  4
> > +   .globl  kvmppc_booke_handler_addr
> > +kvmppc_booke_handler_addr:
> > +KVM_HANDLER_ADDR BOOKE_INT

Re: [PATCH 3/5] KVM: PPC: Book3S HV: Handle memory slot deletion and modification correctly

2012-08-09 Thread Takuya Yoshikawa
On Thu, 9 Aug 2012 22:25:32 -0300
Marcelo Tosatti  wrote:

> I'll send a patch to flush per memslot in the next days, you can work
> out the PPC details in the meantime.

Are you going to implement that using slot_bitmap?

Since I'm now converting kvm_mmu_slot_remove_write_access() to
rmap based protection, I'd like to hear your plan.

If you can use the stale memslot to restrict the flush, that
should live with rmap based protection.

Thanks,
Takuya
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/5] KVM: PPC: Book3S HV: Handle memory slot deletion and modification correctly

2012-08-09 Thread Marcelo Tosatti
On Fri, Aug 10, 2012 at 10:34:39AM +1000, Paul Mackerras wrote:
> On Thu, Aug 09, 2012 at 03:16:12PM -0300, Marcelo Tosatti wrote:
> 
> > The !memslot->npages case is handled in __kvm_set_memory_region
> > (please read that part, before kvm_arch_prepare_memory_region() call).
> > 
> > kvm_arch_flush_shadow should be implemented.
> 
> Book3S HV doesn't have shadow page tables per se, rather the hardware
> page table is under the control of the hypervisor (i.e. KVM), and
> entries are added and removed by the guest using hypercalls.  On
> recent machines (POWER7) the hypervisor can choose whether or not to
> have the hardware PTE point to a real page of memory; if it doesn't,
> access by the guest will trap to the hypervisor.  On older machines
> (PPC970) we don't have that flexibility, and we have to provide a real
> page of memory (i.e. RAM or I/O) behind every hardware PTE.  (This is
> because PPC970 provides no way for page faults in the guest to go to
> the hypervisor.)
> 
> I could implement kvm_arch_flush_shadow to remove the backing pages
> behind every hardware PTE, but that would be very slow and inefficient
> on POWER7, and would break the guest on PPC970, particularly in the
> case where userspace is removing a small memory slot containing some
> I/O device and leaving the memory slot for system RAM untouched.
> 
> So the reason for unmapping the hardware PTEs in
> kvm_arch_prepare_memory_region rather than kvm_arch_flush_shadow is
> that that way we know which memslot is going away.
> 
> What exactly are the semantics of kvm_arch_flush_shadow?  

It removes all translations mapped via memslots. Its used in cases where
the translations become stale, or during shutdown.

> I presume that on x86 with NPT/EPT it basically does nothing - is that right?

It does, it removes all NPT/EPT ptes (named "sptes" in arch/x86/kvm/).
The translations are rebuilt on demand (when accesses by the guest fault
into the HV).

> > > + if (old->npages) {
> > > + /* modifying guest_phys or flags */
> > > + if (old->base_gfn != memslot->base_gfn)
> > > + kvmppc_unmap_memslot(kvm, old);
> > 
> > This case is also handled generically by the last kvm_arch_flush_shadow
> > call in __kvm_set_memory_region.
> 
> Again, to use this we would need to know which memslot we're
> flushing.  If we could change __kvm_set_memory_region to pass the
> memslot for these kvm_arch_flush_shadow calls, then I could do as you
> suggest.  (Though I would need to think carefully about what would
> happen with guest invalidations of hardware PTEs in the interval
> between the rcu_assign_pointer(kvm->memslots, slots) and the
> kvm_arch_flush_shadow, and whether the invalidation would find the
> correct location in the rmap array, given that we have updated the
> base_gfn in the memslot without first getting rid of any references to
> those pages in the hardware page table.)

That can be done.

I'll send a patch to flush per memslot in the next days, you can work
out the PPC details in the meantime.

To be clear: this is necessary to have consistent behaviour across
arches in the kvm_set_memory codepath which is tricky (not nitpicking).

Alternatively, kvm_arch_flush_shadow can be split into two methods (but
thats not necessary if memslot information is sufficient for PPC).

> > > + if (memslot->dirty_bitmap &&
> > > + old->dirty_bitmap != memslot->dirty_bitmap)
> > > + kvmppc_hv_get_dirty_log(kvm, old);
> > > + return 0;
> > > + }
> > 
> > Better clear dirty log unconditionally on kvm_arch_commit_memory_region,
> > similarly to x86 (just so its consistent).
> 
> OK.
> 
> > > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > > @@ -81,7 +81,7 @@ static void remove_revmap_chain(struct kvm *kvm, long 
> > > pte_index,
> > >   ptel = rev->guest_rpte |= rcbits;
> > >   gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
> > >   memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
> > > - if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
> > > + if (!memslot)
> > >   return;
> > 
> > Why remove this check? (i don't know why it was there in the first
> > place, just checking).
> 
> This is where we are removing the page backing a hardware PTE and thus
> removing the hardware PTE from the reverse-mapping list for the page.
> We want to be able to do that properly even if the memslot is in the
> process of going away.  I had the flags check in there originally
> because other places that used a memslot had that check, but when I
> read __kvm_set_memory_region more carefully I realized that the
> KVM_MEMSLOT_INVALID flag indicates that we should not create any more
> references to pages in the memslot, but we do still need to be able to
> handle references going away, i.e. pages in the memslot getting
> unmapped.
> 
> Paul.

Yes, thats it. kvm_arch_flush_shadow requires functional memslot lookup,
for example.

--
To unsubscribe

Re: [PATCH 3/5] KVM: PPC: Book3S HV: Handle memory slot deletion and modification correctly

2012-08-09 Thread Marcelo Tosatti
On Thu, Aug 09, 2012 at 10:25:32PM -0300, Marcelo Tosatti wrote:
> On Fri, Aug 10, 2012 at 10:34:39AM +1000, Paul Mackerras wrote:
> > On Thu, Aug 09, 2012 at 03:16:12PM -0300, Marcelo Tosatti wrote:
> > 
> > > The !memslot->npages case is handled in __kvm_set_memory_region
> > > (please read that part, before kvm_arch_prepare_memory_region() call).
> > > 
> > > kvm_arch_flush_shadow should be implemented.
> > 
> > Book3S HV doesn't have shadow page tables per se, rather the hardware
> > page table is under the control of the hypervisor (i.e. KVM), and
> > entries are added and removed by the guest using hypercalls.  On
> > recent machines (POWER7) the hypervisor can choose whether or not to
> > have the hardware PTE point to a real page of memory; if it doesn't,
> > access by the guest will trap to the hypervisor.  On older machines
> > (PPC970) we don't have that flexibility, and we have to provide a real
> > page of memory (i.e. RAM or I/O) behind every hardware PTE.  (This is
> > because PPC970 provides no way for page faults in the guest to go to
> > the hypervisor.)
> > 
> > I could implement kvm_arch_flush_shadow to remove the backing pages
> > behind every hardware PTE, but that would be very slow and inefficient
> > on POWER7, and would break the guest on PPC970, particularly in the
> > case where userspace is removing a small memory slot containing some
> > I/O device and leaving the memory slot for system RAM untouched.
> > 
> > So the reason for unmapping the hardware PTEs in
> > kvm_arch_prepare_memory_region rather than kvm_arch_flush_shadow is
> > that that way we know which memslot is going away.
> > 
> > What exactly are the semantics of kvm_arch_flush_shadow?  
> 
> It removes all translations mapped via memslots. Its used in cases where
> the translations become stale, or during shutdown.
> 
> > I presume that on x86 with NPT/EPT it basically does nothing - is that 
> > right?
> 
> It does, it removes all NPT/EPT ptes (named "sptes" in arch/x86/kvm/).
> The translations are rebuilt on demand (when accesses by the guest fault
> into the HV).
> 
> > > > +   if (old->npages) {
> > > > +   /* modifying guest_phys or flags */
> > > > +   if (old->base_gfn != memslot->base_gfn)
> > > > +   kvmppc_unmap_memslot(kvm, old);
> > > 
> > > This case is also handled generically by the last kvm_arch_flush_shadow
> > > call in __kvm_set_memory_region.
> > 
> > Again, to use this we would need to know which memslot we're
> > flushing.  If we could change __kvm_set_memory_region to pass the
> > memslot for these kvm_arch_flush_shadow calls, then I could do as you
> > suggest.  (Though I would need to think carefully about what would
> > happen with guest invalidations of hardware PTEs in the interval
> > between the rcu_assign_pointer(kvm->memslots, slots) and the
> > kvm_arch_flush_shadow, and whether the invalidation would find the
> > correct location in the rmap array, given that we have updated the
> > base_gfn in the memslot without first getting rid of any references to
> > those pages in the hardware page table.)
> 
> That can be done.
> 
> I'll send a patch to flush per memslot in the next days, you can work
> out the PPC details in the meantime.
> 
> To be clear: this is necessary to have consistent behaviour across
> arches in the kvm_set_memory codepath which is tricky (not nitpicking).
> 
> Alternatively, kvm_arch_flush_shadow can be split into two methods (but
> thats not necessary if memslot information is sufficient for PPC).

What kvm_set_memory requires is that there are no stale translations. On
x86, it is cheaper to nuke all translation entries and let them be rebuilt on
pagefaults.

But, if necessary we can introduce a new callback
"kvm_arch_sync_shadow", which can be used by PPC to verify translations 
are valid, if it makes sense.

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/5] KVM: PPC: Book3S HV: Handle memory slot deletion and modification correctly

2012-08-09 Thread Paul Mackerras
On Thu, Aug 09, 2012 at 03:16:12PM -0300, Marcelo Tosatti wrote:

> The !memslot->npages case is handled in __kvm_set_memory_region
> (please read that part, before kvm_arch_prepare_memory_region() call).
> 
> kvm_arch_flush_shadow should be implemented.

Book3S HV doesn't have shadow page tables per se, rather the hardware
page table is under the control of the hypervisor (i.e. KVM), and
entries are added and removed by the guest using hypercalls.  On
recent machines (POWER7) the hypervisor can choose whether or not to
have the hardware PTE point to a real page of memory; if it doesn't,
access by the guest will trap to the hypervisor.  On older machines
(PPC970) we don't have that flexibility, and we have to provide a real
page of memory (i.e. RAM or I/O) behind every hardware PTE.  (This is
because PPC970 provides no way for page faults in the guest to go to
the hypervisor.)

I could implement kvm_arch_flush_shadow to remove the backing pages
behind every hardware PTE, but that would be very slow and inefficient
on POWER7, and would break the guest on PPC970, particularly in the
case where userspace is removing a small memory slot containing some
I/O device and leaving the memory slot for system RAM untouched.

So the reason for unmapping the hardware PTEs in
kvm_arch_prepare_memory_region rather than kvm_arch_flush_shadow is
that that way we know which memslot is going away.

What exactly are the semantics of kvm_arch_flush_shadow?  I presume
that on x86 with NPT/EPT it basically does nothing - is that right?

> > +   if (old->npages) {
> > +   /* modifying guest_phys or flags */
> > +   if (old->base_gfn != memslot->base_gfn)
> > +   kvmppc_unmap_memslot(kvm, old);
> 
> This case is also handled generically by the last kvm_arch_flush_shadow
> call in __kvm_set_memory_region.

Again, to use this we would need to know which memslot we're
flushing.  If we could change __kvm_set_memory_region to pass the
memslot for these kvm_arch_flush_shadow calls, then I could do as you
suggest.  (Though I would need to think carefully about what would
happen with guest invalidations of hardware PTEs in the interval
between the rcu_assign_pointer(kvm->memslots, slots) and the
kvm_arch_flush_shadow, and whether the invalidation would find the
correct location in the rmap array, given that we have updated the
base_gfn in the memslot without first getting rid of any references to
those pages in the hardware page table.)

> > +   if (memslot->dirty_bitmap &&
> > +   old->dirty_bitmap != memslot->dirty_bitmap)
> > +   kvmppc_hv_get_dirty_log(kvm, old);
> > +   return 0;
> > +   }
> 
> Better clear dirty log unconditionally on kvm_arch_commit_memory_region,
> similarly to x86 (just so its consistent).

OK.

> > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > @@ -81,7 +81,7 @@ static void remove_revmap_chain(struct kvm *kvm, long 
> > pte_index,
> > ptel = rev->guest_rpte |= rcbits;
> > gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
> > memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
> > -   if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
> > +   if (!memslot)
> > return;
> 
> Why remove this check? (i don't know why it was there in the first
> place, just checking).

This is where we are removing the page backing a hardware PTE and thus
removing the hardware PTE from the reverse-mapping list for the page.
We want to be able to do that properly even if the memslot is in the
process of going away.  I had the flags check in there originally
because other places that used a memslot had that check, but when I
read __kvm_set_memory_region more carefully I realized that the
KVM_MEMSLOT_INVALID flag indicates that we should not create any more
references to pages in the memslot, but we do still need to be able to
handle references going away, i.e. pages in the memslot getting
unmapped.

Paul.
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 5/5] KVM: PPC: Take the SRCU lock around memslot use

2012-08-09 Thread Paul Mackerras
On Thu, Aug 09, 2012 at 03:27:17PM -0300, Marcelo Tosatti wrote:
> On Mon, Aug 06, 2012 at 08:08:16PM +1000, Paul Mackerras wrote:
> > The generic KVM code uses SRCU (sleeping RCU) to protect accesses
> > to the memslots data structures against updates due to userspace
> > adding, modifying or removing memory slots.  We need to do that too,
> > both to avoid accessing stale copies of the memslots and to avoid
> > lockdep warnings.  This therefore adds srcu_read_lock/unlock pairs
> > around code that accesses and uses memslots in the Book 3S PR code
> > and the Book E (44x and e500) code.
> > 
> > Signed-off-by: Paul Mackerras 
> > ---
> > Compile-tested only.
> > 
> >  arch/powerpc/kvm/44x_tlb.c   |6 ++
> >  arch/powerpc/kvm/book3s_pr.c |6 ++
> >  arch/powerpc/kvm/e500_tlb.c  |6 ++
> >  3 files changed, 18 insertions(+)
> 
> On top of the previous comment:
> 
> x86 calls srcu_read_lock at the beginning of the KVM_RUN ioctl handler
> (__vcpu_run in arch/x86/kvm/x86.c), unlocks srcu on guest entry, locks
> on guest exit before any potential use of memslots, and unlocks on
> exit to userspace.
> 
> This has the advantage of not sprinkling srcu lock/unlock calls all over
> (except from other ioctls, of course). Its low maintenance.
> 
> Perhaps doing the same on PPC is not a bad idea.

Perhaps... these changes are to areas of the PPC KVM code that I don't
use or maintain, so they're really more a suggestion of one way to fix
the problem than anything else.  That's why I put RFC in the subject
line.  It would be up to Alex whether he wants to fix it like this or
by taking the SRCU lock at a higher level.

Paul.
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/5] KVM: PPC: Book3S HV: Take the SRCU read lock before looking up memslots

2012-08-09 Thread Paul Mackerras
On Thu, Aug 09, 2012 at 03:22:38PM -0300, Marcelo Tosatti wrote:
> On Mon, Aug 06, 2012 at 08:06:55PM +1000, Paul Mackerras wrote:
> > The generic KVM code uses SRCU (sleeping RCU) to protect accesses
> > to the memslots data structures against updates due to userspace
> > adding, modifying or removing memory slots.  We need to do that too,
> > both to avoid accessing stale copies of the memslots and to avoid
> > lockdep warnings.  This therefore adds srcu_read_lock/unlock pairs
> > around code that accesses and uses memslots.
> > 
> > Since the real-mode handlers for H_ENTER, H_REMOVE and H_BULK_REMOVE
> > need to access the memslots, and we don't want to call the SRCU code
> > in real mode (since we have no assurance that it would only access
> > the linear mapping), we hold the SRCU read lock for the VM while
> > in the guest.  This does mean that adding or removing memory slots
> > while some vcpus are executing in the guest will block for up to
> > two jiffies.  This tradeoff is acceptable since adding/removing
> > memory slots only happens rarely, while H_ENTER/H_REMOVE/H_BULK_REMOVE
> > are performance-critical hot paths.
> 
> I would avoid doing this. What prevents the guest entry loop in the kernel to
> simply reenter without ever unlocking SRCU?

I take and release the SRCU lock inside the guest entry loop, so in
fact we will release the SRCU lock every time we pop out of the guest.

As for holding the SRCU read lock while we're in the guest - that is
what enables us to do H_ENTER etc. (the hypercalls that the guest uses
to update the hardware page table) in real mode, i.e. without
switching the MMU over to the kernel context.  In real mode we can
access the linear mapping but not vmalloc, ioremap or user space, so I
am nervous about doing srcu_read_lock/unlock in real mode.  I really
don't want to switch to kernel mode for those hypercalls because that
switch is relatively slow and requires pulling all of the SMT threads
in the core out of the guest (because of hardware restrictions).

So, holding the SRCU read lock while in the guest is the least ugly
approach AFAICS.  Yes it makes memslot addition/removal a bit slower,
but only for this VM, and it doesn't delay grace periods for other
forms of RCU, so its impact is pretty limited.

Paul.
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/5] KVM: PPC: BookE: Add some more trace points

2012-08-09 Thread Alexander Graf
Without trace points, debugging what exactly is going on inside guest
code can be very tricky. Add a few more trace points at places that
hopefully tell us more when things go wrong.

Signed-off-by: Alexander Graf 
---
 arch/powerpc/kvm/booke.c|3 ++
 arch/powerpc/kvm/e500_tlb.c |3 ++
 arch/powerpc/kvm/trace.h|   71 +++
 3 files changed, 77 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 52f6cbb..00bcc57 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -143,6 +143,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
unsigned int priority)
 {
+   trace_kvm_booke_queue_irqprio(vcpu, priority);
set_bit(priority, &vcpu->arch.pending_exceptions);
 }
 
@@ -457,6 +458,8 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
 static void kvmppc_check_requests(struct kvm_vcpu *vcpu)
 {
if (vcpu->requests) {
+   trace_kvm_check_requests(vcpu);
+
if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
update_timer_ints(vcpu);
 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 56bc92c..f5e531f 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -312,6 +312,7 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref 
*ref,
 static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
 {
if (ref->flags & E500_TLB_VALID) {
+   trace_kvm_booke206_ref_release(ref->pfn, ref->flags);
ref->flags = 0;
}
 }
@@ -1076,6 +1077,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, 
gpa_t gpaddr,
 
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 {
+   trace_kvm_unmap_hva(hva);
+
/*
 * Flush all shadow tlb entries everywhere. This is slow, but
 * we are 100% sure that we catch the to be unmapped page
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 9fab6ed..cb2780a 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -82,6 +82,21 @@ TRACE_EVENT(kvm_exit,
)
 );
 
+TRACE_EVENT(kvm_unmap_hva,
+   TP_PROTO(unsigned long hva),
+   TP_ARGS(hva),
+
+   TP_STRUCT__entry(
+   __field(unsigned long,  hva )
+   ),
+
+   TP_fast_assign(
+   __entry->hva= hva;
+   ),
+
+   TP_printk("unmap hva 0x%lx\n", __entry->hva)
+);
+
 TRACE_EVENT(kvm_stlb_inval,
TP_PROTO(unsigned int stlb_index),
TP_ARGS(stlb_index),
@@ -149,6 +164,24 @@ TRACE_EVENT(kvm_gtlb_write,
__entry->word1, __entry->word2)
 );
 
+TRACE_EVENT(kvm_check_requests,
+   TP_PROTO(struct kvm_vcpu *vcpu),
+   TP_ARGS(vcpu),
+
+   TP_STRUCT__entry(
+   __field(__u32,  cpu_nr  )
+   __field(__u32,  requests)
+   ),
+
+   TP_fast_assign(
+   __entry->cpu_nr = vcpu->vcpu_id;
+   __entry->requests   = vcpu->requests;
+   ),
+
+   TP_printk("vcpu=%x requests=%x",
+   __entry->cpu_nr, __entry->requests)
+);
+
 
 /*
  * Book3S trace points   *
@@ -418,6 +451,44 @@ TRACE_EVENT(kvm_booke206_gtlb_write,
__entry->mas2, __entry->mas7_3)
 );
 
+TRACE_EVENT(kvm_booke206_ref_release,
+   TP_PROTO(__u64 pfn, __u32 flags),
+   TP_ARGS(pfn, flags),
+
+   TP_STRUCT__entry(
+   __field(__u64,  pfn )
+   __field(__u32,  flags   )
+   ),
+
+   TP_fast_assign(
+   __entry->pfn= pfn;
+   __entry->flags  = flags;
+   ),
+
+   TP_printk("pfn=%llx flags=%x",
+   __entry->pfn, __entry->flags)
+);
+
+TRACE_EVENT(kvm_booke_queue_irqprio,
+   TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
+   TP_ARGS(vcpu, priority),
+
+   TP_STRUCT__entry(
+   __field(__u32,  cpu_nr  )
+   __field(__u32,  priority)
+   __field(unsigned long,  pending )
+   ),
+
+   TP_fast_assign(
+   __entry->cpu_nr = vcpu->vcpu_id;
+   __entry->priority   = priority;
+   __entry->pending= vcpu->arch.pending_exceptions;
+   ),
+
+   TP_printk("vcpu=%x prio=%x pending=%lx",
+   __entry->cpu_nr, __entry->priority, __entry->pending)
+);
+
 #endif
 
 #endif /* _TRACE_KVM_H */
-- 
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message

[PATCH 3/5] KVM: PPC: E500: Implement MMU notifiers

2012-08-09 Thread Alexander Graf
The e500 target has lived without mmu notifiers ever since it got
introduced, but fails for the user space check on them with hugetlbfs.

So in order to get that one working, implement mmu notifiers in a
reasonably dumb fashion and be happy. On embedded hardware, we almost
never end up with mmu notifier calls, since most people don't overcommit.

Signed-off-by: Alexander Graf 

---

v1 -> v2:

  - drop refcount for in-use pages
  - mark pages dirty at mapping time, not unmapping time
  - drop hva_to_memslot usage
---
 arch/powerpc/include/asm/kvm_host.h |3 +-
 arch/powerpc/include/asm/kvm_ppc.h  |1 +
 arch/powerpc/kvm/Kconfig|2 +
 arch/powerpc/kvm/booke.c|6 +++
 arch/powerpc/kvm/e500_tlb.c |   60 +++---
 5 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 572ad01..ed75bc9 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -45,7 +45,8 @@
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #endif
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#if defined(CONFIG_KVM_BOOK3S_64_HV) || defined(CONFIG_KVM_E500V2) || \
+defined(CONFIG_KVM_E500MC)
 #include 
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 0124937..c38e824 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -104,6 +104,7 @@ extern void kvmppc_core_queue_external(struct kvm_vcpu 
*vcpu,
struct kvm_interrupt *irq);
 extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
  struct kvm_interrupt *irq);
+extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
   unsigned int op, int *advance);
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index f4dacb9..40cad8c 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -123,6 +123,7 @@ config KVM_E500V2
depends on EXPERIMENTAL && E500 && !PPC_E500MC
select KVM
select KVM_MMIO
+   select MMU_NOTIFIER
---help---
  Support running unmodified E500 guest kernels in virtual machines on
  E500v2 host processors.
@@ -138,6 +139,7 @@ config KVM_E500MC
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
+   select MMU_NOTIFIER
---help---
  Support running unmodified E500MC/E5500 (32-bit) guest kernels in
  virtual machines on E500MC/E5500 host processors.
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 70a86c0..52f6cbb 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -459,6 +459,10 @@ static void kvmppc_check_requests(struct kvm_vcpu *vcpu)
if (vcpu->requests) {
if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
update_timer_ints(vcpu);
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
+   if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+   kvmppc_core_flush_tlb(vcpu);
+#endif
}
 }
 
@@ -579,6 +583,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
 #endif
 
kvm_guest_exit();
+   vcpu->mode = OUTSIDE_GUEST_MODE;
+   smp_wmb();
 
 out:
vcpu->mode = OUTSIDE_GUEST_MODE;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index d26e705..c439754 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -303,18 +303,15 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref 
*ref,
ref->pfn = pfn;
ref->flags = E500_TLB_VALID;
 
-   if (tlbe_is_writable(gtlbe))
+   if (tlbe_is_writable(gtlbe)) {
ref->flags |= E500_TLB_DIRTY;
+   kvm_set_pfn_dirty(pfn);
+   }
 }
 
 static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
 {
if (ref->flags & E500_TLB_VALID) {
-   if (ref->flags & E500_TLB_DIRTY)
-   kvm_release_pfn_dirty(ref->pfn);
-   else
-   kvm_release_pfn_clean(ref->pfn);
-
ref->flags = 0;
}
 }
@@ -357,6 +354,13 @@ static void clear_tlb_refs(struct kvmppc_vcpu_e500 
*vcpu_e500)
clear_tlb_privs(vcpu_e500);
 }
 
+void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu)
+{
+   struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+   clear_tlb_refs(vcpu_e500);
+   clear_tlb1_bitmap(vcpu_e500);
+}
+
 static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
unsigned int eaddr, int as)
 {
@@ -539,6 +543,9 @@ static inline void kvmppc_e500_shadow_map(struct 
kvmppc_vcpu_e500 *vcpu_e500,
 
kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
  

[PATCH 1/5] KVM: PPC: BookE: Add check_requests helper function

2012-08-09 Thread Alexander Graf
We need a central place to check for pending requests in. Add one that
only does the timer check we already do in a different place.

Later, this central function can be extended by more checks.

Signed-off-by: Alexander Graf 
---
 arch/powerpc/kvm/booke.c |   24 +---
 1 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1d4ce9a..bcf87fe 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -419,13 +419,6 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu 
*vcpu)
unsigned long *pending = &vcpu->arch.pending_exceptions;
unsigned int priority;
 
-   if (vcpu->requests) {
-   if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) {
-   smp_mb();
-   update_timer_ints(vcpu);
-   }
-   }
-
priority = __ffs(*pending);
while (priority < BOOKE_IRQPRIO_MAX) {
if (kvmppc_booke_irqprio_deliver(vcpu, priority))
@@ -461,6 +454,14 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
return r;
 }
 
+static void kvmppc_check_requests(struct kvm_vcpu *vcpu)
+{
+   if (vcpu->requests) {
+   if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
+   update_timer_ints(vcpu);
+   }
+}
+
 /*
  * Common checks before entering the guest world.  Call with interrupts
  * disabled.
@@ -485,6 +486,15 @@ static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
break;
}
 
+   smp_mb();
+   if (vcpu->requests) {
+   /* Make sure we process requests preemptable */
+   local_irq_enable();
+   kvmppc_check_requests(vcpu);
+   local_irq_disable();
+   continue;
+   }
+
if (kvmppc_core_prepare_to_enter(vcpu)) {
/* interrupts got enabled in between, so we
   are back at square 1 */
-- 
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/5] KVM: PPC: Add cache flush on page map

2012-08-09 Thread Alexander Graf
When we map a page that wasn't icache cleared before, do so when first
mapping it in KVM using the same information bits as the Linux mapping
logic. That way we are 100% sure that any page we map does not have stale
entries in the icache.

Signed-off-by: Alexander Graf 

---

v1 -> v2:

  - move code to arch/powerpc
---
 arch/powerpc/include/asm/kvm_host.h   |1 +
 arch/powerpc/include/asm/kvm_ppc.h|   12 
 arch/powerpc/kvm/book3s_32_mmu_host.c |3 +++
 arch/powerpc/kvm/book3s_64_mmu_host.c |2 ++
 arch/powerpc/kvm/e500_tlb.c   |3 +++
 arch/powerpc/mm/mem.c |1 +
 6 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index ed75bc9..f5c289b 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define KVM_MAX_VCPUS  NR_CPUS
 #define KVM_MAX_VCORES NR_CPUS
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index c38e824..88de314 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -220,4 +220,16 @@ void kvmppc_claim_lpid(long lpid);
 void kvmppc_free_lpid(long lpid);
 void kvmppc_init_lpid(unsigned long nr_lpids);
 
+static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
+{
+   /* Clear i-cache for new pages */
+   struct page *page;
+   page = pfn_to_page(pfn);
+   if (!test_bit(PG_arch_1, &page->flags)) {
+   flush_dcache_icache_page(page);
+   set_bit(PG_arch_1, &page->flags);
+   }
+}
+
+
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c 
b/arch/powerpc/kvm/book3s_32_mmu_host.c
index f922c29..837f13e 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -211,6 +211,9 @@ next_pteg:
pteg1 |= PP_RWRX;
}
 
+   if (orig_pte->may_execute)
+   kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT);
+
local_irq_disable();
 
if (pteg[rr]) {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c 
b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 10fc8ec..0688b6b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -126,6 +126,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct 
kvmppc_pte *orig_pte)
 
if (!orig_pte->may_execute)
rflags |= HPTE_R_N;
+   else
+   kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT);
 
hash = hpt_hash(va, PTE_SIZE, MMU_SEGSIZE_256M);
 
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index c439754..56bc92c 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -544,6 +544,9 @@ static inline void kvmppc_e500_shadow_map(struct 
kvmppc_vcpu_e500 *vcpu_e500,
kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
ref, gvaddr, stlbe);
 
+   /* Clear i-cache for new pages */
+   kvmppc_mmu_flush_icache(pfn);
+
/* Drop refcount on page, so that mmu notifiers can clear it */
kvm_release_pfn_clean(pfn);
 }
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index baaafde..fbdad0e 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -469,6 +469,7 @@ void flush_dcache_icache_page(struct page *page)
__flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT);
 #endif
 }
+EXPORT_SYMBOL(flush_dcache_icache_page);
 
 void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
 {
-- 
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/5] KVM: PPC: BookE: Add support for vcpu->mode

2012-08-09 Thread Alexander Graf
Generic KVM code might want to know whether we are inside guest context
or outside. It also wants to be able to push us out of guest context.

Add support to the BookE code for the generic vcpu->mode field that describes
the above states.

Signed-off-by: Alexander Graf 
---
 arch/powerpc/kvm/booke.c |   11 +++
 1 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index bcf87fe..70a86c0 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -501,6 +501,15 @@ static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
continue;
}
 
+   if (vcpu->mode == EXITING_GUEST_MODE) {
+   r = 1;
+   break;
+   }
+
+   /* Going into guest context! Yay! */
+   vcpu->mode = IN_GUEST_MODE;
+   smp_wmb();
+
break;
}
 
@@ -572,6 +581,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
kvm_guest_exit();
 
 out:
+   vcpu->mode = OUTSIDE_GUEST_MODE;
+   smp_wmb();
local_irq_enable();
return ret;
 }
-- 
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/5] KVM: PPC: E500: Implement MMU Notifiers

2012-08-09 Thread Alexander Graf
This patch set adds a very simple implementation of MMU notifiers for the
e500 target. Along the way, I stumbled over a few other things that I've
put into the same patch set, namely:

  * support remote vcpu requests
  * hva_to_memslot helper function
  * icache flushing on page map
  * additional trace points

v1 -> v2:

  - remove patches that are already merged into kvm-ppc-next
  - split up mmu notifier patch
  - remove hva_to_memslot
  - add trace points
  - change icache flush method to be in arch/powerpc
  - make request checks preemptible

Alexander Graf (5):
  KVM: PPC: BookE: Add check_requests helper function
  KVM: PPC: BookE: Add support for vcpu->mode
  KVM: PPC: E500: Implement MMU notifiers
  KVM: PPC: Add cache flush on page map
  KVM: PPC: BookE: Add some more trace points

 arch/powerpc/include/asm/kvm_host.h   |4 +-
 arch/powerpc/include/asm/kvm_ppc.h|   13 ++
 arch/powerpc/kvm/Kconfig  |2 +
 arch/powerpc/kvm/book3s_32_mmu_host.c |3 +
 arch/powerpc/kvm/book3s_64_mmu_host.c |2 +
 arch/powerpc/kvm/booke.c  |   44 +---
 arch/powerpc/kvm/e500_tlb.c   |   66 +++---
 arch/powerpc/kvm/trace.h  |   71 +
 arch/powerpc/mm/mem.c |1 +
 9 files changed, 192 insertions(+), 14 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/5] KVM: PPC: Book3S HV: Take the SRCU read lock before looking up memslots

2012-08-09 Thread Marcelo Tosatti
On Mon, Aug 06, 2012 at 08:06:55PM +1000, Paul Mackerras wrote:
> The generic KVM code uses SRCU (sleeping RCU) to protect accesses
> to the memslots data structures against updates due to userspace
> adding, modifying or removing memory slots.  We need to do that too,
> both to avoid accessing stale copies of the memslots and to avoid
> lockdep warnings.  This therefore adds srcu_read_lock/unlock pairs
> around code that accesses and uses memslots.
> 
> Since the real-mode handlers for H_ENTER, H_REMOVE and H_BULK_REMOVE
> need to access the memslots, and we don't want to call the SRCU code
> in real mode (since we have no assurance that it would only access
> the linear mapping), we hold the SRCU read lock for the VM while
> in the guest.  This does mean that adding or removing memory slots
> while some vcpus are executing in the guest will block for up to
> two jiffies.  This tradeoff is acceptable since adding/removing
> memory slots only happens rarely, while H_ENTER/H_REMOVE/H_BULK_REMOVE
> are performance-critical hot paths.

I would avoid doing this. What prevents the guest entry loop in the kernel to
simply reenter without ever unlocking SRCU?

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/5] KVM: PPC: Book3S HV: Handle memory slot deletion and modification correctly

2012-08-09 Thread Marcelo Tosatti
On Mon, Aug 06, 2012 at 08:06:03PM +1000, Paul Mackerras wrote:
> >From 44067a8ee15021583636bea4cc1d47e5370b8397 Mon Sep 17 00:00:00 2001
> From: Paul Mackerras 
> Date: Mon, 30 Jul 2012 16:40:54 +1000
> Subject: 
> 
> At present the HV style of KVM doesn't handle deletion or modification
> of memory slots correctly.  Deletion occurs when userspace does the
> KVM_SET_USER_MEMORY_REGION with arguments specifying a new length of
> zero for a slot that contains memory.  Modification occurs when the
> arguments specify a new guest_phys_addr or flags.
> 
> To allow the HV code to know which operation (creation, deletion or
> modification) is being requested, it needs to see the old and new
> contents of the memslot.  kvm_arch_prepare_memory_region has this
> information, so we modify it to pass it down to
> kvmppc_core_prepare_memory_region.  We then modify the HV version
> of that to check which operation is being performed.  If it is a
> deletion, we call a new function kvmppc_unmap_memslot to remove any
> HPT (hashed page table) entries referencing the memory being removed.
> Similarly, if we are modifying the guest physical address we also
> remove any HPT entries.  If the KVM_MEM_LOG_DIRTY_PAGES flag is now
> set and wasn't before, we call kvmppc_hv_get_dirty_log to clear any
> dirty bits so we can detect all modifications from now on.
> 
> Signed-off-by: Paul Mackerras 
> ---
>  arch/powerpc/include/asm/kvm_ppc.h  |4 +++
>  arch/powerpc/kvm/book3s_64_mmu_hv.c |   27 ++--
>  arch/powerpc/kvm/book3s_hv.c|   61 
> +++
>  arch/powerpc/kvm/book3s_hv_rm_mmu.c |2 +-
>  arch/powerpc/kvm/book3s_pr.c|2 ++
>  arch/powerpc/kvm/booke.c|2 ++
>  arch/powerpc/kvm/powerpc.c  |2 +-
>  7 files changed, 76 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
> b/arch/powerpc/include/asm/kvm_ppc.h
> index 0124937..044c921 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -140,11 +140,15 @@ extern void kvm_release_hpt(struct kvmppc_linear_info 
> *li);
>  extern int kvmppc_core_init_vm(struct kvm *kvm);
>  extern void kvmppc_core_destroy_vm(struct kvm *kvm);
>  extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
> +struct kvm_memory_slot *memslot,
> +struct kvm_memory_slot *old,
>   struct kvm_userspace_memory_region *mem);
>  extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
>   struct kvm_userspace_memory_region *mem);
>  extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
> struct kvm_ppc_smmu_info *info);
> +extern void kvmppc_unmap_memslot(struct kvm *kvm,
> +  struct kvm_memory_slot *memslot);
>  
>  extern int kvmppc_bookehv_init(void);
>  extern void kvmppc_bookehv_exit(void);
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
> b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index 3c635c0..87735a7 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -850,7 +850,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long 
> *rmapp,
>   psize = hpte_page_size(hptep[0], ptel);
>   if ((hptep[0] & HPTE_V_VALID) &&
>   hpte_rpn(ptel, psize) == gfn) {
> - hptep[0] |= HPTE_V_ABSENT;
> + if (kvm->arch.using_mmu_notifiers)
> + hptep[0] |= HPTE_V_ABSENT;
>   kvmppc_invalidate_hpte(kvm, hptep, i);
>   /* Harvest R and C */
>   rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
> @@ -877,6 +878,28 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long 
> start, unsigned long end)
>   return 0;
>  }
>  
> +void kvmppc_unmap_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
> +{
> + unsigned long *rmapp;
> + unsigned long gfn;
> + unsigned long n;
> +
> + rmapp = memslot->rmap;
> + gfn = memslot->base_gfn;
> + for (n = memslot->npages; n; --n) {
> + /*
> +  * Testing the present bit without locking is OK because
> +  * the memslot has been marked invalid already, and hence
> +  * no new HPTEs referencing this page can be created,
> +  * thus the present bit can't go from 0 to 1.
> +  */
> + if (*rmapp & KVMPPC_RMAP_PRESENT)
> + kvm_unmap_rmapp(kvm, rmapp, gfn);
> + ++rmapp;
> + ++gfn;
> + }
> +}
> +
>  static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
>unsigned long gfn)
>  {
> @@ -1039,7 +1062,7 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct 
> kvm_memory_slot *memslot)
>   rmapp = memslot->rmap;
>   map = memslot->dirty_bitmap;

Re: [RFC PATCH 5/5] KVM: PPC: Take the SRCU lock around memslot use

2012-08-09 Thread Marcelo Tosatti
On Mon, Aug 06, 2012 at 08:08:16PM +1000, Paul Mackerras wrote:
> The generic KVM code uses SRCU (sleeping RCU) to protect accesses
> to the memslots data structures against updates due to userspace
> adding, modifying or removing memory slots.  We need to do that too,
> both to avoid accessing stale copies of the memslots and to avoid
> lockdep warnings.  This therefore adds srcu_read_lock/unlock pairs
> around code that accesses and uses memslots in the Book 3S PR code
> and the Book E (44x and e500) code.
> 
> Signed-off-by: Paul Mackerras 
> ---
> Compile-tested only.
> 
>  arch/powerpc/kvm/44x_tlb.c   |6 ++
>  arch/powerpc/kvm/book3s_pr.c |6 ++
>  arch/powerpc/kvm/e500_tlb.c  |6 ++
>  3 files changed, 18 insertions(+)

On top of the previous comment:

x86 calls srcu_read_lock at the beginning of the KVM_RUN ioctl handler
(__vcpu_run in arch/x86/kvm/x86.c), unlocks srcu on guest entry, locks
on guest exit before any potential use of memslots, and unlocks on
exit to userspace.

This has the advantage of not sprinkling srcu lock/unlock calls all over
(except from other ioctls, of course). Its low maintenance.

Perhaps doing the same on PPC is not a bad idea.

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] KVM: Add hva_to_memslot

2012-08-09 Thread Alexander Graf


On 09.08.2012, at 12:36, Avi Kivity  wrote:

> On 08/09/2012 01:34 PM, Takuya Yoshikawa wrote:
>> On Tue,  7 Aug 2012 12:57:13 +0200
>> Alexander Graf  wrote:
>> 
>>> +struct kvm_memory_slot *hva_to_memslot(struct kvm *kvm, hva_t hva)
>>> +{
>>> +struct kvm_memslots *slots = kvm_memslots(kvm);
>>> +struct kvm_memory_slot *memslot;
>>> +
>>> +kvm_for_each_memslot(memslot, slots)
>>> +if (hva >= memslot->userspace_addr &&
>>> +  hva < memslot->userspace_addr + memslot->npages)
>>> +return memslot;
>>> +
>>> +return NULL;
>>> +}
>> 
>> Can't we have two memory slots which contain that hva?
>> I thought that's why hva handler had to check all slots.
> 
> We can and do.  Good catch.
> 

Hrm. So I guess we can only do an hva_is_guest_memory() helper? That's all I 
really need anyways :)

Alex


> 
> -- 
> error compiling committee.c: too many arguments to function
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] KVM: Add hva_to_memslot

2012-08-09 Thread Avi Kivity
On 08/09/2012 01:34 PM, Takuya Yoshikawa wrote:
> On Tue,  7 Aug 2012 12:57:13 +0200
> Alexander Graf  wrote:
> 
>> +struct kvm_memory_slot *hva_to_memslot(struct kvm *kvm, hva_t hva)
>> +{
>> +struct kvm_memslots *slots = kvm_memslots(kvm);
>> +struct kvm_memory_slot *memslot;
>> +
>> +kvm_for_each_memslot(memslot, slots)
>> +if (hva >= memslot->userspace_addr &&
>> +  hva < memslot->userspace_addr + memslot->npages)
>> +return memslot;
>> +
>> +return NULL;
>> +}
> 
> Can't we have two memory slots which contain that hva?
> I thought that's why hva handler had to check all slots.

We can and do.  Good catch.


-- 
error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] KVM: Add hva_to_memslot

2012-08-09 Thread Takuya Yoshikawa
On Tue,  7 Aug 2012 12:57:13 +0200
Alexander Graf  wrote:

> +struct kvm_memory_slot *hva_to_memslot(struct kvm *kvm, hva_t hva)
> +{
> + struct kvm_memslots *slots = kvm_memslots(kvm);
> + struct kvm_memory_slot *memslot;
> +
> + kvm_for_each_memslot(memslot, slots)
> + if (hva >= memslot->userspace_addr &&
> +   hva < memslot->userspace_addr + memslot->npages)
> + return memslot;
> +
> + return NULL;
> +}

Can't we have two memory slots which contain that hva?
I thought that's why hva handler had to check all slots.

Thanks,
Takuya
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4 v2] booke: Added ONE_REG interface for IAC/DAC debug registers

2012-08-09 Thread Bharat Bhushan
IAC/DAC are defined as 32 bit while they are 64 bit wide. So ONE_REG
interface is added to set/get them.

Signed-off-by: Bharat Bhushan 
---
v2:
 - checkpatch error removed

 arch/powerpc/include/asm/kvm.h  |   12 
 arch/powerpc/include/asm/kvm_host.h |   24 -
 arch/powerpc/kvm/booke.c|   48 +-
 arch/powerpc/kvm/booke_emulate.c|8 +++---
 4 files changed, 84 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 1bea4d8..3c14202 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -221,6 +221,12 @@ struct kvm_sregs {
 
__u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */
__u32 dbcr[3];
+   /*
+* iac/dac registers are 64bit wide, while this API
+* interface provides only lower 32 bits on 64 bit
+* processors. ONE_REG interface is added for 64bit
+* iac/dac registers.
+*/
__u32 iac[4];
__u32 dac[2];
__u32 dvc[2];
@@ -326,5 +332,11 @@ struct kvm_book3e_206_tlb_params {
 };
 
 #define KVM_REG_PPC_HIOR   (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
+#define KVM_REG_PPC_IAC1   (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
+#define KVM_REG_PPC_IAC2   (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
+#define KVM_REG_PPC_IAC3   (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x4)
+#define KVM_REG_PPC_IAC4   (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x5)
+#define KVM_REG_PPC_DAC1   (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x6)
+#define KVM_REG_PPC_DAC2   (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x7)
 
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 873ec11..fe86477 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -344,6 +344,27 @@ struct kvmppc_slb {
bool class  : 1;
 };
 
+# ifdef CONFIG_PPC_FSL_BOOK3E
+#define KVMPPC_BOOKE_IAC_NUM   2
+#define KVMPPC_BOOKE_DAC_NUM   2
+# else
+#define KVMPPC_BOOKE_IAC_NUM   4
+#define KVMPPC_BOOKE_DAC_NUM   2
+# endif
+#define KVMPPC_BOOKE_MAX_IAC   4
+#define KVMPPC_BOOKE_MAX_DAC   2
+
+struct kvmppc_booke_debug_reg {
+   u32 dbcr0;
+   u32 dbcr1;
+   u32 dbcr2;
+#ifdef CONFIG_KVM_E500MC
+   u32 dbcr4;
+#endif
+   u64 iac[KVMPPC_BOOKE_MAX_IAC];
+   u64 dac[KVMPPC_BOOKE_MAX_DAC];
+};
+
 struct kvm_vcpu_arch {
ulong host_stack;
u32 host_pid;
@@ -438,8 +459,6 @@ struct kvm_vcpu_arch {
 
u32 ccr0;
u32 ccr1;
-   u32 dbcr0;
-   u32 dbcr1;
u32 dbsr;
 
u64 mmcr[3];
@@ -474,6 +493,7 @@ struct kvm_vcpu_arch {
u32 tlbcfg[4];
u32 mmucfg;
u32 epr;
+   struct kvmppc_booke_debug_reg dbg_reg;
 #endif
gpa_t paddr_accessed;
gva_t vaddr_accessed;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 10905ab..44266e0 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1388,12 +1388,56 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 {
-   return -EINVAL;
+   int r = -EINVAL;
+
+   switch (reg->id) {
+   case KVM_REG_PPC_IAC1:
+   case KVM_REG_PPC_IAC2:
+   case KVM_REG_PPC_IAC3:
+   case KVM_REG_PPC_IAC4: {
+   int iac = reg->id - KVM_REG_PPC_IAC1;
+   r = copy_to_user((u64 __user *)(long)reg->addr,
+&vcpu->arch.dbg_reg.iac[iac], sizeof(u64));
+   break;
+   }
+   case KVM_REG_PPC_DAC1:
+   case KVM_REG_PPC_DAC2: {
+   int dac = reg->id - KVM_REG_PPC_DAC1;
+   r = copy_to_user((u64 __user *)(long)reg->addr,
+&vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
+   break;
+   }
+   default:
+   break;
+   }
+   return r;
 }
 
 int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 {
-   return -EINVAL;
+   int r = -EINVAL;
+
+   switch (reg->id) {
+   case KVM_REG_PPC_IAC1:
+   case KVM_REG_PPC_IAC2:
+   case KVM_REG_PPC_IAC3:
+   case KVM_REG_PPC_IAC4: {
+   int iac = reg->id - KVM_REG_PPC_IAC1;
+   r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac],
+(u64 __user *)(long)reg->addr, sizeof(u64));
+   break;
+   }
+   case KVM_REG_PPC_DAC1:
+   case KVM_REG_PPC_DAC2: {
+   int dac = reg->id - KVM_REG_PPC_DAC1;
+   r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac],
+(u64 __user *)(long)reg->addr, sizeof(u64));
+   break;
+   }
+   default:
+   

[PATCH 1/4 v2] Introduce a common kvm requests handler

2012-08-09 Thread Bharat Bhushan
Added a common requests handler which is called before returning to guest.
This returns non zero value when some request demands exit to userspace.

Signed-off-by: Bharat Bhushan 
---
v2:
 - checkpatch error removed

 arch/powerpc/kvm/booke.c |   46 ++
 1 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index d25a097..0e51102 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -412,18 +412,30 @@ static void update_timer_ints(struct kvm_vcpu *vcpu)
kvmppc_core_dequeue_dec(vcpu);
 }
 
+/*
+ * This function handle all requests when returning to guest. This return
+ * non zero value when some request demands exit to userspace.
+ */
+static int kvmppc_handle_requests(struct kvm_vcpu *vcpu)
+{
+   int ret = 0;
+
+   if (!vcpu->requests)
+   return 0;
+
+   if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) {
+   smp_mb();
+   update_timer_ints(vcpu);
+   }
+
+   return ret;
+}
+
 static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
 {
unsigned long *pending = &vcpu->arch.pending_exceptions;
unsigned int priority;
 
-   if (vcpu->requests) {
-   if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) {
-   smp_mb();
-   update_timer_ints(vcpu);
-   }
-   }
-
priority = __ffs(*pending);
while (priority < BOOKE_IRQPRIO_MAX) {
if (kvmppc_booke_irqprio_deliver(vcpu, priority))
@@ -479,10 +491,15 @@ static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
}
 
if (signal_pending(current)) {
+   vcpu->run->exit_reason = KVM_EXIT_INTR;
r = 1;
break;
}
 
+   r = kvmppc_handle_requests(vcpu);
+   if (r)
+   break;
+
if (kvmppc_core_prepare_to_enter(vcpu)) {
/* interrupts got enabled in between, so we
   are back at square 1 */
@@ -511,8 +528,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
 
local_irq_disable();
if (kvmppc_prepare_to_enter(vcpu)) {
-   kvm_run->exit_reason = KVM_EXIT_INTR;
-   ret = -EINTR;
+   if (kvm_run->exit_reason == KVM_EXIT_INTR)
+   ret = -EINTR;
+   else
+   ret = 0;
goto out;
}
 
@@ -972,9 +991,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
if (!(r & RESUME_HOST)) {
local_irq_disable();
if (kvmppc_prepare_to_enter(vcpu)) {
-   run->exit_reason = KVM_EXIT_INTR;
-   r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
-   kvmppc_account_exit(vcpu, SIGNAL_EXITS);
+   if (run->exit_reason == KVM_EXIT_INTR) {
+   r = (-EINTR << 2) | RESUME_HOST |
+(r & RESUME_FLAG_NV);
+   kvmppc_account_exit(vcpu, SIGNAL_EXITS);
+   } else
+   r = RESUME_HOST | (r & RESUME_FLAG_NV);
}
}
 
-- 
1.7.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4 v2] KVM: PPC: booke: Add watchdog emulation

2012-08-09 Thread Bharat Bhushan
This patch adds the watchdog emulation in KVM. The watchdog
emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_BOOKE_WATCHDOG) ioctl.
The kernel timer are used for watchdog emulation and emulates
h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU
if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how
it is configured.

Signed-off-by: Liu Yu 
Signed-off-by: Scott Wood 
[bharat.bhus...@freescale.com: reworked patch]
Signed-off-by: Bharat Bhushan 
---
v2:
 - checkpatch error removed
 arch/powerpc/include/asm/kvm_host.h  |3 +
 arch/powerpc/include/asm/kvm_ppc.h   |2 +
 arch/powerpc/include/asm/reg_booke.h |7 ++
 arch/powerpc/kvm/book3s.c|9 ++
 arch/powerpc/kvm/booke.c |  154 ++
 arch/powerpc/kvm/booke_emulate.c |8 ++
 arch/powerpc/kvm/powerpc.c   |   14 +++-
 include/linux/kvm.h  |2 +
 include/linux/kvm_host.h |1 +
 9 files changed, 198 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 572ad01..873ec11 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -469,6 +469,8 @@ struct kvm_vcpu_arch {
ulong fault_esr;
ulong queued_dear;
ulong queued_esr;
+   spinlock_t wdt_lock;
+   struct timer_list wdt_timer;
u32 tlbcfg[4];
u32 mmucfg;
u32 epr;
@@ -484,6 +486,7 @@ struct kvm_vcpu_arch {
u8 osi_needed;
u8 osi_enabled;
u8 papr_enabled;
+   u8 watchdog_enabled;
u8 sane;
u8 cpu_type;
u8 hcall_needed;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 0124937..1438a5e 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -68,6 +68,8 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
 extern void kvmppc_decrementer_func(unsigned long data);
 extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
+extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
+extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
 
 /* Core-specific hooks */
 
diff --git a/arch/powerpc/include/asm/reg_booke.h 
b/arch/powerpc/include/asm/reg_booke.h
index 2d916c4..e07e6af 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -539,6 +539,13 @@
 #define TCR_FIE0x0080  /* FIT Interrupt Enable */
 #define TCR_ARE0x0040  /* Auto Reload Enable */
 
+#ifdef CONFIG_E500
+#define TCR_GET_WP(tcr)  tcr) & 0xC000) >> 30) | \
+ (((tcr) & 0x1E) >> 15))
+#else
+#define TCR_GET_WP(tcr)  (((tcr) & 0xC000) >> 30)
+#endif
+
 /* Bit definitions for the TSR. */
 #define TSR_ENW0x8000  /* Enable Next Watchdog */
 #define TSR_WIS0x4000  /* WDT Interrupt Status */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 3f2a836..e946665 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -411,6 +411,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
return 0;
 }
 
+int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+   return 0;
+}
+
+void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+}
+
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
int i;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 0e51102..10905ab 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -206,6 +206,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
 }
 
+static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
+{
+   kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
+}
+
+static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
+{
+   clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
+}
+
 static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
 {
 #ifdef CONFIG_KVM_BOOKE_HV
@@ -325,6 +335,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu 
*vcpu,
msr_mask = MSR_CE | MSR_ME | MSR_DE;
int_class = INT_CLASS_NONCRIT;
break;
+   case BOOKE_IRQPRIO_WATCHDOG:
case BOOKE_IRQPRIO_CRITICAL:
case BOOKE_IRQPRIO_DBELL_CRIT:
allowed = vcpu->arch.shared->msr & MSR_CE;
@@ -404,12 +415,121 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu 
*vcpu,
return allowed;
 }
 
+/*
+ * Return the number of jiffies until the next timeout.  If the timeout is
+ * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
+ * because the larger value can break the timer APIs.
+ */
+static unsigne

[PATCH 4/4 v2] KVM: PPC: booke: Allow multiple exception types

2012-08-09 Thread Bharat Bhushan
Current kvmppc_booke_handlers uses the same macro (KVM_HANDLER) and
all handlers are considered to be the same size. This will not be
the case if we want to use different macros for different handlers.

This patch improves the kvmppc_booke_handler so that it can
support different macros for different handlers.

Signed-off-by: Liu Yu 
[bharat.bhus...@freescale.com: Substantial changes]
Signed-off-by: Bharat Bhushan 
---
v2:
 - checkpatch error removed

 arch/powerpc/include/asm/kvm_ppc.h  |2 -
 arch/powerpc/kvm/booke.c|   13 ++-
 arch/powerpc/kvm/booke.h|1 +
 arch/powerpc/kvm/booke_interrupts.S |   37 --
 arch/powerpc/kvm/e500.c |   16 +-
 5 files changed, 52 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 1438a5e..deb55bd 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -47,8 +47,6 @@ enum emulation_result {
 
 extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
-extern char kvmppc_handlers_start[];
-extern unsigned long kvmppc_handler_len;
 extern void kvmppc_handler_highmem(void);
 
 extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 44266e0..42bf8d2 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1530,7 +1530,9 @@ int __init kvmppc_booke_init(void)
 {
 #ifndef CONFIG_KVM_BOOKE_HV
unsigned long ivor[16];
-   unsigned long max_ivor = 0;
+   unsigned long *handler = kvmppc_booke_handler_addr;
+   unsigned long handler_len;
+   unsigned long handler_end = 0;
int i;
 
/* We install our own exception handlers by hijacking IVPR. IVPR must
@@ -1562,15 +1564,14 @@ int __init kvmppc_booke_init(void)
ivor[15] = mfspr(SPRN_IVOR15);
 
for (i = 0; i < 16; i++) {
-   if (ivor[i] > max_ivor)
-   max_ivor = ivor[i];
+   handler_len = handler[i + 1] - handler[i];
+   handler_end = max(handler_end, handler[i] + handler_len);
 
memcpy((void *)kvmppc_booke_handlers + ivor[i],
-  kvmppc_handlers_start + i * kvmppc_handler_len,
-  kvmppc_handler_len);
+  (void *)handler[i], handler_len);
}
flush_icache_range(kvmppc_booke_handlers,
-  kvmppc_booke_handlers + max_ivor + 
kvmppc_handler_len);
+  kvmppc_booke_handlers + handler_end);
 #endif /* !BOOKE_HV */
return 0;
 }
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index ba61974..de9e526 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -65,6 +65,7 @@
  (1 << BOOKE_IRQPRIO_CRITICAL))
 
 extern unsigned long kvmppc_booke_handlers;
+extern unsigned long kvmppc_booke_handler_addr[];
 
 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
 void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
diff --git a/arch/powerpc/kvm/booke_interrupts.S 
b/arch/powerpc/kvm/booke_interrupts.S
index bb46b32..3539805 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -73,6 +73,14 @@ _GLOBAL(kvmppc_handler_\ivor_nr)
bctr
 .endm
 
+.macro KVM_HANDLER_ADDR ivor_nr
+   .long   kvmppc_handler_\ivor_nr
+.endm
+
+.macro KVM_HANDLER_END
+   .long   kvmppc_handlers_end
+.endm
+
 _GLOBAL(kvmppc_handlers_start)
 KVM_HANDLER BOOKE_INTERRUPT_CRITICAL SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
 KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK  SPRN_SPRG_RSCRATCH_MC SPRN_MCSRR0
@@ -93,9 +101,7 @@ KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT 
SPRN_CSRR0
 KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0
 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-
-_GLOBAL(kvmppc_handler_len)
-   .long kvmppc_handler_1 - kvmppc_handler_0
+_GLOBAL(kvmppc_handlers_end)
 
 /* Registers:
  *  SPRG_SCRATCH0: guest r4
@@ -463,6 +469,31 @@ lightweight_exit:
lwz r4, VCPU_GPR(R4)(r4)
rfi
 
+   .data
+   .align  4
+   .globl  kvmppc_booke_handler_addr
+kvmppc_booke_handler_addr:
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_CRITICAL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_MACHINE_CHECK
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_DATA_STORAGE
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_INST_STORAGE
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_EXTERNAL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_ALIGNMENT
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_PROGRAM
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_FP_UNAVAIL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_SYSCALL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_AP_UNAVAIL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_DECREMENTER
+KVM_HANDLER_ADDR BOOKE_INTERRU