Re: [PATCH 2/6] KVM: PPC: Add support for multiple-TCE hcalls

2013-05-10 Thread Alexey Kardashevskiy
On 05/10/2013 04:51 PM, David Gibson wrote:
> On Mon, May 06, 2013 at 05:25:53PM +1000, Alexey Kardashevskiy wrote:
>> This adds real mode handlers for the H_PUT_TCE_INDIRECT and
>> H_STUFF_TCE hypercalls for QEMU emulated devices such as virtio
>> devices or emulated PCI.  These calls allow adding multiple entries
>> (up to 512) into the TCE table in one call which saves time on
>> transition to/from real mode.
>>
>> This adds a guest physical to host real address converter
>> and calls the existing H_PUT_TCE handler. The converting function
>> is going to be fully utilized by upcoming VFIO supporting patches.
>>
>> This also implements the KVM_CAP_PPC_MULTITCE capability,
>> so in order to support the functionality of this patch, QEMU
>> needs to query for this capability and set the "hcall-multi-tce"
>> hypertas property only if the capability is present, otherwise
>> there will be serious performance degradation.
> 
> 
> Hrm.  Clearly I didn't read this carefully enough before.  There are
> some problems here.

?


> [snip]
>> diff --git a/arch/powerpc/kvm/book3s_64_vio.c 
>> b/arch/powerpc/kvm/book3s_64_vio.c
>> index 72ffc89..643ac1e 100644
>> --- a/arch/powerpc/kvm/book3s_64_vio.c
>> +++ b/arch/powerpc/kvm/book3s_64_vio.c
>> @@ -14,6 +14,7 @@
>>   *
>>   * Copyright 2010 Paul Mackerras, IBM Corp. 
>>   * Copyright 2011 David Gibson, IBM Corporation 
>> + * Copyright 2013 Alexey Kardashevskiy, IBM Corporation 
>>   */
>>  
>>  #include 
>> @@ -36,9 +37,14 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  
>>  #define TCES_PER_PAGE   (PAGE_SIZE / sizeof(u64))
>> +#define ERROR_ADDR  (~(unsigned long)0x0)
>>  
>> +/*
>> + * TCE tables handlers.
>> + */
>>  static long kvmppc_stt_npages(unsigned long window_size)
>>  {
>>  return ALIGN((window_size >> SPAPR_TCE_SHIFT)
>> @@ -148,3 +154,111 @@ fail:
>>  }
>>  return ret;
>>  }
>> +
>> +/*
>> + * Virtual mode handling of IOMMU map/unmap.
>> + */
>> +/* Converts guest physical address into host virtual */
>> +static unsigned long get_virt_address(struct kvm_vcpu *vcpu,
>> +unsigned long gpa)
> 
> This should probably return a void * rather than an unsigned long.
> Well, actually a void __user *.
> 
>> +{
>> +unsigned long hva, gfn = gpa >> PAGE_SHIFT;
>> +struct kvm_memory_slot *memslot;
>> +
>> +memslot = search_memslots(kvm_memslots(vcpu->kvm), gfn);
>> +if (!memslot)
>> +return ERROR_ADDR;
>> +
>> +/*
>> + * Convert gfn to hva preserving flags and an offset
>> + * within a system page
>> + */
>> +hva = __gfn_to_hva_memslot(memslot, gfn) + (gpa & ~PAGE_MASK);
>> +return hva;
>> +}
>> +
>> +long kvmppc_virtmode_h_put_tce(struct kvm_vcpu *vcpu,
>> +unsigned long liobn, unsigned long ioba,
>> +unsigned long tce)
>> +{
>> +struct kvmppc_spapr_tce_table *tt;
>> +
>> +tt = kvmppc_find_tce_table(vcpu, liobn);
>> +/* Didn't find the liobn, put it to userspace */
>> +if (!tt)
>> +return H_TOO_HARD;
>> +
>> +/* Emulated IO */
>> +return kvmppc_emulated_h_put_tce(tt, ioba, tce);
>> +}
>> +
>> +long kvmppc_virtmode_h_put_tce_indirect(struct kvm_vcpu *vcpu,
>> +unsigned long liobn, unsigned long ioba,
>> +unsigned long tce_list, unsigned long npages)
>> +{
>> +struct kvmppc_spapr_tce_table *tt;
>> +long i;
>> +unsigned long tces;
>> +
>> +/* The whole table addressed by tce_list resides in 4K page */
>> +if (npages > 512)
>> +return H_PARAMETER;
> 
> So, that doesn't actually verify what the comment says it does - only
> that the list is < 4K in total.  You need to check the alignment of
> tce_list as well.



The spec says to return H_PARAMETER if >512. I.e. it takes just 1 page and
I do not need to bother if pages may not lay continuously in RAM (matters
for real mode).

/*
 * As the spec is saying that maximum possible number of TCEs is 512,
 * the whole TCE page is no more than 4K. Therefore we do not have to
 * worry if pages do not lie continuously in the RAM
 */
Any better?...


>> +
>> +tt = kvmppc_find_tce_table(vcpu, liobn);
>> +/* Didn't find the liobn, put it to userspace */
>> +if (!tt)
>> +return H_TOO_HARD;
>> +
>> +tces = get_virt_address(vcpu, tce_list);
>> +if (tces == ERROR_ADDR)
>> +return H_TOO_HARD;
>> +
>> +/* Emulated IO */
> 
> This comment doesn't seem to have any bearing on the test which
> follows it.
> 
>> +if ((ioba + (npages << IOMMU_PAGE_SHIFT)) > tt->window_size)
>> +return H_PARAMETER;
>> +
>> +for (i = 0; i < npages; ++i) {
>> +unsigned long tce;
>> +unsigned long ptce = tces + i * sizeof(unsigned long);
>> +
>> +if (get_user(tce, (unsigned long __user *)ptce))
>> +break;
>> +
>> +if (kvmppc_emulated_h_put_tce(tt,
>> +ioba + (i << IOMMU_PAGE_SHIFT), tce))
>> +   

Re: [PATCH 2/6] KVM: PPC: Add support for multiple-TCE hcalls

2013-05-09 Thread David Gibson
On Mon, May 06, 2013 at 05:25:53PM +1000, Alexey Kardashevskiy wrote:
> This adds real mode handlers for the H_PUT_TCE_INDIRECT and
> H_STUFF_TCE hypercalls for QEMU emulated devices such as virtio
> devices or emulated PCI.  These calls allow adding multiple entries
> (up to 512) into the TCE table in one call which saves time on
> transition to/from real mode.
> 
> This adds a guest physical to host real address converter
> and calls the existing H_PUT_TCE handler. The converting function
> is going to be fully utilized by upcoming VFIO supporting patches.
> 
> This also implements the KVM_CAP_PPC_MULTITCE capability,
> so in order to support the functionality of this patch, QEMU
> needs to query for this capability and set the "hcall-multi-tce"
> hypertas property only if the capability is present, otherwise
> there will be serious performance degradation.


Hrm.  Clearly I didn't read this carefully enough before.  There are
some problems here.

[snip]
> diff --git a/arch/powerpc/kvm/book3s_64_vio.c 
> b/arch/powerpc/kvm/book3s_64_vio.c
> index 72ffc89..643ac1e 100644
> --- a/arch/powerpc/kvm/book3s_64_vio.c
> +++ b/arch/powerpc/kvm/book3s_64_vio.c
> @@ -14,6 +14,7 @@
>   *
>   * Copyright 2010 Paul Mackerras, IBM Corp. 
>   * Copyright 2011 David Gibson, IBM Corporation 
> + * Copyright 2013 Alexey Kardashevskiy, IBM Corporation 
>   */
>  
>  #include 
> @@ -36,9 +37,14 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #define TCES_PER_PAGE(PAGE_SIZE / sizeof(u64))
> +#define ERROR_ADDR  (~(unsigned long)0x0)
>  
> +/*
> + * TCE tables handlers.
> + */
>  static long kvmppc_stt_npages(unsigned long window_size)
>  {
>   return ALIGN((window_size >> SPAPR_TCE_SHIFT)
> @@ -148,3 +154,111 @@ fail:
>   }
>   return ret;
>  }
> +
> +/*
> + * Virtual mode handling of IOMMU map/unmap.
> + */
> +/* Converts guest physical address into host virtual */
> +static unsigned long get_virt_address(struct kvm_vcpu *vcpu,
> + unsigned long gpa)

This should probably return a void * rather than an unsigned long.
Well, actually a void __user *.

> +{
> + unsigned long hva, gfn = gpa >> PAGE_SHIFT;
> + struct kvm_memory_slot *memslot;
> +
> + memslot = search_memslots(kvm_memslots(vcpu->kvm), gfn);
> + if (!memslot)
> + return ERROR_ADDR;
> +
> + /*
> +  * Convert gfn to hva preserving flags and an offset
> +  * within a system page
> +  */
> + hva = __gfn_to_hva_memslot(memslot, gfn) + (gpa & ~PAGE_MASK);
> + return hva;
> +}
> +
> +long kvmppc_virtmode_h_put_tce(struct kvm_vcpu *vcpu,
> + unsigned long liobn, unsigned long ioba,
> + unsigned long tce)
> +{
> + struct kvmppc_spapr_tce_table *tt;
> +
> + tt = kvmppc_find_tce_table(vcpu, liobn);
> + /* Didn't find the liobn, put it to userspace */
> + if (!tt)
> + return H_TOO_HARD;
> +
> + /* Emulated IO */
> + return kvmppc_emulated_h_put_tce(tt, ioba, tce);
> +}
> +
> +long kvmppc_virtmode_h_put_tce_indirect(struct kvm_vcpu *vcpu,
> + unsigned long liobn, unsigned long ioba,
> + unsigned long tce_list, unsigned long npages)
> +{
> + struct kvmppc_spapr_tce_table *tt;
> + long i;
> + unsigned long tces;
> +
> + /* The whole table addressed by tce_list resides in 4K page */
> + if (npages > 512)
> + return H_PARAMETER;

So, that doesn't actually verify what the comment says it does - only
that the list is < 4K in total.  You need to check the alignment of
tce_list as well.

> +
> + tt = kvmppc_find_tce_table(vcpu, liobn);
> + /* Didn't find the liobn, put it to userspace */
> + if (!tt)
> + return H_TOO_HARD;
> +
> + tces = get_virt_address(vcpu, tce_list);
> + if (tces == ERROR_ADDR)
> + return H_TOO_HARD;
> +
> + /* Emulated IO */

This comment doesn't seem to have any bearing on the test which
follows it.

> + if ((ioba + (npages << IOMMU_PAGE_SHIFT)) > tt->window_size)
> + return H_PARAMETER;
> +
> + for (i = 0; i < npages; ++i) {
> + unsigned long tce;
> + unsigned long ptce = tces + i * sizeof(unsigned long);
> +
> + if (get_user(tce, (unsigned long __user *)ptce))
> + break;
> +
> + if (kvmppc_emulated_h_put_tce(tt,
> + ioba + (i << IOMMU_PAGE_SHIFT), tce))
> + break;
> + }
> + if (i == npages)
> + return H_SUCCESS;
> +
> + /* Failed, do cleanup */
> + do {
> + --i;
> + kvmppc_emulated_h_put_tce(tt, ioba + (i << IOMMU_PAGE_SHIFT),
> + 0);
> + } while (i);

Hrm, so, actually PAPR specifies that this hcall is supposed to first
copy the given tces to hypervisor memory, then translate (and
validate) them all, and only then touch the actual TCE table.  Rather
more complicated to do, but I gues

Re: [PATCH 2/6] KVM: PPC: Add support for multiple-TCE hcalls

2013-05-06 Thread David Gibson
On Mon, May 06, 2013 at 05:25:53PM +1000, Alexey Kardashevskiy wrote:
> This adds real mode handlers for the H_PUT_TCE_INDIRECT and
> H_STUFF_TCE hypercalls for QEMU emulated devices such as virtio
> devices or emulated PCI.  These calls allow adding multiple entries
> (up to 512) into the TCE table in one call which saves time on
> transition to/from real mode.
> 
> This adds a guest physical to host real address converter
> and calls the existing H_PUT_TCE handler. The converting function
> is going to be fully utilized by upcoming VFIO supporting patches.
> 
> This also implements the KVM_CAP_PPC_MULTITCE capability,
> so in order to support the functionality of this patch, QEMU
> needs to query for this capability and set the "hcall-multi-tce"
> hypertas property only if the capability is present, otherwise
> there will be serious performance degradation.
> 
> Cc: David Gibson 
> Signed-off-by: Alexey Kardashevskiy 
> Signed-off-by: Paul Mackerras 

Fwiw, it would be nice to get this patch merged, regardless of the
rest of the VFIO/powerpc patches.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: Digital signature


[PATCH 2/6] KVM: PPC: Add support for multiple-TCE hcalls

2013-05-06 Thread Alexey Kardashevskiy
This adds real mode handlers for the H_PUT_TCE_INDIRECT and
H_STUFF_TCE hypercalls for QEMU emulated devices such as virtio
devices or emulated PCI.  These calls allow adding multiple entries
(up to 512) into the TCE table in one call which saves time on
transition to/from real mode.

This adds a guest physical to host real address converter
and calls the existing H_PUT_TCE handler. The converting function
is going to be fully utilized by upcoming VFIO supporting patches.

This also implements the KVM_CAP_PPC_MULTITCE capability,
so in order to support the functionality of this patch, QEMU
needs to query for this capability and set the "hcall-multi-tce"
hypertas property only if the capability is present, otherwise
there will be serious performance degradation.

Cc: David Gibson 
Signed-off-by: Alexey Kardashevskiy 
Signed-off-by: Paul Mackerras 
---
 Documentation/virtual/kvm/api.txt   |   15 ++
 arch/powerpc/include/asm/kvm_ppc.h  |   15 +-
 arch/powerpc/kvm/book3s_64_vio.c|  114 +++
 arch/powerpc/kvm/book3s_64_vio_hv.c |  231 +++
 arch/powerpc/kvm/book3s_hv.c|   23 +++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |6 +
 arch/powerpc/kvm/book3s_pr_papr.c   |   37 -
 arch/powerpc/kvm/powerpc.c  |3 +
 include/uapi/linux/kvm.h|1 +
 9 files changed, 413 insertions(+), 32 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index a4df553..f621cd6 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2463,3 +2463,18 @@ For mmu types KVM_MMU_FSL_BOOKE_NOHV and 
KVM_MMU_FSL_BOOKE_HV:
where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value.
  - The tsize field of mas1 shall be set to 4K on TLB0, even though the
hardware ignores this value for TLB0.
+
+
+6.4 KVM_CAP_PPC_MULTITCE
+
+Architectures: ppc
+Parameters: none
+Returns: 0 on success; -1 on error
+
+This capability enables the guest to put/remove multiple TCE entries
+per hypercall which significanly accelerates DMA operations for PPC KVM
+guests.
+
+When this capability is enabled, H_PUT_TCE_INDIRECT and H_STUFF_TCE are
+expected to occur rather than H_PUT_TCE which supports only one TCE entry
+per call.
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 99da298..d501246 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -139,8 +139,19 @@ extern void kvmppc_xics_free(struct kvm *kvm);
 
 extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);
-extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
-unsigned long ioba, unsigned long tce);
+extern struct kvmppc_spapr_tce_table *kvmppc_find_tce_table(
+   struct kvm_vcpu *vcpu, unsigned long liobn);
+extern long kvmppc_emulated_h_put_tce(struct kvmppc_spapr_tce_table *stt,
+   unsigned long ioba, unsigned long tce);
+extern long kvmppc_virtmode_h_put_tce(struct kvm_vcpu *vcpu,
+   unsigned long liobn, unsigned long ioba,
+   unsigned long tce);
+extern long kvmppc_virtmode_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+   unsigned long liobn, unsigned long ioba,
+   unsigned long tce_list, unsigned long npages);
+extern long kvmppc_virtmode_h_stuff_tce(struct kvm_vcpu *vcpu,
+   unsigned long liobn, unsigned long ioba,
+   unsigned long tce_value, unsigned long npages);
 extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
struct kvm_allocate_rma *rma);
 extern struct kvmppc_linear_info *kvm_alloc_rma(void);
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 72ffc89..643ac1e 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -14,6 +14,7 @@
  *
  * Copyright 2010 Paul Mackerras, IBM Corp. 
  * Copyright 2011 David Gibson, IBM Corporation 
+ * Copyright 2013 Alexey Kardashevskiy, IBM Corporation 
  */
 
 #include 
@@ -36,9 +37,14 @@
 #include 
 #include 
 #include 
+#include 
 
 #define TCES_PER_PAGE  (PAGE_SIZE / sizeof(u64))
+#define ERROR_ADDR  (~(unsigned long)0x0)
 
+/*
+ * TCE tables handlers.
+ */
 static long kvmppc_stt_npages(unsigned long window_size)
 {
return ALIGN((window_size >> SPAPR_TCE_SHIFT)
@@ -148,3 +154,111 @@ fail:
}
return ret;
 }
+
+/*
+ * Virtual mode handling of IOMMU map/unmap.
+ */
+/* Converts guest physical address into host virtual */
+static unsigned long get_virt_address(struct kvm_vcpu *vcpu,
+   unsigned long gpa)
+{
+   unsigned long hva, gfn = gpa >> PAGE_SHIFT;
+   struct kvm_memory_slot *memslot;
+
+   memslot = search_memslots(kvm_memslots(vcpu->kvm), gfn);
+   if (!memslot)
+   return ERROR_ADDR