Re: [PATCH v5 22/33] KVM: PPC: Book3S HV: Introduce rmap to track nested guest mappings

2018-10-08 Thread David Gibson
On Mon, Oct 08, 2018 at 04:31:08PM +1100, Paul Mackerras wrote:
> From: Suraj Jitindar Singh 
> 
> When a host (L0) page which is mapped into a (L1) guest is in turn
> mapped through to a nested (L2) guest we keep a reverse mapping (rmap)
> so that these mappings can be retrieved later.
> 
> Whenever we create an entry in a shadow_pgtable for a nested guest we
> create a corresponding rmap entry and add it to the list for the
> L1 guest memslot at the index of the L1 guest page it maps. This means
> at the L1 guest memslot we end up with lists of rmaps.
> 
> When we are notified of a host page being invalidated which has been
> mapped through to a (L1) guest, we can then walk the rmap list for that
> guest page, and find and invalidate all of the corresponding
> shadow_pgtable entries.
> 
> In order to reduce memory consumption, we compress the information for
> each rmap entry down to 52 bits -- 12 bits for the LPID and 40 bits
> for the guest real page frame number -- which will fit in a single
> unsigned long.  To avoid a scenario where a guest can trigger
> unbounded memory allocations, we scan the list when adding an entry to
> see if there is already an entry with the contents we need.  This can
> occur, because we don't ever remove entries from the middle of a list.
> 
> A struct nested guest rmap is a list pointer and an rmap entry;
> 
> | next pointer |
> 
> | rmap entry   |
> 
> 
> Thus the rmap pointer for each guest frame number in the memslot can be
> either NULL, a single entry, or a pointer to a list of nested rmap entries.
> 
> gfnmemslot rmap array
>   -
>  0| NULL  |   (no rmap entry)
>   -
>  1| single rmap entry |   (rmap entry with low bit set)
>   -
>  2| list head pointer |   (list of rmap entries)
>   -
> 
> The final entry always has the lowest bit set and is stored in the next
> pointer of the last list entry, or as a single rmap entry.
> With a list of rmap entries looking like;
> 
> - -   -
> | list head ptr   | > | next pointer  | > | single rmap entry 
> |
> - -   -
>   | rmap entry|   | rmap entry|
>   -   -
> 
> Signed-off-by: Suraj Jitindar Singh 
> Signed-off-by: Paul Mackerras 

Reviewed-by: David Gibson 

> ---
>  arch/powerpc/include/asm/kvm_book3s.h|   3 +
>  arch/powerpc/include/asm/kvm_book3s_64.h |  69 +++-
>  arch/powerpc/kvm/book3s_64_mmu_radix.c   |  44 +++---
>  arch/powerpc/kvm/book3s_hv.c |   1 +
>  arch/powerpc/kvm/book3s_hv_nested.c  | 138 
> ++-
>  5 files changed, 240 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
> b/arch/powerpc/include/asm/kvm_book3s.h
> index 63f7ccf..d7aeb6f 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -196,6 +196,9 @@ extern int kvmppc_mmu_radix_translate_table(struct 
> kvm_vcpu *vcpu, gva_t eaddr,
>   int table_index, u64 *pte_ret_p);
>  extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
>   struct kvmppc_pte *gpte, bool data, bool iswrite);
> +extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
> + unsigned int shift, struct kvm_memory_slot *memslot,
> + unsigned int lpid);
>  extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable,
>   bool writing, unsigned long gpa,
>   unsigned int lpid);
> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
> b/arch/powerpc/include/asm/kvm_book3s_64.h
> index 5496152..c2a9146 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
> @@ -53,6 +53,66 @@ struct kvm_nested_guest {
>   struct kvm_nested_guest *next;
>  };
>  
> +/*
> + * We define a nested rmap entry as a single 64-bit quantity
> + * 0xFFF012-bit lpid field
> + * 0x000FF00040-bit guest 4k page frame number
> + * 0x00011-bit  single entry flag
> + */
> +#define RMAP_NESTED_LPID_MASK0xFFF0UL
> +#define RMAP_NESTED_LPID_SHIFT   (52)
> +#define RMAP_NESTED_GPA_MASK 0x000FF000UL
> +#define RMAP_NESTED_IS_SINGLE_ENTRY  0x0001UL
> +
> +/* Structure for a nested guest rmap entry */
> +struct rmap_nested {
> + struct llist_node list;
> + u64 rmap;
> +};
> +
> +/*
> + * for_each_nest_rmap_safe - iterate over the list of nested rmap entries
> + *   

[PATCH v5 22/33] KVM: PPC: Book3S HV: Introduce rmap to track nested guest mappings

2018-10-07 Thread Paul Mackerras
From: Suraj Jitindar Singh 

When a host (L0) page which is mapped into a (L1) guest is in turn
mapped through to a nested (L2) guest we keep a reverse mapping (rmap)
so that these mappings can be retrieved later.

Whenever we create an entry in a shadow_pgtable for a nested guest we
create a corresponding rmap entry and add it to the list for the
L1 guest memslot at the index of the L1 guest page it maps. This means
at the L1 guest memslot we end up with lists of rmaps.

When we are notified of a host page being invalidated which has been
mapped through to a (L1) guest, we can then walk the rmap list for that
guest page, and find and invalidate all of the corresponding
shadow_pgtable entries.

In order to reduce memory consumption, we compress the information for
each rmap entry down to 52 bits -- 12 bits for the LPID and 40 bits
for the guest real page frame number -- which will fit in a single
unsigned long.  To avoid a scenario where a guest can trigger
unbounded memory allocations, we scan the list when adding an entry to
see if there is already an entry with the contents we need.  This can
occur, because we don't ever remove entries from the middle of a list.

A struct nested guest rmap is a list pointer and an rmap entry;

| next pointer |

| rmap entry   |


Thus the rmap pointer for each guest frame number in the memslot can be
either NULL, a single entry, or a pointer to a list of nested rmap entries.

gfn  memslot rmap array
-
 0  | NULL  |   (no rmap entry)
-
 1  | single rmap entry |   (rmap entry with low bit set)
-
 2  | list head pointer |   (list of rmap entries)
-

The final entry always has the lowest bit set and is stored in the next
pointer of the last list entry, or as a single rmap entry.
With a list of rmap entries looking like;

-   -   -
| list head ptr | > | next pointer  | > | single rmap entry |
-   -   -
| rmap entry|   | rmap entry|
-   -

Signed-off-by: Suraj Jitindar Singh 
Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/kvm_book3s.h|   3 +
 arch/powerpc/include/asm/kvm_book3s_64.h |  69 +++-
 arch/powerpc/kvm/book3s_64_mmu_radix.c   |  44 +++---
 arch/powerpc/kvm/book3s_hv.c |   1 +
 arch/powerpc/kvm/book3s_hv_nested.c  | 138 ++-
 5 files changed, 240 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 63f7ccf..d7aeb6f 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -196,6 +196,9 @@ extern int kvmppc_mmu_radix_translate_table(struct kvm_vcpu 
*vcpu, gva_t eaddr,
int table_index, u64 *pte_ret_p);
 extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *gpte, bool data, bool iswrite);
+extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
+   unsigned int shift, struct kvm_memory_slot *memslot,
+   unsigned int lpid);
 extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable,
bool writing, unsigned long gpa,
unsigned int lpid);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 5496152..c2a9146 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -53,6 +53,66 @@ struct kvm_nested_guest {
struct kvm_nested_guest *next;
 };
 
+/*
+ * We define a nested rmap entry as a single 64-bit quantity
+ * 0xFFF0  12-bit lpid field
+ * 0x000FF000  40-bit guest 4k page frame number
+ * 0x0001  1-bit  single entry flag
+ */
+#define RMAP_NESTED_LPID_MASK  0xFFF0UL
+#define RMAP_NESTED_LPID_SHIFT (52)
+#define RMAP_NESTED_GPA_MASK   0x000FF000UL
+#define RMAP_NESTED_IS_SINGLE_ENTRY0x0001UL
+
+/* Structure for a nested guest rmap entry */
+struct rmap_nested {
+   struct llist_node list;
+   u64 rmap;
+};
+
+/*
+ * for_each_nest_rmap_safe - iterate over the list of nested rmap entries
+ *  safe against removal of the list entry or NULL list
+ * @pos:   a (struct rmap_nested *) to use as a loop cursor
+ * @node:  pointer to the first entry
+ * NOTE: this can be NULL
+ * @rmapp: an (unsigned long *) in which to return the rmap entries on each
+