Add a new stage2 function, stage2_create_removed(), for creating removed
tables (the opposite of kvm_pgtable_stage2_free_removed()).  Creating a
removed table is useful for splitting block PTEs into tables.  For example,
a 1G block PTE can be split into 4K PTEs by first creating a fully
populated tree, and then use it to replace the 1G PTE in a single step.
This will be used in a subsequent commit for eager huge page splitting.

No functional change intended. This new function will be used in a
subsequent commit.

Signed-off-by: Ricardo Koller <ricar...@google.com>
---
 arch/arm64/kvm/hyp/pgtable.c | 93 ++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 1b371f6dbac2..d1f309128118 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -1173,6 +1173,99 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, 
u64 addr, u64 size)
        return kvm_pgtable_walk(pgt, addr, size, &walker);
 }
 
+struct stage2_create_removed_data {
+       void                            *memcache;
+       struct kvm_pgtable_mm_ops       *mm_ops;
+       u64                             phys;
+       kvm_pte_t                       attr;
+};
+
+/*
+ * This flag should only be used by the create_removed walker, as it would
+ * be misinterpreted it in an installed PTE.
+ */
+#define KVM_INVALID_PTE_NO_PAGE                BIT(9)
+
+/*
+ * Failure to allocate a table results in setting the respective PTE with a
+ * valid block PTE instead of a table PTE.
+ */
+static int stage2_create_removed_walker(const struct kvm_pgtable_visit_ctx 
*ctx,
+                                       enum kvm_pgtable_walk_flags visit)
+{
+       struct stage2_create_removed_data *data = ctx->arg;
+       struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
+       u64 granule = kvm_granule_size(ctx->level);
+       kvm_pte_t attr = data->attr;
+       kvm_pte_t *childp = NULL;
+       u32 level = ctx->level;
+       int ret = 0;
+
+       if (level < KVM_PGTABLE_MAX_LEVELS - 1) {
+               childp = mm_ops->zalloc_page(data->memcache);
+               ret = childp ? 0 : -ENOMEM;
+       }
+
+       if (childp)
+               *ctx->ptep = kvm_init_table_pte(childp, mm_ops);
+
+       /*
+        * Create a block PTE if we are at the max level, or if we failed
+        * to create a table (we are not at max level).
+        */
+       if (level == KVM_PGTABLE_MAX_LEVELS - 1 || !childp) {
+               *ctx->ptep = kvm_init_valid_leaf_pte(data->phys, attr, level);
+               data->phys += granule;
+       }
+
+       if (ctx->old != KVM_INVALID_PTE_NO_PAGE)
+               mm_ops->get_page(ctx->ptep);
+
+       return ret;
+}
+
+/*
+ * Create a removed page-table tree of PAGE_SIZE leaf PTEs under *ptep.
+ * This new page-table tree is not reachable (i.e., it is removed) from the
+ * root (the pgd).
+ *
+ * This function will try to create as many entries in the tree as allowed
+ * by the memcache capacity. It always writes a valid PTE into *ptep. In
+ * the best case, it returns 0 and a fully populated tree under *ptep. In
+ * the worst case, it returns -ENOMEM and *ptep will contain a valid block
+ * PTE covering the expected level, or any other valid combination (e.g., a
+ * 1G table PTE pointing to half 2M block PTEs and half 2M table PTEs).
+ */
+static int stage2_create_removed(kvm_pte_t *ptep, u64 phys, u32 level,
+                                kvm_pte_t attr, void *memcache,
+                                struct kvm_pgtable_mm_ops *mm_ops)
+{
+       struct stage2_create_removed_data alloc_data = {
+               .phys           = phys,
+               .memcache       = memcache,
+               .mm_ops         = mm_ops,
+               .attr           = attr,
+       };
+       struct kvm_pgtable_walker walker = {
+               .cb     = stage2_create_removed_walker,
+               .flags  = KVM_PGTABLE_WALK_LEAF,
+               .arg    = &alloc_data,
+       };
+       struct kvm_pgtable_walk_data data = {
+               .walker = &walker,
+
+               /* The IPA is irrelevant for a removed table. */
+               .addr   = 0,
+               .end    = kvm_granule_size(level),
+       };
+
+       /*
+        * The walker should not try to get a reference to the memory
+        * holding this ptep (it's not a page).
+        */
+       *ptep = KVM_INVALID_PTE_NO_PAGE;
+       return __kvm_pgtable_visit(&data, mm_ops, ptep, level);
+}
 
 int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
                              struct kvm_pgtable_mm_ops *mm_ops,
-- 
2.38.1.431.g37b22c650d-goog

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Reply via email to