Re: [RFC PATCH] kvm: arm64: Try stage2 block mapping for host device MMIO

2021-03-11 Thread Marc Zyngier
Digging this patch back from my Inbox...

On Fri, 22 Jan 2021 08:36:50 +,
Keqian Zhu  wrote:
> 
> The MMIO region of a device maybe huge (GB level), try to use block
> mapping in stage2 to speedup both map and unmap.
> 
> Especially for unmap, it performs TLBI right after each invalidation
> of PTE. If all mapping is of PAGE_SIZE, it takes much time to handle
> GB level range.
> 
> Signed-off-by: Keqian Zhu 
> ---
>  arch/arm64/include/asm/kvm_pgtable.h | 11 +++
>  arch/arm64/kvm/hyp/pgtable.c | 15 +++
>  arch/arm64/kvm/mmu.c | 12 
>  3 files changed, 34 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
> b/arch/arm64/include/asm/kvm_pgtable.h
> index 52ab38db04c7..2266ac45f10c 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -82,6 +82,17 @@ struct kvm_pgtable_walker {
>   const enum kvm_pgtable_walk_flags   flags;
>  };
>  
> +/**
> + * kvm_supported_pgsize() - Get the max supported page size of a mapping.
> + * @pgt: Initialised page-table structure.
> + * @addr:Virtual address at which to place the mapping.
> + * @end: End virtual address of the mapping.
> + * @phys:Physical address of the memory to map.
> + *
> + * The smallest return value is PAGE_SIZE.
> + */
> +u64 kvm_supported_pgsize(struct kvm_pgtable *pgt, u64 addr, u64 end, u64 
> phys);
> +
>  /**
>   * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
>   * @pgt: Uninitialised page-table structure to initialise.
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index bdf8e55ed308..ab11609b9b13 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -81,6 +81,21 @@ static bool kvm_block_mapping_supported(u64 addr, u64 end, 
> u64 phys, u32 level)
>   return IS_ALIGNED(addr, granule) && IS_ALIGNED(phys, granule);
>  }
>  
> +u64 kvm_supported_pgsize(struct kvm_pgtable *pgt, u64 addr, u64 end, u64 
> phys)
> +{
> + u32 lvl;
> + u64 pgsize = PAGE_SIZE;
> +
> + for (lvl = pgt->start_level; lvl < KVM_PGTABLE_MAX_LEVELS; lvl++) {
> + if (kvm_block_mapping_supported(addr, end, phys, lvl)) {
> + pgsize = kvm_granule_size(lvl);
> + break;
> + }
> + }
> +
> + return pgsize;
> +}
> +
>  static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
>  {
>   u64 shift = kvm_granule_shift(level);
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 7d2257cc5438..80b403fc8e64 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -499,7 +499,8 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
>  int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
> phys_addr_t pa, unsigned long size, bool writable)
>  {
> - phys_addr_t addr;
> + phys_addr_t addr, end;
> + unsigned long pgsize;
>   int ret = 0;
>   struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, };
>   struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
> @@ -509,21 +510,24 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t 
> guest_ipa,
>  
>   size += offset_in_page(guest_ipa);
>   guest_ipa &= PAGE_MASK;
> + end = guest_ipa + size;
>  
> - for (addr = guest_ipa; addr < guest_ipa + size; addr += PAGE_SIZE) {
> + for (addr = guest_ipa; addr < end; addr += pgsize) {
>   ret = kvm_mmu_topup_memory_cache(&cache,
>kvm_mmu_cache_min_pages(kvm));
>   if (ret)
>   break;
>  
> + pgsize = kvm_supported_pgsize(pgt, addr, end, pa);
> +
>   spin_lock(&kvm->mmu_lock);
> - ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot,
> + ret = kvm_pgtable_stage2_map(pgt, addr, pgsize, pa, prot,
>&cache);
>   spin_unlock(&kvm->mmu_lock);
>   if (ret)
>   break;
>  
> - pa += PAGE_SIZE;
> + pa += pgsize;
>   }
>  
>   kvm_mmu_free_memory_cache(&cache);

There is one issue with this patch, which is that it only does half
the job. A VM_PFNMAP VMA can definitely be faulted in dynamically, and
in that case we force this to be a page mapping. This conflicts with
what you are doing here.

There is also the fact that if we can map things on demand, why are we
still mapping these MMIO regions ahead of time?

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 1/4] KVM: arm64: GICv4.1: Add function to get VLPI state

2021-03-11 Thread Marc Zyngier
On Wed, 27 Jan 2021 12:13:34 +,
Shenming Lu  wrote:
> 
> With GICv4.1 and the vPE unmapped, which indicates the invalidation
> of any VPT caches associated with the vPE, we can get the VLPI state
> by peeking at the VPT. So we add a function for this.
> 
> Signed-off-by: Shenming Lu 
> ---
>  arch/arm64/kvm/vgic/vgic-v4.c | 19 +++
>  arch/arm64/kvm/vgic/vgic.h|  1 +
>  2 files changed, 20 insertions(+)
> 
> diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
> index 66508b03094f..ac029ba3d337 100644
> --- a/arch/arm64/kvm/vgic/vgic-v4.c
> +++ b/arch/arm64/kvm/vgic/vgic-v4.c
> @@ -203,6 +203,25 @@ void vgic_v4_configure_vsgis(struct kvm *kvm)
>   kvm_arm_resume_guest(kvm);
>  }
>  
> +/*
> + * Must be called with GICv4.1 and the vPE unmapped, which
> + * indicates the invalidation of any VPT caches associated
> + * with the vPE, thus we can get the VLPI state by peeking
> + * at the VPT.
> + */
> +void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val)
> +{
> + struct its_vpe *vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
> + int mask = BIT(irq->intid % BITS_PER_BYTE);
> + void *va;
> + u8 *ptr;
> +
> + va = page_address(vpe->vpt_page);
> + ptr = va + irq->intid / BITS_PER_BYTE;
> +
> + *val = !!(*ptr & mask);

What guarantees that you can actually read anything valid? Yes, the
VPT caches are clean. But that doesn't make them coherent with CPU
caches.

You need some level of cache maintenance here.

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 2/4] KVM: arm64: GICv4.1: Try to save hw pending state in save_pending_tables

2021-03-11 Thread Marc Zyngier
On Wed, 27 Jan 2021 12:13:35 +,
Shenming Lu  wrote:
> 
> After pausing all vCPUs and devices capable of interrupting, in order
> to save the information of all interrupts, besides flushing the pending
> states in kvm’s vgic, we also try to flush the states of VLPIs in the
> virtual pending tables into guest RAM, but we need to have GICv4.1 and
> safely unmap the vPEs first.
> 
> As for the saving of VSGIs, which needs the vPEs to be mapped and might
> conflict with the saving of VLPIs, but since we will map the vPEs back
> at the end of save_pending_tables and both savings require the kvm->lock
> to be held (only happen serially), it will work fine.
> 
> Signed-off-by: Shenming Lu 
> ---
>  arch/arm64/kvm/vgic/vgic-v3.c | 61 +++
>  1 file changed, 55 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
> index 52915b342351..06b1162b7a0a 100644
> --- a/arch/arm64/kvm/vgic/vgic-v3.c
> +++ b/arch/arm64/kvm/vgic/vgic-v3.c
> @@ -1,6 +1,8 @@
>  // SPDX-License-Identifier: GPL-2.0-only
>  
>  #include 
> +#include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -356,6 +358,32 @@ int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, 
> struct vgic_irq *irq)
>   return 0;
>  }
>  
> +/*
> + * The deactivation of the doorbell interrupt will trigger the
> + * unmapping of the associated vPE.
> + */
> +static void unmap_all_vpes(struct vgic_dist *dist)
> +{
> + struct irq_desc *desc;
> + int i;
> +
> + for (i = 0; i < dist->its_vm.nr_vpes; i++) {
> + desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
> + irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
> + }
> +}
> +
> +static void map_all_vpes(struct vgic_dist *dist)
> +{
> + struct irq_desc *desc;
> + int i;
> +
> + for (i = 0; i < dist->its_vm.nr_vpes; i++) {
> + desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
> + irq_domain_activate_irq(irq_desc_get_irq_data(desc), false);
> + }
> +}
> +
>  /**
>   * vgic_v3_save_pending_tables - Save the pending tables into guest RAM
>   * kvm lock and all vcpu lock must be held
> @@ -365,14 +393,26 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
>   struct vgic_dist *dist = &kvm->arch.vgic;
>   struct vgic_irq *irq;
>   gpa_t last_ptr = ~(gpa_t)0;
> - int ret;
> + bool vlpi_avail = false;
> + int ret = 0;
>   u8 val;
>  
> + /*
> +  * As a preparation for getting any VLPI states.
> +  * The vgic initialized check ensures that the allocation and
> +  * enabling of the doorbells have already been done.
> +  */
> + if (kvm_vgic_global_state.has_gicv4_1 && 
> !WARN_ON(!vgic_initialized(kvm))) {

Should we bail out if we ever spot !vgic_initialized()? In general, I
find the double negation horrible to read).

> + unmap_all_vpes(dist);
> + vlpi_avail = true;
> + }
> +
>   list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
>   int byte_offset, bit_nr;
>   struct kvm_vcpu *vcpu;
>   gpa_t pendbase, ptr;
>   bool stored;
> + bool is_pending = irq->pending_latch;
>  
>   vcpu = irq->target_vcpu;
>   if (!vcpu)
> @@ -387,24 +427,33 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
>   if (ptr != last_ptr) {
>   ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
>   if (ret)
> - return ret;
> + goto out;
>   last_ptr = ptr;
>   }
>  
>   stored = val & (1U << bit_nr);
> - if (stored == irq->pending_latch)
> +
> + if (irq->hw && vlpi_avail)
> + vgic_v4_get_vlpi_state(irq, &is_pending);

Keep the 'is_pending = irq->pending_latch;' statement close to the VPT
read, since they represent the same state.

> +
> + if (stored == is_pending)
>   continue;
>  
> - if (irq->pending_latch)
> + if (is_pending)
>   val |= 1 << bit_nr;
>   else
>   val &= ~(1 << bit_nr);
>  
>   ret = kvm_write_guest_lock(kvm, ptr, &val, 1);
>   if (ret)
> - return ret;
> + goto out;
>   }
> - return 0;
> +
> +out:
> + if (vlpi_avail)
> + map_all_vpes(dist);

I have asked that question in the past: is it actually safe to remap
the vPEs and expect them to be runnable?

Also, the current code assumes that VMAPP.PTZ can be advertised if a
VPT is mapped for the first time. Clearly, it is unlikely that the VPT
will be only populated with 0s, so you'll end up with state corruption
on the first remap.

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
kvmar

Re: [PATCH v3 3/4] KVM: arm64: GICv4.1: Restore VLPI's pending state to physical side

2021-03-11 Thread Marc Zyngier
On Wed, 27 Jan 2021 12:13:36 +,
Shenming Lu  wrote:
> 
> From: Zenghui Yu 
> 
> When setting the forwarding path of a VLPI (switch to the HW mode),
> we could also transfer the pending state from irq->pending_latch to
> VPT (especially in migration, the pending states of VLPIs are restored
> into kvm’s vgic first). And we currently send "INT+VSYNC" to trigger
> a VLPI to pending.
> 
> Signed-off-by: Zenghui Yu 
> Signed-off-by: Shenming Lu 
> ---
>  arch/arm64/kvm/vgic/vgic-v4.c | 14 ++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
> index ac029ba3d337..a3542af6f04a 100644
> --- a/arch/arm64/kvm/vgic/vgic-v4.c
> +++ b/arch/arm64/kvm/vgic/vgic-v4.c
> @@ -449,6 +449,20 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
>   irq->host_irq   = virq;
>   atomic_inc(&map.vpe->vlpi_count);
>  
> + /* Transfer pending state */
> + if (irq->pending_latch) {
> + ret = irq_set_irqchip_state(irq->host_irq,
> + IRQCHIP_STATE_PENDING,
> + irq->pending_latch);
> + WARN_RATELIMIT(ret, "IRQ %d", irq->host_irq);
> +
> + /*
> +  * Let it be pruned from ap_list later and don't bother
> +  * the List Register.
> +  */
> + irq->pending_latch = false;

NAK. If the interrupt is on the AP list, it must be pruned from it
*immediately*. The only case where it can be !pending and still on the
AP list is in interval between sync and prune. If we start messing
with this, we can't reason about the state of this list anymore.

Consider calling vgic_queue_irq_unlock() here.

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 0/4] KVM: arm64: Add VLPI migration support on GICv4.1

2021-03-11 Thread Marc Zyngier
On Thu, 11 Mar 2021 07:03:21 +,
Shenming Lu  wrote:
> 
> Hi,
> 
> Sorry to bother again, I am really hoping a response for this series. :-)

Done, and sorry for the delay. There are a number of issues that need
fixing, I'm afraid.

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 2/2] KVM: arm64: Fix exclusive limit for IPA size

2021-03-11 Thread Marc Zyngier
When registering a memslot, we check the size and location of that
memslot against the IPA size to ensure that we can provide guest
access to the whole of the memory.

Unfortunately, this check rejects memslot that end-up at the exact
limit of the addressing capability for a given IPA size. For example,
it refuses the creation of a 2GB memslot at 0x800 with a 32bit
IPA space.

Fix it by relaxing the check to accept a memslot reaching the
limit of the IPA space.

Fixes: e55cac5bf2a9 ("kvm: arm/arm64: Prepare for VM specific stage2 
translations")
Reviewed-by: Eric Auger 
Signed-off-by: Marc Zyngier 
Cc: sta...@vger.kernel.org
---
 arch/arm64/kvm/mmu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 77cb2d28f2a4..8711894db8c2 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1312,8 +1312,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 * Prevent userspace from creating a memory region outside of the IPA
 * space addressable by the KVM guest IPA space.
 */
-   if (memslot->base_gfn + memslot->npages >=
-   (kvm_phys_size(kvm) >> PAGE_SHIFT))
+   if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> 
PAGE_SHIFT))
return -EFAULT;
 
mmap_read_lock(current->mm);
-- 
2.29.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 0/2] KVM: arm64: Assorted IPA size fixes

2021-03-11 Thread Marc Zyngier
This is a rework of an initial patch posted a couple of days back[1]

While working on enabling KVM on "reduced IPA size" systems, I realise
we have a couple of issues, some of while do impact userspace.

The first issue is that we accept the creation of a "default IPA size"
VM (40 bits) even when the HW doesn't support it. Not good.

The second one is that we disallow a memslot to end right where the
IPA limit is. One page less and you're good, but that's not quite what
it should be.

I intend for both patches to be backported to -stable.

Thanks,

M.

* From v2 [2]:
  - Fix silly printk blunder
  - Added Cc-stable and Fixes tags

* From v1 [1]:
  - Don't try to cap the default IPA size. If userspace uses 0 with an
expectation that it will get 40bits, we should abide by it and
return an error immediately (noticed by Andrew)
  - Added a new patch to fix the exclusive nature of the IPA limit
  
[1] https://lore.kernel.org/r/20210308174643.761100-1-...@kernel.org
[2] https://lore.kernel.org/r/20210310104208.3819061-1-...@kernel.org

Marc Zyngier (2):
  KVM: arm64: Reject VM creation when the default IPA size is
unsupported
  KVM: arm64: Fix exclusive limit for IPA size

 Documentation/virt/kvm/api.rst |  3 +++
 arch/arm64/kvm/mmu.c   |  3 +--
 arch/arm64/kvm/reset.c | 12 
 3 files changed, 12 insertions(+), 6 deletions(-)

-- 
2.29.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 1/2] KVM: arm64: Reject VM creation when the default IPA size is unsupported

2021-03-11 Thread Marc Zyngier
KVM/arm64 has forever used a 40bit default IPA space, partially
due to its 32bit heritage (where the only choice is 40bit).

However, there are implementations in the wild that have a *cough*
much smaller *cough* IPA space, which leads to a misprogramming of
VTCR_EL2, and a guest that is stuck on its first memory access
if userspace dares to ask for the default IPA setting (which most
VMMs do).

Instead, blundly reject the creation of such VM, as we can't
satisfy the requirements from userspace (with a one-off warning).
Also clarify the boot warning, and document that the VM creation
will fail when an unsupported IPA size is probided.

Although this is an ABI change, it doesn't really change much
for userspace:

- the guest couldn't run before this change, but no error was
  returned. At least userspace knows what is happening.

- a memory slot that was accepted because it did fit the default
  IPA space now doesn't even get a chance to be registered.

The other thing that is left doing is to convince userspace to
actually use the IPA space setting instead of relying on the
antiquated default.

Fixes: 233a7cb23531 ("kvm: arm64: Allow tuning the physical address size for 
VM")
Signed-off-by: Marc Zyngier 
Cc: sta...@vger.kernel.org
---
 Documentation/virt/kvm/api.rst |  3 +++
 arch/arm64/kvm/reset.c | 12 
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 1a2b5210cdbf..38e327d4b479 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -182,6 +182,9 @@ is dependent on the CPU capability and the kernel 
configuration. The limit can
 be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION
 ioctl() at run-time.
 
+Creation of the VM will fail if the requested IPA size (whether it is
+implicit or explicit) is unsupported on the host.
+
 Please note that configuring the IPA size does not affect the capability
 exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects
 size of the address translated by the stage2 level (guest physical to
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 47f3f035f3ea..9d3d09a89894 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -324,10 +324,9 @@ int kvm_set_ipa_limit(void)
}
 
kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange);
-   WARN(kvm_ipa_limit < KVM_PHYS_SHIFT,
-"KVM IPA Size Limit (%d bits) is smaller than default size\n",
-kvm_ipa_limit);
-   kvm_info("IPA Size Limit: %d bits\n", kvm_ipa_limit);
+   kvm_info("IPA Size Limit: %d bits%s\n", kvm_ipa_limit,
+((kvm_ipa_limit < KVM_PHYS_SHIFT) ?
+ " (Reduced IPA size, limited VM/VMM compatibility)" : ""));
 
return 0;
 }
@@ -356,6 +355,11 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long 
type)
return -EINVAL;
} else {
phys_shift = KVM_PHYS_SHIFT;
+   if (phys_shift > kvm_ipa_limit) {
+   pr_warn_once("%s using unsupported default IPA limit, 
upgrade your VMM\n",
+current->comm);
+   return -EINVAL;
+   }
}
 
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
-- 
2.29.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 1/2] KVM: arm64: Reject VM creation when the default IPA size is unsupported

2021-03-11 Thread Andrew Jones
On Thu, Mar 11, 2021 at 10:00:15AM +, Marc Zyngier wrote:
> KVM/arm64 has forever used a 40bit default IPA space, partially
> due to its 32bit heritage (where the only choice is 40bit).
> 
> However, there are implementations in the wild that have a *cough*
> much smaller *cough* IPA space, which leads to a misprogramming of
> VTCR_EL2, and a guest that is stuck on its first memory access
> if userspace dares to ask for the default IPA setting (which most
> VMMs do).
> 
> Instead, blundly reject the creation of such VM, as we can't
> satisfy the requirements from userspace (with a one-off warning).
> Also clarify the boot warning, and document that the VM creation
> will fail when an unsupported IPA size is probided.

provided

> 
> Although this is an ABI change, it doesn't really change much
> for userspace:
> 
> - the guest couldn't run before this change, but no error was
>   returned. At least userspace knows what is happening.
> 
> - a memory slot that was accepted because it did fit the default
>   IPA space now doesn't even get a chance to be registered.
> 
> The other thing that is left doing is to convince userspace to
> actually use the IPA space setting instead of relying on the
> antiquated default.
> 
> Fixes: 233a7cb23531 ("kvm: arm64: Allow tuning the physical address size for 
> VM")
> Signed-off-by: Marc Zyngier 
> Cc: sta...@vger.kernel.org
> ---
>  Documentation/virt/kvm/api.rst |  3 +++
>  arch/arm64/kvm/reset.c | 12 
>  2 files changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 1a2b5210cdbf..38e327d4b479 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -182,6 +182,9 @@ is dependent on the CPU capability and the kernel 
> configuration. The limit can
>  be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION
>  ioctl() at run-time.
>  
> +Creation of the VM will fail if the requested IPA size (whether it is
> +implicit or explicit) is unsupported on the host.
> +
>  Please note that configuring the IPA size does not affect the capability
>  exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects
>  size of the address translated by the stage2 level (guest physical to
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index 47f3f035f3ea..9d3d09a89894 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -324,10 +324,9 @@ int kvm_set_ipa_limit(void)
>   }
>  
>   kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange);
> - WARN(kvm_ipa_limit < KVM_PHYS_SHIFT,
> -  "KVM IPA Size Limit (%d bits) is smaller than default size\n",
> -  kvm_ipa_limit);
> - kvm_info("IPA Size Limit: %d bits\n", kvm_ipa_limit);
> + kvm_info("IPA Size Limit: %d bits%s\n", kvm_ipa_limit,
> +  ((kvm_ipa_limit < KVM_PHYS_SHIFT) ?
> +   " (Reduced IPA size, limited VM/VMM compatibility)" : ""));

nit: there's a couple pair of unnecessary ()

>  
>   return 0;
>  }
> @@ -356,6 +355,11 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long 
> type)
>   return -EINVAL;
>   } else {
>   phys_shift = KVM_PHYS_SHIFT;
> + if (phys_shift > kvm_ipa_limit) {
> + pr_warn_once("%s using unsupported default IPA limit, 
> upgrade your VMM\n",
> +  current->comm);
> + return -EINVAL;
> + }
>   }
>  
>   mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
> -- 
> 2.29.2
> 

Reviewed-by: Andrew Jones 

Thanks,
drew

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 2/2] KVM: arm64: Fix exclusive limit for IPA size

2021-03-11 Thread Andrew Jones
On Thu, Mar 11, 2021 at 10:00:16AM +, Marc Zyngier wrote:
> When registering a memslot, we check the size and location of that
> memslot against the IPA size to ensure that we can provide guest
> access to the whole of the memory.
> 
> Unfortunately, this check rejects memslot that end-up at the exact
> limit of the addressing capability for a given IPA size. For example,
> it refuses the creation of a 2GB memslot at 0x800 with a 32bit
> IPA space.
> 
> Fix it by relaxing the check to accept a memslot reaching the
> limit of the IPA space.
> 
> Fixes: e55cac5bf2a9 ("kvm: arm/arm64: Prepare for VM specific stage2 
> translations")

Isn't this actually fixing commit c3058d5da222 ("arm/arm64: KVM: Ensure
memslots are within KVM_PHYS_SIZE") ?

> Reviewed-by: Eric Auger 
> Signed-off-by: Marc Zyngier 
> Cc: sta...@vger.kernel.org
> ---
>  arch/arm64/kvm/mmu.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 77cb2d28f2a4..8711894db8c2 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -1312,8 +1312,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
>* Prevent userspace from creating a memory region outside of the IPA
>* space addressable by the KVM guest IPA space.
>*/
> - if (memslot->base_gfn + memslot->npages >=
> - (kvm_phys_size(kvm) >> PAGE_SHIFT))
> + if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> 
> PAGE_SHIFT))
>   return -EFAULT;
>  
>   mmap_read_lock(current->mm);
> -- 
> 2.29.2
> 

Otherwise

Reviewed-by: Andrew Jones 

Thanks,
drew

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 3/4] KVM: arm64: Rename SCTLR_ELx_FLAGS to SCTLR_EL2_FLAGS

2021-03-11 Thread Mark Rutland
On Wed, Mar 10, 2021 at 06:20:22PM +, Will Deacon wrote:
> On Wed, Mar 10, 2021 at 05:49:17PM +, Marc Zyngier wrote:
> > On Wed, 10 Mar 2021 16:15:47 +,
> > Will Deacon  wrote:
> > > On Wed, Mar 10, 2021 at 04:05:17PM +, Marc Zyngier wrote:
> > > > On Wed, 10 Mar 2021 15:46:26 +,
> > > > Will Deacon  wrote:
> > > > > On Wed, Mar 10, 2021 at 03:26:55PM +, Marc Zyngier wrote:
> > > > > > diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S 
> > > > > > b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
> > > > > > index 4eb584ae13d9..7423f4d961a4 100644
> > > > > > --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
> > > > > > +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
> > > > > > @@ -122,7 +122,7 @@ alternative_else_nop_endif
> > > > > >  * as well as the EE bit on BE. Drop the A flag since the 
> > > > > > compiler
> > > > > >  * is allowed to generate unaligned accesses.
> > > > > >  */
> > > > > > -   mov_q   x0, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
> > > > > > +   mov_q   x0, (SCTLR_EL2_RES1 | (SCTLR_EL2_FLAGS & ~SCTLR_ELx_A))
> > > > > 
> > > > > Can we just drop SCTLR_ELx_A from SCTLR_EL2_FLAGS instead of clearing 
> > > > > it
> > > > > here?
> > > > 
> > > > Absolutely. That'd actually be an improvement.
> > > 
> > > In fact, maybe just define INIT_SCTLR_EL2_MMU_ON to mirror what we do for
> > > EL1 (i.e. including the RES1 bits) and then use that here?
> > 
> > Like this?
> > 
> > diff --git a/arch/arm64/include/asm/sysreg.h 
> > b/arch/arm64/include/asm/sysreg.h
> > index dfd4edbfe360..593b9bf91bbd 100644
> > --- a/arch/arm64/include/asm/sysreg.h
> > +++ b/arch/arm64/include/asm/sysreg.h
> > @@ -579,9 +579,6 @@
> >  #define SCTLR_ELx_A(BIT(1))
> >  #define SCTLR_ELx_M(BIT(0))
> >  
> > -#define SCTLR_ELx_FLAGS(SCTLR_ELx_M  | SCTLR_ELx_A | SCTLR_ELx_C | \
> > -SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_IESB)
> > -
> >  /* SCTLR_EL2 specific flags. */
> >  #define SCTLR_EL2_RES1 ((BIT(4))  | (BIT(5))  | (BIT(11)) | (BIT(16)) 
> > | \
> >  (BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \
> > @@ -593,6 +590,10 @@
> >  #define ENDIAN_SET_EL2 0
> >  #endif
> >  
> > +#define INIT_SCTLR_EL2_MMU_ON  
> > \
> > +   (SCTLR_ELx_M  | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I |  \
> > +SCTLR_ELx_IESB | ENDIAN_SET_EL2 | SCTLR_EL2_RES1)
> > +
> >  #define INIT_SCTLR_EL2_MMU_OFF \
> > (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
> >  
> > diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S 
> > b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
> > index 4eb584ae13d9..2e16b2098bbd 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
> > +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
> > @@ -117,13 +117,7 @@ alternative_else_nop_endif
> > tlbialle2
> > dsb sy
> >  
> > -   /*
> > -* Preserve all the RES1 bits while setting the default flags,
> > -* as well as the EE bit on BE. Drop the A flag since the compiler
> > -* is allowed to generate unaligned accesses.
> > -*/
> > -   mov_q   x0, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
> > -CPU_BE(orr x0, x0, #SCTLR_ELx_EE)
> > +   mov_q   x0, INIT_SCTLR_EL2_MMU_ON
> >  alternative_if ARM64_HAS_ADDRESS_AUTH
> > mov_q   x1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \
> >  SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)
> 
> Beautiful!
> 
> With that, you can have my ack on the whole series:
> 
> Acked-by: Will Deacon 

FWIW, likewise:

Acked-by: Mark Rutland 

This is really nice!

Thanks,
Mark.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 3/4] KVM: arm64: Rename SCTLR_ELx_FLAGS to SCTLR_EL2_FLAGS

2021-03-11 Thread Mark Rutland
On Thu, Mar 11, 2021 at 11:35:29AM +, Mark Rutland wrote:
> Acked-by: Mark Rutland 

Upon reflection, maybe I should spell my own name correctly:

Acked-by: Mark Rutland 

... lest you decide to add a Mocked-by tag instead ;)

Mark.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 1/4] KVM: arm64: GICv4.1: Add function to get VLPI state

2021-03-11 Thread Shenming Lu
On 2021/3/11 16:57, Marc Zyngier wrote:
> On Wed, 27 Jan 2021 12:13:34 +,
> Shenming Lu  wrote:
>>
>> With GICv4.1 and the vPE unmapped, which indicates the invalidation
>> of any VPT caches associated with the vPE, we can get the VLPI state
>> by peeking at the VPT. So we add a function for this.
>>
>> Signed-off-by: Shenming Lu 
>> ---
>>  arch/arm64/kvm/vgic/vgic-v4.c | 19 +++
>>  arch/arm64/kvm/vgic/vgic.h|  1 +
>>  2 files changed, 20 insertions(+)
>>
>> diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
>> index 66508b03094f..ac029ba3d337 100644
>> --- a/arch/arm64/kvm/vgic/vgic-v4.c
>> +++ b/arch/arm64/kvm/vgic/vgic-v4.c
>> @@ -203,6 +203,25 @@ void vgic_v4_configure_vsgis(struct kvm *kvm)
>>  kvm_arm_resume_guest(kvm);
>>  }
>>  
>> +/*
>> + * Must be called with GICv4.1 and the vPE unmapped, which
>> + * indicates the invalidation of any VPT caches associated
>> + * with the vPE, thus we can get the VLPI state by peeking
>> + * at the VPT.
>> + */
>> +void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val)
>> +{
>> +struct its_vpe *vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
>> +int mask = BIT(irq->intid % BITS_PER_BYTE);
>> +void *va;
>> +u8 *ptr;
>> +
>> +va = page_address(vpe->vpt_page);
>> +ptr = va + irq->intid / BITS_PER_BYTE;
>> +
>> +*val = !!(*ptr & mask);
> 
> What guarantees that you can actually read anything valid? Yes, the
> VPT caches are clean. But that doesn't make them coherent with CPU
> caches.
> 
> You need some level of cache maintenance here.

Yeah, and you have come up with some codes for this in v2:

diff --git a/drivers/irqchip/irq-gic-v3-its.c
b/drivers/irqchip/irq-gic-v3-its.c
index 7db602434ac5..2dbef127ca15 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -4552,6 +4552,10 @@ static void its_vpe_irq_domain_deactivate(struct
irq_domain *domain,

its_send_vmapp(its, vpe, false);
}
+
+   if (find_4_1_its() && !atomic_read(vpe->vmapp_count))
+   gic_flush_dcache_to_poc(page_address(vpe->vpt_page),
+   LPI_PENDBASE_SZ);
  }

  static const struct irq_domain_ops its_vpe_domain_ops = {

Could I add it to this series? :-)

Thanks,
Shenming

> 
> Thanks,
> 
>   M.
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 2/4] KVM: arm64: GICv4.1: Try to save hw pending state in save_pending_tables

2021-03-11 Thread Shenming Lu
On 2021/3/11 17:09, Marc Zyngier wrote:
> On Wed, 27 Jan 2021 12:13:35 +,
> Shenming Lu  wrote:
>>
>> After pausing all vCPUs and devices capable of interrupting, in order
>> to save the information of all interrupts, besides flushing the pending
>> states in kvm’s vgic, we also try to flush the states of VLPIs in the
>> virtual pending tables into guest RAM, but we need to have GICv4.1 and
>> safely unmap the vPEs first.
>>
>> As for the saving of VSGIs, which needs the vPEs to be mapped and might
>> conflict with the saving of VLPIs, but since we will map the vPEs back
>> at the end of save_pending_tables and both savings require the kvm->lock
>> to be held (only happen serially), it will work fine.
>>
>> Signed-off-by: Shenming Lu 
>> ---
>>  arch/arm64/kvm/vgic/vgic-v3.c | 61 +++
>>  1 file changed, 55 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
>> index 52915b342351..06b1162b7a0a 100644
>> --- a/arch/arm64/kvm/vgic/vgic-v3.c
>> +++ b/arch/arm64/kvm/vgic/vgic-v3.c
>> @@ -1,6 +1,8 @@
>>  // SPDX-License-Identifier: GPL-2.0-only
>>  
>>  #include 
>> +#include 
>> +#include 
>>  #include 
>>  #include 
>>  #include 
>> @@ -356,6 +358,32 @@ int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, 
>> struct vgic_irq *irq)
>>  return 0;
>>  }
>>  
>> +/*
>> + * The deactivation of the doorbell interrupt will trigger the
>> + * unmapping of the associated vPE.
>> + */
>> +static void unmap_all_vpes(struct vgic_dist *dist)
>> +{
>> +struct irq_desc *desc;
>> +int i;
>> +
>> +for (i = 0; i < dist->its_vm.nr_vpes; i++) {
>> +desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
>> +irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
>> +}
>> +}
>> +
>> +static void map_all_vpes(struct vgic_dist *dist)
>> +{
>> +struct irq_desc *desc;
>> +int i;
>> +
>> +for (i = 0; i < dist->its_vm.nr_vpes; i++) {
>> +desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
>> +irq_domain_activate_irq(irq_desc_get_irq_data(desc), false);
>> +}
>> +}
>> +
>>  /**
>>   * vgic_v3_save_pending_tables - Save the pending tables into guest RAM
>>   * kvm lock and all vcpu lock must be held
>> @@ -365,14 +393,26 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
>>  struct vgic_dist *dist = &kvm->arch.vgic;
>>  struct vgic_irq *irq;
>>  gpa_t last_ptr = ~(gpa_t)0;
>> -int ret;
>> +bool vlpi_avail = false;
>> +int ret = 0;
>>  u8 val;
>>  
>> +/*
>> + * As a preparation for getting any VLPI states.
>> + * The vgic initialized check ensures that the allocation and
>> + * enabling of the doorbells have already been done.
>> + */
>> +if (kvm_vgic_global_state.has_gicv4_1 && 
>> !WARN_ON(!vgic_initialized(kvm))) {
> 
> Should we bail out if we ever spot !vgic_initialized()? In general, I
> find the double negation horrible to read).

Ok, I will change it.

> 
>> +unmap_all_vpes(dist);
>> +vlpi_avail = true;
>> +}
>> +
>>  list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
>>  int byte_offset, bit_nr;
>>  struct kvm_vcpu *vcpu;
>>  gpa_t pendbase, ptr;
>>  bool stored;
>> +bool is_pending = irq->pending_latch;
>>  
>>  vcpu = irq->target_vcpu;
>>  if (!vcpu)
>> @@ -387,24 +427,33 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
>>  if (ptr != last_ptr) {
>>  ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
>>  if (ret)
>> -return ret;
>> +goto out;
>>  last_ptr = ptr;
>>  }
>>  
>>  stored = val & (1U << bit_nr);
>> -if (stored == irq->pending_latch)
>> +
>> +if (irq->hw && vlpi_avail)
>> +vgic_v4_get_vlpi_state(irq, &is_pending);
> 
> Keep the 'is_pending = irq->pending_latch;' statement close to the VPT
> read, since they represent the same state.

Ok, make sense.

> 
>> +
>> +if (stored == is_pending)
>>  continue;
>>  
>> -if (irq->pending_latch)
>> +if (is_pending)
>>  val |= 1 << bit_nr;
>>  else
>>  val &= ~(1 << bit_nr);
>>  
>>  ret = kvm_write_guest_lock(kvm, ptr, &val, 1);
>>  if (ret)
>> -return ret;
>> +goto out;
>>  }
>> -return 0;
>> +
>> +out:
>> +if (vlpi_avail)
>> +map_all_vpes(dist);
> 
> I have asked that question in the past: is it actually safe to remap
> the vPEs and expect them to be runnable

In my opinion, logically it can work, but there might be problems like the
one below that I didn't notice...

> 
> Also, the current code assumes that VMAPP.PTZ can be advertised if a
> VPT is mapped fo

Re: [PATCH v3 3/4] KVM: arm64: GICv4.1: Restore VLPI's pending state to physical side

2021-03-11 Thread Shenming Lu
On 2021/3/11 17:14, Marc Zyngier wrote:
> On Wed, 27 Jan 2021 12:13:36 +,
> Shenming Lu  wrote:
>>
>> From: Zenghui Yu 
>>
>> When setting the forwarding path of a VLPI (switch to the HW mode),
>> we could also transfer the pending state from irq->pending_latch to
>> VPT (especially in migration, the pending states of VLPIs are restored
>> into kvm’s vgic first). And we currently send "INT+VSYNC" to trigger
>> a VLPI to pending.
>>
>> Signed-off-by: Zenghui Yu 
>> Signed-off-by: Shenming Lu 
>> ---
>>  arch/arm64/kvm/vgic/vgic-v4.c | 14 ++
>>  1 file changed, 14 insertions(+)
>>
>> diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
>> index ac029ba3d337..a3542af6f04a 100644
>> --- a/arch/arm64/kvm/vgic/vgic-v4.c
>> +++ b/arch/arm64/kvm/vgic/vgic-v4.c
>> @@ -449,6 +449,20 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int 
>> virq,
>>  irq->host_irq   = virq;
>>  atomic_inc(&map.vpe->vlpi_count);
>>  
>> +/* Transfer pending state */
>> +if (irq->pending_latch) {
>> +ret = irq_set_irqchip_state(irq->host_irq,
>> +IRQCHIP_STATE_PENDING,
>> +irq->pending_latch);
>> +WARN_RATELIMIT(ret, "IRQ %d", irq->host_irq);
>> +
>> +/*
>> + * Let it be pruned from ap_list later and don't bother
>> + * the List Register.
>> + */
>> +irq->pending_latch = false;
> 
> NAK. If the interrupt is on the AP list, it must be pruned from it
> *immediately*. The only case where it can be !pending and still on the
> AP list is in interval between sync and prune. If we start messing
> with this, we can't reason about the state of this list anymore.
> 
> Consider calling vgic_queue_irq_unlock() here.

Thanks for giving a hint, but it seems that vgic_queue_irq_unlock() only
queues an IRQ after checking, did you mean vgic_prune_ap_list() instead?

Thanks a lot for the comments! :-)
Shenming

> 
> Thanks,
> 
>   M.
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v9 6/6] KVM: arm64: Document MTE capability and ioctl

2021-03-11 Thread Steven Price

On 09/03/2021 11:01, Peter Maydell wrote:

On Mon, 1 Mar 2021 at 14:23, Steven Price  wrote:


A new capability (KVM_CAP_ARM_MTE) identifies that the kernel supports
granting a guest access to the tags, and provides a mechanism for the
VMM to enable it.

A new ioctl (KVM_ARM_MTE_COPY_TAGS) provides a simple way for a VMM to
access the tags of a guest without having to maintain a PROT_MTE mapping
in userspace. The above capability gates access to the ioctl.

Signed-off-by: Steven Price 
---
  Documentation/virt/kvm/api.rst | 37 ++
  1 file changed, 37 insertions(+)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index aed52b0fc16e..1406ea138127 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4939,6 +4939,23 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
  Allows Xen vCPU attributes to be read. For the structure and types,
  see KVM_XEN_VCPU_SET_ATTR above.

+4.131 KVM_ARM_MTE_COPY_TAGS
+---
+
+:Capability: KVM_CAP_ARM_MTE
+:Architectures: arm64
+:Type: vm ioctl
+:Parameters: struct kvm_arm_copy_mte_tags
+:Returns: 0 on success, < 0 on error
+
+Copies Memory Tagging Extension (MTE) tags to/from guest tag memory.


Mostly virt/kvm/api.rst seems to include documentation of the
associated structs, something like:

::

   struct kvm_arm_copy_mte_tags {
  __u64 guest_ipa;
  __u64 length;
  union {
  void __user *addr;
  __u64 padding;
  };
  __u64 flags;
   };


which saves the reader having to cross-reference against the header file.


Good point - I'll add that.


It also means you can more naturally use the actual field names in the doc,
eg:


+The
+starting address and length of guest memory must be ``PAGE_SIZE`` aligned.


you could say "The guest_ipa and length fields" here.

Also "The addr field must point to a buffer which the tags will
be copied to or from." I assume.


Indeed - I'll add the clarification.


+The size of the buffer to store the tags is ``(length / MTE_GRANULE_SIZE)``
+bytes (i.e. 1/16th of the corresponding size).



+ Each byte contains a single tag
+value. This matches the format of ``PTRACE_PEEKMTETAGS`` and
+``PTRACE_POKEMTETAGS``.


What are the valid values for 'flags' ? It looks like they specify which
direction the copy is, which we definitely need to document here.


Yes either KVM_ARM_TAGS_TO_GUEST or KVM_ARM_TAGS_FROM_GUEST - again I'll 
clarify that.



What happens if the caller requests a tag copy for an area of guest
address space which doesn't have tags (eg it has nothing mapped),
or for an area of guest addres space which has tags in some parts
but not in others ?


Guest memory either exists (and has tags) or doesn't exist (assuming MTE 
is enabled for the guest). So the cases this can fail are:


 * The region isn't completely covered with memslots
 * The region isn't completely writable (and KVM_ARM_TAGS_TO_GUEST is 
specified).
 * User space doesn't have access to the memory (i.e. the memory would 
SIGSEGV or similar if the VMM accessed it).


Currently all the above produce the error -ENOENT, which now I come to 
enumerate the cases doesn't seem like a great error code (it's really 
only appropriate for the first)! Perhaps -EFAULT would be better.



+
  5. The kvm_run structure
  

@@ -6227,6 +6244,25 @@ KVM_RUN_BUS_LOCK flag is used to distinguish between 
them.
  This capability can be used to check / enable 2nd DAWR feature provided
  by POWER10 processor.

+7.23 KVM_CAP_ARM_MTE
+
+
+:Architectures: arm64
+:Parameters: none
+
+This capability indicates that KVM (and the hardware) supports exposing the
+Memory Tagging Extensions (MTE) to the guest. It must also be enabled by the
+VMM before the guest will be granted access.
+
+When enabled the guest is able to access tags associated with any memory given
+to the guest. KVM will ensure that the pages are flagged ``PG_mte_tagged`` so
+that the tags are maintained during swap or hibernation of the host, however


s/,/;/


Yep


+the VMM needs to manually save/restore the tags as appropriate if the VM is
+migrated.
+
+When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to
+perform a bulk copy of tags to/from the guest


"guest."


Good spot.


+
  8. Other capabilities.
  ==

@@ -6716,3 +6752,4 @@ KVM_XEN_HVM_SET_ATTR, KVM_XEN_HVM_GET_ATTR, 
KVM_XEN_VCPU_SET_ATTR and
  KVM_XEN_VCPU_GET_ATTR ioctls, as well as the delivery of exception vectors
  for event channel upcalls when the evtchn_upcall_pending field of a vcpu's
  vcpu_info is set.
+
--
2.20.1



Stray whitespace change ?


Not sure how that got there - but will remove.

Thanks,

Steve
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 1/2] KVM: arm64: Reject VM creation when the default IPA size is unsupported

2021-03-11 Thread Auger Eric
Hi Marc,

On 3/11/21 11:00 AM, Marc Zyngier wrote:
> KVM/arm64 has forever used a 40bit default IPA space, partially
> due to its 32bit heritage (where the only choice is 40bit).
> 
> However, there are implementations in the wild that have a *cough*
> much smaller *cough* IPA space, which leads to a misprogramming of
> VTCR_EL2, and a guest that is stuck on its first memory access
> if userspace dares to ask for the default IPA setting (which most
> VMMs do).
> 
> Instead, blundly reject the creation of such VM, as we can't
> satisfy the requirements from userspace (with a one-off warning).
> Also clarify the boot warning, and document that the VM creation
> will fail when an unsupported IPA size is probided.
> 
> Although this is an ABI change, it doesn't really change much
> for userspace:
> 
> - the guest couldn't run before this change, but no error was
>   returned. At least userspace knows what is happening.
> 
> - a memory slot that was accepted because it did fit the default
>   IPA space now doesn't even get a chance to be registered.
> 
> The other thing that is left doing is to convince userspace to
> actually use the IPA space setting instead of relying on the
> antiquated default.
> 
> Fixes: 233a7cb23531 ("kvm: arm64: Allow tuning the physical address size for 
> VM")
> Signed-off-by: Marc Zyngier 
> Cc: sta...@vger.kernel.org
Reviewed-by: Eric Auger 

Thanks

Eric
> ---
>  Documentation/virt/kvm/api.rst |  3 +++
>  arch/arm64/kvm/reset.c | 12 
>  2 files changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 1a2b5210cdbf..38e327d4b479 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -182,6 +182,9 @@ is dependent on the CPU capability and the kernel 
> configuration. The limit can
>  be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION
>  ioctl() at run-time.
>  
> +Creation of the VM will fail if the requested IPA size (whether it is
> +implicit or explicit) is unsupported on the host.
> +
>  Please note that configuring the IPA size does not affect the capability
>  exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects
>  size of the address translated by the stage2 level (guest physical to
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index 47f3f035f3ea..9d3d09a89894 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -324,10 +324,9 @@ int kvm_set_ipa_limit(void)
>   }
>  
>   kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange);
> - WARN(kvm_ipa_limit < KVM_PHYS_SHIFT,
> -  "KVM IPA Size Limit (%d bits) is smaller than default size\n",
> -  kvm_ipa_limit);
> - kvm_info("IPA Size Limit: %d bits\n", kvm_ipa_limit);
> + kvm_info("IPA Size Limit: %d bits%s\n", kvm_ipa_limit,
> +  ((kvm_ipa_limit < KVM_PHYS_SHIFT) ?
> +   " (Reduced IPA size, limited VM/VMM compatibility)" : ""));
>  
>   return 0;
>  }
> @@ -356,6 +355,11 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long 
> type)
>   return -EINVAL;
>   } else {
>   phys_shift = KVM_PHYS_SHIFT;
> + if (phys_shift > kvm_ipa_limit) {
> + pr_warn_once("%s using unsupported default IPA limit, 
> upgrade your VMM\n",
> +  current->comm);
> + return -EINVAL;
> + }
>   }
>  
>   mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
> 

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC PATCH] kvm: arm64: Try stage2 block mapping for host device MMIO

2021-03-11 Thread Keqian Zhu
Hi Marc,

On 2021/3/11 16:43, Marc Zyngier wrote:
> Digging this patch back from my Inbox...
Yeah, thanks ;-)

> 
> On Fri, 22 Jan 2021 08:36:50 +,
> Keqian Zhu  wrote:
>>
>> The MMIO region of a device maybe huge (GB level), try to use block
>> mapping in stage2 to speedup both map and unmap.
>>
>> Especially for unmap, it performs TLBI right after each invalidation
>> of PTE. If all mapping is of PAGE_SIZE, it takes much time to handle
>> GB level range.
>>
>> Signed-off-by: Keqian Zhu 
>> ---
>>  arch/arm64/include/asm/kvm_pgtable.h | 11 +++
>>  arch/arm64/kvm/hyp/pgtable.c | 15 +++
>>  arch/arm64/kvm/mmu.c | 12 
>>  3 files changed, 34 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
>> b/arch/arm64/include/asm/kvm_pgtable.h
>> index 52ab38db04c7..2266ac45f10c 100644
>> --- a/arch/arm64/include/asm/kvm_pgtable.h
>> +++ b/arch/arm64/include/asm/kvm_pgtable.h
>> @@ -82,6 +82,17 @@ struct kvm_pgtable_walker {
>>  const enum kvm_pgtable_walk_flags   flags;
>>  };
>>  
>> +/**
>> + * kvm_supported_pgsize() - Get the max supported page size of a mapping.
>> + * @pgt:Initialised page-table structure.
>> + * @addr:   Virtual address at which to place the mapping.
>> + * @end:End virtual address of the mapping.
>> + * @phys:   Physical address of the memory to map.
>> + *
>> + * The smallest return value is PAGE_SIZE.
>> + */
>> +u64 kvm_supported_pgsize(struct kvm_pgtable *pgt, u64 addr, u64 end, u64 
>> phys);
>> +
>>  /**
>>   * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
>>   * @pgt:Uninitialised page-table structure to initialise.
>> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
>> index bdf8e55ed308..ab11609b9b13 100644
>> --- a/arch/arm64/kvm/hyp/pgtable.c
>> +++ b/arch/arm64/kvm/hyp/pgtable.c
>> @@ -81,6 +81,21 @@ static bool kvm_block_mapping_supported(u64 addr, u64 
>> end, u64 phys, u32 level)
>>  return IS_ALIGNED(addr, granule) && IS_ALIGNED(phys, granule);
>>  }
>>  
>> +u64 kvm_supported_pgsize(struct kvm_pgtable *pgt, u64 addr, u64 end, u64 
>> phys)
>> +{
>> +u32 lvl;
>> +u64 pgsize = PAGE_SIZE;
>> +
>> +for (lvl = pgt->start_level; lvl < KVM_PGTABLE_MAX_LEVELS; lvl++) {
>> +if (kvm_block_mapping_supported(addr, end, phys, lvl)) {
>> +pgsize = kvm_granule_size(lvl);
>> +break;
>> +}
>> +}
>> +
>> +return pgsize;
>> +}
>> +
>>  static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
>>  {
>>  u64 shift = kvm_granule_shift(level);
>> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
>> index 7d2257cc5438..80b403fc8e64 100644
>> --- a/arch/arm64/kvm/mmu.c
>> +++ b/arch/arm64/kvm/mmu.c
>> @@ -499,7 +499,8 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
>>  int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
>>phys_addr_t pa, unsigned long size, bool writable)
>>  {
>> -phys_addr_t addr;
>> +phys_addr_t addr, end;
>> +unsigned long pgsize;
>>  int ret = 0;
>>  struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, };
>>  struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
>> @@ -509,21 +510,24 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t 
>> guest_ipa,
>>  
>>  size += offset_in_page(guest_ipa);
>>  guest_ipa &= PAGE_MASK;
>> +end = guest_ipa + size;
>>  
>> -for (addr = guest_ipa; addr < guest_ipa + size; addr += PAGE_SIZE) {
>> +for (addr = guest_ipa; addr < end; addr += pgsize) {
>>  ret = kvm_mmu_topup_memory_cache(&cache,
>>   kvm_mmu_cache_min_pages(kvm));
>>  if (ret)
>>  break;
>>  
>> +pgsize = kvm_supported_pgsize(pgt, addr, end, pa);
>> +
>>  spin_lock(&kvm->mmu_lock);
>> -ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot,
>> +ret = kvm_pgtable_stage2_map(pgt, addr, pgsize, pa, prot,
>>   &cache);
>>  spin_unlock(&kvm->mmu_lock);
>>  if (ret)
>>  break;
>>  
>> -pa += PAGE_SIZE;
>> +pa += pgsize;
>>  }
>>  
>>  kvm_mmu_free_memory_cache(&cache);
> 
> There is one issue with this patch, which is that it only does half
> the job. A VM_PFNMAP VMA can definitely be faulted in dynamically, and
> in that case we force this to be a page mapping. This conflicts with
> what you are doing here.
Oh yes, these two paths should keep a same mapping logic.

I try to search the "force_pte" and find out some discussion [1] between you 
and Christoffer.
And I failed to get a reason about forcing pte mapping for device MMIO region 
(expect that
we want to keep a same logic with the eager mapping path). So if you don't 
object to it, I
will try to implement block mapping for 

Re: [PATCH 04/10] KVM: arm64: Support smp_processor_id() in nVHE hyp

2021-03-11 Thread Quentin Perret
On Thursday 04 Mar 2021 at 11:54:47 (+), 'Andrew Scull' via kernel-team 
wrote:
> smp_procesor_id() works off of the cpu_number per-cpu variable. Create
> an nVHE hyp version of cpu_number and initialize it to the same value as
> the host when creating the hyp per-cpu regions.
> 
> Signed-off-by: Andrew Scull 
> ---
>  arch/arm64/kvm/arm.c  | 2 ++
>  arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 2 ++
>  2 files changed, 4 insertions(+)
> 
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 26ccc369cf11..e3edea8379f3 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -54,6 +54,7 @@ DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
>  static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
>  unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
>  DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
> +DECLARE_KVM_NVHE_PER_CPU(int, cpu_number);
>  
>  /* The VMID used in the VTTBR */
>  static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
> @@ -1740,6 +1741,7 @@ static int init_hyp_mode(void)
>   page_addr = page_address(page);
>   memcpy(page_addr, CHOOSE_NVHE_SYM(__per_cpu_start), 
> nvhe_percpu_size());
>   kvm_arm_hyp_percpu_base[cpu] = (unsigned long)page_addr;
> + *per_cpu_ptr_nvhe_sym(cpu_number, cpu) = cpu;
>   }
>  
>   /*
> diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c 
> b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
> index 879559057dee..86f952b1de18 100644
> --- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
> +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
> @@ -8,6 +8,8 @@
>  #include 
>  #include 
>  
> +DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);

Is smp_processor_id() going to work at EL2 with CONFIG_DEBUG_PREEMPT=y ?
If not, then maybe we should have out own hyp_smp_processor_id() macro?

Thanks,
Quentin
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 05/10] KVM: arm64: Track where vcpu FP state was last loaded

2021-03-11 Thread Quentin Perret
On Thursday 04 Mar 2021 at 11:54:48 (+), 'Andrew Scull' via kernel-team 
wrote:
> Keep track of the cpu that a vcpu's FP state was last loaded onto. This
> information is needed in order to tell whether a vcpu's latest FP state
> is already loaded on a cpu to avoid unnecessary reloading.
> 
> The method follows the pattern used by thread_struct whereby an
> fpsimd_cpu field is added and updated when the state is loaded.
> 
> Signed-off-by: Andrew Scull 
> Cc: Dave Martin 
> ---
>  arch/arm64/include/asm/kvm_host.h   | 1 +
>  arch/arm64/kvm/arm.c| 2 ++
>  arch/arm64/kvm/hyp/include/hyp/switch.h | 2 ++
>  3 files changed, 5 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 8a559fa2f237..a01194371ae5 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -306,6 +306,7 @@ struct kvm_vcpu_arch {
>   struct kvm_guest_debug_arch vcpu_debug_state;
>   struct kvm_guest_debug_arch external_debug_state;
>  
> + int fpsimd_cpu;
>   struct user_fpsimd_state *host_fpsimd_state;/* hyp VA */
>  
>   struct {
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index e3edea8379f3..87141c8d63e6 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -313,6 +313,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
>  
>   vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
>  
> + vcpu->arch.fpsimd_cpu = NR_CPUS;

Is this supposed to be an invalid CPU number? If so, then NR_CPUS + 1 ?

Thanks,
Quentin
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 05/10] KVM: arm64: Track where vcpu FP state was last loaded

2021-03-11 Thread Quentin Perret
On Thursday 11 Mar 2021 at 10:37:28 (+), Quentin Perret wrote:
> On Thursday 04 Mar 2021 at 11:54:48 (+), 'Andrew Scull' via kernel-team 
> wrote:
> > Keep track of the cpu that a vcpu's FP state was last loaded onto. This
> > information is needed in order to tell whether a vcpu's latest FP state
> > is already loaded on a cpu to avoid unnecessary reloading.
> > 
> > The method follows the pattern used by thread_struct whereby an
> > fpsimd_cpu field is added and updated when the state is loaded.
> > 
> > Signed-off-by: Andrew Scull 
> > Cc: Dave Martin 
> > ---
> >  arch/arm64/include/asm/kvm_host.h   | 1 +
> >  arch/arm64/kvm/arm.c| 2 ++
> >  arch/arm64/kvm/hyp/include/hyp/switch.h | 2 ++
> >  3 files changed, 5 insertions(+)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_host.h 
> > b/arch/arm64/include/asm/kvm_host.h
> > index 8a559fa2f237..a01194371ae5 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -306,6 +306,7 @@ struct kvm_vcpu_arch {
> > struct kvm_guest_debug_arch vcpu_debug_state;
> > struct kvm_guest_debug_arch external_debug_state;
> >  
> > +   int fpsimd_cpu;
> > struct user_fpsimd_state *host_fpsimd_state;/* hyp VA */
> >  
> > struct {
> > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> > index e3edea8379f3..87141c8d63e6 100644
> > --- a/arch/arm64/kvm/arm.c
> > +++ b/arch/arm64/kvm/arm.c
> > @@ -313,6 +313,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
> >  
> > vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
> >  
> > +   vcpu->arch.fpsimd_cpu = NR_CPUS;
> 
> Is this supposed to be an invalid CPU number? If so, then NR_CPUS + 1 ?

Obviously not, forget me.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 06/34] KVM: arm64: Factor memory allocation out of pgtable.c

2021-03-11 Thread Will Deacon
On Wed, Mar 10, 2021 at 05:57:23PM +, Quentin Perret wrote:
> In preparation for enabling the creation of page-tables at EL2, factor
> all memory allocation out of the page-table code, hence making it
> re-usable with any compatible memory allocator.
> 
> No functional changes intended.
> 
> Signed-off-by: Quentin Perret 
> ---
>  arch/arm64/include/asm/kvm_pgtable.h | 41 +++-
>  arch/arm64/kvm/hyp/pgtable.c | 98 +---
>  arch/arm64/kvm/mmu.c | 66 ++-
>  3 files changed, 163 insertions(+), 42 deletions(-)

Thanks, looks good to me now:

Acked-by: Will Deacon 

Will
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 11/34] KVM: arm64: Stub CONFIG_DEBUG_LIST at Hyp

2021-03-11 Thread Will Deacon
On Wed, Mar 10, 2021 at 05:57:28PM +, Quentin Perret wrote:
> In order to use the kernel list library at EL2, introduce stubs for the
> CONFIG_DEBUG_LIST out-of-lines calls.
> 
> Signed-off-by: Quentin Perret 
> ---
>  arch/arm64/kvm/hyp/nvhe/Makefile |  2 +-
>  arch/arm64/kvm/hyp/nvhe/stub.c   | 22 ++
>  2 files changed, 23 insertions(+), 1 deletion(-)
>  create mode 100644 arch/arm64/kvm/hyp/nvhe/stub.c
> 
> diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile 
> b/arch/arm64/kvm/hyp/nvhe/Makefile
> index 24ff99e2eac5..144da72ad510 100644
> --- a/arch/arm64/kvm/hyp/nvhe/Makefile
> +++ b/arch/arm64/kvm/hyp/nvhe/Makefile
> @@ -13,7 +13,7 @@ lib-objs := clear_page.o copy_page.o memcpy.o memset.o
>  lib-objs := $(addprefix ../../../lib/, $(lib-objs))
>  
>  obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
> -  hyp-main.o hyp-smp.o psci-relay.o early_alloc.o
> +  hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o
>  obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
>../fpsimd.o ../hyp-entry.o ../exception.o
>  obj-y += $(lib-objs)
> diff --git a/arch/arm64/kvm/hyp/nvhe/stub.c b/arch/arm64/kvm/hyp/nvhe/stub.c
> new file mode 100644
> index ..c0aa6bbfd79d
> --- /dev/null
> +++ b/arch/arm64/kvm/hyp/nvhe/stub.c
> @@ -0,0 +1,22 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Stubs for out-of-line function calls caused by re-using kernel
> + * infrastructure at EL2.
> + *
> + * Copyright (C) 2020 - Google LLC
> + */
> +
> +#include 
> +
> +#ifdef CONFIG_DEBUG_LIST
> +bool __list_add_valid(struct list_head *new, struct list_head *prev,
> +   struct list_head *next)
> +{
> + return true;
> +}
> +
> +bool __list_del_entry_valid(struct list_head *entry)
> +{
> + return true;
> +}
> +#endif

This isn't any worse than disabling DEBUG_LIST for the EL2 object, so as
an initial implementation:

Acked-by: Will Deacon 

but we really should have the debug list checks on (probably
unconditionally) for the EL2 code in my opinion.

Will
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 12/34] KVM: arm64: Introduce a Hyp buddy page allocator

2021-03-11 Thread Will Deacon
On Wed, Mar 10, 2021 at 05:57:29PM +, Quentin Perret wrote:
> When memory protection is enabled, the hyp code will require a basic
> form of memory management in order to allocate and free memory pages at
> EL2. This is needed for various use-cases, including the creation of hyp
> mappings or the allocation of stage 2 page tables.
> 
> To address these use-case, introduce a simple memory allocator in the
> hyp code. The allocator is designed as a conventional 'buddy allocator',
> working with a page granularity. It allows to allocate and free
> physically contiguous pages from memory 'pools', with a guaranteed order
> alignment in the PA space. Each page in a memory pool is associated
> with a struct hyp_page which holds the page's metadata, including its
> refcount, as well as its current order, hence mimicking the kernel's
> buddy system in the GFP infrastructure. The hyp_page metadata are made
> accessible through a hyp_vmemmap, following the concept of
> SPARSE_VMEMMAP in the kernel.
> 
> Signed-off-by: Quentin Perret 
> ---
>  arch/arm64/kvm/hyp/include/nvhe/gfp.h|  68 
>  arch/arm64/kvm/hyp/include/nvhe/memory.h |  28 
>  arch/arm64/kvm/hyp/nvhe/Makefile |   2 +-
>  arch/arm64/kvm/hyp/nvhe/page_alloc.c | 195 +++
>  4 files changed, 292 insertions(+), 1 deletion(-)
>  create mode 100644 arch/arm64/kvm/hyp/include/nvhe/gfp.h
>  create mode 100644 arch/arm64/kvm/hyp/nvhe/page_alloc.c

Eventually, we can replace the refcount with refcount_t, but for now this
looks pretty good:

Acked-by: Will Deacon 

Will
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 16/34] KVM: arm64: Prepare the creation of s1 mappings at EL2

2021-03-11 Thread Will Deacon
On Wed, Mar 10, 2021 at 05:57:33PM +, Quentin Perret wrote:
> When memory protection is enabled, the EL2 code needs the ability to
> create and manage its own page-table. To do so, introduce a new set of
> hypercalls to bootstrap a memory management system at EL2.
> 
> This leads to the following boot flow in nVHE Protected mode:
> 
>  1. the host allocates memory for the hypervisor very early on, using
> the memblock API;
> 
>  2. the host creates a set of stage 1 page-table for EL2, installs the
> EL2 vectors, and issues the __pkvm_init hypercall;
> 
>  3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table
> and stores it in the memory pool provided by the host;
> 
>  4. the hypervisor then extends its stage 1 mappings to include a
> vmemmap in the EL2 VA space, hence allowing to use the buddy
> allocator introduced in a previous patch;
> 
>  5. the hypervisor jumps back in the idmap page, switches from the
> host-provided page-table to the new one, and wraps up its
> initialization by enabling the new allocator, before returning to
> the host.
> 
>  6. the host can free the now unused page-table created for EL2, and
> will now need to issue hypercalls to make changes to the EL2 stage 1
> mappings instead of modifying them directly.
> 
> Note that for the sake of simplifying the review, this patch focuses on
> the hypervisor side of things. In other words, this only implements the
> new hypercalls, but does not make use of them from the host yet. The
> host-side changes will follow in a subsequent patch.
> 
> Credits to Will for __pkvm_init_switch_pgd.
> 
> Co-authored-by: Will Deacon 
> Signed-off-by: Will Deacon 
> Signed-off-by: Quentin Perret 
> ---
>  arch/arm64/include/asm/kvm_asm.h |   4 +
>  arch/arm64/include/asm/kvm_host.h|   7 +
>  arch/arm64/include/asm/kvm_hyp.h |   8 ++
>  arch/arm64/include/asm/kvm_pgtable.h |   2 +
>  arch/arm64/kernel/image-vars.h   |  16 +++
>  arch/arm64/kvm/hyp/Makefile  |   2 +-
>  arch/arm64/kvm/hyp/include/nvhe/mm.h |  71 ++
>  arch/arm64/kvm/hyp/nvhe/Makefile |   4 +-
>  arch/arm64/kvm/hyp/nvhe/hyp-init.S   |  27 
>  arch/arm64/kvm/hyp/nvhe/hyp-main.c   |  49 +++
>  arch/arm64/kvm/hyp/nvhe/mm.c | 173 +++
>  arch/arm64/kvm/hyp/nvhe/setup.c  | 197 +++
>  arch/arm64/kvm/hyp/pgtable.c |   2 -
>  arch/arm64/kvm/hyp/reserved_mem.c|  92 +
>  arch/arm64/mm/init.c |   3 +
>  15 files changed, 652 insertions(+), 5 deletions(-)
>  create mode 100644 arch/arm64/kvm/hyp/include/nvhe/mm.h
>  create mode 100644 arch/arm64/kvm/hyp/nvhe/mm.c
>  create mode 100644 arch/arm64/kvm/hyp/nvhe/setup.c
>  create mode 100644 arch/arm64/kvm/hyp/reserved_mem.c

Acked-by: Will Deacon 

Will
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 15/34] arm64: asm: Provide set_sctlr_el2 macro

2021-03-11 Thread Will Deacon
On Wed, Mar 10, 2021 at 05:57:32PM +, Quentin Perret wrote:
> We will soon need to turn the EL2 stage 1 MMU on and off in nVHE
> protected mode, so refactor the set_sctlr_el1 macro to make it usable
> for that purpose.
> 
> Signed-off-by: Quentin Perret 
> ---
>  arch/arm64/include/asm/assembler.h | 14 +++---
>  1 file changed, 11 insertions(+), 3 deletions(-)

Acked-by: Will Deacon 

Will
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 17/34] KVM: arm64: Elevate hypervisor mappings creation at EL2

2021-03-11 Thread Will Deacon
On Wed, Mar 10, 2021 at 05:57:34PM +, Quentin Perret wrote:
> Previous commits have introduced infrastructure to enable the EL2 code
> to manage its own stage 1 mappings. However, this was preliminary work,
> and none of it is currently in use.
> 
> Put all of this together by elevating the mapping creation at EL2 when
> memory protection is enabled. In this case, the host kernel running
> at EL1 still creates _temporary_ EL2 mappings, only used while
> initializing the hypervisor, but frees them right after.
> 
> As such, all calls to create_hyp_mappings() after kvm init has finished
> turn into hypercalls, as the host now has no 'legal' way to modify the
> hypevisor page tables directly.
> 
> Signed-off-by: Quentin Perret 
> ---
>  arch/arm64/include/asm/kvm_mmu.h |  2 +-
>  arch/arm64/kvm/arm.c | 87 +---
>  arch/arm64/kvm/mmu.c | 43 ++--
>  3 files changed, 120 insertions(+), 12 deletions(-)

Acked-by: Will Deacon 

Will
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 27/34] KVM: arm64: Always zero invalid PTEs

2021-03-11 Thread Will Deacon
On Wed, Mar 10, 2021 at 05:57:44PM +, Quentin Perret wrote:
> kvm_set_invalid_pte() currently only clears bit 0 from a PTE because
> stage2_map_walk_table_post() needs to be able to follow the anchor. In
> preparation for re-using bits 63-02 from invalid PTEs, make sure to zero

Why do you exclude bit 1 from this range?

> it entirely by ensuring to cache the anchor's child upfront.
> 
> Suggested-by: Will Deacon 
> Signed-off-by: Quentin Perret 
> ---
>  arch/arm64/kvm/hyp/pgtable.c | 26 --
>  1 file changed, 16 insertions(+), 10 deletions(-)

For the patch:

Acked-by: Will Deacon 

Will
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 28/34] KVM: arm64: Use page-table to track page ownership

2021-03-11 Thread Will Deacon
On Wed, Mar 10, 2021 at 05:57:45PM +, Quentin Perret wrote:
> As the host stage 2 will be identity mapped, all the .hyp memory regions
> and/or memory pages donated to protected guestis will have to marked
> invalid in the host stage 2 page-table. At the same time, the hypervisor
> will need a way to track the ownership of each physical page to ensure
> memory sharing or donation between entities (host, guests, hypervisor) is
> legal.
> 
> In order to enable this tracking at EL2, let's use the host stage 2
> page-table itself. The idea is to use the top bits of invalid mappings
> to store the unique identifier of the page owner. The page-table owner
> (the host) gets identifier 0 such that, at boot time, it owns the entire
> IPA space as the pgd starts zeroed.
> 
> Provide kvm_pgtable_stage2_set_owner() which allows to modify the
> ownership of pages in the host stage 2. It re-uses most of the map()
> logic, but ends up creating invalid mappings instead. This impacts
> how we do refcount as we now need to count invalid mappings when they
> are used for ownership tracking.
> 
> Signed-off-by: Quentin Perret 
> ---
>  arch/arm64/include/asm/kvm_pgtable.h | 21 +++
>  arch/arm64/kvm/hyp/pgtable.c | 92 
>  2 files changed, 101 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
> b/arch/arm64/include/asm/kvm_pgtable.h
> index 4ae19247837b..b09af4612656 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -238,6 +238,27 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 
> addr, u64 size,
>  u64 phys, enum kvm_pgtable_prot prot,
>  void *mc);
>  
> +/**
> + * kvm_pgtable_stage2_set_owner() - Annotate invalid mappings with metadata
> + *   encoding the ownership of a page in the
> + *   IPA space.
> + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
> + * @addr:Intermediate physical address at which to place the annotation.

This confused me a bit, as the annotation is stored in the page-table, not
at the memory identified by @addr. How about:

  "Base intermediate physical address to annotate"

> + * @size:Size of the IPA range to annotate.

  "Size of the annotated range"

> + * @mc:  Cache of pre-allocated and zeroed memory from which to 
> allocate
> + *   page-table pages.
> + * @owner_id:Unique identifier for the owner of the page.
> + *
> + * The page-table owner has identifier 0.

Perhaps, "By default, all page-tables are owned by identifier 0"

> + *
> + * Return: 0 on success, negative error code on failure.
> + */
> +int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
> +  void *mc, u32 owner_id);

Is there a need for the owner_id to be 32-bit rather than e.g. 16-bit? Just
strikes me that it might be difficult to recover these bits in future if we
give them out freely now.

> +
>  /**
>   * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 
> page-table.
>   * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index f37b4179b880..e4670b639726 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -48,6 +48,8 @@
>KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
>KVM_PTE_LEAF_ATTR_HI_S2_XN)
>  
> +#define KVM_INVALID_PTE_OWNER_MASK   GENMASK(63, 32)

Ah, so that '02' earlier was a typo for '32'.

> +
>  struct kvm_pgtable_walk_data {
>   struct kvm_pgtable  *pgt;
>   struct kvm_pgtable_walker   *walker;
> @@ -186,6 +188,11 @@ static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, 
> kvm_pte_t attr, u32 level)
>   return pte;
>  }
>  
> +static kvm_pte_t kvm_init_invalid_leaf_owner(u32 owner_id)
> +{
> + return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
> +}
> +
>  static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 
> addr,
> u32 level, kvm_pte_t *ptep,
> enum kvm_pgtable_walk_flags flag)
> @@ -440,6 +447,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
>  struct stage2_map_data {
>   u64 phys;
>   kvm_pte_t   attr;
> + u32 owner_id;
>  
>   kvm_pte_t   *anchor;
>   kvm_pte_t   *childp;
> @@ -506,6 +514,24 @@ static int stage2_map_set_prot_attr(enum 
> kvm_pgtable_prot prot,
>   return 0;
>  }
>  
> +static bool stage2_is_permission_change(kvm_pte_t old, kvm_pte_t new)
> +{
> + if (!kvm_pte_valid(old) || !kvm_pte_valid(new))
> + return false;
> +
> + return !((ol

Re: [PATCH v4 29/34] KVM: arm64: Refactor stage2_map_set_prot_attr()

2021-03-11 Thread Will Deacon
On Wed, Mar 10, 2021 at 05:57:46PM +, Quentin Perret wrote:
> In order to ease its re-use in other code paths, refactor
> stage2_map_set_prot_attr() to not depend on a stage2_map_data struct.
> No functional change intended.
> 
> Signed-off-by: Quentin Perret 
> ---
>  arch/arm64/kvm/hyp/pgtable.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index e4670b639726..c16e0306dd9a 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -489,8 +489,7 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
>   return vtcr;
>  }
>  
> -static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot,
> - struct stage2_map_data *data)
> +static int stage2_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
>  {
>   bool device = prot & KVM_PGTABLE_PROT_DEVICE;
>   kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) :
> @@ -510,7 +509,8 @@ static int stage2_map_set_prot_attr(enum kvm_pgtable_prot 
> prot,
>  
>   attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
>   attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
> - data->attr = attr;
> + *ptep = attr;
> +
>   return 0;
>  }
>  
> @@ -728,7 +728,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 
> addr, u64 size,
>   .arg= &map_data,
>   };
>  
> - ret = stage2_map_set_prot_attr(prot, &map_data);
> + ret = stage2_set_prot_attr(prot, &map_data.attr);
>   if (ret)
>   return ret;

(nit: this is now different to hyp_map_set_prot_attr() -- can we do the same
thing there, please?)

With that:

Acked-by: Will Deacon 

Will
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 30/34] KVM: arm64: Add kvm_pgtable_stage2_find_range()

2021-03-11 Thread Will Deacon
On Wed, Mar 10, 2021 at 05:57:47PM +, Quentin Perret wrote:
> Since the host stage 2 will be identity mapped, and since it will own
> most of memory, it would preferable for performance to try and use large
> block mappings whenever that is possible. To ease this, introduce a new
> helper in the KVM page-table code which allows to search for large
> ranges of available IPA space. This will be used in the host memory
> abort path to greedily idmap large portion of the PA space.
> 
> Signed-off-by: Quentin Perret 
> ---
>  arch/arm64/include/asm/kvm_pgtable.h | 30 ++
>  arch/arm64/kvm/hyp/pgtable.c | 90 +++-
>  2 files changed, 117 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
> b/arch/arm64/include/asm/kvm_pgtable.h
> index b09af4612656..477bf10c48a9 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -94,6 +94,16 @@ enum kvm_pgtable_prot {
>  #define PAGE_HYP_RO  (KVM_PGTABLE_PROT_R)
>  #define PAGE_HYP_DEVICE  (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
>  
> +/**
> + * struct kvm_mem_range - Range of Intermediate Physical Addresses
> + * @start:   Start of the range.
> + * @end: End of the range.
> + */
> +struct kvm_mem_range {
> + u64 start;
> + u64 end;
> +};
> +
>  /**
>   * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table 
> walk.
>   * @KVM_PGTABLE_WALK_LEAF:   Visit leaf entries, including invalid
> @@ -398,4 +408,24 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, 
> u64 addr, u64 size);
>  int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
>struct kvm_pgtable_walker *walker);
>  
> +/**
> + * kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical
> + *Addresses with compatible permission
> + *attributes.
> + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
> + * @addr:Address that must be covered by the range.
> + * @prot:Protection attributes that the range must be compatible with.
> + * @range:   Range structure used to limit the search space at call time and
> + *   that will hold the result.
> + *
> + * The offset of @addr within a page is ignored. An existing mapping is 
> defined
> + * as compatible with @prot if it is invalid and not owned by another 
> entity, or
> + * if its permission attributes are strictly similar to @prot and it has no
> + * software bits set.

nit: I think the 'or' is ambigious here, as it makes it sound like an
invalid entry that _is_ owned by another entity is compatible if the
permissions attribute match. How about:

  | An IPA is compatible with prot iff its corresponding stage-2 page-table
  | entry has default ownership and, if valid, is mapped with protection
  | attributes identical to @prot.

> + *
> + * Return: 0 on success, negative error code on failure.
> + */
> +int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr,
> +   enum kvm_pgtable_prot prot,
> +   struct kvm_mem_range *range);
>  #endif   /* __ARM64_KVM_PGTABLE_H__ */
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index c16e0306dd9a..f20287bb3e41 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -48,6 +48,8 @@
>KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
>KVM_PTE_LEAF_ATTR_HI_S2_XN)
>  
> +#define KVM_PTE_LEAF_ATTR_S2_RES GENMASK(58, 55)

Please make this IGN instead of RES, to match the architecture (and because
RES is usually used in the context of RES0 or RES1).

>  #define KVM_INVALID_PTE_OWNER_MASK   GENMASK(63, 32)
>  
>  struct kvm_pgtable_walk_data {
> @@ -69,10 +71,8 @@ static u64 kvm_granule_size(u32 level)
>   return BIT(kvm_granule_shift(level));
>  }
>  
> -static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 
> level)
> +static bool kvm_level_support_block_mappings(u32 level)
>  {
> - u64 granule = kvm_granule_size(level);
> -
>   /*
>* Reject invalid block mappings and don't bother with 4TB mappings for
>* 52-bit PAs.
> @@ -80,6 +80,16 @@ static bool kvm_block_mapping_supported(u64 addr, u64 end, 
> u64 phys, u32 level)
>   if (level == 0 || (PAGE_SIZE != SZ_4K && level == 1))
>   return false;
>  
> + return true;

return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1));

> +static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 
> level)
> +{
> + u64 granule = kvm_granule_size(level);
> +
> + if (!kvm_level_support_block_mappings(level))
> + return false;
> +
>   if (granule > (end - addr))
>   return false;
>  
> @@ -1042,3 +1052,77 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable 
> *pgt

Re: [PATCH v4 31/34] KVM: arm64: Wrap the host with a stage 2

2021-03-11 Thread Will Deacon
On Wed, Mar 10, 2021 at 05:57:48PM +, Quentin Perret wrote:
> When KVM runs in protected nVHE mode, make use of a stage 2 page-table
> to give the hypervisor some control over the host memory accesses. The
> host stage 2 is created lazily using large block mappings if possible,
> and will default to page mappings in absence of a better solution.
> 
> From this point on, memory accesses from the host to protected memory
> regions (e.g. marked PROT_NONE) are fatal and lead to hyp_panic().
> 
> Signed-off-by: Quentin Perret 
> ---
>  arch/arm64/include/asm/kvm_asm.h  |   1 +
>  arch/arm64/include/asm/kvm_cpufeature.h   |   2 +
>  arch/arm64/kernel/image-vars.h|   3 +
>  arch/arm64/kvm/arm.c  |  10 +
>  arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  34 +++
>  arch/arm64/kvm/hyp/nvhe/Makefile  |   2 +-
>  arch/arm64/kvm/hyp/nvhe/hyp-init.S|   1 +
>  arch/arm64/kvm/hyp/nvhe/hyp-main.c|  11 +
>  arch/arm64/kvm/hyp/nvhe/mem_protect.c | 246 ++
>  arch/arm64/kvm/hyp/nvhe/setup.c   |   5 +
>  arch/arm64/kvm/hyp/nvhe/switch.c  |   7 +-
>  arch/arm64/kvm/hyp/nvhe/tlb.c |   4 +-
>  12 files changed, 319 insertions(+), 7 deletions(-)
>  create mode 100644 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
>  create mode 100644 arch/arm64/kvm/hyp/nvhe/mem_protect.c

I like this a lot more now, thanks:

Acked-by: Will Deacon 

Will
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 34/34] KVM: arm64: Protect the .hyp sections from the host

2021-03-11 Thread Will Deacon
On Wed, Mar 10, 2021 at 05:57:51PM +, Quentin Perret wrote:
> When KVM runs in nVHE protected mode, use the host stage 2 to unmap the
> hypervisor sections by marking them as owned by the hypervisor itself.
> The long-term goal is to ensure the EL2 code can remain robust
> regardless of the host's state, so this starts by making sure the host
> cannot e.g. write to the .hyp sections directly.
> 
> Signed-off-by: Quentin Perret 
> ---
>  arch/arm64/include/asm/kvm_asm.h  |  1 +
>  arch/arm64/kvm/arm.c  | 46 +++
>  arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  2 +
>  arch/arm64/kvm/hyp/nvhe/hyp-main.c|  9 
>  arch/arm64/kvm/hyp/nvhe/mem_protect.c | 33 +
>  5 files changed, 91 insertions(+)

Acked-by: Will Deacon 

Will
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 13/34] KVM: arm64: Enable access to sanitized CPU features at EL2

2021-03-11 Thread Will Deacon
On Wed, Mar 10, 2021 at 05:57:30PM +, Quentin Perret wrote:
> Introduce the infrastructure in KVM enabling to copy CPU feature
> registers into EL2-owned data-structures, to allow reading sanitised
> values directly at EL2 in nVHE.
> 
> Given that only a subset of these features are being read by the
> hypervisor, the ones that need to be copied are to be listed under
>  together with the name of the nVHE variable that
> will hold the copy.
> 
> While at it, introduce the first user of this infrastructure by
> implementing __flush_dcache_area at EL2, which needs
> arm64_ftr_reg_ctrel0.
> 
> Signed-off-by: Quentin Perret 
> ---
>  arch/arm64/include/asm/cpufeature.h |  1 +
>  arch/arm64/include/asm/kvm_cpufeature.h | 17 +
>  arch/arm64/include/asm/kvm_host.h   |  4 
>  arch/arm64/kernel/cpufeature.c  | 13 +
>  arch/arm64/kvm/hyp/nvhe/Makefile|  3 ++-
>  arch/arm64/kvm/hyp/nvhe/cache.S | 13 +
>  arch/arm64/kvm/hyp/nvhe/cpufeature.c|  8 
>  arch/arm64/kvm/sys_regs.c   | 21 +
>  8 files changed, 79 insertions(+), 1 deletion(-)
>  create mode 100644 arch/arm64/include/asm/kvm_cpufeature.h
>  create mode 100644 arch/arm64/kvm/hyp/nvhe/cache.S
>  create mode 100644 arch/arm64/kvm/hyp/nvhe/cpufeature.c
> 
> diff --git a/arch/arm64/include/asm/cpufeature.h 
> b/arch/arm64/include/asm/cpufeature.h
> index 61177bac49fa..a85cea2cac57 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -607,6 +607,7 @@ void check_local_cpu_capabilities(void);
>  
>  u64 read_sanitised_ftr_reg(u32 id);
>  u64 __read_sysreg_by_encoding(u32 sys_id);
> +int copy_ftr_reg(u32 id, struct arm64_ftr_reg *dst);
>  
>  static inline bool cpu_supports_mixed_endian_el0(void)
>  {
> diff --git a/arch/arm64/include/asm/kvm_cpufeature.h 
> b/arch/arm64/include/asm/kvm_cpufeature.h
> new file mode 100644
> index ..d34f85cba358
> --- /dev/null
> +++ b/arch/arm64/include/asm/kvm_cpufeature.h
> @@ -0,0 +1,17 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2020 - Google LLC
> + * Author: Quentin Perret 
> + */
> +
> +#include 
> +
> +#ifndef KVM_HYP_CPU_FTR_REG
> +#if defined(__KVM_NVHE_HYPERVISOR__)
> +#define KVM_HYP_CPU_FTR_REG(id, name) extern struct arm64_ftr_reg name;
> +#else
> +#define KVM_HYP_CPU_FTR_REG(id, name) DECLARE_KVM_NVHE_SYM(name);
> +#endif
> +#endif
> +
> +KVM_HYP_CPU_FTR_REG(SYS_CTR_EL0, arm64_ftr_reg_ctrel0)
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 06ca4828005f..459ee557f87c 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -751,9 +751,13 @@ void kvm_clr_pmu_events(u32 clr);
>  
>  void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu);
>  void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
> +
> +void setup_kvm_el2_caps(void);
>  #else
>  static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) 
> {}
>  static inline void kvm_clr_pmu_events(u32 clr) {}
> +
> +static inline void setup_kvm_el2_caps(void) {}
>  #endif
>  
>  void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu);
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 066030717a4c..f2d8b479ff74 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -1154,6 +1154,18 @@ u64 read_sanitised_ftr_reg(u32 id)
>  }
>  EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg);
>  
> +int copy_ftr_reg(u32 id, struct arm64_ftr_reg *dst)
> +{
> + struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id);
> +
> + if (!regp)
> + return -EINVAL;
> +
> + memcpy(dst, regp, sizeof(*regp));

Can you not just use struct assignment here?

> diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S
> new file mode 100644
> index ..36cef6915428
> --- /dev/null
> +++ b/arch/arm64/kvm/hyp/nvhe/cache.S
> @@ -0,0 +1,13 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Code copied from arch/arm64/mm/cache.S.
> + */
> +
> +#include 
> +#include 
> +#include 
> +
> +SYM_FUNC_START_PI(__flush_dcache_area)
> + dcache_by_line_op civac, sy, x0, x1, x2, x3
> + ret
> +SYM_FUNC_END_PI(__flush_dcache_area)

Separate patch for this? (or fold it into that one really near the start
that introduces some other PI helpers).

> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 4f2f1e3145de..84be93df52fa 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -21,6 +21,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -2775,3 +2776,23 @@ void kvm_sys_reg_table_init(void)
>   /* Clear all higher bits. */
>   cache_levels &= (1 << (i*3))-1;
>  }
> +
> +#undef KVM_HYP_CPU_FTR_REG
> +#define KVM_HYP_CPU_FTR_REG(id, name) \
> + { .sys_id = id, .dst = (struct arm6