Re: [PATCH v3 03/12] KVM: arm/arm64: Replace the single rdist region by a list

2018-04-24 Thread Christoffer Dall
On Fri, Apr 13, 2018 at 10:20:49AM +0200, Eric Auger wrote:
> At the moment KVM supports a single rdist region. We want to
> support several separate rdist regions so let's introduce a list
> of them. This patch currently only cares about a single
> entry in this list as the functionality to register several redist
> regions is not yet there. So this only translates the existing code
> into something functionally similar using that new data struct.
> 
> The redistributor region handle is stored in the vgic_cpu structure
> to allow later computation of the TYPER last bit.
> 
> Signed-off-by: Eric Auger <eric.au...@redhat.com>

Reviewed-by: Christoffer Dall <christoffer.d...@arm.com>

> ---
>  include/kvm/arm_vgic.h  | 14 +
>  virt/kvm/arm/vgic/vgic-init.c   | 16 --
>  virt/kvm/arm/vgic/vgic-kvm-device.c | 13 ++--
>  virt/kvm/arm/vgic/vgic-mmio-v3.c| 42 
> -
>  virt/kvm/arm/vgic/vgic-v3.c | 20 +++---
>  5 files changed, 79 insertions(+), 26 deletions(-)
> 
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 24f0394..e5c16d1 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -200,6 +200,14 @@ struct vgic_its {
>  
>  struct vgic_state_iter;
>  
> +struct vgic_redist_region {
> + uint32_t index;
> + gpa_t base;
> + uint32_t count; /* number of redistributors or 0 if single region */
> + uint32_t free_index; /* index of the next free redistributor */
> + struct list_head list;
> +};
> +
>  struct vgic_dist {
>   boolin_kernel;
>   boolready;
> @@ -219,10 +227,7 @@ struct vgic_dist {
>   /* either a GICv2 CPU interface */
>   gpa_t   vgic_cpu_base;
>   /* or a number of GICv3 redistributor regions */
> - struct {
> - gpa_t   vgic_redist_base;
> - gpa_t   vgic_redist_free_offset;
> - };
> + struct list_head rd_regions;
>   };
>  
>   /* distributor enabled */
> @@ -310,6 +315,7 @@ struct vgic_cpu {
>*/
>   struct vgic_io_device   rd_iodev;
>   struct vgic_io_device   sgi_iodev;
> + struct vgic_redist_region *rdreg;
>  
>   /* Contains the attributes and gpa of the LPI pending tables. */
>   u64 pendbaser;
> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
> index c52f03d..6456371 100644
> --- a/virt/kvm/arm/vgic/vgic-init.c
> +++ b/virt/kvm/arm/vgic/vgic-init.c
> @@ -167,8 +167,11 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
>   kvm->arch.vgic.vgic_model = type;
>  
>   kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
> - kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
> - kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF;
> +
> + if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
> + kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
> + else
> + INIT_LIST_HEAD(>arch.vgic.rd_regions);
>  
>  out_unlock:
>   for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
> @@ -303,6 +306,7 @@ int vgic_init(struct kvm *kvm)
>  static void kvm_vgic_dist_destroy(struct kvm *kvm)
>  {
>   struct vgic_dist *dist = >arch.vgic;
> + struct vgic_redist_region *rdreg, *next;
>  
>   dist->ready = false;
>   dist->initialized = false;
> @@ -311,6 +315,14 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
>   dist->spis = NULL;
>   dist->nr_spis = 0;
>  
> + if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
> + list_for_each_entry_safe(rdreg, next, >rd_regions, list) {
> + list_del(>list);
> + kfree(rdreg);
> + }
> + INIT_LIST_HEAD(>rd_regions);
> + }
> +
>   if (vgic_supports_direct_msis(kvm))
>   vgic_v4_teardown(kvm);
>  }
> diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c 
> b/virt/kvm/arm/vgic/vgic-kvm-device.c
> index 10ae6f3..e7b5a86 100644
> --- a/virt/kvm/arm/vgic/vgic-kvm-device.c
> +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
> @@ -66,6 +66,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 
> *addr, bool write)
>   int r = 0;
>   struct vgic_dist *vgic = >arch.vgic;
>   phys_addr_t *addr_ptr, alignment;
> + uint64_t undef_value = VGIC_ADDR_UNDEF;
>  
>   mutex_lock(>lock);
>   switch (type) {
> @@ -84,7 +85,9 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 
> *addr, bool write)
>   

Re: [PATCH v3 05/12] KVM: arm/arm64: Revisit Redistributor TYPER last bit computation

2018-04-24 Thread Christoffer Dall
On Fri, Apr 13, 2018 at 10:20:51AM +0200, Eric Auger wrote:
> The TYPER of an redistributor reflects whether the rdist is
> the last one of the redistributor region. Let's compare the TYPER
> GPA against the address of the last occupied slot within the
> redistributor region.
> 
> Signed-off-by: Eric Auger <eric.au...@redhat.com>
> ---
>  virt/kvm/arm/vgic/vgic-mmio-v3.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c 
> b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> index 49ca176..ce5c927 100644
> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> @@ -184,12 +184,17 @@ static unsigned long vgic_mmio_read_v3r_typer(struct 
> kvm_vcpu *vcpu,
> gpa_t addr, unsigned int len)
>  {
>   unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
> + struct vgic_cpu *vgic_cpu = >arch.vgic_cpu;
> + struct vgic_redist_region *rdreg = vgic_cpu->rdreg;
>   int target_vcpu_id = vcpu->vcpu_id;
> + gpa_t last_rdist_typer = rdreg->base + GICR_TYPER +
> + (rdreg->free_index - 1) * KVM_VGIC_V3_REDIST_SIZE;
>   u64 value;
>  
>   value = (u64)(mpidr & GENMASK(23, 0)) << 32;
>   value |= ((target_vcpu_id & 0x) << 8);
> - if (target_vcpu_id == atomic_read(>kvm->online_vcpus) - 1)
> +
> + if (addr == last_rdist_typer)
>   value |= GICR_TYPER_LAST;
>   if (vgic_has_its(vcpu->kvm))
>   value |= GICR_TYPER_PLPIS;
> -- 
> 2.5.5
> 

Reviewed-by: Christoffer Dall <christoffer.d...@arm.com>
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 01/12] KVM: arm/arm64: Set dist->spis to NULL after kfree

2018-04-24 Thread Christoffer Dall
On Fri, Apr 13, 2018 at 10:20:47AM +0200, Eric Auger wrote:
> in case kvm_vgic_map_resources() fails, typically if the vgic
> distributor is not defined, __kvm_vgic_destroy will be called
> several times. Indeed kvm_vgic_map_resources() is called on
> first vcpu run. As a result dist->spis is freeed more than once
> and on the second time it causes a "kernel BUG at mm/slub.c:3912!"
> 
> Set dist->spis to NULL to avoid the crash.
> 
> Fixes: ad275b8bb1e6 ("KVM: arm/arm64: vgic-new: vgic_init: implement
> vgic_init")
> 
> Signed-off-by: Eric Auger <eric.au...@redhat.com>
> Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>

Reviewed-by: Christoffer Dall <christoffer.d...@arm.com>

> 
> ---
> 
> v2 -> v3:
> - added Marc's R-b and Fixed commit
> ---
>  virt/kvm/arm/vgic/vgic-init.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
> index 68378fe..c52f03d 100644
> --- a/virt/kvm/arm/vgic/vgic-init.c
> +++ b/virt/kvm/arm/vgic/vgic-init.c
> @@ -308,6 +308,7 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
>   dist->initialized = false;
>  
>   kfree(dist->spis);
> + dist->spis = NULL;
>   dist->nr_spis = 0;
>  
>   if (vgic_supports_direct_msis(kvm))
> -- 
> 2.5.5
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 06/12] KVM: arm/arm64: Helper to register a new redistributor region

2018-04-24 Thread Christoffer Dall
On Fri, Apr 13, 2018 at 10:20:52AM +0200, Eric Auger wrote:
> We introduce a new helper that creates and inserts a new redistributor
> region into the rdist region list. This helper both handles the case
> where the redistributor region size is known at registration time
> and the legacy case where it is not (eventually depending on the number
> of online vcpus). Depending on pfns, we perform all the possible checks
> that we can do:
> 
> - end of memory crossing
> - incorrect alignment of the base address
> - collision with distributor region if already defined
> - collision with already registered rdist regions
> - check of the new index
> 
> Rdist regions must be inserted by increasing order of indices. Indices
> must be contiguous.
> 
> We also introduce vgic_v3_rdist_region_from_index() which will be used
> from the vgic kvm-device, later on.
> 
> Signed-off-by: Eric Auger 
> ---
>  virt/kvm/arm/vgic/vgic-mmio-v3.c | 95 
> +---
>  virt/kvm/arm/vgic/vgic-v3.c  | 29 
>  virt/kvm/arm/vgic/vgic.h | 14 ++
>  3 files changed, 122 insertions(+), 16 deletions(-)
> 
> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c 
> b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> index ce5c927..5273fb8 100644
> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> @@ -680,14 +680,66 @@ static int vgic_register_all_redist_iodevs(struct kvm 
> *kvm)
>   return ret;
>  }
>  
> -int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr)
> +/**
> + * vgic_v3_insert_redist_region - Insert a new redistributor region
> + *
> + * Performs various checks before inserting the rdist region in the list.
> + * Those tests depend on whether the size of the rdist region is known
> + * (ie. count != 0). The list is sorted by rdist region index.
> + *
> + * @kvm: kvm handle
> + * @index: redist region index
> + * @base: base of the new rdist region
> + * @count: number of redistributors the region is made of (of 0 in the old 
> style
> + * single region, whose size is induced from the number of vcpus)
> + *
> + * Return 0 on success, < 0 otherwise
> + */
> +static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index,
> + gpa_t base, uint32_t count)
>  {
> - struct vgic_dist *vgic = >arch.vgic;
> + struct vgic_dist *d = >arch.vgic;
>   struct vgic_redist_region *rdreg;
> + struct list_head *rd_regions = >rd_regions;
> + struct list_head *last = rd_regions->prev;
> +

nit: extra blank line?

> + gpa_t new_start, new_end;
> + size_t size = count * KVM_VGIC_V3_REDIST_SIZE;
>   int ret;
>  
> - /* vgic_check_ioaddr makes sure we don't do this twice */
> - if (!list_empty(>rd_regions))
> + /* single rdist region already set ?*/
> + if (!count && !list_empty(rd_regions))
> + return -EINVAL;
> +
> + /* cross the end of memory ? */
> + if (base + size < base)
> + return -EINVAL;

what is the size of memory?  This seems to check for a gpa_t overflow,
but not againt the IPA space of the VM...

> +
> + if (list_empty(rd_regions)) {
> + if (index != 0)
> + return -EINVAL;

note, I think this can be simplified if we can rid of the index.

> + } else {
> + rdreg = list_entry(last, struct vgic_redist_region, list);

you can use list_last_entry here and get rid of the 'last' temporary
variable above.

> + if (index != rdreg->index + 1)
> + return -EINVAL;
> +
> + /* Cannot add an explicitly sized regions after legacy region */
> + if (!rdreg->count)
> + return -EINVAL;
> + }
> +
> + /*
> +  * collision with already set dist region ?
> +  * this assumes we know the size of the new rdist region (pfns != 0)
> +  * otherwise we can only test this when all vcpus are registered
> +  */

I don't really understand this commentary... :(

> + if (!count && !IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) &&
> + (!(d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE <= base)) &&
> + (!(base + size <= d->vgic_dist_base)))
> + return -EINVAL;

Can't you call vgic_v3_check_base() here instead?

> +
> + /* collision with any other rdist region? */
> + if (vgic_v3_rdist_overlap(kvm, base, size))
>   return -EINVAL;
>  
>   rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL);
> @@ -696,17 +748,32 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr)
>  
>   rdreg->base = VGIC_ADDR_UNDEF;
>  
> - ret = vgic_check_ioaddr(kvm, >base, addr, SZ_64K);
> + ret = vgic_check_ioaddr(kvm, >base, base, SZ_64K);
>   if (ret)
> - goto out;
> + goto free;
>  
> - rdreg->base = addr;
> - if (!vgic_v3_check_base(kvm)) {
> - ret = -EINVAL;
> - goto out;
> - }
> + rdreg->base = base;
> + 

Re: [PATCH] KVM: arm/arm64: VGIC: Kick new VCPU on interrupt migration

2018-04-17 Thread Christoffer Dall
On Tue, Apr 17, 2018 at 11:23:49AM +0100, Andre Przywara wrote:
> When vgic_prune_ap_list() finds an interrupt that needs to be migrated
> to a new VCPU, we should notify this VCPU of the pending interrupt,
> since it requires immediate action.
> Kick this VCPU once we have added the new IRQ to the list, but only
> after dropping the locks.
> 
> Reported-by: Stefano Stabellini <sstabell...@kernel.org>
> Signed-off-by: Andre Przywara <andre.przyw...@arm.com>

Reviewed-by: Christoffer Dall <christoffer.d...@arm.com>

> ---
>  virt/kvm/arm/vgic/vgic.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> index e74baec76361..4b6d72939c42 100644
> --- a/virt/kvm/arm/vgic/vgic.c
> +++ b/virt/kvm/arm/vgic/vgic.c
> @@ -594,6 +594,7 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
>  
>   list_for_each_entry_safe(irq, tmp, _cpu->ap_list_head, ap_list) {
>   struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
> + bool target_vcpu_needs_kick = false;
>  
>   spin_lock(>irq_lock);
>  
> @@ -664,11 +665,18 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
>   list_del(>ap_list);
>   irq->vcpu = target_vcpu;
>   list_add_tail(>ap_list, _cpu->ap_list_head);
> + target_vcpu_needs_kick = true;
>   }
>  
>   spin_unlock(>irq_lock);
>   spin_unlock(>arch.vgic_cpu.ap_list_lock);
>   spin_unlock_irqrestore(>arch.vgic_cpu.ap_list_lock, 
> flags);
> +
> + if (target_vcpu_needs_kick) {
> + kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
> + kvm_vcpu_kick(target_vcpu);
> + }
> +
>   goto retry;
>   }
>  
> -- 
> 2.14.1
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] arm64: KVM: Demote SVE and LORegion warnings to debug only

2018-04-17 Thread Christoffer Dall
On Tue, Apr 17, 2018 at 09:23:56AM +0100, Marc Zyngier wrote:
> While generating a message about guests probing for SVE/LORegions
> is a useful debugging tool, considering it an error is slightly
> over the top, as this is the only way the guest can find out
> about the presence of the feature.
> 
> Let's turn these message into kvm_debug so that they can only
> be seen if CONFIG_DYNAMIC_DEBUG, and kept quiet otherwise.
> 
> Signed-off-by: Marc Zyngier <marc.zyng...@arm.com>

Acked-by: Christoffer Dall <christoffer.d...@arm.com>

> ---
>  arch/arm64/kvm/sys_regs.c | 6 ++
>  1 file changed, 2 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 806b0b126a64..6e3b969391fd 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -996,14 +996,12 @@ static u64 read_id_reg(struct sys_reg_desc const *r, 
> bool raz)
>  
>   if (id == SYS_ID_AA64PFR0_EL1) {
>   if (val & (0xfUL << ID_AA64PFR0_SVE_SHIFT))
> - pr_err_once("kvm [%i]: SVE unsupported for guests, 
> suppressing\n",
> - task_pid_nr(current));
> + kvm_debug("SVE unsupported for guests, suppressing\n");
>  
>   val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);
>   } else if (id == SYS_ID_AA64MMFR1_EL1) {
>   if (val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT))
> - pr_err_once("kvm [%i]: LORegions unsupported for 
> guests, suppressing\n",
> - task_pid_nr(current));
> + kvm_debug("LORegions unsupported for guests, 
> suppressing\n");
>  
>   val &= ~(0xfUL << ID_AA64MMFR1_LOR_SHIFT);
>   }
> -- 
> 2.14.2
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH] MAINTAINERS: Update e-mail address for Christoffer Dall

2018-04-16 Thread Christoffer Dall
Update my e-mail address to a working address.

Signed-off-by: Christoffer Dall <christoffer.d...@arm.com>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0a1410d5a621..3e9c99d2620b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7738,7 +7738,7 @@ F:arch/x86/include/asm/svm.h
 F: arch/x86/kvm/svm.c
 
 KERNEL VIRTUAL MACHINE FOR ARM (KVM/arm)
-M: Christoffer Dall <christoffer.d...@linaro.org>
+M: Christoffer Dall <christoffer.d...@arm.com>
 M: Marc Zyngier <marc.zyng...@arm.com>
 L: linux-arm-ker...@lists.infradead.org (moderated for non-subscribers)
 L: kvmarm@lists.cs.columbia.edu
@@ -7752,7 +7752,7 @@ F:virt/kvm/arm/
 F: include/kvm/arm_*
 
 KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
-M: Christoffer Dall <christoffer.d...@linaro.org>
+M: Christoffer Dall <christoffer.d...@arm.com>
 M: Marc Zyngier <marc.zyng...@arm.com>
 L: linux-arm-ker...@lists.infradead.org (moderated for non-subscribers)
 L: kvmarm@lists.cs.columbia.edu
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] KVM: arm/arm64: Close VMID generation race

2018-04-10 Thread Christoffer Dall
On Tue, Apr 10, 2018 at 04:37:12PM +0100, Marc Zyngier wrote:
> On 10/04/18 16:24, Mark Rutland wrote:
> > On Tue, Apr 10, 2018 at 05:05:40PM +0200, Christoffer Dall wrote:
> >> On Tue, Apr 10, 2018 at 11:51:19AM +0100, Mark Rutland wrote:
> >>> I think we also need to update kvm->arch.vttbr before updating
> >>> kvm->arch.vmid_gen, otherwise another CPU can come in, see that the
> >>> vmid_gen is up-to-date, jump to hyp, and program a stale VTTBR (with the
> >>> old VMID).
> >>>
> >>> With the smp_wmb() and update of kvm->arch.vmid_gen moved to the end of
> >>> the critical section, I think that works, modulo using READ_ONCE() and
> >>> WRITE_ONCE() to ensure single-copy-atomicity of the fields we access
> >>> locklessly.
> >>
> >> Indeed, you're right.  I would look something like this, then:
> >>
> >> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> >> index 2e43f9d42bd5..6cb08995e7ff 100644
> >> --- a/virt/kvm/arm/arm.c
> >> +++ b/virt/kvm/arm/arm.c
> >> @@ -450,7 +450,9 @@ void force_vm_exit(const cpumask_t *mask)
> >>   */
> >>  static bool need_new_vmid_gen(struct kvm *kvm)
> >>  {
> >> -  return unlikely(kvm->arch.vmid_gen != atomic64_read(_vmid_gen));
> >> +  u64 current_vmid_gen = atomic64_read(_vmid_gen);
> >> +  smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
> >> +  return unlikely(READ_ONCE(kvm->arch.vmid_gen) != current_vmid_gen);
> >>  }
> >>  
> >>  /**
> >> @@ -500,7 +502,6 @@ static void update_vttbr(struct kvm *kvm)
> >>kvm_call_hyp(__kvm_flush_vm_context);
> >>}
> >>  
> >> -  kvm->arch.vmid_gen = atomic64_read(_vmid_gen);
> >>kvm->arch.vmid = kvm_next_vmid;
> >>kvm_next_vmid++;
> >>kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
> >> @@ -509,7 +510,10 @@ static void update_vttbr(struct kvm *kvm)
> >>pgd_phys = virt_to_phys(kvm->arch.pgd);
> >>BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
> >>vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & 
> >> VTTBR_VMID_MASK(kvm_vmid_bits);
> >> -  kvm->arch.vttbr = pgd_phys | vmid;
> >> +  WRITE_ONCE(kvm->arch.vttbr, pgd_phys | vmid);
> >> +
> >> +  smp_wmb(); /* Ensure vttbr update is observed before vmid_gen update */
> >> +  kvm->arch.vmid_gen = atomic64_read(_vmid_gen);
> >>  
> >>spin_unlock(_vmid_lock);
> >>  }
> > 
> > I think that's right, yes.
> > 
> > We could replace the smp_{r,w}mb() barriers with an acquire of the
> > kvm_vmid_gen and a release of kvm->arch.vmid_gen, but if we're really
> > trying to optimize things there are larger algorithmic changes necessary
> > anyhow.
> > 
> >> It's probably easier to convince ourselves about the correctness of
> >> Marc's code using a rwlock instead, though.  Thoughts?
> > 
> > I believe that Marc's preference was the rwlock; I have no preference
> > either way.
> 
> I don't mind either way. If you can be bothered to write a proper commit
> log for this, I'll take it. 

You've already done the work, and your patch is easier to read, so let's
just go ahead with that.

I was just curious to which degree my original implementation was
broken; was I trying to achieve something impossible or was I just
writing buggy code.  Seems the latter.  Oh well.

> What I'd really want is Shannon to indicate
> whether or not this solves the issue he was seeing.
> 

Agreed, would like to see that too.

Thanks (and sorry for being noisy),
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] KVM: arm/arm64: Close VMID generation race

2018-04-10 Thread Christoffer Dall
On Tue, Apr 10, 2018 at 04:24:20PM +0100, Mark Rutland wrote:
> On Tue, Apr 10, 2018 at 05:05:40PM +0200, Christoffer Dall wrote:
> > On Tue, Apr 10, 2018 at 11:51:19AM +0100, Mark Rutland wrote:
> > > I think we also need to update kvm->arch.vttbr before updating
> > > kvm->arch.vmid_gen, otherwise another CPU can come in, see that the
> > > vmid_gen is up-to-date, jump to hyp, and program a stale VTTBR (with the
> > > old VMID).
> > > 
> > > With the smp_wmb() and update of kvm->arch.vmid_gen moved to the end of
> > > the critical section, I think that works, modulo using READ_ONCE() and
> > > WRITE_ONCE() to ensure single-copy-atomicity of the fields we access
> > > locklessly.
> > 
> > Indeed, you're right.  I would look something like this, then:
> > 
> > diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> > index 2e43f9d42bd5..6cb08995e7ff 100644
> > --- a/virt/kvm/arm/arm.c
> > +++ b/virt/kvm/arm/arm.c
> > @@ -450,7 +450,9 @@ void force_vm_exit(const cpumask_t *mask)
> >   */
> >  static bool need_new_vmid_gen(struct kvm *kvm)
> >  {
> > -   return unlikely(kvm->arch.vmid_gen != atomic64_read(_vmid_gen));
> > +   u64 current_vmid_gen = atomic64_read(_vmid_gen);
> > +   smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
> > +   return unlikely(READ_ONCE(kvm->arch.vmid_gen) != current_vmid_gen);
> >  }
> >  
> >  /**
> > @@ -500,7 +502,6 @@ static void update_vttbr(struct kvm *kvm)
> > kvm_call_hyp(__kvm_flush_vm_context);
> > }
> >  
> > -   kvm->arch.vmid_gen = atomic64_read(_vmid_gen);
> > kvm->arch.vmid = kvm_next_vmid;
> > kvm_next_vmid++;
> > kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
> > @@ -509,7 +510,10 @@ static void update_vttbr(struct kvm *kvm)
> > pgd_phys = virt_to_phys(kvm->arch.pgd);
> > BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
> > vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & 
> > VTTBR_VMID_MASK(kvm_vmid_bits);
> > -   kvm->arch.vttbr = pgd_phys | vmid;
> > +   WRITE_ONCE(kvm->arch.vttbr, pgd_phys | vmid);
> > +
> > +   smp_wmb(); /* Ensure vttbr update is observed before vmid_gen update */
> > +   kvm->arch.vmid_gen = atomic64_read(_vmid_gen);
> >  
> > spin_unlock(_vmid_lock);
> >  }
> 
> I think that's right, yes.
> 
> We could replace the smp_{r,w}mb() barriers with an acquire of the
> kvm_vmid_gen and a release of kvm->arch.vmid_gen, but if we're really
> trying to optimize things there are larger algorithmic changes necessary
> anyhow.
> 
> > It's probably easier to convince ourselves about the correctness of
> > Marc's code using a rwlock instead, though.  Thoughts?
> 
> I believe that Marc's preference was the rwlock; I have no preference
> either way.
> 

I'm fine with both approaches as well, but it was educational for me to
see if this could be done in the lockless way as well.  Thanks for
having a look at that!

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC PATCH v3 4/4] KVM: arm64: Optimise FPSIMD handling to reduce guest/host thrashing

2018-04-10 Thread Christoffer Dall
On Tue, Apr 10, 2018 at 11:32:50AM +0100, Dave Martin wrote:
> On Mon, Apr 09, 2018 at 11:22:43PM +0200, Christoffer Dall wrote:
> > Hi Dave,
> > 
> > On Mon, Apr 09, 2018 at 11:53:02AM +0100, Dave Martin wrote:
> > > This patch refactors KVM to align the host and guest FPSIMD
> > > save/restore logic with each other for arm64.  This reduces the
> > > number of redundant save/restore operations that must occur, and
> > > reduces the common-case IRQ blackout time during guest exit storms
> > > by saving the host state lazily and optimising away the need to
> > > restore the host state before returning to the run loop.
> > > 
> > 
> > [...]
> > 
> > > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> > > index db08a54..74c5a46 100644
> > > --- a/arch/arm64/kernel/fpsimd.c
> > > +++ b/arch/arm64/kernel/fpsimd.c
> > 
> > [...]
> > 
> > > @@ -1054,15 +1066,20 @@ void fpsimd_update_current_state(struct 
> > > user_fpsimd_state const *state)
> > >   local_bh_enable();
> > >  }
> > >  
> > > +void fpsimd_flush_state(unsigned int *cpu)
> > 
> > This API looks strange to me, and doesn't seem to be called from
> > elsewhere.  Wouldn't it be more clear if it took a struct thread_struct
> > pointer instead, or if the logic remained embedded in
> > fpsimd_flush_task_state ?
> 
> Hmmm, thanks for spotting this -- it's a throwback to my previous
> approach.
> 
> I had intended to align KVM fully with the way host tasks' context is
> tracked, and this would involve a "most recent cpu FPSIMD loaded on"
> field in struct vcpu_arch: for ABI reasons this can't easily be tacked
> onto the end of struct user_fpsimd_state, so it would be necessary for
> it to be a separate field and passed to the relevant maintenance
> functions as a separate parameter.
> 
> This approach would allow the vcpu FPSIMD state to remain in the regs
> across a context switch without the need to reload it, but this also
> means that some flushing/invalidation of this cached view of the state
> would be needed around KVM_GET_ONE_REG etc. and at vcpu destruction
> time.  This function would be part of such a maintenance API.
> 
> For now though, this seemed like extra complexity for dubious benefit.
> 
> Unless you think it's worth pursuing this optimisation I should
> probably get rid of this function.  We can always bring this back
> later if we choose.
> 

Agreed, not need to pursue further optimizations at this time (ie.
before we have data that indicates it's worth it).


> > > +{
> > > + *cpu = NR_CPUS;
> > > +}
> > > +
> > >  /*
> > >   * Invalidate live CPU copies of task t's FPSIMD state
> > >   */
> > >  void fpsimd_flush_task_state(struct task_struct *t)
> > >  {
> > > - t->thread.fpsimd_cpu = NR_CPUS;
> > > + fpsimd_flush_state(>thread.fpsimd_cpu);
> > >  }
> > >  
> > > -static inline void fpsimd_flush_cpu_state(void)
> > > +void fpsimd_flush_cpu_state(void)
> > >  {
> > >   __this_cpu_write(fpsimd_last_state.st, NULL);
> > >  }
> > 
> > [...]
> > 
> > > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> > > index 8605e04..797b259 100644
> > > --- a/arch/arm64/kvm/hyp/switch.c
> > > +++ b/arch/arm64/kvm/hyp/switch.c
> > > @@ -27,6 +27,7 @@
> > >  #include 
> > >  #include 
> > >  #include 
> > > +#include 
> > >  
> > >  static bool __hyp_text __fpsimd_enabled_nvhe(void)
> > >  {
> > > @@ -47,24 +48,40 @@ bool __hyp_text __fpsimd_enabled(void)
> > >   return __fpsimd_is_enabled()();
> > >  }
> > >  
> > > -static void __hyp_text __activate_traps_vhe(void)
> > > +static bool update_fp_enabled(struct kvm_vcpu *vcpu)
> > 
> > I think this needs a __hyp_text in the unlikely case that this function
> > is not inlined in the _nvhe caller by the compiler.
> 
> You're right.  I'll add it.
> 
> > > +{
> > > + if (vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) {
> > > + vcpu->arch.host_fpsimd_state = NULL;
> > > + vcpu->arch.fp_enabled = false;
> > > + }
> > 
> > I'm not clear why the above logic can't go into kvm_arch_vcpu_load_fp
> > and why we can't simply check TIF_FOREIGN_FPSTATE in __hyp_switch_fpsimd
> > instead?
> 
> The situation can change in between _load_fp() and here, because of
> kernel-mode NEO

Re: [PATCH] KVM: arm/arm64: Close VMID generation race

2018-04-10 Thread Christoffer Dall
On Tue, Apr 10, 2018 at 11:51:19AM +0100, Mark Rutland wrote:
> On Mon, Apr 09, 2018 at 10:51:39PM +0200, Christoffer Dall wrote:
> > On Mon, Apr 09, 2018 at 06:07:06PM +0100, Marc Zyngier wrote:
> > > Before entering the guest, we check whether our VMID is still
> > > part of the current generation. In order to avoid taking a lock,
> > > we start with checking that the generation is still current, and
> > > only if not current do we take the lock, recheck, and update the
> > > generation and VMID.
> > > 
> > > This leaves open a small race: A vcpu can bump up the global
> > > generation number as well as the VM's, but has not updated
> > > the VMID itself yet.
> > > 
> > > At that point another vcpu from the same VM comes in, checks
> > > the generation (and finds it not needing anything), and jumps
> > > into the guest. At this point, we end-up with two vcpus belonging
> > > to the same VM running with two different VMIDs. Eventually, the
> > > VMID used by the second vcpu will get reassigned, and things will
> > > really go wrong...
> > > 
> > > A simple solution would be to drop this initial check, and always take
> > > the lock. This is likely to cause performance issues. A middle ground
> > > is to convert the spinlock to a rwlock, and only take the read lock
> > > on the fast path. If the check fails at that point, drop it and
> > > acquire the write lock, rechecking the condition.
> > > 
> > > This ensures that the above scenario doesn't occur.
> > > 
> > > Reported-by: Mark Rutland <mark.rutl...@arm.com>
> > > Signed-off-by: Marc Zyngier <marc.zyng...@arm.com>
> > > ---
> > > I haven't seen any reply from Shannon, so reposting this to
> > > a slightly wider audience for feedback.
> > > 
> > >  virt/kvm/arm/arm.c | 15 ++-
> > >  1 file changed, 10 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> > > index dba629c5f8ac..a4c1b76240df 100644
> > > --- a/virt/kvm/arm/arm.c
> > > +++ b/virt/kvm/arm/arm.c
> > > @@ -63,7 +63,7 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, 
> > > kvm_arm_running_vcpu);
> > >  static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
> > >  static u32 kvm_next_vmid;
> > >  static unsigned int kvm_vmid_bits __read_mostly;
> > > -static DEFINE_SPINLOCK(kvm_vmid_lock);
> > > +static DEFINE_RWLOCK(kvm_vmid_lock);
> > >  
> > >  static bool vgic_present;
> > >  
> > > @@ -473,11 +473,16 @@ static void update_vttbr(struct kvm *kvm)
> > >  {
> > >   phys_addr_t pgd_phys;
> > >   u64 vmid;
> > > + bool new_gen;
> > >  
> > > - if (!need_new_vmid_gen(kvm))
> > > + read_lock(_vmid_lock);
> > > + new_gen = need_new_vmid_gen(kvm);
> > > + read_unlock(_vmid_lock);
> > > +
> > > + if (!new_gen)
> > >   return;
> > >  
> > > - spin_lock(_vmid_lock);
> > > + write_lock(_vmid_lock);
> > >  
> > >   /*
> > >* We need to re-check the vmid_gen here to ensure that if another vcpu
> > > @@ -485,7 +490,7 @@ static void update_vttbr(struct kvm *kvm)
> > >* use the same vmid.
> > >*/
> > >   if (!need_new_vmid_gen(kvm)) {
> > > - spin_unlock(_vmid_lock);
> > > + write_unlock(_vmid_lock);
> > >   return;
> > >   }
> > >  
> > > @@ -519,7 +524,7 @@ static void update_vttbr(struct kvm *kvm)
> > >   vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & 
> > > VTTBR_VMID_MASK(kvm_vmid_bits);
> > >   kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid;
> > >  
> > > - spin_unlock(_vmid_lock);
> > > + write_unlock(_vmid_lock);
> > >  }
> > >  
> > >  static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
> > > -- 
> > > 2.14.2
> > > 
> > 
> > The above looks correct to me.  I am wondering if something like the
> > following would also work, which may be slightly more efficient,
> > although I doubt the difference can be measured:
> > 

[...]

> 
> I think we also need to update kvm->arch.vttbr before updating
> kvm->arch.vmid_gen, otherwise another CPU can come in, see that the
> vmid_gen is up-to-date, jump to hyp, and program a stale VTTBR (with the
> old VMID).
> 
> With the smp_wmb() and update of kvm-&g

Re: [RFC v2 02/12] KVM: arm/arm64: Document KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION

2018-04-09 Thread Christoffer Dall
Hi Eric,

On Tue, Mar 27, 2018 at 04:04:06PM +0200, Eric Auger wrote:
> We introduce a new KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attribute in
> KVM_DEV_ARM_VGIC_GRP_ADDR group. It allows userspace to provide the
> base address and size of a redistributor region
> 
> Compared to KVM_VGIC_V3_ADDR_TYPE_REDIST, this new attribute allows
> to declare several separate redistributor regions.
> 
> So the whole redist space does not need to be contiguous anymore.
> 
> Signed-off-by: Eric Auger 
> ---
>  Documentation/virtual/kvm/devices/arm-vgic-v3.txt | 18 ++
>  1 file changed, 18 insertions(+)
> 
> diff --git a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt 
> b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
> index 9293b45..0ded904 100644
> --- a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
> +++ b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
> @@ -27,6 +27,24 @@ Groups:
>VCPU and all of the redistributor pages are contiguous.
>Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
>This address needs to be 64K aligned.
> +
> +KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION (rw, 64-bit)
> +  The attr field of kvm_device_attr encodes 3 values:
> +  bits: | 63     52  |  51      16 | 15 - 12  |11 - 0
> +  values:   | count  |   base  |  flags   | index
> +  - index encodes the unique redistributor region index
> +  - flags: reserved for future use, currently 0
> +  - base field encodes bits [51:16] of the guest physical base address
> +of the first redistributor in the region. There are two 64K pages
> +for each VCPU and all of the redistributor pages are contiguous

should this be two 64K pages for the number of redistributors in this
region as specified by count ?

> +within the redistributor region.
> +  - count encodes the number of redistributors in the region.

I assume it's implied that the user must register a total number of
redistributors across all the regions that matches the number of vcpus,
and that otherwise something bad happens?

> +  Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
> +
> +  It is invalid to mix calls with KVM_VGIC_V3_ADDR_TYPE_REDIST and
> +  KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes. When attempted an
> +  -EINVAL error is returned.
> +
>Errors:
>  -E2BIG:  Address outside of addressable IPA range
>  -EINVAL: Incorrectly aligned address
> -- 
> 2.5.5
> 

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC PATCH v3 4/4] KVM: arm64: Optimise FPSIMD handling to reduce guest/host thrashing

2018-04-09 Thread Christoffer Dall
Hi Dave,

On Mon, Apr 09, 2018 at 11:53:02AM +0100, Dave Martin wrote:
> This patch refactors KVM to align the host and guest FPSIMD
> save/restore logic with each other for arm64.  This reduces the
> number of redundant save/restore operations that must occur, and
> reduces the common-case IRQ blackout time during guest exit storms
> by saving the host state lazily and optimising away the need to
> restore the host state before returning to the run loop.
> 

[...]

> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index db08a54..74c5a46 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c

[...]

> @@ -1054,15 +1066,20 @@ void fpsimd_update_current_state(struct 
> user_fpsimd_state const *state)
>   local_bh_enable();
>  }
>  
> +void fpsimd_flush_state(unsigned int *cpu)

This API looks strange to me, and doesn't seem to be called from
elsewhere.  Wouldn't it be more clear if it took a struct thread_struct
pointer instead, or if the logic remained embedded in
fpsimd_flush_task_state ?

> +{
> + *cpu = NR_CPUS;
> +}
> +
>  /*
>   * Invalidate live CPU copies of task t's FPSIMD state
>   */
>  void fpsimd_flush_task_state(struct task_struct *t)
>  {
> - t->thread.fpsimd_cpu = NR_CPUS;
> + fpsimd_flush_state(>thread.fpsimd_cpu);
>  }
>  
> -static inline void fpsimd_flush_cpu_state(void)
> +void fpsimd_flush_cpu_state(void)
>  {
>   __this_cpu_write(fpsimd_last_state.st, NULL);
>  }

[...]

> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 8605e04..797b259 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -27,6 +27,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  static bool __hyp_text __fpsimd_enabled_nvhe(void)
>  {
> @@ -47,24 +48,40 @@ bool __hyp_text __fpsimd_enabled(void)
>   return __fpsimd_is_enabled()();
>  }
>  
> -static void __hyp_text __activate_traps_vhe(void)
> +static bool update_fp_enabled(struct kvm_vcpu *vcpu)

I think this needs a __hyp_text in the unlikely case that this function
is not inlined in the _nvhe caller by the compiler.

> +{
> + if (vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) {
> + vcpu->arch.host_fpsimd_state = NULL;
> + vcpu->arch.fp_enabled = false;
> + }

I'm not clear why the above logic can't go into kvm_arch_vcpu_load_fp
and why we can't simply check TIF_FOREIGN_FPSTATE in __hyp_switch_fpsimd
instead?

> +
> + return vcpu->arch.fp_enabled;
> +}
> +
> +static void __hyp_text __activate_traps_vhe(struct kvm_vcpu *vcpu)
>  {
>   u64 val;
>  
>   val = read_sysreg(cpacr_el1);
>   val |= CPACR_EL1_TTA;
> - val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
> + val &= ~CPACR_EL1_ZEN;
> + if (!update_fp_enabled(vcpu))
> + val &= ~CPACR_EL1_FPEN;
> +
>   write_sysreg(val, cpacr_el1);
>  
>   write_sysreg(kvm_get_hyp_vector(), vbar_el1);
>  }
>  
> -static void __hyp_text __activate_traps_nvhe(void)
> +static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
>  {
>   u64 val;
>  
>   val = CPTR_EL2_DEFAULT;
> - val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
> + val |= CPTR_EL2_TTA | CPTR_EL2_TZ;
> + if (!update_fp_enabled(vcpu))
> + val |= CPTR_EL2_TFP;
> +
>   write_sysreg(val, cptr_el2);
>  }
>  

[...]

Otherwise this approach looks quite good to me overall.  Are you
planning to add SVE support before removing the RFC from this series?

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] KVM: arm/arm64: Close VMID generation race

2018-04-09 Thread Christoffer Dall
On Mon, Apr 09, 2018 at 06:07:06PM +0100, Marc Zyngier wrote:
> Before entering the guest, we check whether our VMID is still
> part of the current generation. In order to avoid taking a lock,
> we start with checking that the generation is still current, and
> only if not current do we take the lock, recheck, and update the
> generation and VMID.
> 
> This leaves open a small race: A vcpu can bump up the global
> generation number as well as the VM's, but has not updated
> the VMID itself yet.
> 
> At that point another vcpu from the same VM comes in, checks
> the generation (and finds it not needing anything), and jumps
> into the guest. At this point, we end-up with two vcpus belonging
> to the same VM running with two different VMIDs. Eventually, the
> VMID used by the second vcpu will get reassigned, and things will
> really go wrong...
> 
> A simple solution would be to drop this initial check, and always take
> the lock. This is likely to cause performance issues. A middle ground
> is to convert the spinlock to a rwlock, and only take the read lock
> on the fast path. If the check fails at that point, drop it and
> acquire the write lock, rechecking the condition.
> 
> This ensures that the above scenario doesn't occur.
> 
> Reported-by: Mark Rutland 
> Signed-off-by: Marc Zyngier 
> ---
> I haven't seen any reply from Shannon, so reposting this to
> a slightly wider audience for feedback.
> 
>  virt/kvm/arm/arm.c | 15 ++-
>  1 file changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index dba629c5f8ac..a4c1b76240df 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -63,7 +63,7 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, 
> kvm_arm_running_vcpu);
>  static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
>  static u32 kvm_next_vmid;
>  static unsigned int kvm_vmid_bits __read_mostly;
> -static DEFINE_SPINLOCK(kvm_vmid_lock);
> +static DEFINE_RWLOCK(kvm_vmid_lock);
>  
>  static bool vgic_present;
>  
> @@ -473,11 +473,16 @@ static void update_vttbr(struct kvm *kvm)
>  {
>   phys_addr_t pgd_phys;
>   u64 vmid;
> + bool new_gen;
>  
> - if (!need_new_vmid_gen(kvm))
> + read_lock(_vmid_lock);
> + new_gen = need_new_vmid_gen(kvm);
> + read_unlock(_vmid_lock);
> +
> + if (!new_gen)
>   return;
>  
> - spin_lock(_vmid_lock);
> + write_lock(_vmid_lock);
>  
>   /*
>* We need to re-check the vmid_gen here to ensure that if another vcpu
> @@ -485,7 +490,7 @@ static void update_vttbr(struct kvm *kvm)
>* use the same vmid.
>*/
>   if (!need_new_vmid_gen(kvm)) {
> - spin_unlock(_vmid_lock);
> + write_unlock(_vmid_lock);
>   return;
>   }
>  
> @@ -519,7 +524,7 @@ static void update_vttbr(struct kvm *kvm)
>   vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & 
> VTTBR_VMID_MASK(kvm_vmid_bits);
>   kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid;
>  
> - spin_unlock(_vmid_lock);
> + write_unlock(_vmid_lock);
>  }
>  
>  static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
> -- 
> 2.14.2
> 

The above looks correct to me.  I am wondering if something like the
following would also work, which may be slightly more efficient,
although I doubt the difference can be measured:

diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index dba629c5f8ac..7ac869bcad21 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -458,7 +458,9 @@ void force_vm_exit(const cpumask_t *mask)
  */
 static bool need_new_vmid_gen(struct kvm *kvm)
 {
-   return unlikely(kvm->arch.vmid_gen != atomic64_read(_vmid_gen));
+   u64 current_vmid_gen = atomic64_read(_vmid_gen);
+   smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
+   return unlikely(kvm->arch.vmid_gen != current_vmid_gen);
 }
 
 /**
@@ -508,10 +510,11 @@ static void update_vttbr(struct kvm *kvm)
kvm_call_hyp(__kvm_flush_vm_context);
}
 
-   kvm->arch.vmid_gen = atomic64_read(_vmid_gen);
kvm->arch.vmid = kvm_next_vmid;
kvm_next_vmid++;
kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
+   smp_wmb();
+   kvm->arch.vmid_gen = atomic64_read(_vmid_gen);
 
/* update vttbr to be used with the new vmid */
pgd_phys = virt_to_phys(kvm->arch.pgd);


Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCHv2 09/12] arm64/kvm: preserve host HCR_EL2 value

2018-04-09 Thread Christoffer Dall
On Mon, Apr 09, 2018 at 03:57:09PM +0100, Mark Rutland wrote:
> On Tue, Feb 06, 2018 at 01:39:15PM +0100, Christoffer Dall wrote:
> > On Mon, Nov 27, 2017 at 04:38:03PM +, Mark Rutland wrote:
> > > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> > > index 525c01f48867..2205f0be3ced 100644
> > > --- a/arch/arm64/kvm/hyp/switch.c
> > > +++ b/arch/arm64/kvm/hyp/switch.c
> > > @@ -71,6 +71,8 @@ static void __hyp_text __activate_traps(struct kvm_vcpu 
> > > *vcpu)
> > >  {
> > >   u64 val;
> > >  
> > > + vcpu->arch.host_hcr_el2 = read_sysreg(hcr_el2);
> > > +
> > 
> > Looking back at this, it seems excessive to switch this at every
> > round-trip.  I think it should be possible to have this as a single
> > global (or per-CPU) variable that gets restored directly when returning
> > from the VM.
> 
> I suspect this needs to be per-cpu, to account for heterogeneous
> systems.
> 
> I guess if we move hcr_el2 into kvm_cpu_context, that gives us a
> per-vcpu copy for guests, and a per-cpu copy for the host (in the global
> kvm_host_cpu_state).
> 
> I'll have a look at how gnarly that turns out. I'm not sure how we can
> initialise that sanely for the !VHE case to match whatever el2_setup
> did.

There's no harm in jumping down to EL2 to read a register during the
initialization phase.  All it requires is an annotation of the callee
function, and a kvm_call_hyp(), and it's actually quite fast unless you
start saving/restoring a bunch of additional system registers.  See how
we call __kvm_set_tpidr_el2() for example.

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [REPOST PATCH] arm/arm64: KVM: Add PSCI version selection API

2018-04-09 Thread Christoffer Dall
On Mon, Apr 09, 2018 at 01:47:50PM +0100, Marc Zyngier wrote:
> +Drew, who's look at the whole save/restore thing extensively
> 
> On 09/04/18 13:30, Christoffer Dall wrote:
> > On Thu, Mar 15, 2018 at 07:26:48PM +, Marc Zyngier wrote:
> >> On 15/03/18 19:13, Peter Maydell wrote:
> >>> On 15 March 2018 at 19:00, Marc Zyngier <marc.zyng...@arm.com> wrote:
> >>>> On 06/03/18 09:21, Andrew Jones wrote:
> >>>>> On Mon, Mar 05, 2018 at 04:47:55PM +, Peter Maydell wrote:
> >>>>>> On 2 March 2018 at 11:11, Marc Zyngier <marc.zyng...@arm.com> wrote:
> >>>>>>> On Fri, 02 Mar 2018 10:44:48 +,
> >>>>>>> Auger Eric wrote:
> >>>>>>>> I understand the get/set is called as part of the migration process.
> >>>>>>>> So my understanding is the benefit of this series is migration fails 
> >>>>>>>> in
> >>>>>>>> those cases:
> >>>>>>>>
> >>>>>>>>> =0.2 source -> 0.1 destination
> >>>>>>>> 0.1 source -> >=0.2 destination
> >>>>>>>
> >>>>>>> It also fails in the case where you migrate a 1.0 guest to something
> >>>>>>> that cannot support it.
> >>>>>>
> >>>>>> I think it would be useful if we could write out the various
> >>>>>> combinations of source, destination and what we expect/want to
> >>>>>> have happen. My gut feeling here is that we're sacrificing
> >>>>>> exact migration compatibility in favour of having the guest
> >>>>>> automatically get the variant-2 mitigations, but it's not clear
> >>>>>> to me exactly which migration combinations that's intended to
> >>>>>> happen for. Marc?
> >>>>>>
> >>>>>> If this wasn't a mitigation issue the desired behaviour would be
> >>>>>> straightforward:
> >>>>>>  * kernel should default to 0.2 on the basis that
> >>>>>>that's what it did before
> >>>>>>  * new QEMU version should enable 1.0 by default for virt-2.12
> >>>>>>and 0.2 for virt-2.11 and earlier
> >>>>>>  * PSCI version info shouldn't appear in migration stream unless
> >>>>>>it's something other than 0.2
> >>>>>> But that would leave some setups (which?) unnecessarily without the
> >>>>>> mitigation, so we're not doing that. The question is, exactly
> >>>>>> what *are* we aiming for?
> >>>>>
> >>>>> The reason Marc dropped this patch from the series it was first 
> >>>>> introduced
> >>>>> in was because we didn't have the aim 100% understood. We want the
> >>>>> mitigation by default, but also to have the least chance of migration
> >>>>> failure, and when we must fail (because we're not doing the
> >>>>> straightforward approach listed above, which would prevent failures), 
> >>>>> then
> >>>>> we want to fail with the least amount of damage to the user.
> >>>>>
> >>>>> I experimented with a couple different approaches and provided tables[1]
> >>>>> with my results. I even recommended an approach, but I may have changed
> >>>>> my mind after reading Marc's follow-up[2]. The thread continues from
> >>>>> there as well with follow-ups from Christoffer, Marc, and myself. 
> >>>>> Anyway,
> >>>>> Marc did this repost for us to debate it and work out the best approach
> >>>>> here.
> >>>> It doesn't look like we've made much progress on this, which makes me
> >>>> think that we probably don't need anything of the like.
> >>>
> >>> I was waiting for a better explanation from you of what we're trying to
> >>> achieve. If you want to take the "do nothing" approach then a list
> >>> also of what migrations succeed/fail/break in that case would also
> >>> be useful.
> >>>
> >>> (I am somewhat lazily trying to avoid having to spend time reverse
> >>> engineering the "what are we trying to do and what effects are
> >>> we accepting" parts from the patch and the code that's already gone
> >>> into the kernel.

Re: [PATCHv2 10/12] arm64/kvm: context-switch ptrauth registers

2018-04-09 Thread Christoffer Dall
Hi Mark,

[Sorry for late reply]

On Fri, Mar 09, 2018 at 02:28:38PM +, Mark Rutland wrote:
> On Tue, Feb 06, 2018 at 01:38:47PM +0100, Christoffer Dall wrote:
> > On Mon, Nov 27, 2017 at 04:38:04PM +, Mark Rutland wrote:
> > > When pointer authentication is supported, a guest may wish to use it.
> > > This patch adds the necessary KVM infrastructure for this to work, with
> > > a semi-lazy context switch of the pointer auth state.
> > > 
> > > When we schedule a vcpu, 
> > 
> > That's not quite what the code does, the code only does this when we
> > schedule back a preempted or blocked vcpu thread.
> 
> Does that only leave the case of the vCPU being scheduled for the first
> time? Or am I missing something else?
> 
> [...]

In the current patch, you're only calling kvm_arm_vcpu_ptrauth_disable()
from kvm_arch_sched_in() which is only called on the preempt notifier
patch, which leaves out every time we enter the guest from userspace and
therefore also the initial run of the vCPU (assuming there's no
preemption in the kernel prior to running the first time).

vcpu_load() takes care of all the cases.

> 

[...]

> > 
> > I still find this decision to begin trapping again quite arbitrary, and
> > would at least prefer this to be in vcpu_load (which would make the
> > behavior match the commit text as well).
> 
> Sure, done.
> 
> > My expectation would be that if a guest is running software with pointer
> > authentication enabled, then it's likely to either keep using the
> > feature, or not use it at all, so I would make this a one-time flag.
> 
> I think it's likely that some applications will use ptrauth while others
> do not. Even if the gust OS supports ptrauth, KVM may repeatedly preempt
> an application that doesn't use it, and we'd win in that case.
> 
> There are also some rarer cases, like kexec in a guest from a
> ptrauth-aware kernel to a ptrauth-oblivious one.
> 
> I don't have strong feelings either way, and I have no data.
> 

I think your intuition sounds sane, and let's reset the flag on every
vcpu_load, and we can always revisit when we have hardware and data if
someone reports a performance issue.

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC PATCH v2 2/3] KVM: arm64: Convert lazy FPSIMD context switch trap to C

2018-04-09 Thread Christoffer Dall
On Mon, Apr 09, 2018 at 11:00:40AM +0100, Marc Zyngier wrote:
> On 09/04/18 10:44, Christoffer Dall wrote:
> > On Fri, Apr 06, 2018 at 04:51:53PM +0100, Dave Martin wrote:
> >> On Fri, Apr 06, 2018 at 04:25:57PM +0100, Marc Zyngier wrote:
> >>> Hi Dave,
> >>>
> >>> On 06/04/18 16:01, Dave Martin wrote:
> >>>> To make the lazy FPSIMD context switch trap code easier to hack on,
> >>>> this patch converts it to C.
> >>>>
> >>>> This is not amazingly efficient, but the trap should typically only
> >>>> be taken once per host context switch.
> >>>>
> >>>> Signed-off-by: Dave Martin <dave.mar...@arm.com>
> >>>>
> >>>> ---
> >>>>
> >>>> Since RFCv1:
> >>>>
> >>>>  * Fix indentation to be consistent with the rest of the file.
> >>>>  * Add missing ! to write back to sp with attempting to push regs.
> >>>> ---
> >>>>  arch/arm64/kvm/hyp/entry.S  | 57 
> >>>> +
> >>>>  arch/arm64/kvm/hyp/switch.c | 24 +++
> >>>>  2 files changed, 46 insertions(+), 35 deletions(-)
> >>>>
> >>>> diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
> >>>> index fdd1068..47c6a78 100644
> >>>> --- a/arch/arm64/kvm/hyp/entry.S
> >>>> +++ b/arch/arm64/kvm/hyp/entry.S
> >>>> @@ -176,41 +176,28 @@ ENTRY(__fpsimd_guest_restore)
> >>>>  // x1: vcpu
> >>>>  // x2-x29,lr: vcpu regs
> >>>>  // vcpu x0-x1 on the stack
> >>>> -stp x2, x3, [sp, #-16]!
> >>>> -stp x4, lr, [sp, #-16]!
> >>>> -
> >>>> -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> >>>> -mrs x2, cptr_el2
> >>>> -bic x2, x2, #CPTR_EL2_TFP
> >>>> -msr cptr_el2, x2
> >>>> -alternative_else
> >>>> -mrs x2, cpacr_el1
> >>>> -orr x2, x2, #CPACR_EL1_FPEN
> >>>> -msr cpacr_el1, x2
> >>>> -alternative_endif
> >>>> -isb
> >>>> -
> >>>> -mov x3, x1
> >>>> -
> >>>> -ldr x0, [x3, #VCPU_HOST_CONTEXT]
> >>>> -kern_hyp_va x0
> >>>> -add x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
> >>>> -bl  __fpsimd_save_state
> >>>> -
> >>>> -add x2, x3, #VCPU_CONTEXT
> >>>> -add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
> >>>> -bl  __fpsimd_restore_state
> >>>> -
> >>>> -// Skip restoring fpexc32 for AArch64 guests
> >>>> -mrs x1, hcr_el2
> >>>> -tbnzx1, #HCR_RW_SHIFT, 1f
> >>>> -ldr x4, [x3, #VCPU_FPEXC32_EL2]
> >>>> -msr fpexc32_el2, x4
> >>>> -1:
> >>>> -ldp x4, lr, [sp], #16
> >>>> -ldp x2, x3, [sp], #16
> >>>> -ldp x0, x1, [sp], #16
> >>>> -
> >>>> +stp x2, x3, [sp, #-144]!
> >>>> +stp x4, x5, [sp, #16]
> >>>> +stp x6, x7, [sp, #32]
> >>>> +stp x8, x9, [sp, #48]
> >>>> +stp x10, x11, [sp, #64]
> >>>> +stp x12, x13, [sp, #80]
> >>>> +stp x14, x15, [sp, #96]
> >>>> +stp x16, x17, [sp, #112]
> >>>> +stp x18, lr, [sp, #128]
> >>>> +
> >>>> +bl  __hyp_switch_fpsimd
> >>>> +
> >>>> +ldp x4, x5, [sp, #16]
> >>>> +ldp x6, x7, [sp, #32]
> >>>> +ldp x8, x9, [sp, #48]
> >>>> +ldp x10, x11, [sp, #64]
> >>>> +ldp x12, x13, [sp, #80]
> >>>> +ldp x14, x15, [sp, #96]
> >>>> +ldp x16, x17, [sp, #112]
> >>>> +ldp x18, lr, [sp, #128]
> >>>> +ldp x0, x1, [sp, #144]
> >>>> +ldp x2, x3, [sp], #160
> >>>
> >>> I can't say I'm overly thrilled with adding another save/restore 
> >>

Re: [RFC PATCH v2 3/3] KVM: arm64: Optimise FPSIMD handling to reduce guest/host thrashing

2018-04-09 Thread Christoffer Dall
On Fri, Apr 06, 2018 at 04:01:04PM +0100, Dave Martin wrote:
> This patch refactors KVM to align the host and guest FPSIMD
> save/restore logic with each other for arm64.  This reduces the
> number of redundant save/restore operations that must occur, and
> reduces the common-case IRQ blackout time during guest exit storms
> by saving the host state lazily and optimising away the need to
> restore the host state before returning to the run loop.
> 
> Four hooks are defined in order to enable this:
> 
>  * kvm_arch_vcpu_run_map_fp():
>Called on PID change to map necessary bits of current to Hyp.
> 
>  * kvm_arch_vcpu_load_fp():
>Set up FP/SIMD for entering the KVM run loop (parse as
>"vcpu_load fp").
> 
>  * kvm_arch_vcpu_park_fp():
>Get FP/SIMD into a safe state for re-enabling interrupts after a
>guest exit back to the run loop.
> 
>  * kvm_arch_vcpu_put_fp():
>Save guest FP/SIMD state back to memory and dissociate from the
>CPU ("vcpu_put fp").
> 
> Also, the arm64 FPSIMD context switch code is updated to enable it
> to save back FPSIMD state for a vcpu, not just current.  A few
> helpers drive this:
> 
>  * fpsimd_bind_state_to_cpu(struct user_fpsimd_state *fp):
>mark this CPU as having context fp (which may belong to a vcpu)
>currently loaded in its registers.  This is the non-task
>equivalent of the static function fpsimd_bind_to_cpu() in
>fpsimd.c.
> 
>  * task_fpsimd_save():
>exported to allow KVM to save the guest's FPSIMD state back to
>memory on exit from the run loop.
> 
>  * fpsimd_flush_state():
>invalidate any context's FPSIMD state that is currently loaded.
>Used to disassociate the vcpu from the CPU regs on run loop exit.
> 
> These changes allow the run loop to enable interrupts (and thus
> softirqs that may use kernel-mode NEON) without having to save the
> guest's FPSIMD state eagerly.
> 
> Some new vcpu_arch fields are added to make all this work.  Because
> host FPSIMD state can now be saved back directly into current's
> thread_struct as appropriate, host_cpu_context is no longer used
> for preserving the FPSIMD state.  However, it is still needed for
> preserving other things such as the host's system registers.  To
> avoid ABI churn, the redundant storage space in host_cpu_context is
> not removed for now.
> 
> arch/arm is not addressed by this patch and continues to use its
> current save/restore logic.  It could provide implementations of
> the helpers later if desired.
> 
> Signed-off-by: Dave Martin 
> ---
>  arch/arm/include/asm/kvm_host.h   |  8 +++
>  arch/arm64/include/asm/fpsimd.h   |  5 +
>  arch/arm64/include/asm/kvm_host.h | 18 +++
>  arch/arm64/kernel/fpsimd.c| 31 --
>  arch/arm64/kvm/Makefile   |  2 +-
>  arch/arm64/kvm/hyp/switch.c   | 46 
> ---
>  virt/kvm/arm/arm.c| 14 
>  7 files changed, 89 insertions(+), 35 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 248b930..11cd64a3 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -303,6 +303,14 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
>  int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
>  struct kvm_device_attr *attr);
>  
> +/*
> + * VFP/NEON switching is all done by the hyp switch code, so no need to
> + * coordinate with host context handling for this state:
> + */
> +static inline void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) {}
> +static inline void kvm_arch_vcpu_park_fp(struct kvm_vcpu *vcpu) {}
> +static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {}
> +
>  /* All host FP/SIMD state is restored on guest exit, so nothing to save: */
>  static inline void kvm_fpsimd_flush_cpu_state(void) {}
>  
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 1bfc920..dbe7340 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -40,6 +40,8 @@ struct task_struct;
>  extern void fpsimd_save_state(struct user_fpsimd_state *state);
>  extern void fpsimd_load_state(struct user_fpsimd_state *state);
>  
> +extern void task_fpsimd_save(void);
> +
>  extern void fpsimd_thread_switch(struct task_struct *next);
>  extern void fpsimd_flush_thread(void);
>  
> @@ -48,7 +50,10 @@ extern void fpsimd_preserve_current_state(void);
>  extern void fpsimd_restore_current_state(void);
>  extern void fpsimd_update_current_state(struct user_fpsimd_state const 
> *state);
>  
> +extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state);
> +
>  extern void fpsimd_flush_task_state(struct task_struct *target);
> +extern void fpsimd_flush_cpu_state(void);
>  extern void sve_flush_cpu_state(void);
>  
>  /* Maximum VL that SVE VL-agnostic software can transparently support */
> diff --git 

Re: [RFC PATCH v2 2/3] KVM: arm64: Convert lazy FPSIMD context switch trap to C

2018-04-09 Thread Christoffer Dall
On Fri, Apr 06, 2018 at 04:51:53PM +0100, Dave Martin wrote:
> On Fri, Apr 06, 2018 at 04:25:57PM +0100, Marc Zyngier wrote:
> > Hi Dave,
> > 
> > On 06/04/18 16:01, Dave Martin wrote:
> > > To make the lazy FPSIMD context switch trap code easier to hack on,
> > > this patch converts it to C.
> > > 
> > > This is not amazingly efficient, but the trap should typically only
> > > be taken once per host context switch.
> > > 
> > > Signed-off-by: Dave Martin 
> > > 
> > > ---
> > > 
> > > Since RFCv1:
> > > 
> > >  * Fix indentation to be consistent with the rest of the file.
> > >  * Add missing ! to write back to sp with attempting to push regs.
> > > ---
> > >  arch/arm64/kvm/hyp/entry.S  | 57 
> > > +
> > >  arch/arm64/kvm/hyp/switch.c | 24 +++
> > >  2 files changed, 46 insertions(+), 35 deletions(-)
> > > 
> > > diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
> > > index fdd1068..47c6a78 100644
> > > --- a/arch/arm64/kvm/hyp/entry.S
> > > +++ b/arch/arm64/kvm/hyp/entry.S
> > > @@ -176,41 +176,28 @@ ENTRY(__fpsimd_guest_restore)
> > >   // x1: vcpu
> > >   // x2-x29,lr: vcpu regs
> > >   // vcpu x0-x1 on the stack
> > > - stp x2, x3, [sp, #-16]!
> > > - stp x4, lr, [sp, #-16]!
> > > -
> > > -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> > > - mrs x2, cptr_el2
> > > - bic x2, x2, #CPTR_EL2_TFP
> > > - msr cptr_el2, x2
> > > -alternative_else
> > > - mrs x2, cpacr_el1
> > > - orr x2, x2, #CPACR_EL1_FPEN
> > > - msr cpacr_el1, x2
> > > -alternative_endif
> > > - isb
> > > -
> > > - mov x3, x1
> > > -
> > > - ldr x0, [x3, #VCPU_HOST_CONTEXT]
> > > - kern_hyp_va x0
> > > - add x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
> > > - bl  __fpsimd_save_state
> > > -
> > > - add x2, x3, #VCPU_CONTEXT
> > > - add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
> > > - bl  __fpsimd_restore_state
> > > -
> > > - // Skip restoring fpexc32 for AArch64 guests
> > > - mrs x1, hcr_el2
> > > - tbnzx1, #HCR_RW_SHIFT, 1f
> > > - ldr x4, [x3, #VCPU_FPEXC32_EL2]
> > > - msr fpexc32_el2, x4
> > > -1:
> > > - ldp x4, lr, [sp], #16
> > > - ldp x2, x3, [sp], #16
> > > - ldp x0, x1, [sp], #16
> > > -
> > > + stp x2, x3, [sp, #-144]!
> > > + stp x4, x5, [sp, #16]
> > > + stp x6, x7, [sp, #32]
> > > + stp x8, x9, [sp, #48]
> > > + stp x10, x11, [sp, #64]
> > > + stp x12, x13, [sp, #80]
> > > + stp x14, x15, [sp, #96]
> > > + stp x16, x17, [sp, #112]
> > > + stp x18, lr, [sp, #128]
> > > +
> > > + bl  __hyp_switch_fpsimd
> > > +
> > > + ldp x4, x5, [sp, #16]
> > > + ldp x6, x7, [sp, #32]
> > > + ldp x8, x9, [sp, #48]
> > > + ldp x10, x11, [sp, #64]
> > > + ldp x12, x13, [sp, #80]
> > > + ldp x14, x15, [sp, #96]
> > > + ldp x16, x17, [sp, #112]
> > > + ldp x18, lr, [sp, #128]
> > > + ldp x0, x1, [sp, #144]
> > > + ldp x2, x3, [sp], #160
> > 
> > I can't say I'm overly thrilled with adding another save/restore 
> > sequence. How about treating it like a real guest exit instead? Granted, 
> > there is a bit more overhead to it, but as you pointed out above, this 
> > should be pretty rare...
> 
> I have no objection to handling this after exiting back to
> __kvm_vcpu_run(), provided the performance is deemed acceptable.
> 

My guess is that it's going to be visible on non-VHE systems, and given
that we're doing all of this for performance in the first place, I'm not
exceited about that approach either.

I thought it was acceptable to do another save/restore, because it was
only the GPRs (and equivalent to what the compiler would generate for a
function call?) and thus not susceptible to the complexities of sysreg
save/restores.

Another alternative would be to go back to Dave's original approach of
implementing the fpsimd state update to the host's structure in assembly
directly, but I was having a hard time understanding that.  Perhaps I
just need to try harder.

Thoughts?

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] KVM: arm/arm64: VGIC MMIO: add missing irq_lock

2018-03-12 Thread Christoffer Dall
On Tue, Mar 06, 2018 at 09:21:06AM +, Andre Przywara wrote:
> Our irq_is_pending() helper function accesses multiple members of the
> vgic_irq struct, so we need to hold the lock when calling it.

For the record I don't think this is necessarily a completely valid
conclusion.  The fact that you access multiple members of a struct is a
good indication that it might be a good idea to hold a lock, but it's
not as simple as that.

I think the only thing that could happen here is that a caller
mistakenly evaluates line_level when it shouldn't, but that would only
happen when changing the configuration of an irq from level to edge,
while the line_level is high, expecting the line_level to go down, and
the pending state to be subsequently reported as false, but we only
support changing the configuration of an interrupt when it's disabled,
and as a result this can only affect reads of the PENDR registers.

> Add that requirement as a comment to the definition and take the lock
> around the call in vgic_mmio_read_pending(), where we were missing it
> before.
> 
> Signed-off-by: Andre Przywara 

Note, I'm fine with this change, but I don't agree with the rationale.
The rationale is to take the lock on every use for consistency and to
make the code easier to reason about, but it's possible that some future
analysis in the future would relax this requirement if essential for
performance.

Thanks,
-Christoffer

> ---
>  virt/kvm/arm/vgic/vgic-mmio.c | 3 +++
>  virt/kvm/arm/vgic/vgic.h  | 1 +
>  2 files changed, 4 insertions(+)
> 
> diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
> index 83d82bd7dc4e..dbe99d635c80 100644
> --- a/virt/kvm/arm/vgic/vgic-mmio.c
> +++ b/virt/kvm/arm/vgic/vgic-mmio.c
> @@ -113,9 +113,12 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu 
> *vcpu,
>   /* Loop over all IRQs affected by this read */
>   for (i = 0; i < len * 8; i++) {
>   struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
> + unsigned long flags;
>  
> + spin_lock_irqsave(>irq_lock, flags);
>   if (irq_is_pending(irq))
>   value |= (1U << i);
> + spin_unlock_irqrestore(>irq_lock, flags);
>  
>   vgic_put_irq(vcpu->kvm, irq);
>   }
> diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
> index 12c37b89f7a3..5b11859a1a1e 100644
> --- a/virt/kvm/arm/vgic/vgic.h
> +++ b/virt/kvm/arm/vgic/vgic.h
> @@ -96,6 +96,7 @@
>  /* we only support 64 kB translation table page size */
>  #define KVM_ITS_L1E_ADDR_MASKGENMASK_ULL(51, 16)
>  
> +/* Requires the irq_lock to be held by the caller. */
>  static inline bool irq_is_pending(struct vgic_irq *irq)
>  {
>   if (irq->config == VGIC_CONFIG_EDGE)
> -- 
> 2.14.1
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 2/2] kvm: arm/arm64: vgic-v3: Tighten synchronization for guests using v2 on v3

2018-03-12 Thread Christoffer Dall
On Sun, Mar 11, 2018 at 12:49:56PM +, Marc Zyngier wrote:
> On guest exit, and when using GICv2 on GICv3, we use a dsb(st) to
> force synchronization between the memory-mapped guest view and
> the system-register view that the hypervisor uses.
> 
> This is incorrect, as the spec calls out the need for "a DSB whose
> required access type is both loads and stores with any Shareability
> attribute", while we're only synchronizing stores.
> 
> We also lack an isb after the dsb to ensure that the latter has
> actually been executed before we start reading stuff from the sysregs.
> 
> The fix is pretty easy: turn dsb(st) into dsb(sy), and slap an isb()
> just after.
> 
> Cc: sta...@vger.kernel.org
> Fixes: f68d2b1b73cc ("arm64: KVM: Implement vgic-v3 save/restore")
> Reviewed-by: Andre Przywara <andre.przyw...@arm.com>
> Signed-off-by: Marc Zyngier <marc.zyng...@arm.com>

Acked-by: Christoffer Dall <cd...@kernel.org>

> ---
>  virt/kvm/arm/hyp/vgic-v3-sr.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
> index f5c3d6d7019e..b89ce5432214 100644
> --- a/virt/kvm/arm/hyp/vgic-v3-sr.c
> +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
> @@ -215,7 +215,8 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu 
> *vcpu)
>* are now visible to the system register interface.
>*/
>   if (!cpu_if->vgic_sre) {
> - dsb(st);
> + dsb(sy);
> + isb();
>   cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
>   }
>  
> -- 
> 2.14.2
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 1/2] KVM: arm/arm64: vgic: Don't populate multiple LRs with the same vintid

2018-03-12 Thread Christoffer Dall
On Sun, Mar 11, 2018 at 12:49:55PM +, Marc Zyngier wrote:
> The vgic code is trying to be clever when injecting GICv2 SGIs,
> and will happily populate LRs with the same interrupt number if
> they come from multiple vcpus (after all, they are distinct
> interrupt sources).
> 
> Unfortunately, this is against the letter of the architecture,
> and the GICv2 architecture spec says "Each valid interrupt stored
> in the List registers must have a unique VirtualID for that
> virtual CPU interface.". GICv3 has similar (although slightly
> ambiguous) restrictions.
> 
> This results in guests locking up when using GICv2-on-GICv3, for
> example. The obvious fix is to stop trying so hard, and inject
> a single vcpu per SGI per guest entry. After all, pending SGIs
> with multiple source vcpus are pretty rare, and are mostly seen
> in scenario where the physical CPUs are severely overcomitted.
> 
> But as we now only inject a single instance of a multi-source SGI per
> vcpu entry, we may delay those interrupts for longer than strictly
> necessary, and run the risk of injecting lower priority interrupts
> in the meantime.
> 
> In order to address this, we adopt a three stage strategy:
> - If we encounter a multi-source SGI in the AP list while computing
>   its depth, we force the list to be sorted
> - When populating the LRs, we prevent the injection of any interrupt
>   of lower priority than that of the first multi-source SGI we've
>   injected.
> - Finally, the injection of a multi-source SGI triggers the request
>   of a maintenance interrupt when there will be no pending interrupt
>   in the LRs (HCR_NPIE).
> 
> At the point where the last pending interrupt in the LRs switches
> from Pending to Active, the maintenance interrupt will be delivered,
> allowing us to add the remaining SGIs using the same process.
> 
> Cc: sta...@vger.kernel.org
> Fixes: 0919e84c0fc1 ("KVM: arm/arm64: vgic-new: Add IRQ sync/flush framework")
> Signed-off-by: Marc Zyngier <marc.zyng...@arm.com>

The fact that we have to do this is really annoying, but I see not other
way around it.  It will get slightly better if we move to insertion sort
based on priorities when injecting interrupts as discussed with Andre,
though.

Acked-by: Christoffer Dall <cd...@kernel.org>

> ---
>  include/linux/irqchip/arm-gic-v3.h |  1 +
>  include/linux/irqchip/arm-gic.h|  1 +
>  virt/kvm/arm/vgic/vgic-v2.c|  9 +-
>  virt/kvm/arm/vgic/vgic-v3.c|  9 +-
>  virt/kvm/arm/vgic/vgic.c   | 61 
> +-
>  virt/kvm/arm/vgic/vgic.h   |  2 ++
>  6 files changed, 67 insertions(+), 16 deletions(-)
> 
> diff --git a/include/linux/irqchip/arm-gic-v3.h 
> b/include/linux/irqchip/arm-gic-v3.h
> index c00c4c33e432..b26eccc78fb1 100644
> --- a/include/linux/irqchip/arm-gic-v3.h
> +++ b/include/linux/irqchip/arm-gic-v3.h
> @@ -503,6 +503,7 @@
>  
>  #define ICH_HCR_EN   (1 << 0)
>  #define ICH_HCR_UIE  (1 << 1)
> +#define ICH_HCR_NPIE (1 << 3)
>  #define ICH_HCR_TC   (1 << 10)
>  #define ICH_HCR_TALL0(1 << 11)
>  #define ICH_HCR_TALL1(1 << 12)
> diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
> index d3453ee072fc..68d8b1f73682 100644
> --- a/include/linux/irqchip/arm-gic.h
> +++ b/include/linux/irqchip/arm-gic.h
> @@ -84,6 +84,7 @@
>  
>  #define GICH_HCR_EN  (1 << 0)
>  #define GICH_HCR_UIE (1 << 1)
> +#define GICH_HCR_NPIE(1 << 3)
>  
>  #define GICH_LR_VIRTUALID(0x3ff << 0)
>  #define GICH_LR_PHYSID_CPUID_SHIFT   (10)
> diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
> index c32d7b93ffd1..44264d11be02 100644
> --- a/virt/kvm/arm/vgic/vgic-v2.c
> +++ b/virt/kvm/arm/vgic/vgic-v2.c
> @@ -37,6 +37,13 @@ void vgic_v2_init_lrs(void)
>   vgic_v2_write_lr(i, 0);
>  }
>  
> +void vgic_v2_set_npie(struct kvm_vcpu *vcpu)
> +{
> + struct vgic_v2_cpu_if *cpuif = >arch.vgic_cpu.vgic_v2;
> +
> + cpuif->vgic_hcr |= GICH_HCR_NPIE;
> +}
> +
>  void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
>  {
>   struct vgic_v2_cpu_if *cpuif = >arch.vgic_cpu.vgic_v2;
> @@ -64,7 +71,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
>   int lr;
>   unsigned long flags;
>  
> - cpuif->vgic_hcr &= ~GICH_HCR_UIE;
> + cpuif->vgic_hcr &= ~(GICH_HCR_UIE | GICH_HCR_NPIE);
>  
>   for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
>

Re: [RFC PATCH] KVM: arm/arm64: vgic: change condition for level interrupt resampling

2018-03-10 Thread Christoffer Dall
On Sat, Mar 10, 2018 at 12:20 PM, Marc Zyngier <marc.zyng...@arm.com> wrote:
> On Fri, 09 Mar 2018 21:36:12 +,
> Christoffer Dall wrote:
>>
>> On Thu, Mar 08, 2018 at 05:28:44PM +, Marc Zyngier wrote:
>> > I'd be more confident if we did forbid P+A for such interrupts
>> > altogether, as they really feel like another kind of HW interrupt.
>>
>> How about a slightly bigger hammer:  Can we avoid doing P+A for level
>> interrupts completely?  I don't think that really makes much sense, and
>> I think we simply everything if we just come back out and resample the
>> line.  For an edge, something like a network card, there's a potential
>> performance win to appending a new pending state, but I doubt that this
>> is the case for level interrupts.
>
> I started implementing the same thing yesterday. Somehow, it feels
> slightly better to have the same flow for all level interrupts,
> including the timer, and we only use the MI on EOI as a way to trigger
> the next state of injection. Still testing, but looking good so far.
>
> I'm still puzzled that we have this level-but-not-quite behaviour for
> VFIO interrupts. At some point, it is going to bite us badly.
>

Where is the departure from level-triggered behavior with VFIO?  As
far as I can tell, the GIC flow of the interrupts will be just a level
interrupt, but we just need to make sure the resamplefd mechanism is
supported for both types of interrupts.  Whether or not that's a
decent mechanism seems orthogonal to me, but that's a discussion for
another day I think.

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 1/2] KVM: arm/arm64: vgic: Don't populate multiple LRs with the same vintid

2018-03-10 Thread Christoffer Dall
On Sat, Mar 10, 2018 at 1:57 PM, Marc Zyngier <marc.zyng...@arm.com> wrote:
> Hi Christoffer,
>
> On Fri, 09 Mar 2018 21:39:31 +,
> Christoffer Dall wrote:
>>
>> On Thu, Mar 08, 2018 at 06:39:20PM +, Marc Zyngier wrote:
>> > Thinking of it a bit more: MI on EOI doesn't offer much more guarantee
>> > in the way of priority ordering. Taking your example above: Even if
>> > you generate a MI when EOIing the SGI, there is no guarantee that
>> > you'll take the MI before you've acked the SPI.
>>
>> There's no guarantee, but at least you're attempting at processing the
>> SGIs in first.  It's the best we can do, but not completely correct,
>> kinda thing...
>>
>> >
>> > If you really want to offer that absolute guarantee that all the
>> > multi-source SGIs of higher priority are delivered before anything
>> > else, then you must make sure that only the SGIs are present in the
>> > LRs, excluding any other interrupt on lower priority until you've
>> > queued them all.
>>
>> Yes, that sucks!  Might not be too hard to implement, it's basically an
>> early out of the loop traversing the AP list, but just an annoying
>> complication.
>
> Yeah, it is a bit gross. The way I implemented it is by forcing the AP
> list to be sorted if there is any multi-SGI in the pipeline, and early
> out as soon as we see an interrupt of a lower priority than the first
> multi-SGI. That way, we only have an overhead in the case that
> combines multi-SGI and lower priority interrupts.
>

yes, that's what I had in mind as well.

>> > At that stage, I wonder if there is a point in doing anything at
>> > all. The GICv2 architecture is too rubbish for words.
>> >
>>
>> The case we do need to worry about is the guest processing all its
>> interrupts and not exiting while there is actually still an SGI pending.
>> At that point, we can either do it with the "no interrupts pending
>> maintenance interrupt" or with the "EOI maintenance interrupt", and I
>> think the latter at least gets us slightly closer to the architecture
>> for a non-virtualized system.
>
> I think that this is where we disagree.

I don't think we disagree, I must have expressed myself poorly...

> I don't see anything in the
> architecture that mandates that we should present the SGIs before
> anything else.

Neither do I.

> All we need to do is to ensure that interrupts of
> higher priority are presented before anything else.

Agreed.

> It is perfectly
> acceptable for an implementation to deliver SGI0, then SPI3, and SGI0
> (from another CPU) after that, as long as SPI3 isn't of lesser
> priority than SGI0.

Yes, but what we cannot do is let the guest deliver SGI0, then SPI3,
and then loop forever without delivering SGI0 from another CPU.
That's why I said "the guest processing all its interrupts and not
exiting while there is actually still an SGI pending" and said that we
could use either the EOI or the NPIE trick.

>
> Another thing I dislike about using EOI for that is forces us to
> propagate the knowledge of the multi-SGI horror further down the
> stack, down to both implementations of vgic_populate_lr. NPIE allows
> us to keep that knowledge local. But that's an orthogonal issue, and
> we can further argue/bikeshed about the respective merits of both
> solutions once we have something that fits the sorry state of the
> GICv2 architecture ;-).
>

Yeah, I don't care deeply.  If NPIE is prettier in the
implementations, let's do that.

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 1/2] KVM: arm/arm64: vgic: Don't populate multiple LRs with the same vintid

2018-03-09 Thread Christoffer Dall
On Thu, Mar 08, 2018 at 06:39:20PM +, Marc Zyngier wrote:
> On Thu, 08 Mar 2018 17:04:38 +,
> Marc Zyngier wrote:
> > 
> > On Thu, 08 Mar 2018 16:02:42 +0000,
> > Christoffer Dall wrote:
> > > 
> > > On Thu, Mar 08, 2018 at 10:19:49AM +, Marc Zyngier wrote:
> > > > On 07/03/18 23:34, Christoffer Dall wrote:
> > > > > On Wed, Mar 7, 2018 at 12:40 PM, Marc Zyngier <marc.zyng...@arm.com> 
> > > > > wrote:
> > > > >> The vgic code is trying to be clever when injecting GICv2 SGIs,
> > > > >> and will happily populate LRs with the same interrupt number if
> > > > >> they come from multiple vcpus (after all, they are distinct
> > > > >> interrupt sources).
> > > > >>
> > > > >> Unfortunately, this is against the letter of the architecture,
> > > > >> and the GICv2 architecture spec says "Each valid interrupt stored
> > > > >> in the List registers must have a unique VirtualID for that
> > > > >> virtual CPU interface.". GICv3 has similar (although slightly
> > > > >> ambiguous) restrictions.
> > > > >>
> > > > >> This results in guests locking up when using GICv2-on-GICv3, for
> > > > >> example. The obvious fix is to stop trying so hard, and inject
> > > > >> a single vcpu per SGI per guest entry. After all, pending SGIs
> > > > >> with multiple source vcpus are pretty rare, and are mostly seen
> > > > >> in scenario where the physical CPUs are severely overcomitted.
> > > > >>
> > > > >> Cc: sta...@vger.kernel.org
> > > > >> Fixes: 0919e84c0fc1 ("KVM: arm/arm64: vgic-new: Add IRQ sync/flush 
> > > > >> framework")
> > > > >> Signed-off-by: Marc Zyngier <marc.zyng...@arm.com>
> > > > >> ---
> > > > >>  virt/kvm/arm/vgic/vgic.c | 11 +--
> > > > >>  1 file changed, 1 insertion(+), 10 deletions(-)
> > > > >>
> > > > >> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> > > > >> index c7c5ef190afa..1f7ff175f47b 100644
> > > > >> --- a/virt/kvm/arm/vgic/vgic.c
> > > > >> +++ b/virt/kvm/arm/vgic/vgic.c
> > > > >> @@ -720,18 +720,9 @@ static void vgic_flush_lr_state(struct kvm_vcpu 
> > > > >> *vcpu)
> > > > >> list_for_each_entry(irq, _cpu->ap_list_head, ap_list) {
> > > > >> spin_lock(>irq_lock);
> > > > >>
> > > > >> -   if (unlikely(vgic_target_oracle(irq) != vcpu))
> > > > >> -   goto next;
> > > > >> -
> > > > >> -   /*
> > > > >> -* If we get an SGI with multiple sources, try to get
> > > > >> -* them in all at once.
> > > > >> -*/
> > > > >> -   do {
> > > > >> +   if (likely(vgic_target_oracle(irq) == vcpu))
> > > > >> vgic_populate_lr(vcpu, irq, count++);
> > > > > 
> > > > > I think we need to change vgic_populate_lr to set the EOI maintenance
> > > > > interrupt flag so that when the interrupt is deactivated, if there are
> > > > > additional pending sources, we exit the guest and pick up the
> > > > > interrupt.
> > > > 
> > > > Potentially. We need to be careful about about the semantics of EOI MI
> > > > with non-level interrupts (see the other thread about EOI signalling).
> > > 
> > > I'll have a look.
> > > 
> > > > 
> > > > > An alternative would be to set the underflow interrupt, but I don't
> > > > > think that would be correct for multiple priorities, because the SGI
> > > > > could have a higher priority than other pending interrupts we put in
> > > > > the LR.
> > > > 
> > > > I don't think priorities are the issue (after all, we already sort the
> > > > irqs in order of priority). 
> > > 
> > > Yes, but assume you have three pending interrupts, one SGI from two
> > > sources, and one SPI, and assume that the SGI has priority 1 and SPI
> > > priority 2 (lower means higher priority), then I think with underflow or
> > &g

Re: [RFC PATCH] KVM: arm/arm64: vgic: change condition for level interrupt resampling

2018-03-09 Thread Christoffer Dall
On Thu, Mar 08, 2018 at 05:28:44PM +, Marc Zyngier wrote:
> On Thu, 08 Mar 2018 16:19:00 +,
> Christoffer Dall wrote:
> > 
> > On Thu, Mar 08, 2018 at 11:54:27AM +, Marc Zyngier wrote:
> > > On 08/03/18 09:49, Marc Zyngier wrote:

[...]

> > > The state is now pending, we've really EOI'd the interrupt, and
> > > yet lr_signals_eoi_mi() returns false, since the state is not 0.
> > > The result is that we won't signal anything on the corresponding
> > > irqfd, which people complain about. Meh.
> > 
> > So the core of the problem is that when we've entered the guest with
> > PENDING+ACTIVE and when we exit (for some reason) we don't signal the
> > resamplefd, right?  The solution seems to me that we don't ever do
> > PENDING+ACTIVE if you need to resample after each deactivate.  What
> > would be the point of appending a pending state that you only know to be
> > valid after a resample anyway?
> 
> The question is then to identify that a given source needs to be
> signalled back to VFIO. Calling into the eventfd code on the hot path
> is pretty horrid (I'm not sure if we can really call into this with
> interrupts disabled, for example).
> 

This feels like a bad layering violation to me as well.

> > 
> > > 
> > > Example 2:
> > > P+A -> guest EOI -> P -> delayed MI -> guest IAR -> A -> MI fires
> > 
> > We could be more clever and do the following calculation on every exit:
> > 
> > If you enter with P, and exit with either A or 0, then signal.
> > 
> > If you enter with P+A, and you exit with either P, A, or 0, then signal.
> > 
> > Wouldn't that also solve it?  (Although I have a feeling you'd miss some
> > exits in this case).
> 
> I'd be more confident if we did forbid P+A for such interrupts
> altogether, as they really feel like another kind of HW interrupt.

How about a slightly bigger hammer:  Can we avoid doing P+A for level
interrupts completely?  I don't think that really makes much sense, and
I think we simply everything if we just come back out and resample the
line.  For an edge, something like a network card, there's a potential
performance win to appending a new pending state, but I doubt that this
is the case for level interrupts.

The timer would be unaffected, because it's a HW interrupt.

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC PATCH] KVM: arm/arm64: vgic: change condition for level interrupt resampling

2018-03-08 Thread Christoffer Dall
On Thu, Mar 08, 2018 at 11:54:27AM +, Marc Zyngier wrote:
> On 08/03/18 09:49, Marc Zyngier wrote:
> > [updated Christoffer's email address]
> > 
> > Hi Shunyong,
> > 
> > On 08/03/18 07:01, Shunyong Yang wrote:
> >> When resampling irqfds is enabled, level interrupt should be
> >> de-asserted when resampling happens. On page 4-47 of GIC v3
> >> specification IHI0069D, it said,
> >> "When the PE acknowledges an SGI, a PPI, or an SPI at the CPU
> >> interface, the IRI changes the status of the interrupt to active
> >> and pending if:
> >> • It is an edge-triggered interrupt, and another edge has been
> >> detected since the interrupt was acknowledged.
> >> • It is a level-sensitive interrupt, and the level has not been
> >> deasserted since the interrupt was acknowledged."
> >>
> >> GIC v2 specification IHI0048B.b has similar description on page
> >> 3-42 for state machine transition.
> >>
> >> When some VFIO device, like mtty(8250 VFIO mdev emulation driver
> >> in samples/vfio-mdev) triggers a level interrupt, the status
> >> transition in LR is pending-->active-->active and pending.
> >> Then it will wait resampling to de-assert the interrupt.
> >>
> >> Current design of lr_signals_eoi_mi() will return false if state
> >> in LR is not invalid(Inactive). It causes resampling will not happen
> >> in mtty case.
> > 
> > Let me rephrase this, and tell me if I understood it correctly:
> > 
> > - A level interrupt is injected, activated by the guest (LR state=active)
> > - guest exits, re-enters, (LR state=pending+active)
> > - guest EOIs the interrupt (LR state=pending)
> > - maintenance interrupt
> > - we don't signal the resampling because we're not in an invalid state
> > 
> > Is that correct?
> > 
> > That's an interesting case, because it seems to invalidate some of the 
> > optimization that went in over a year ago.
> > 
> > 096f31c4360f KVM: arm/arm64: vgic: Get rid of MISR and EISR fields
> > b6095b084d87 KVM: arm/arm64: vgic: Get rid of unnecessary 
> > save_maint_int_state
> > af0614991ab6 KVM: arm/arm64: vgic: Get rid of unnecessary 
> > process_maintenance operation
> > 
> > We could compare the value of the LR before the guest entry with
> > the value at exit time, but we still could miss it if we have a
> > transition such as P+A -> P -> A and assume a long enough propagation
> > delay for the maintenance interrupt (which is very likely).
> > 
> > In essence, we have lost the benefit of EISR, which was to give us a
> > way to deal with asynchronous signalling.
> > 
> >>
> >> This will cause interrupt fired continuously to guest even 8250 IIR
> >> has no interrupt. When 8250's interrupt is configured in shared mode,
> >> it will pass interrupt to other drivers to handle. However, there
> >> is no other driver involved. Then, a "nobody cared" kernel complaint
> >> occurs.
> >>
> >> / # cat /dev/ttyS0
> >> [4.826836] random: crng init done
> >> [6.373620] irq 41: nobody cared (try booting with the "irqpoll"
> >> option)
> >> [6.376414] CPU: 0 PID: 1307 Comm: cat Not tainted 4.16.0-rc4 #4
> >> [6.378927] Hardware name: linux,dummy-virt (DT)
> >> [6.380876] Call trace:
> >> [6.381937]  dump_backtrace+0x0/0x180
> >> [6.383495]  show_stack+0x14/0x1c
> >> [6.384902]  dump_stack+0x90/0xb4
> >> [6.386312]  __report_bad_irq+0x38/0xe0
> >> [6.387944]  note_interrupt+0x1f4/0x2b8
> >> [6.389568]  handle_irq_event_percpu+0x54/0x7c
> >> [6.391433]  handle_irq_event+0x44/0x74
> >> [6.393056]  handle_fasteoi_irq+0x9c/0x154
> >> [6.394784]  generic_handle_irq+0x24/0x38
> >> [6.396483]  __handle_domain_irq+0x60/0xb4
> >> [6.398207]  gic_handle_irq+0x98/0x1b0
> >> [6.399796]  el1_irq+0xb0/0x128
> >> [6.401138]  _raw_spin_unlock_irqrestore+0x18/0x40
> >> [6.403149]  __setup_irq+0x41c/0x678
> >> [6.404669]  request_threaded_irq+0xe0/0x190
> >> [6.406474]  univ8250_setup_irq+0x208/0x234
> >> [6.408250]  serial8250_do_startup+0x1b4/0x754
> >> [6.410123]  serial8250_startup+0x20/0x28
> >> [6.411826]  uart_startup.part.21+0x78/0x144
> >> [6.413633]  uart_port_activate+0x50/0x68
> >> [6.415328]  tty_port_open+0x84/0xd4
> >> [6.416851]  uart_open+0x34/0x44
> >> [6.418229]  tty_open+0xec/0x3c8
> >> [6.419610]  chrdev_open+0xb0/0x198
> >> [6.421093]  do_dentry_open+0x200/0x310
> >> [6.422714]  vfs_open+0x54/0x84
> >> [6.424054]  path_openat+0x2dc/0xf04
> >> [6.425569]  do_filp_open+0x68/0xd8
> >> [6.427044]  do_sys_open+0x16c/0x224
> >> [6.428563]  SyS_openat+0x10/0x18
> >> [6.429972]  el0_svc_naked+0x30/0x34
> >> [6.431494] handlers:
> >> [6.432479] [<0e9fb4bb>] serial8250_interrupt
> >> [6.434597] Disabling IRQ #41
> >>
> >> This patch changes the lr state condition in lr_signals_eoi_mi() from
> >> invalid(Inactive) to active and pending to avoid this.
> >>
> >> I am not sure about the original design of the condition of
> >> invalid(active). So, This RFC is 

Re: [RFC PATCH] KVM: arm/arm64: vgic: change condition for level interrupt resampling

2018-03-08 Thread Christoffer Dall
On Thu, Mar 08, 2018 at 09:49:43AM +, Marc Zyngier wrote:
> [updated Christoffer's email address]
> 
> Hi Shunyong,
> 
> On 08/03/18 07:01, Shunyong Yang wrote:
> > When resampling irqfds is enabled, level interrupt should be
> > de-asserted when resampling happens. On page 4-47 of GIC v3
> > specification IHI0069D, it said,
> > "When the PE acknowledges an SGI, a PPI, or an SPI at the CPU
> > interface, the IRI changes the status of the interrupt to active
> > and pending if:
> > • It is an edge-triggered interrupt, and another edge has been
> > detected since the interrupt was acknowledged.
> > • It is a level-sensitive interrupt, and the level has not been
> > deasserted since the interrupt was acknowledged."
> > 
> > GIC v2 specification IHI0048B.b has similar description on page
> > 3-42 for state machine transition.
> > 
> > When some VFIO device, like mtty(8250 VFIO mdev emulation driver
> > in samples/vfio-mdev) triggers a level interrupt, the status
> > transition in LR is pending-->active-->active and pending.
> > Then it will wait resampling to de-assert the interrupt.
> > 
> > Current design of lr_signals_eoi_mi() will return false if state
> > in LR is not invalid(Inactive). It causes resampling will not happen
> > in mtty case.
> 
> Let me rephrase this, and tell me if I understood it correctly:
> 
> - A level interrupt is injected, activated by the guest (LR state=active)
> - guest exits, re-enters, (LR state=pending+active)
> - guest EOIs the interrupt (LR state=pending)
> - maintenance interrupt
> - we don't signal the resampling because we're not in an invalid state
> 
> Is that correct?
> 
> That's an interesting case, because it seems to invalidate some of the 
> optimization that went in over a year ago.
> 
> 096f31c4360f KVM: arm/arm64: vgic: Get rid of MISR and EISR fields
> b6095b084d87 KVM: arm/arm64: vgic: Get rid of unnecessary save_maint_int_state
> af0614991ab6 KVM: arm/arm64: vgic: Get rid of unnecessary process_maintenance 
> operation
> 
> We could compare the value of the LR before the guest entry with
> the value at exit time, but we still could miss it if we have a
> transition such as P+A -> P -> A and assume a long enough propagation
> delay for the maintenance interrupt (which is very likely).
> 
> In essence, we have lost the benefit of EISR, which was to give us a
> way to deal with asynchronous signalling.
> 

I don't understand why EISR gives us anything beyond looking at the LR
and evaluating if the state is 00.  My reading of the spec is that the
EISR is merely a shortcut to knowing the state of the LRs but contains
not record or information beyond what you can read from the LRs.

What am I missing?

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 1/2] KVM: arm/arm64: vgic: Don't populate multiple LRs with the same vintid

2018-03-08 Thread Christoffer Dall
On Thu, Mar 08, 2018 at 10:19:49AM +, Marc Zyngier wrote:
> On 07/03/18 23:34, Christoffer Dall wrote:
> > On Wed, Mar 7, 2018 at 12:40 PM, Marc Zyngier <marc.zyng...@arm.com> wrote:
> >> The vgic code is trying to be clever when injecting GICv2 SGIs,
> >> and will happily populate LRs with the same interrupt number if
> >> they come from multiple vcpus (after all, they are distinct
> >> interrupt sources).
> >>
> >> Unfortunately, this is against the letter of the architecture,
> >> and the GICv2 architecture spec says "Each valid interrupt stored
> >> in the List registers must have a unique VirtualID for that
> >> virtual CPU interface.". GICv3 has similar (although slightly
> >> ambiguous) restrictions.
> >>
> >> This results in guests locking up when using GICv2-on-GICv3, for
> >> example. The obvious fix is to stop trying so hard, and inject
> >> a single vcpu per SGI per guest entry. After all, pending SGIs
> >> with multiple source vcpus are pretty rare, and are mostly seen
> >> in scenario where the physical CPUs are severely overcomitted.
> >>
> >> Cc: sta...@vger.kernel.org
> >> Fixes: 0919e84c0fc1 ("KVM: arm/arm64: vgic-new: Add IRQ sync/flush 
> >> framework")
> >> Signed-off-by: Marc Zyngier <marc.zyng...@arm.com>
> >> ---
> >>  virt/kvm/arm/vgic/vgic.c | 11 +--
> >>  1 file changed, 1 insertion(+), 10 deletions(-)
> >>
> >> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> >> index c7c5ef190afa..1f7ff175f47b 100644
> >> --- a/virt/kvm/arm/vgic/vgic.c
> >> +++ b/virt/kvm/arm/vgic/vgic.c
> >> @@ -720,18 +720,9 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
> >> list_for_each_entry(irq, _cpu->ap_list_head, ap_list) {
> >> spin_lock(>irq_lock);
> >>
> >> -   if (unlikely(vgic_target_oracle(irq) != vcpu))
> >> -   goto next;
> >> -
> >> -   /*
> >> -* If we get an SGI with multiple sources, try to get
> >> -* them in all at once.
> >> -*/
> >> -   do {
> >> +   if (likely(vgic_target_oracle(irq) == vcpu))
> >> vgic_populate_lr(vcpu, irq, count++);
> > 
> > I think we need to change vgic_populate_lr to set the EOI maintenance
> > interrupt flag so that when the interrupt is deactivated, if there are
> > additional pending sources, we exit the guest and pick up the
> > interrupt.
> 
> Potentially. We need to be careful about about the semantics of EOI MI
> with non-level interrupts (see the other thread about EOI signalling).

I'll have a look.

> 
> > An alternative would be to set the underflow interrupt, but I don't
> > think that would be correct for multiple priorities, because the SGI
> > could have a higher priority than other pending interrupts we put in
> > the LR.
> 
> I don't think priorities are the issue (after all, we already sort the
> irqs in order of priority). 

Yes, but assume you have three pending interrupts, one SGI from two
sources, and one SPI, and assume that the SGI has priority 1 and SPI
priority 2 (lower means higher priority), then I think with underflow or
the no-pending interrupt flag, we'll deliver the SGI from the first
source, and then the SPI, and then exiting to pick up the last SGI from
the other source.  That's not how I understand the GIC architecture is
supposed to work.  Am I missing something?

> My worry is that underflow is allowed to
> fire if there is one interrupt pending, which implies that you could
> end-up in a livelock scenario if you only have one SGI pending with
> multiple sources.

Yes, doesn't work, so I think it should be a maintenance interrupt on
EOI.

> 
> Another possibility would be to use ICH_HCR_EL2.NPIE (GICH_HCR.NPIE on
> GICv2), which delivers a a MI if no pending interrupts are present. Once
> the SGI has been activated, we're guaranteed to be able to inject a new
> pending one.
> 
> I like the latter, because it doesn't overload the rest of the code with
> new semantics. Thoughts?
> 

I'm fine with that if I can be proven wrong about the multiple sources
and priorities.

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 1/2] KVM: arm/arm64: vgic: Don't populate multiple LRs with the same vintid

2018-03-07 Thread Christoffer Dall
On Wed, Mar 7, 2018 at 12:40 PM, Marc Zyngier  wrote:
> The vgic code is trying to be clever when injecting GICv2 SGIs,
> and will happily populate LRs with the same interrupt number if
> they come from multiple vcpus (after all, they are distinct
> interrupt sources).
>
> Unfortunately, this is against the letter of the architecture,
> and the GICv2 architecture spec says "Each valid interrupt stored
> in the List registers must have a unique VirtualID for that
> virtual CPU interface.". GICv3 has similar (although slightly
> ambiguous) restrictions.
>
> This results in guests locking up when using GICv2-on-GICv3, for
> example. The obvious fix is to stop trying so hard, and inject
> a single vcpu per SGI per guest entry. After all, pending SGIs
> with multiple source vcpus are pretty rare, and are mostly seen
> in scenario where the physical CPUs are severely overcomitted.
>
> Cc: sta...@vger.kernel.org
> Fixes: 0919e84c0fc1 ("KVM: arm/arm64: vgic-new: Add IRQ sync/flush framework")
> Signed-off-by: Marc Zyngier 
> ---
>  virt/kvm/arm/vgic/vgic.c | 11 +--
>  1 file changed, 1 insertion(+), 10 deletions(-)
>
> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> index c7c5ef190afa..1f7ff175f47b 100644
> --- a/virt/kvm/arm/vgic/vgic.c
> +++ b/virt/kvm/arm/vgic/vgic.c
> @@ -720,18 +720,9 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
> list_for_each_entry(irq, _cpu->ap_list_head, ap_list) {
> spin_lock(>irq_lock);
>
> -   if (unlikely(vgic_target_oracle(irq) != vcpu))
> -   goto next;
> -
> -   /*
> -* If we get an SGI with multiple sources, try to get
> -* them in all at once.
> -*/
> -   do {
> +   if (likely(vgic_target_oracle(irq) == vcpu))
> vgic_populate_lr(vcpu, irq, count++);

I think we need to change vgic_populate_lr to set the EOI maintenance
interrupt flag so that when the interrupt is deactivated, if there are
additional pending sources, we exit the guest and pick up the
interrupt.

An alternative would be to set the underflow interrupt, but I don't
think that would be correct for multiple priorities, because the SGI
could have a higher priority than other pending interrupts we put in
the LR.

Thanks,
Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH] KVM: arm/arm64: Reset mapped IRQs on VM reset

2018-03-05 Thread Christoffer Dall
We currently don't allow resetting mapped IRQs from userspace, because
their state is controlled by the hardware.  But we do need to reset the
state when the VM is reset, so we provide a function for the 'owner' of
the mapped interrupt to reset the interrupt state.

Currently only the timer uses mapped interrupts, so we call this
function from the timer reset logic.

Signed-off-by: Christoffer Dall <cd...@kernel.org>
---
This depends on "KVM: arm/arm64: Avoid vcpu_load for other vcpu ioctls
than KVM_RUN" from the VHE optimization series so that the reset doesn't
get called while vtimer->loaded is true.

 include/kvm/arm_vgic.h|  1 +
 virt/kvm/arm/arch_timer.c |  4 
 virt/kvm/arm/vgic/vgic.c  | 26 ++
 3 files changed, 31 insertions(+)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index cdbd142ca7f2..02924ae2527e 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -360,6 +360,7 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
+void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid);
 
 void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
 
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 70268c0bec79..09890a5268d1 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -571,6 +571,7 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 
 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
 {
+   struct arch_timer_cpu *timer = >arch.timer_cpu;
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
@@ -584,6 +585,9 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
ptimer->cnt_ctl = 0;
kvm_timer_update_state(vcpu);
 
+   if (timer->enabled && irqchip_in_kernel(vcpu->kvm))
+   kvm_vgic_reset_mapped_irq(vcpu, vtimer->irq.irq);
+
return 0;
 }
 
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index c7c5ef190afa..0001858a2c23 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -495,6 +495,32 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned 
int host_irq,
return ret;
 }
 
+/**
+ * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ
+ * @vcpu: The VCPU pointer
+ * @vintid: The INTID of the interrupt
+ *
+ * Reset the active and pending states of a mapped interrupt.  Kernel
+ * subsystems injecting mapped interrupts should reset their interrupt lines
+ * when we are doing a reset of the VM.
+ */
+void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid)
+{
+   struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
+   unsigned long flags;
+
+   if (!irq->hw)
+   goto out;
+
+   spin_lock_irqsave(>irq_lock, flags);
+   irq->active = false;
+   irq->pending_latch = false;
+   irq->line_level = false;
+   spin_unlock_irqrestore(>irq_lock, flags);
+out:
+   vgic_put_irq(vcpu->kvm, irq);
+}
+
 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
 {
struct vgic_irq *irq;
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: VCPU hotplug on KVM/ARM

2018-02-27 Thread Christoffer Dall
On Tue, Feb 27, 2018 at 05:34:28PM +0530, btha...@codeaurora.org wrote:
> Hi Christoffer,
> 
> Thanks for your reply.
> 
> On 2018-02-27 16:17, Christoffer Dall wrote:
> >Hi Bhupinder,
> >
> >On Tue, Feb 27, 2018 at 03:01:17PM +0530, btha...@codeaurora.org wrote:
> >>I hope it is the right forum to post my query.
> >>
> >>
> >>
> >>I am currently looking at the possibility of adding a new VCPU to a
> >>running
> >>guest VM in KVM/ARM. I see that currently, it is not allowed to add a
> >>new
> >>VCPU to a guest VM, if it is already initialized. The first check in
> >>kvm_arch_vcpu_create() returns failure if it is already initialized.
> >>
> >
> >This would require a major rework of a lot of logic surrounding the GIC
> >and other parts of KVM initialization.
> >
> >>
> >>
> >>There was some work done in QEMU to add support for VCPU hotplug:
> >>https://lists.gnu.org/archive/html/qemu-arm/2017-05/msg00404.html
> >>
> >>
> >>
> >>But I am looking at the KVM side for enabling adding a new VCPU. If you
> >>can
> >>point me to any relevant work/resources, which I can refer to then it
> >>will
> >>help me.
> >>
> >
> >I don't have any specific pointers, but I was always told that the way
> >we were going to do CPU hotplug would be to instantiate a large number
> >of VCPUs, and hotplug would be equivalent to turning on a VCPU which was
> >previously powered off.
> >
> >Is this not still a feasible solution?
> It should be a feasible solution provided the guest VM is not able to
> control the onlining/offlining of VCPUs. It should be controlled by the
> Host.
> 

KVM could simply refuse to turn on some of the CPUs unless given
permission from host userspace.

> >
> >How does VCPU hotplug work on x86?
> On x86, you can add a vcpu through libvirt setvcpu command and it shows up
> in the guest VM as a new CPU if you do lscpu.
> 

Sure, but what is the mechanism, does x86 qemu actually call
KVM_CREATE_VCPU, or is this also a question of turning on already
created vcpus ?

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 40/40] KVM: arm/arm64: Avoid VGICv3 save/restore on VHE with no IRQs

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

We can finally get completely rid of any calls to the VGICv3
save/restore functions when the AP lists are empty on VHE systems.  This
requires carefully factoring out trap configuration from saving and
restoring state, and carefully choosing what to do on the VHE and
non-VHE path.

One of the challenges is that we cannot save/restore the VMCR lazily
because we can only write the VMCR when ICC_SRE_EL1.SRE is cleared when
emulating a GICv2-on-GICv3, since otherwise all Group-0 interrupts end
up being delivered as FIQ.

To solve this problem, and still provide fast performance in the fast
path of exiting a VM when no interrupts are pending (which also
optimized the latency for actually delivering virtual interrupts coming
from physical interrupts), we orchestrate a dance of only doing the
activate/deactivate traps in vgic load/put for VHE systems (which can
have ICC_SRE_EL1.SRE cleared when running in the host), and doing the
configuration on every round-trip on non-VHE systems.

Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v3:
 - Removed extra blank line

 arch/arm/include/asm/kvm_hyp.h   |   2 +
 arch/arm/kvm/hyp/switch.c|   8 ++-
 arch/arm64/include/asm/kvm_hyp.h |   2 +
 arch/arm64/kvm/hyp/switch.c  |   8 ++-
 virt/kvm/arm/hyp/vgic-v3-sr.c| 120 +--
 virt/kvm/arm/vgic/vgic-v3.c  |   6 ++
 virt/kvm/arm/vgic/vgic.c |   9 ++-
 7 files changed, 103 insertions(+), 52 deletions(-)

diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index 530a3c1cfe6f..e93a0cac9add 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -110,6 +110,8 @@ void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
 
 void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
 void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu);
+void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu);
 void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
 void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index 882b9b9e0077..acf1c37fa49c 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -90,14 +90,18 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu 
*vcpu)
 
 static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
 {
-   if (static_branch_unlikely(_vgic_global_state.gicv3_cpuif))
+   if (static_branch_unlikely(_vgic_global_state.gicv3_cpuif)) {
__vgic_v3_save_state(vcpu);
+   __vgic_v3_deactivate_traps(vcpu);
+   }
 }
 
 static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
 {
-   if (static_branch_unlikely(_vgic_global_state.gicv3_cpuif))
+   if (static_branch_unlikely(_vgic_global_state.gicv3_cpuif)) {
+   __vgic_v3_activate_traps(vcpu);
__vgic_v3_restore_state(vcpu);
+   }
 }
 
 static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 6f3929b2fcf7..384c34397619 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -124,6 +124,8 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
 
 void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
 void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu);
+void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu);
 void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
 void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
 int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 86abbee40d3f..07b572173265 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -195,15 +195,19 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu 
*vcpu)
 /* Save VGICv3 state on non-VHE systems */
 static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
 {
-   if (static_branch_unlikely(_vgic_global_state.gicv3_cpuif))
+   if (static_branch_unlikely(_vgic_global_state.gicv3_cpuif)) {
__vgic_v3_save_state(vcpu);
+   __vgic_v3_deactivate_traps(vcpu);
+   }
 }
 
 /* Restore VGICv3 state on non_VEH systems */
 static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
 {
-   if (static_branch_unlikely(_vgic_global_state.gicv3_cpuif))
+   if (static_branch_unlikely(_vgic_global_state.gicv3_cpuif)) {
+   __vgic_v3_activate_traps(vcpu);
__vgic_v3_restore_state(vcpu);
+   }
 }
 
 static bool __hyp_text __true_value(void)
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index 437d7af08683..b13cbd41dbc3 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt

[PATCH v5 39/40] KVM: arm/arm64: Move VGIC APR save/restore to vgic put/load

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

The APRs can only have bits set when the guest acknowledges an interrupt
in the LR and can only have a bit cleared when the guest EOIs an
interrupt in the LR.  Therefore, if we have no LRs with any
pending/active interrupts, the APR cannot change value and there is no
need to clear it on every exit from the VM (hint: it will have already
been cleared when we exited the guest the last time with the LRs all
EOIed).

The only case we need to take care of is when we migrate the VCPU away
from a CPU or migrate a new VCPU onto a CPU, or when we return to
userspace to capture the state of the VCPU for migration.  To make sure
this works, factor out the APR save/restore functionality into separate
functions called from the VCPU (and by extension VGIC) put/load hooks.

Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---
 arch/arm/include/asm/kvm_hyp.h   |   2 +
 arch/arm64/include/asm/kvm_hyp.h |   2 +
 virt/kvm/arm/hyp/vgic-v3-sr.c| 124 +--
 virt/kvm/arm/vgic/vgic-v2.c  |   7 +--
 virt/kvm/arm/vgic/vgic-v3.c  |   5 ++
 5 files changed, 78 insertions(+), 62 deletions(-)

diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index 1ab8329e9ff7..530a3c1cfe6f 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -110,6 +110,8 @@ void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
 
 void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
 void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
+void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
 
 asmlinkage void __vfp_save_state(struct vfp_hard_struct *vfp);
 asmlinkage void __vfp_restore_state(struct vfp_hard_struct *vfp);
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index febe417b8b4e..6f3929b2fcf7 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -124,6 +124,8 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
 
 void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
 void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
+void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
 int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
 
 void __timer_enable_traps(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index 9abf2f3c12b5..437d7af08683 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -21,6 +21,7 @@
 
 #include 
 #include 
+#include 
 
 #define vtr_to_max_lr_idx(v)   ((v) & 0xf)
 #define vtr_to_nr_pre_bits(v)  u32)(v) >> 26) & 7) + 1)
@@ -221,14 +222,11 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu 
*vcpu)
 
if (used_lrs) {
int i;
-   u32 nr_pre_bits;
u32 elrsr;
 
elrsr = read_gicreg(ICH_ELSR_EL2);
 
write_gicreg(0, ICH_HCR_EL2);
-   val = read_gicreg(ICH_VTR_EL2);
-   nr_pre_bits = vtr_to_nr_pre_bits(val);
 
for (i = 0; i < used_lrs; i++) {
if (elrsr & (1 << i))
@@ -238,39 +236,10 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu 
*vcpu)
 
__gic_v3_set_lr(0, i);
}
-
-   switch (nr_pre_bits) {
-   case 7:
-   cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3);
-   cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2);
-   case 6:
-   cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1);
-   default:
-   cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0);
-   }
-
-   switch (nr_pre_bits) {
-   case 7:
-   cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3);
-   cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2);
-   case 6:
-   cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1);
-   default:
-   cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0);
-   }
} else {
if (static_branch_unlikely(_v3_cpuif_trap) ||
cpu_if->its_vpe.its_vm)
write_gicreg(0, ICH_HCR_EL2);
-
-   cpu_if->vgic_ap0r[0] = 0;
-   cpu_if->vgic_ap0r[1] = 0;
-   cpu_if->vgic_ap0r[2] = 0;
-   cpu_if->vgic_ap0r[3] = 0;
-   cpu_if->vgic_ap1r[0] = 0;
-   cpu_if->vgic_ap1r[1] = 0;
-   cpu_if->vgic_ap1r[2] = 0;
-   cpu_if->vgic_ap1r[3] = 0;
}
 
val = read_gicreg(ICC_SRE_EL2);
@@ -287,8 +256,6 @@ void __h

[PATCH v5 36/40] KVM: arm/arm64: Handle VGICv2 save/restore from the main VGIC code

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

We can program the GICv2 hypervisor control interface logic directly
from the core vgic code and can instead do the save/restore directly
from the flush/sync functions, which can lead to a number of future
optimizations.

Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v1:
 - Removed unnecessary kvm_hyp.h include
 - Adapted the patch based on having gotten rid of storing the elrsr
   prior to this patch.
 - No longer change the interrupt handling of the maintenance interrupt
   handler.  That seems to have been a leftover from an earlier version
   of the timer patches where we were syncing the vgic state after
   having enabled interrupts, leading to the maintenance interrupt firing.

   It may be possible to move the vgic sync function out to an
   interrupts enabled section later on, which would require
   re-introducing logic to disable the VGIC maintenance interrupt in the
   maintenance interrupt handler, but we leave this for future work as
   the immediate benefit is not clear.

 arch/arm/kvm/hyp/switch.c|  4 ---
 arch/arm64/include/asm/kvm_hyp.h |  2 --
 arch/arm64/kvm/hyp/switch.c  |  4 ---
 virt/kvm/arm/hyp/vgic-v2-sr.c| 65 
 virt/kvm/arm/vgic/vgic-v2.c  | 63 ++
 virt/kvm/arm/vgic/vgic.c | 19 +++-
 virt/kvm/arm/vgic/vgic.h |  3 ++
 7 files changed, 84 insertions(+), 76 deletions(-)

diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index aac025783ee8..882b9b9e0077 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -92,16 +92,12 @@ static void __hyp_text __vgic_save_state(struct kvm_vcpu 
*vcpu)
 {
if (static_branch_unlikely(_vgic_global_state.gicv3_cpuif))
__vgic_v3_save_state(vcpu);
-   else
-   __vgic_v2_save_state(vcpu);
 }
 
 static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
 {
if (static_branch_unlikely(_vgic_global_state.gicv3_cpuif))
__vgic_v3_restore_state(vcpu);
-   else
-   __vgic_v2_restore_state(vcpu);
 }
 
 static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 949f2e77ae58..febe417b8b4e 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -120,8 +120,6 @@ typeof(orig) * __hyp_text fname(void)   
\
return val; \
 }
 
-void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
-void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
 int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
 
 void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 67c66b4e237e..31badf6e91e8 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -196,16 +196,12 @@ static void __hyp_text __vgic_save_state(struct kvm_vcpu 
*vcpu)
 {
if (static_branch_unlikely(_vgic_global_state.gicv3_cpuif))
__vgic_v3_save_state(vcpu);
-   else
-   __vgic_v2_save_state(vcpu);
 }
 
 static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
 {
if (static_branch_unlikely(_vgic_global_state.gicv3_cpuif))
__vgic_v3_restore_state(vcpu);
-   else
-   __vgic_v2_restore_state(vcpu);
 }
 
 static bool __hyp_text __true_value(void)
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
index a91b0d2b9249..0bbafdfd4adb 100644
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v2-sr.c
@@ -23,71 +23,6 @@
 #include 
 #include 
 
-static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
-{
-   struct vgic_v2_cpu_if *cpu_if = >arch.vgic_cpu.vgic_v2;
-   u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
-   u64 elrsr;
-   int i;
-
-   elrsr = readl_relaxed(base + GICH_ELRSR0);
-   if (unlikely(used_lrs > 32))
-   elrsr |= ((u64)readl_relaxed(base + GICH_ELRSR1)) << 32;
-
-   for (i = 0; i < used_lrs; i++) {
-   if (elrsr & (1UL << i))
-   cpu_if->vgic_lr[i] &= ~GICH_LR_STATE;
-   else
-   cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i 
* 4));
-
-   writel_relaxed(0, base + GICH_LR0 + (i * 4));
-   }
-}
-
-/* vcpu is already in the HYP VA space */
-void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu)
-{
-   struct kvm *kvm = kern_hyp_va(vcpu->kvm);
-   struct vgic_v2_cpu_if *cpu_if = >arch.vgic_cpu.vgic_v2;
-   struct vgic_dist *vgic = >arch.vgic;
-   void __iomem *base =

[PATCH v5 37/40] KVM: arm/arm64: Move arm64-only vgic-v2-sr.c file to arm64

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

The vgic-v2-sr.c file now only contains the logic to replay unaligned
accesses to the virtual CPU interface on 16K and 64K page systems, which
is only relevant on 64-bit platforms.  Therefore move this file to the
arm64 KVM tree, remove the compile directive from the 32-bit side
makefile, and remove the ifdef in the C file.

Since this file also no longer saves/restores anything, rename the file
to vgic-v2-cpuif-proxy.c to more accurately describe the logic in this
file.

Reviewed-by: Andre Przywara <andre.przyw...@arm.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v4:
 - Renamed file to vgic-v2-cpuif-proxy.c as suggested by Marc.

 arch/arm/kvm/hyp/Makefile   | 1 -
 arch/arm64/kvm/hyp/Makefile | 2 +-
 .../arm/hyp/vgic-v2-sr.c => arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c| 2 --
 3 files changed, 1 insertion(+), 4 deletions(-)
 rename virt/kvm/arm/hyp/vgic-v2-sr.c => 
arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c (98%)

diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile
index 5638ce0c9524..1964111c984a 100644
--- a/arch/arm/kvm/hyp/Makefile
+++ b/arch/arm/kvm/hyp/Makefile
@@ -7,7 +7,6 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING
 
 KVM=../../../../virt/kvm
 
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
 
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index f04400d494b7..4313f7475333 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -7,10 +7,10 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING
 
 KVM=../../../../virt/kvm
 
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
 
+obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o
 obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += entry.o
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c 
b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
similarity index 98%
rename from virt/kvm/arm/hyp/vgic-v2-sr.c
rename to arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
index 0bbafdfd4adb..97f357ea9c72 100644
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -23,7 +23,6 @@
 #include 
 #include 
 
-#ifdef CONFIG_ARM64
 /*
  * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the
  *  guest.
@@ -77,4 +76,3 @@ int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu 
*vcpu)
 
return 1;
 }
-#endif
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 38/40] KVM: arm/arm64: Handle VGICv3 save/restore from the main VGIC code on VHE

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

Just like we can program the GICv2 hypervisor control interface directly
from the core vgic code, we can do the same for the GICv3 hypervisor
control interface on VHE systems.

We do this by simply calling the save/restore functions when we have VHE
and we can then get rid of the save/restore function calls from the VHE
world switch function.

One caveat is that we now write GICv3 system register state before the
potential early exit path in the run loop, and because we sync back
state in the early exit path, we have to ensure that we read a
consistent GIC state from the sync path, even though we have never
actually run the guest with the newly written GIC state.  We solve this
by inserting an ISB in the early exit path.

Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v4:
 - Added can_access_vgic_from_kernel() primitive to make the
   save/restore flow from the main vgic code slightly easier to
   understand.
 - Also added a __hyp prefix to the non-VHE world-switch
   save/restore functions for GICv3 to avoid confusion with the
   save/restore functions in the main VGIC code.

Changes since v2:
 - Added ISB in the early exit path in the run loop as explained
   in the commit message.

 arch/arm64/kvm/hyp/switch.c | 13 ++---
 virt/kvm/arm/arm.c  |  1 +
 virt/kvm/arm/vgic/vgic.c| 21 +++--
 3 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 31badf6e91e8..86abbee40d3f 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -192,13 +192,15 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu 
*vcpu)
write_sysreg(0, vttbr_el2);
 }
 
-static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
+/* Save VGICv3 state on non-VHE systems */
+static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
 {
if (static_branch_unlikely(_vgic_global_state.gicv3_cpuif))
__vgic_v3_save_state(vcpu);
 }
 
-static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
+/* Restore VGICv3 state on non_VEH systems */
+static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
 {
if (static_branch_unlikely(_vgic_global_state.gicv3_cpuif))
__vgic_v3_restore_state(vcpu);
@@ -400,8 +402,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
__activate_traps(vcpu);
__activate_vm(vcpu->kvm);
 
-   __vgic_restore_state(vcpu);
-
sysreg_restore_guest_state_vhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
 
@@ -415,7 +415,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
fp_enabled = fpsimd_enabled_vhe();
 
sysreg_save_guest_state_vhe(guest_ctxt);
-   __vgic_save_state(vcpu);
 
__deactivate_traps(vcpu);
 
@@ -451,7 +450,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
__activate_traps(vcpu);
__activate_vm(kern_hyp_va(vcpu->kvm));
 
-   __vgic_restore_state(vcpu);
+   __hyp_vgic_restore_state(vcpu);
__timer_enable_traps(vcpu);
 
/*
@@ -484,7 +483,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
__sysreg_save_state_nvhe(guest_ctxt);
__sysreg32_save_state(vcpu);
__timer_disable_traps(vcpu);
-   __vgic_save_state(vcpu);
+   __hyp_vgic_save_state(vcpu);
 
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 09dbee56ed8f..dba629c5f8ac 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -717,6 +717,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
kvm_request_pending(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
+   isb(); /* Ensure work in x_flush_hwstate is committed */
kvm_pmu_sync_hwstate(vcpu);
if (static_branch_unlikely(_irqchip_in_use))
kvm_timer_sync_hwstate(vcpu);
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 12e2a28f437e..eaab4a616ecf 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "vgic.h"
 
@@ -749,10 +750,22 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
vgic_clear_lr(vcpu, count);
 }
 
+static inline bool can_access_vgic_from_kernel(void)
+{
+   /*
+* GICv2 can always be accessed from the kernel because it is
+* memory-mapped, and VHE systems can access GICv3 EL2 system
+* registers.
+*/
+   return !static_branch_unlikely(_vgic_global_state.gicv3_cpuif) || 
has_vhe();
+}
+
 static inline void 

[PATCH v5 33/40] KVM: arm64: Configure c15, PMU, and debug register traps on cpu load/put for VHE

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

We do not have to change the c15 trap setting on each switch to/from the
guest on VHE systems, because this setting only affects guest EL1/EL0
(and therefore not the VHE host).

The PMU and debug trap configuration can also be done on vcpu load/put
instead, because they don't affect how the VHE host kernel can access the
debug registers while executing KVM kernel code.

Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v4:
 - Clarified commit message

 arch/arm64/include/asm/kvm_hyp.h |  3 +++
 arch/arm64/kvm/hyp/switch.c  | 31 ++-
 arch/arm64/kvm/hyp/sysreg-sr.c   |  4 
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 2b1fda90dde4..949f2e77ae58 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -147,6 +147,9 @@ void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
 void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
 bool __fpsimd_enabled(void);
 
+void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
+void deactivate_traps_vhe_put(void);
+
 u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
 void __noreturn __hyp_do_panic(unsigned long, ...);
 
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 5fbb77bd4e90..eab433fa1442 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -102,6 +102,8 @@ static void __hyp_text __activate_traps_nvhe(struct 
kvm_vcpu *vcpu)
 {
u64 val;
 
+   __activate_traps_common(vcpu);
+
val = CPTR_EL2_DEFAULT;
val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
write_sysreg(val, cptr_el2);
@@ -121,20 +123,12 @@ static void __hyp_text __activate_traps(struct kvm_vcpu 
*vcpu)
write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
 
__activate_traps_fpsimd32(vcpu);
-   __activate_traps_common(vcpu);
__activate_traps_arch()(vcpu);
 }
 
 static void __hyp_text __deactivate_traps_vhe(void)
 {
extern char vectors[];  /* kernel exception vectors */
-   u64 mdcr_el2 = read_sysreg(mdcr_el2);
-
-   mdcr_el2 &= MDCR_EL2_HPMN_MASK |
-   MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
-   MDCR_EL2_TPMS;
-
-   write_sysreg(mdcr_el2, mdcr_el2);
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
write_sysreg(vectors, vbar_el1);
@@ -144,6 +138,8 @@ static void __hyp_text __deactivate_traps_nvhe(void)
 {
u64 mdcr_el2 = read_sysreg(mdcr_el2);
 
+   __deactivate_traps_common();
+
mdcr_el2 &= MDCR_EL2_HPMN_MASK;
mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
 
@@ -167,10 +163,27 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu 
*vcpu)
if (vcpu->arch.hcr_el2 & HCR_VSE)
vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
 
-   __deactivate_traps_common();
__deactivate_traps_arch()();
 }
 
+void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
+{
+   __activate_traps_common(vcpu);
+}
+
+void deactivate_traps_vhe_put(void)
+{
+   u64 mdcr_el2 = read_sysreg(mdcr_el2);
+
+   mdcr_el2 &= MDCR_EL2_HPMN_MASK |
+   MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
+   MDCR_EL2_TPMS;
+
+   write_sysreg(mdcr_el2, mdcr_el2);
+
+   __deactivate_traps_common();
+}
+
 static void __hyp_text __activate_vm(struct kvm *kvm)
 {
write_sysreg(kvm->arch.vttbr, vttbr_el2);
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index aacba4636871..b3894df6bf1a 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -254,6 +254,8 @@ void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu)
__sysreg_restore_el1_state(guest_ctxt);
 
vcpu->arch.sysregs_loaded_on_cpu = true;
+
+   activate_traps_vhe_load(vcpu);
 }
 
 /**
@@ -275,6 +277,8 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
if (!has_vhe())
return;
 
+   deactivate_traps_vhe_put();
+
__sysreg_save_el1_state(guest_ctxt);
__sysreg_save_user_state(guest_ctxt);
__sysreg32_save_state(vcpu);
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 35/40] KVM: arm/arm64: Get rid of vgic_elrsr

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

There is really no need to store the vgic_elrsr on the VGIC data
structures as the only need we have for the elrsr is to figure out if an
LR is inactive when we save the VGIC state upon returning from the
guest.  We can might as well store this in a temporary local variable.

Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v4:
 - Removed endianness fix comment from commit text, as this was
   fixed in a separate commit already merged in mainline.

Changes since v1:
 - Moved patch up the queue before we start moving code around to avoid 
moving
   potentially broken code.

 include/kvm/arm_vgic.h|  2 --
 virt/kvm/arm/hyp/vgic-v2-sr.c | 28 +++-
 virt/kvm/arm/hyp/vgic-v3-sr.c |  6 +++---
 virt/kvm/arm/vgic/vgic-v2.c   |  1 -
 virt/kvm/arm/vgic/vgic-v3.c   |  1 -
 5 files changed, 10 insertions(+), 28 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index cdbd142ca7f2..ac98ae46bfb7 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -263,7 +263,6 @@ struct vgic_dist {
 struct vgic_v2_cpu_if {
u32 vgic_hcr;
u32 vgic_vmcr;
-   u64 vgic_elrsr; /* Saved only */
u32 vgic_apr;
u32 vgic_lr[VGIC_V2_MAX_LRS];
 };
@@ -272,7 +271,6 @@ struct vgic_v3_cpu_if {
u32 vgic_hcr;
u32 vgic_vmcr;
u32 vgic_sre;   /* Restored only, change ignored */
-   u32 vgic_elrsr; /* Saved only */
u32 vgic_ap0r[4];
u32 vgic_ap1r[4];
u64 vgic_lr[VGIC_V3_MAX_LRS];
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
index 4fe6e797e8b3..a91b0d2b9249 100644
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v2-sr.c
@@ -23,29 +23,19 @@
 #include 
 #include 
 
-static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
-{
-   struct vgic_v2_cpu_if *cpu_if = >arch.vgic_cpu.vgic_v2;
-   int nr_lr = (kern_hyp_va(_vgic_global_state))->nr_lr;
-   u32 elrsr0, elrsr1;
-
-   elrsr0 = readl_relaxed(base + GICH_ELRSR0);
-   if (unlikely(nr_lr > 32))
-   elrsr1 = readl_relaxed(base + GICH_ELRSR1);
-   else
-   elrsr1 = 0;
-
-   cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
-}
-
 static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
 {
struct vgic_v2_cpu_if *cpu_if = >arch.vgic_cpu.vgic_v2;
-   int i;
u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+   u64 elrsr;
+   int i;
+
+   elrsr = readl_relaxed(base + GICH_ELRSR0);
+   if (unlikely(used_lrs > 32))
+   elrsr |= ((u64)readl_relaxed(base + GICH_ELRSR1)) << 32;
 
for (i = 0; i < used_lrs; i++) {
-   if (cpu_if->vgic_elrsr & (1UL << i))
+   if (elrsr & (1UL << i))
cpu_if->vgic_lr[i] &= ~GICH_LR_STATE;
else
cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i 
* 4));
@@ -68,13 +58,9 @@ void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu)
 
if (used_lrs) {
cpu_if->vgic_apr = readl_relaxed(base + GICH_APR);
-
-   save_elrsr(vcpu, base);
save_lrs(vcpu, base);
-
writel_relaxed(0, base + GICH_HCR);
} else {
-   cpu_if->vgic_elrsr = ~0UL;
cpu_if->vgic_apr = 0;
}
 }
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index f5c3d6d7019e..9abf2f3c12b5 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -222,15 +222,16 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu 
*vcpu)
if (used_lrs) {
int i;
u32 nr_pre_bits;
+   u32 elrsr;
 
-   cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
+   elrsr = read_gicreg(ICH_ELSR_EL2);
 
write_gicreg(0, ICH_HCR_EL2);
val = read_gicreg(ICH_VTR_EL2);
nr_pre_bits = vtr_to_nr_pre_bits(val);
 
for (i = 0; i < used_lrs; i++) {
-   if (cpu_if->vgic_elrsr & (1 << i))
+   if (elrsr & (1 << i))
cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
else
cpu_if->vgic_lr[i] = __gic_v3_get_lr(i);
@@ -262,7 +263,6 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
cpu_if->its_vpe.its_vm)
write_gicreg(0, ICH_HCR_EL2);
 
-

[PATCH v5 34/40] KVM: arm64: Cleanup __activate_traps and __deactive_traps for VHE and non-VHE

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

To make the code more readable and to avoid the overhead of a function
call, let's get rid of a pair of the alternative function selectors and
explicitly call the VHE and non-VHE functions using the has_vhe() static
key based selector instead, telling the compiler to try to inline the
static function if it can.

Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v4:
 - Removed inline attributes from static functions in 
arch/arm64/kvm/hyp/switch.c

 arch/arm64/kvm/hyp/switch.c | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index eab433fa1442..67c66b4e237e 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -86,7 +86,7 @@ static void __hyp_text __deactivate_traps_common(void)
write_sysreg(0, pmuserenr_el0);
 }
 
-static void __hyp_text __activate_traps_vhe(struct kvm_vcpu *vcpu)
+static void activate_traps_vhe(struct kvm_vcpu *vcpu)
 {
u64 val;
 
@@ -109,10 +109,6 @@ static void __hyp_text __activate_traps_nvhe(struct 
kvm_vcpu *vcpu)
write_sysreg(val, cptr_el2);
 }
 
-static hyp_alternate_select(__activate_traps_arch,
-   __activate_traps_nvhe, __activate_traps_vhe,
-   ARM64_HAS_VIRT_HOST_EXTN);
-
 static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
 {
u64 hcr = vcpu->arch.hcr_el2;
@@ -123,10 +119,13 @@ static void __hyp_text __activate_traps(struct kvm_vcpu 
*vcpu)
write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
 
__activate_traps_fpsimd32(vcpu);
-   __activate_traps_arch()(vcpu);
+   if (has_vhe())
+   activate_traps_vhe(vcpu);
+   else
+   __activate_traps_nvhe(vcpu);
 }
 
-static void __hyp_text __deactivate_traps_vhe(void)
+static void deactivate_traps_vhe(void)
 {
extern char vectors[];  /* kernel exception vectors */
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
@@ -148,10 +147,6 @@ static void __hyp_text __deactivate_traps_nvhe(void)
write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
 }
 
-static hyp_alternate_select(__deactivate_traps_arch,
-   __deactivate_traps_nvhe, __deactivate_traps_vhe,
-   ARM64_HAS_VIRT_HOST_EXTN);
-
 static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
 {
/*
@@ -163,7 +158,10 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu 
*vcpu)
if (vcpu->arch.hcr_el2 & HCR_VSE)
vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
 
-   __deactivate_traps_arch()();
+   if (has_vhe())
+   deactivate_traps_vhe();
+   else
+   __deactivate_traps_nvhe();
 }
 
 void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 31/40] KVM: arm64: Move common VHE/non-VHE trap config in separate functions

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

As we are about to be more lazy with some of the trap configuration
register read/writes for VHE systems, move the logic that is currently
shared between VHE and non-VHE into a separate function which can be
called from either the world-switch path or from vcpu_load/vcpu_put.

Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v4:
 - Added blank newline to separate logic blocks in
   __activate_traps_common.

Changes since v3:
 - Separate fpsimd32 trap configuration into a separate function
   which is still called from __activate_traps, because we no longer
   defer saving/restoring of VFP registers to load/put.

 arch/arm64/kvm/hyp/switch.c | 77 +++--
 1 file changed, 46 insertions(+), 31 deletions(-)

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 9b0380d3c9c3..9d90bda3c2cc 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -56,7 +56,46 @@ static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu 
*vcpu)
vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
 }
 
-static void __hyp_text __activate_traps_vhe(void)
+static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
+{
+   /*
+* We are about to set CPTR_EL2.TFP to trap all floating point
+* register accesses to EL2, however, the ARM ARM clearly states that
+* traps are only taken to EL2 if the operation would not otherwise
+* trap to EL1.  Therefore, always make sure that for 32-bit guests,
+* we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
+* If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
+* it will cause an exception.
+*/
+   if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
+   write_sysreg(1 << 30, fpexc32_el2);
+   isb();
+   }
+}
+
+static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu)
+{
+   /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
+   write_sysreg(1 << 15, hstr_el2);
+
+   /*
+* Make sure we trap PMU access from EL0 to EL2. Also sanitize
+* PMSELR_EL0 to make sure it never contains the cycle
+* counter, which could make a PMXEVCNTR_EL0 access UNDEF at
+* EL1 instead of being trapped to EL2.
+*/
+   write_sysreg(0, pmselr_el0);
+   write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
+   write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+}
+
+static void __hyp_text __deactivate_traps_common(void)
+{
+   write_sysreg(0, hstr_el2);
+   write_sysreg(0, pmuserenr_el0);
+}
+
+static void __hyp_text __activate_traps_vhe(struct kvm_vcpu *vcpu)
 {
u64 val;
 
@@ -68,7 +107,7 @@ static void __hyp_text __activate_traps_vhe(void)
write_sysreg(kvm_get_hyp_vector(), vbar_el1);
 }
 
-static void __hyp_text __activate_traps_nvhe(void)
+static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
 {
u64 val;
 
@@ -85,37 +124,14 @@ static void __hyp_text __activate_traps(struct kvm_vcpu 
*vcpu)
 {
u64 hcr = vcpu->arch.hcr_el2;
 
-   /*
-* We are about to set CPTR_EL2.TFP to trap all floating point
-* register accesses to EL2, however, the ARM ARM clearly states that
-* traps are only taken to EL2 if the operation would not otherwise
-* trap to EL1.  Therefore, always make sure that for 32-bit guests,
-* we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
-* If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
-* it will cause an exception.
-*/
-   if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
-   write_sysreg(1 << 30, fpexc32_el2);
-   isb();
-   }
+   write_sysreg(hcr, hcr_el2);
 
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
 
-   write_sysreg(hcr, hcr_el2);
-
-   /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
-   write_sysreg(1 << 15, hstr_el2);
-   /*
-* Make sure we trap PMU access from EL0 to EL2. Also sanitize
-* PMSELR_EL0 to make sure it never contains the cycle
-* counter, which could make a PMXEVCNTR_EL0 access UNDEF at
-* EL1 instead of being trapped to EL2.
-*/
-   write_sysreg(0, pmselr_el0);
-   write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
-   write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
-   __activate_traps_arch()();
+   __activate_traps_fpsimd32(vcpu);
+   __activ

[PATCH v5 30/40] KVM: arm64: Defer saving/restoring 32-bit sysregs to vcpu load/put

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

When running a 32-bit VM (EL1 in AArch32), the AArch32 system registers
can be deferred to vcpu load/put on VHE systems because neither
the host kernel nor host userspace uses these registers.

Note that we can't save DBGVCR32_EL2 conditionally based on the state of
the debug dirty flag on VHE after this change, because during
vcpu_load() we haven't calculated a valid debug flag yet, and when we've
restored the register during vcpu_load() we also have to save it during
vcpu_put().  This means that we'll always restore/save the register for
VHE on load/put, but luckily vcpu load/put are called rarely, so saving
an extra register unconditionally shouldn't significantly hurt
performance.

We can also not defer saving FPEXC32_32 because this register only holds
a guest-valid value for 32-bit guests during the exit path when the
guest has used FPSIMD registers and restored the register in the early
assembly handler from taking the EL2 fault, and therefore we have to
check if fpsimd is enabled for the guest in the exit path and save the
register then, for both VHE and non-VHE guests.

Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v4:
 - Moved logic to directly read/write 32-bit sysregs in the
   vcpu_{read,write}_sys_reg when sysregs are loaded to this patch.
 - Removed inline attribute from __fpsimd_save_fpexc32

Changes since v3:
 - Rework the FPEXC32 save/restore logic to no longer attempt to
   save/restore this register lazily.

Changes since v2:
 - New patch (deferred register handling has been reworked)

 arch/arm64/kvm/hyp/switch.c| 17 +++--
 arch/arm64/kvm/hyp/sysreg-sr.c | 15 ++-
 arch/arm64/kvm/sys_regs.c  |  6 ++
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index ec31e447dd7f..9b0380d3c9c3 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -47,6 +47,15 @@ bool __hyp_text __fpsimd_enabled(void)
return __fpsimd_is_enabled()();
 }
 
+/* Save the 32-bit only FPSIMD system register state */
+static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
+{
+   if (!vcpu_el1_is_32bit(vcpu))
+   return;
+
+   vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
+}
+
 static void __hyp_text __activate_traps_vhe(void)
 {
u64 val;
@@ -380,11 +389,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 
__vgic_restore_state(vcpu);
 
-   /*
-* We must restore the 32-bit state before the sysregs, thanks
-* to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
-*/
-   __sysreg32_restore_state(vcpu);
sysreg_restore_guest_state_vhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
 
@@ -398,7 +402,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
fp_enabled = __fpsimd_enabled();
 
sysreg_save_guest_state_vhe(guest_ctxt);
-   __sysreg32_save_state(vcpu);
__vgic_save_state(vcpu);
 
__deactivate_traps(vcpu);
@@ -408,6 +411,7 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
if (fp_enabled) {
__fpsimd_save_state(_ctxt->gp_regs.fp_regs);
__fpsimd_restore_state(_ctxt->gp_regs.fp_regs);
+   __fpsimd_save_fpexc32(vcpu);
}
 
__debug_switch_to_host(vcpu);
@@ -477,6 +481,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
if (fp_enabled) {
__fpsimd_save_state(_ctxt->gp_regs.fp_regs);
__fpsimd_restore_state(_ctxt->gp_regs.fp_regs);
+   __fpsimd_save_fpexc32(vcpu);
}
 
/*
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 9c60b8062724..aacba4636871 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -196,10 +196,7 @@ void __hyp_text __sysreg32_save_state(struct kvm_vcpu 
*vcpu)
sysreg[DACR32_EL2] = read_sysreg(dacr32_el2);
sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2);
 
-   if (__fpsimd_enabled())
-   sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
-
-   if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+   if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2);
 }
 
@@ -221,7 +218,7 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu 
*vcpu)
write_sysreg(sysreg[DACR32_EL2], dacr32_el2);
write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2);
 
-   if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+   if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
write_sysreg(sysreg[DBGVCR32_EL2], 

[PATCH v5 28/40] KVM: arm64: Defer saving/restoring 64-bit sysregs to vcpu load/put on VHE

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

Some system registers do not affect the host kernel's execution and can
therefore be loaded when we are about to run a VCPU and we don't have to
restore the host state to the hardware before the time when we are
actually about to return to userspace or schedule out the VCPU thread.

The EL1 system registers and the userspace state registers only
affecting EL0 execution do not need to be saved and restored on every
switch between the VM and the host, because they don't affect the host
kernel's execution.

We mark all registers which are now deffered as such in the
vcpu_{read,write}_sys_reg accessors in sys-regs.c to ensure the most
up-to-date copy is always accessed.

Note MPIDR_EL1 (controlled via VMPIDR_EL2) is accessed from other vcpu
threads, for example via the GIC emulation, and therefore must be
declared as immediate, which is fine as the guest cannot modify this
value.

The 32-bit sysregs can also be deferred but we do this in a separate
patch as it requires a bit more infrastructure.

Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v4:
 - Added explanatory note about MPIDR_EL1 to
   vcpu_read_sys_reg and vcpu_write_sys_reg.

Changes since v3:
 - Changed to switch-based sysreg approach

 arch/arm64/kvm/hyp/sysreg-sr.c | 39 ++---
 arch/arm64/kvm/sys_regs.c  | 49 ++
 2 files changed, 80 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 906606dc4e2c..9c60b8062724 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -25,8 +25,12 @@
 /*
  * Non-VHE: Both host and guest must save everything.
  *
- * VHE: Host must save tpidr*_el0, mdscr_el1, sp_el0,
- * and guest must save everything.
+ * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and pstate,
+ * which are handled as part of the el2 return state) on every switch.
+ * tpidr_el0 and tpidrro_el0 only need to be switched when going
+ * to host userspace or a different VCPU.  EL1 registers only need to be
+ * switched when potentially going to run a different VCPU.  The latter two
+ * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put.
  */
 
 static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
@@ -93,14 +97,11 @@ void __hyp_text __sysreg_save_state_nvhe(struct 
kvm_cpu_context *ctxt)
 void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt)
 {
__sysreg_save_common_state(ctxt);
-   __sysreg_save_user_state(ctxt);
 }
 
 void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt)
 {
-   __sysreg_save_el1_state(ctxt);
__sysreg_save_common_state(ctxt);
-   __sysreg_save_user_state(ctxt);
__sysreg_save_el2_return_state(ctxt);
 }
 
@@ -169,14 +170,11 @@ void __hyp_text __sysreg_restore_state_nvhe(struct 
kvm_cpu_context *ctxt)
 void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt)
 {
__sysreg_restore_common_state(ctxt);
-   __sysreg_restore_user_state(ctxt);
 }
 
 void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt)
 {
-   __sysreg_restore_el1_state(ctxt);
__sysreg_restore_common_state(ctxt);
-   __sysreg_restore_user_state(ctxt);
__sysreg_restore_el2_return_state(ctxt);
 }
 
@@ -240,6 +238,18 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu 
*vcpu)
  */
 void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu)
 {
+   struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context;
+   struct kvm_cpu_context *guest_ctxt = >arch.ctxt;
+
+   if (!has_vhe())
+   return;
+
+   __sysreg_save_user_state(host_ctxt);
+
+   __sysreg_restore_user_state(guest_ctxt);
+   __sysreg_restore_el1_state(guest_ctxt);
+
+   vcpu->arch.sysregs_loaded_on_cpu = true;
 }
 
 /**
@@ -255,6 +265,19 @@ void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu)
  */
 void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
 {
+   struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context;
+   struct kvm_cpu_context *guest_ctxt = >arch.ctxt;
+
+   if (!has_vhe())
+   return;
+
+   __sysreg_save_el1_state(guest_ctxt);
+   __sysreg_save_user_state(guest_ctxt);
+
+   /* Restore host user state */
+   __sysreg_restore_user_state(host_ctxt);
+
+   vcpu->arch.sysregs_loaded_on_cpu = false;
 }
 
 void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index c809f0d1a059..17eb7772e059 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -85,8 +85,33 @@ u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
/*
 * System registers listed in the switch are not saved on every
 * exit from the 

[PATCH v5 29/40] KVM: arm64: Prepare to handle deferred save/restore of 32-bit registers

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

32-bit registers are not used by a 64-bit host kernel and can be
deferred, but we need to rework the accesses to these register to access
the latest values depending on whether or not guest system registers are
loaded on the CPU or only reside in memory.

Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v4:
 - Fixed typos in commit message
 - Moved change to read/write sysreg to the following patch as
   suggested by Drew

Changes since v3:
 - Don't also try to write hardware spsr when sysregs are not loaded
 - Adapted patch to use switch-based sysreg save/restore approach
 - (Kept additional BUG_ON() in vcpu_read_spsr32() to keep the compiler 
happy)

Changes since v2:
 - New patch (deferred register handling has been reworked)

 arch/arm64/include/asm/kvm_emulate.h | 32 +
 arch/arm64/kvm/regmap.c  | 67 +++-
 2 files changed, 59 insertions(+), 40 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_emulate.h 
b/arch/arm64/include/asm/kvm_emulate.h
index 6ed18ce0d638..23b33e8ea03a 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -33,7 +33,8 @@
 #include 
 
 unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
-unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
+unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu);
+void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v);
 
 bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
 void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr);
@@ -162,41 +163,26 @@ static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 
reg_num,
 
 static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu)
 {
-   unsigned long *p = (unsigned long 
*)_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
-
-   if (vcpu_mode_is_32bit(vcpu)) {
-   unsigned long *p_32bit = vcpu_spsr32(vcpu);
-
-   /* KVM_SPSR_SVC aliases KVM_SPSR_EL1 */
-   if (p_32bit != p)
-   return *p_32bit;
-   }
+   if (vcpu_mode_is_32bit(vcpu))
+   return vcpu_read_spsr32(vcpu);
 
if (vcpu->arch.sysregs_loaded_on_cpu)
return read_sysreg_el1(spsr);
else
-   return *p;
+   return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
 }
 
-static inline void vcpu_write_spsr(const struct kvm_vcpu *vcpu, unsigned long 
v)
+static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
 {
-   unsigned long *p = (unsigned long 
*)_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
-
-   /* KVM_SPSR_SVC aliases KVM_SPSR_EL1 */
if (vcpu_mode_is_32bit(vcpu)) {
-   unsigned long *p_32bit = vcpu_spsr32(vcpu);
-
-   /* KVM_SPSR_SVC aliases KVM_SPSR_EL1 */
-   if (p_32bit != p) {
-   *p_32bit = v;
-   return;
-   }
+   vcpu_write_spsr32(vcpu, v);
+   return;
}
 
if (vcpu->arch.sysregs_loaded_on_cpu)
write_sysreg_el1(v, spsr);
else
-   *p = v;
+   vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v;
 }
 
 static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
index bbc6ae32e4af..eefe403a2e63 100644
--- a/arch/arm64/kvm/regmap.c
+++ b/arch/arm64/kvm/regmap.c
@@ -141,28 +141,61 @@ unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 
reg_num)
 /*
  * Return the SPSR for the current mode of the virtual CPU.
  */
-unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu)
+static int vcpu_spsr32_mode(const struct kvm_vcpu *vcpu)
 {
unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
switch (mode) {
-   case COMPAT_PSR_MODE_SVC:
-   mode = KVM_SPSR_SVC;
-   break;
-   case COMPAT_PSR_MODE_ABT:
-   mode = KVM_SPSR_ABT;
-   break;
-   case COMPAT_PSR_MODE_UND:
-   mode = KVM_SPSR_UND;
-   break;
-   case COMPAT_PSR_MODE_IRQ:
-   mode = KVM_SPSR_IRQ;
-   break;
-   case COMPAT_PSR_MODE_FIQ:
-   mode = KVM_SPSR_FIQ;
-   break;
+   case COMPAT_PSR_MODE_SVC: return KVM_SPSR_SVC;
+   case COMPAT_PSR_MODE_ABT: return KVM_SPSR_ABT;
+   case COMPAT_PSR_MODE_UND: return KVM_SPSR_UND;
+   case COMPAT_PSR_MODE_IRQ: return KVM_SPSR_IRQ;
+   case COMPAT_PSR_MODE_FIQ: return KVM_SPSR_FIQ;
+   default: BUG();
+   }
+}
+
+unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu)
+{
+   int spsr_idx = vcpu_spsr32_mode(vcpu);
+
+   if (!vcpu->a

[PATCH v5 24/40] KVM: arm64: Rewrite system register accessors to read/write functions

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <cd...@cs.columbia.edu>

Currently we access the system registers array via the vcpu_sys_reg()
macro.  However, we are about to change the behavior to some times
modify the register file directly, so let's change this to two
primitives:

 * Accessor macros vcpu_write_sys_reg() and vcpu_read_sys_reg()
 * Direct array access macro __vcpu_sys_reg()

The accessor macros should be used in places where the code needs to
access the currently loaded VCPU's state as observed by the guest.  For
example, when trapping on cache related registers, a write to a system
register should go directly to the VCPU version of the register.

The direct array access macro can be used in places where the VCPU is
known to never be running (for example userspace access) or for
registers which are never context switched (for example all the PMU
system registers).

This rewrites all users of vcpu_sys_regs to one of the macros described
above.

No functional change.

Acked-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <cd...@cs.columbia.edu>
---

Notes:
Changes since v4:
 - Reordered val/reg of vcpu_write_sys_reg to make it look more like
   the write_sysreg macro.
 - Fixed up a few scenarios of ugly text wrapping that were trying
   to maintain the 80 char limit; we can be a little more loose.
 - Added Marc's acked-by and Drew's reviewed-by despite the changes,
   as I did the changes with a coccinelle script and figured it
   probably got things right:
@@
expression vcpu, val;
const reg;
@@
- vcpu_write_sys_reg(vcpu, reg, val)
+ vcpu_write_sys_reg(vcpu, val, reg)

Changes since v2:
 - New patch (deferred register handling has been reworked)

 arch/arm64/include/asm/kvm_emulate.h | 13 ---
 arch/arm64/include/asm/kvm_host.h| 13 ++-
 arch/arm64/include/asm/kvm_mmu.h |  2 +-
 arch/arm64/kvm/debug.c   | 28 ++-
 arch/arm64/kvm/inject_fault.c|  8 ++---
 arch/arm64/kvm/sys_regs.c| 69 ++--
 arch/arm64/kvm/sys_regs.h|  4 +--
 arch/arm64/kvm/sys_regs_generic_v8.c |  4 +--
 virt/kvm/arm/pmu.c   | 36 +--
 9 files changed, 101 insertions(+), 76 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_emulate.h 
b/arch/arm64/include/asm/kvm_emulate.h
index 3cc535591bdf..d313aaae5c38 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -290,15 +290,18 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu 
*vcpu)
 
 static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
 {
-   return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
+   return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
 }
 
 static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
 {
-   if (vcpu_mode_is_32bit(vcpu))
+   if (vcpu_mode_is_32bit(vcpu)) {
*vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT;
-   else
-   vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25);
+   } else {
+   u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+   sctlr |= (1 << 25);
+   vcpu_write_sys_reg(vcpu, SCTLR_EL1, sctlr);
+   }
 }
 
 static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
@@ -306,7 +309,7 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
if (vcpu_mode_is_32bit(vcpu))
return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT);
 
-   return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
+   return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
 }
 
 static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 9001fd0890c9..179bb9d5760b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -287,7 +287,18 @@ struct kvm_vcpu_arch {
 };
 
 #define vcpu_gp_regs(v)(&(v)->arch.ctxt.gp_regs)
-#define vcpu_sys_reg(v,r)  ((v)->arch.ctxt.sys_regs[(r)])
+
+/*
+ * Only use __vcpu_sys_reg if you know you want the memory backed version of a
+ * register, and not the one most recently accessed by a running VCPU.  For
+ * example, for userspace access or for system registers that are never context
+ * switched, but only emulated.
+ */
+#define __vcpu_sys_reg(v,r)((v)->arch.ctxt.sys_regs[(r)])
+
+#define vcpu_read_sys_reg(v,r) __vcpu_sys_reg(v,r)
+#define vcpu_write_sys_reg(v,n,r)  do { __vcpu_sys_reg(v,r) = n; } while 
(0)
+
 /*
  * CP14 and CP15 live in the same array, as they are backed by the
  * same system registers.
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 7faed6e48b46..cffa34e23718 100

[PATCH v5 26/40] KVM: arm/arm64: Prepare to handle deferred save/restore of SPSR_EL1

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

SPSR_EL1 is not used by a VHE host kernel and can be deferred, but we
need to rework the accesses to this register to access the latest value
depending on whether or not guest system registers are loaded on the CPU
or only reside in memory.

The handling of accessing the various banked SPSRs for 32-bit VMs is a
bit clunky, but this will be improved in following patches which will
first prepare and subsequently implement deferred save/restore of the
32-bit registers, including the 32-bit SPSRs.

Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v4:
 - Dropped unnecessary cast

Changes since v2:
 - New patch (deferred register handling has been reworked)

 arch/arm/include/asm/kvm_emulate.h   | 12 ++-
 arch/arm/kvm/emulate.c   |  2 +-
 arch/arm64/include/asm/kvm_emulate.h | 41 +++-
 arch/arm64/kvm/inject_fault.c|  4 ++--
 virt/kvm/arm/aarch32.c   |  2 +-
 5 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h 
b/arch/arm/include/asm/kvm_emulate.h
index e27caa4b47a1..6493bd479ddc 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -41,7 +41,17 @@ static inline unsigned long *vcpu_reg32(struct kvm_vcpu 
*vcpu, u8 reg_num)
return vcpu_reg(vcpu, reg_num);
 }
 
-unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu);
+unsigned long *__vcpu_spsr(struct kvm_vcpu *vcpu);
+
+static inline unsigned long vpcu_read_spsr(struct kvm_vcpu *vcpu)
+{
+   return *__vcpu_spsr(vcpu);
+}
+
+static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
+{
+   *__vcpu_spsr(vcpu) = v;
+}
 
 static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu,
 u8 reg_num)
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index fa501bf437f3..9046b53d87c1 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -142,7 +142,7 @@ unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num)
 /*
  * Return the SPSR for the current mode of the virtual CPU.
  */
-unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu)
+unsigned long *__vcpu_spsr(struct kvm_vcpu *vcpu)
 {
unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
switch (mode) {
diff --git a/arch/arm64/include/asm/kvm_emulate.h 
b/arch/arm64/include/asm/kvm_emulate.h
index d313aaae5c38..f32640132e26 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -26,6 +26,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -143,13 +144,43 @@ static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 
reg_num,
vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val;
 }
 
-/* Get vcpu SPSR for current mode */
-static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
+static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu)
 {
-   if (vcpu_mode_is_32bit(vcpu))
-   return vcpu_spsr32(vcpu);
+   unsigned long *p = (unsigned long 
*)_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+
+   if (vcpu_mode_is_32bit(vcpu)) {
+   unsigned long *p_32bit = vcpu_spsr32(vcpu);
+
+   /* KVM_SPSR_SVC aliases KVM_SPSR_EL1 */
+   if (p_32bit != p)
+   return *p_32bit;
+   }
+
+   if (vcpu->arch.sysregs_loaded_on_cpu)
+   return read_sysreg_el1(spsr);
+   else
+   return *p;
+}
 
-   return (unsigned long *)_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+static inline void vcpu_write_spsr(const struct kvm_vcpu *vcpu, unsigned long 
v)
+{
+   unsigned long *p = (unsigned long 
*)_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+
+   /* KVM_SPSR_SVC aliases KVM_SPSR_EL1 */
+   if (vcpu_mode_is_32bit(vcpu)) {
+   unsigned long *p_32bit = vcpu_spsr32(vcpu);
+
+   /* KVM_SPSR_SVC aliases KVM_SPSR_EL1 */
+   if (p_32bit != p) {
+   *p_32bit = v;
+   return;
+   }
+   }
+
+   if (vcpu->arch.sysregs_loaded_on_cpu)
+   write_sysreg_el1(v, spsr);
+   else
+   *p = v;
 }
 
 static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 63dba401fc7d..7f8d2a4e420f 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -71,7 +71,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, 
unsigned long addr
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
 
*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
-   *vcpu_spsr(vcpu) = cpsr;
+   vcpu_write_spsr(vcpu, cpsr);
 
vcpu_write_sy

[PATCH v5 27/40] KVM: arm64: Prepare to handle deferred save/restore of ELR_EL1

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

ELR_EL1 is not used by a VHE host kernel and can be deferred, but we
need to rework the accesses to this register to access the latest value
depending on whether or not guest system registers are loaded on the CPU
or only reside in memory.

Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v2:
 - New patch (deferred register handling has been reworked)

 arch/arm64/include/asm/kvm_emulate.h | 18 +-
 arch/arm64/kvm/inject_fault.c|  4 ++--
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_emulate.h 
b/arch/arm64/include/asm/kvm_emulate.h
index f32640132e26..6ed18ce0d638 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -90,11 +90,27 @@ static inline unsigned long *vcpu_pc(const struct kvm_vcpu 
*vcpu)
return (unsigned long *)_gp_regs(vcpu)->regs.pc;
 }
 
-static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu)
+static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu)
 {
return (unsigned long *)_gp_regs(vcpu)->elr_el1;
 }
 
+static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu)
+{
+   if (vcpu->arch.sysregs_loaded_on_cpu)
+   return read_sysreg_el1(elr);
+   else
+   return *__vcpu_elr_el1(vcpu);
+}
+
+static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned 
long v)
+{
+   if (vcpu->arch.sysregs_loaded_on_cpu)
+   write_sysreg_el1(v, elr);
+   else
+   *__vcpu_elr_el1(vcpu) = v;
+}
+
 static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
 {
return (unsigned long *)_gp_regs(vcpu)->regs.pstate;
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 7f8d2a4e420f..d8e71659ba7e 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -67,7 +67,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, 
unsigned long addr
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
u32 esr = 0;
 
-   *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+   vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
 
*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
@@ -102,7 +102,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
unsigned long cpsr = *vcpu_cpsr(vcpu);
u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
 
-   *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+   vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
 
*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 25/40] KVM: arm64: Introduce framework for accessing deferred sysregs

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

We are about to defer saving and restoring some groups of system
registers to vcpu_put and vcpu_load on supported systems.  This means
that we need some infrastructure to access system registes which
supports either accessing the memory backing of the register or directly
accessing the system registers, depending on the state of the system
when we access the register.

We do this by defining read/write accessor functions, which can handle
both "immediate" and "deferrable" system registers.  Immediate registers
are always saved/restored in the world-switch path, but deferrable
registers are only saved/restored in vcpu_put/vcpu_load when supported
and sysregs_loaded_on_cpu will be set in that case.

Note that we don't use the deferred mechanism yet in this patch, but only
introduce infrastructure.  This is to improve convenience of review in
the subsequent patches where it is clear which registers become
deferred.

Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v4:
 - Slightly reworded commentary based on Drew's feedback

Changes since v3:
 - Changed to a switch-statement based approach to improve
   readability.

Changes since v2:
 - New patch (deferred register handling has been reworked)

 arch/arm64/include/asm/kvm_host.h |  8 ++--
 arch/arm64/kvm/sys_regs.c | 33 +
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 179bb9d5760b..ab46bc70add6 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -284,6 +284,10 @@ struct kvm_vcpu_arch {
 
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
u64 vsesr_el2;
+
+   /* True when deferrable sysregs are loaded on the physical CPU,
+* see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
+   bool sysregs_loaded_on_cpu;
 };
 
 #define vcpu_gp_regs(v)(&(v)->arch.ctxt.gp_regs)
@@ -296,8 +300,8 @@ struct kvm_vcpu_arch {
  */
 #define __vcpu_sys_reg(v,r)((v)->arch.ctxt.sys_regs[(r)])
 
-#define vcpu_read_sys_reg(v,r) __vcpu_sys_reg(v,r)
-#define vcpu_write_sys_reg(v,n,r)  do { __vcpu_sys_reg(v,r) = n; } while 
(0)
+u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg);
+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
 
 /*
  * CP14 and CP15 live in the same array, as they are backed by the
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 7514db002430..c809f0d1a059 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -76,6 +77,38 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
return false;
 }
 
+u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
+{
+   if (!vcpu->arch.sysregs_loaded_on_cpu)
+   goto immediate_read;
+
+   /*
+* System registers listed in the switch are not saved on every
+* exit from the guest but are only saved on vcpu_put.
+*/
+   switch (reg) {
+   }
+
+immediate_read:
+   return __vcpu_sys_reg(vcpu, reg);
+}
+
+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
+{
+   if (!vcpu->arch.sysregs_loaded_on_cpu)
+   goto immediate_write;
+
+   /*
+* System registers listed in the switch are not restored on every
+* entry to the guest but are only restored on vcpu_load.
+*/
+   switch (reg) {
+   }
+
+immediate_write:
+__vcpu_sys_reg(vcpu, reg) = val;
+}
+
 /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
 static u32 cache_levels;
 
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 23/40] KVM: arm64: Change 32-bit handling of VM system registers

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

We currently handle 32-bit accesses to trapped VM system registers using
the 32-bit index into the coproc array on the vcpu structure, which is a
union of the coproc array and the sysreg array.

Since all the 32-bit coproc indices are created to correspond to the
architectural mapping between 64-bit system registers and 32-bit
coprocessor registers, and because the AArch64 system registers are the
double in size of the AArch32 coprocessor registers, we can always find
the system register entry that we must update by dividing the 32-bit
coproc index by 2.

This is going to make our lives much easier when we have to start
accessing system registers that use deferred save/restore and might
have to be read directly from the physical CPU.

Reviewed-by: Andrew Jones <drjo...@redhat.com>
Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h |  8 
 arch/arm64/kvm/sys_regs.c | 20 +++-
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 87abc94fb591..9001fd0890c9 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -295,14 +295,6 @@ struct kvm_vcpu_arch {
 #define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)])
 #define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)])
 
-#ifdef CONFIG_CPU_BIG_ENDIAN
-#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r))
-#define vcpu_cp15_64_low(v,r)  vcpu_cp15((v),(r) + 1)
-#else
-#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r) + 1)
-#define vcpu_cp15_64_low(v,r)  vcpu_cp15((v),(r))
-#endif
-
 struct kvm_vm_stat {
ulong remote_tlb_flush;
 };
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 6feb4a2215cb..691f81c31018 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -121,16 +121,26 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
  const struct sys_reg_desc *r)
 {
bool was_enabled = vcpu_has_cache_enabled(vcpu);
+   u64 val;
+   int reg = r->reg;
 
BUG_ON(!p->is_write);
 
-   if (!p->is_aarch32) {
-   vcpu_sys_reg(vcpu, r->reg) = p->regval;
+   /* See the 32bit mapping in kvm_host.h */
+   if (p->is_aarch32)
+   reg = r->reg / 2;
+
+   if (!p->is_aarch32 || !p->is_32bit) {
+   val = p->regval;
} else {
-   if (!p->is_32bit)
-   vcpu_cp15_64_high(vcpu, r->reg) = 
upper_32_bits(p->regval);
-   vcpu_cp15_64_low(vcpu, r->reg) = lower_32_bits(p->regval);
+   val = vcpu_sys_reg(vcpu, reg);
+   if (r->reg % 2)
+   val = (p->regval << 32) | (u64)lower_32_bits(val);
+   else
+   val = ((u64)upper_32_bits(val) << 32) |
+   lower_32_bits(p->regval);
}
+   vcpu_sys_reg(vcpu, reg) = val;
 
kvm_toggle_cache(vcpu, was_enabled);
return true;
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 21/40] KVM: arm64: Unify non-VHE host/guest sysreg save and restore functions

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

There is no need to have multiple identical functions with different
names for saving host and guest state.  When saving and restoring state
for the host and guest, the state is the same for both contexts, and
that's why we have the kvm_cpu_context structure.  Delete one
version and rename the other into simply save/restore.

Reviewed-by: Andrew Jones <drjo...@redhat.com>
Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---
 arch/arm64/include/asm/kvm_hyp.h |  6 ++
 arch/arm64/kvm/hyp/switch.c  | 10 +-
 arch/arm64/kvm/hyp/sysreg-sr.c   | 18 ++
 3 files changed, 9 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 23c09d9af343..2b1fda90dde4 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -131,10 +131,8 @@ int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
 void __timer_enable_traps(struct kvm_vcpu *vcpu);
 void __timer_disable_traps(struct kvm_vcpu *vcpu);
 
-void __sysreg_save_host_state_nvhe(struct kvm_cpu_context *ctxt);
-void __sysreg_restore_host_state_nvhe(struct kvm_cpu_context *ctxt);
-void __sysreg_save_guest_state_nvhe(struct kvm_cpu_context *ctxt);
-void __sysreg_restore_guest_state_nvhe(struct kvm_cpu_context *ctxt);
+void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt);
+void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt);
 void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt);
 void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt);
 void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt);
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index fd845dda007a..ec31e447dd7f 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -429,7 +429,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = >arch.ctxt;
 
-   __sysreg_save_host_state_nvhe(host_ctxt);
+   __sysreg_save_state_nvhe(host_ctxt);
 
__activate_traps(vcpu);
__activate_vm(kern_hyp_va(vcpu->kvm));
@@ -442,7 +442,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
 * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
 */
__sysreg32_restore_state(vcpu);
-   __sysreg_restore_guest_state_nvhe(guest_ctxt);
+   __sysreg_restore_state_nvhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
 
do {
@@ -464,7 +464,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
 
fp_enabled = __fpsimd_enabled();
 
-   __sysreg_save_guest_state_nvhe(guest_ctxt);
+   __sysreg_save_state_nvhe(guest_ctxt);
__sysreg32_save_state(vcpu);
__timer_disable_traps(vcpu);
__vgic_save_state(vcpu);
@@ -472,7 +472,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
 
-   __sysreg_restore_host_state_nvhe(host_ctxt);
+   __sysreg_restore_state_nvhe(host_ctxt);
 
if (fp_enabled) {
__fpsimd_save_state(_ctxt->gp_regs.fp_regs);
@@ -502,7 +502,7 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 
elr, u64 par,
__timer_disable_traps(vcpu);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
-   __sysreg_restore_host_state_nvhe(__host_ctxt);
+   __sysreg_restore_state_nvhe(__host_ctxt);
}
 
/*
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 18801ab56e8b..d35b3aa680ab 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -78,14 +78,7 @@ static void __hyp_text __sysreg_save_el1_state(struct 
kvm_cpu_context *ctxt)
ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);
 }
 
-void __hyp_text __sysreg_save_host_state_nvhe(struct kvm_cpu_context *ctxt)
-{
-   __sysreg_save_el1_state(ctxt);
-   __sysreg_save_common_state(ctxt);
-   __sysreg_save_user_state(ctxt);
-}
-
-void __hyp_text __sysreg_save_guest_state_nvhe(struct kvm_cpu_context *ctxt)
+void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
 {
__sysreg_save_el1_state(ctxt);
__sysreg_save_common_state(ctxt);
@@ -154,14 +147,7 @@ static void __hyp_text __sysreg_restore_el1_state(struct 
kvm_cpu_context *ctxt)
write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
 }
 
-void __hyp_text __sysreg_restore_host_state_nvhe(struct kvm_cpu_context *ctxt)
-{
-   __sysreg_restore_el1_state(ctxt);
-   __sysreg_restore_common_state(ctxt);
-   __sysreg_restore_user_state(ctxt);
-}
-
-void __hyp_text __sysreg_restore_guest_state_nvhe(struct kvm_cpu_context *ctxt)
+void __hyp_text __sysreg_resto

[PATCH v5 20/40] KVM: arm/arm64: Remove leftover comment from kvm_vcpu_run_vhe

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

The comment only applied to SPE on non-VHE systems, so we simply remove
it.

Suggested-by: Andrew Jones <drjo...@redhat.com>
Acked-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---
 arch/arm64/kvm/hyp/switch.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index d60d3a018882..fd845dda007a 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -410,10 +410,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
__fpsimd_restore_state(_ctxt->gp_regs.fp_regs);
}
 
-   /*
-* This must come after restoring the host sysregs, since a non-VHE
-* system may enable SPE here and make use of the TTBRs.
-*/
__debug_switch_to_host(vcpu);
 
return exit_code;
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 19/40] KVM: arm64: Introduce separate VHE/non-VHE sysreg save/restore functions

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

As we are about to handle system registers quite differently between VHE
and non-VHE systems.  In preparation for that, we need to split some of
the handling functions between VHE and non-VHE functionality.

For now, we simply copy the non-VHE functions, but we do change the use
of static keys for VHE and non-VHE functionality now that we have
separate functions.

Reviewed-by: Andrew Jones <drjo...@redhat.com>
Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---
 arch/arm64/include/asm/kvm_hyp.h | 12 
 arch/arm64/kvm/hyp/switch.c  | 20 ++--
 arch/arm64/kvm/hyp/sysreg-sr.c   | 40 
 3 files changed, 50 insertions(+), 22 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index aeda2a777365..23c09d9af343 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -131,10 +131,14 @@ int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
 void __timer_enable_traps(struct kvm_vcpu *vcpu);
 void __timer_disable_traps(struct kvm_vcpu *vcpu);
 
-void __sysreg_save_host_state(struct kvm_cpu_context *ctxt);
-void __sysreg_restore_host_state(struct kvm_cpu_context *ctxt);
-void __sysreg_save_guest_state(struct kvm_cpu_context *ctxt);
-void __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt);
+void __sysreg_save_host_state_nvhe(struct kvm_cpu_context *ctxt);
+void __sysreg_restore_host_state_nvhe(struct kvm_cpu_context *ctxt);
+void __sysreg_save_guest_state_nvhe(struct kvm_cpu_context *ctxt);
+void __sysreg_restore_guest_state_nvhe(struct kvm_cpu_context *ctxt);
+void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt);
 void __sysreg32_save_state(struct kvm_vcpu *vcpu);
 void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 307f8c1fcc2f..d60d3a018882 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -373,7 +373,7 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = >arch.ctxt;
 
-   __sysreg_save_host_state(host_ctxt);
+   sysreg_save_host_state_vhe(host_ctxt);
 
__activate_traps(vcpu);
__activate_vm(vcpu->kvm);
@@ -385,7 +385,7 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
 */
__sysreg32_restore_state(vcpu);
-   __sysreg_restore_guest_state(guest_ctxt);
+   sysreg_restore_guest_state_vhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
 
do {
@@ -397,13 +397,13 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 
fp_enabled = __fpsimd_enabled();
 
-   __sysreg_save_guest_state(guest_ctxt);
+   sysreg_save_guest_state_vhe(guest_ctxt);
__sysreg32_save_state(vcpu);
__vgic_save_state(vcpu);
 
__deactivate_traps(vcpu);
 
-   __sysreg_restore_host_state(host_ctxt);
+   sysreg_restore_host_state_vhe(host_ctxt);
 
if (fp_enabled) {
__fpsimd_save_state(_ctxt->gp_regs.fp_regs);
@@ -433,7 +433,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = >arch.ctxt;
 
-   __sysreg_save_host_state(host_ctxt);
+   __sysreg_save_host_state_nvhe(host_ctxt);
 
__activate_traps(vcpu);
__activate_vm(kern_hyp_va(vcpu->kvm));
@@ -446,7 +446,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
 * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
 */
__sysreg32_restore_state(vcpu);
-   __sysreg_restore_guest_state(guest_ctxt);
+   __sysreg_restore_guest_state_nvhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
 
do {
@@ -468,7 +468,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
 
fp_enabled = __fpsimd_enabled();
 
-   __sysreg_save_guest_state(guest_ctxt);
+   __sysreg_save_guest_state_nvhe(guest_ctxt);
__sysreg32_save_state(vcpu);
__timer_disable_traps(vcpu);
__vgic_save_state(vcpu);
@@ -476,7 +476,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
 
-   __sysreg_restore_host_state(host_ctxt);
+   __sysreg_restore_host_state_nvhe(host_ctxt);
 
if (fp_enabled) {
__fpsimd_save_state(_ctxt->gp_regs.fp_regs);
@@ -506,7 +506,7 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 
elr, u64 par,
__timer_disable_trap

[PATCH v5 18/40] KVM: arm64: Rewrite sysreg alternatives to static keys

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

As we are about to move calls around in the sysreg save/restore logic,
let's first rewrite the alternative function callers, because it is
going to make the next patches much easier to read.

Acked-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---
 arch/arm64/kvm/hyp/sysreg-sr.c | 17 -
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index d5a5145b4e7c..51b557226170 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -22,9 +22,6 @@
 #include 
 #include 
 
-/* Yes, this does nothing, on purpose */
-static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
-
 /*
  * Non-VHE: Both host and guest must save everything.
  *
@@ -81,13 +78,10 @@ static void __hyp_text __sysreg_save_el1_state(struct 
kvm_cpu_context *ctxt)
ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);
 }
 
-static hyp_alternate_select(__sysreg_call_save_host_state,
-   __sysreg_save_el1_state, __sysreg_do_nothing,
-   ARM64_HAS_VIRT_HOST_EXTN);
-
 void __hyp_text __sysreg_save_host_state(struct kvm_cpu_context *ctxt)
 {
-   __sysreg_call_save_host_state()(ctxt);
+   if (!has_vhe())
+   __sysreg_save_el1_state(ctxt);
__sysreg_save_common_state(ctxt);
__sysreg_save_user_state(ctxt);
 }
@@ -148,13 +142,10 @@ static void __hyp_text __sysreg_restore_el1_state(struct 
kvm_cpu_context *ctxt)
write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
 }
 
-static hyp_alternate_select(__sysreg_call_restore_host_state,
-   __sysreg_restore_el1_state, __sysreg_do_nothing,
-   ARM64_HAS_VIRT_HOST_EXTN);
-
 void __hyp_text __sysreg_restore_host_state(struct kvm_cpu_context *ctxt)
 {
-   __sysreg_call_restore_host_state()(ctxt);
+   if (!has_vhe())
+   __sysreg_restore_el1_state(ctxt);
__sysreg_restore_common_state(ctxt);
__sysreg_restore_user_state(ctxt);
 }
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 17/40] KVM: arm64: Move userspace system registers into separate function

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

There's a semantic difference between the EL1 registers that control
operation of a kernel running in EL1 and EL1 registers that only control
userspace execution in EL0.  Since we can defer saving/restoring the
latter, move them into their own function.

The ARMv8 ARM (ARM DDI 0487C.a) Section D10.2.1 recommends that
ACTLR_EL1 has no effect on the processor when running the VHE host, and
we can therefore move this register into the EL1 state which is only
saved/restored on vcpu_put/load for a VHE host.

We also take this chance to rename the function saving/restoring the
remaining system register to make it clear this function deals with
the EL1 system registers.

Reviewed-by: Andrew Jones <drjo...@redhat.com>
Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v4:
 - Clarified rationale for deferring ACTLR_EL1 in the commit message.

Changes since v3:
 - Correct the comment about ACTLR_EL1 and adjust commit text.

Changes since v2:
 - Save restore ACTLR_EL1 as part of the EL1 registers state instead of
   the user register state, as ACTLR_EL1 can't affect the host's execution
   on VHE systems.

Changes since v1:
 - Added comment about sp_el0 to common save sysreg save/restore functions

 arch/arm64/kvm/hyp/sysreg-sr.c | 48 ++
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 99fc60516103..d5a5145b4e7c 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -28,24 +28,33 @@ static void __hyp_text __sysreg_do_nothing(struct 
kvm_cpu_context *ctxt) { }
 /*
  * Non-VHE: Both host and guest must save everything.
  *
- * VHE: Host must save tpidr*_el0, actlr_el1, mdscr_el1, sp_el0,
+ * VHE: Host must save tpidr*_el0, mdscr_el1, sp_el0,
  * and guest must save everything.
  */
 
 static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
 {
-   ctxt->sys_regs[ACTLR_EL1]   = read_sysreg(actlr_el1);
-   ctxt->sys_regs[TPIDR_EL0]   = read_sysreg(tpidr_el0);
-   ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
ctxt->sys_regs[MDSCR_EL1]   = read_sysreg(mdscr_el1);
+
+   /*
+* The host arm64 Linux uses sp_el0 to point to 'current' and it must
+* therefore be saved/restored on every entry/exit to/from the guest.
+*/
ctxt->gp_regs.regs.sp   = read_sysreg(sp_el0);
 }
 
-static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
+static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
+{
+   ctxt->sys_regs[TPIDR_EL0]   = read_sysreg(tpidr_el0);
+   ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
+}
+
+static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
 {
ctxt->sys_regs[MPIDR_EL1]   = read_sysreg(vmpidr_el2);
ctxt->sys_regs[CSSELR_EL1]  = read_sysreg(csselr_el1);
ctxt->sys_regs[SCTLR_EL1]   = read_sysreg_el1(sctlr);
+   ctxt->sys_regs[ACTLR_EL1]   = read_sysreg(actlr_el1);
ctxt->sys_regs[CPACR_EL1]   = read_sysreg_el1(cpacr);
ctxt->sys_regs[TTBR0_EL1]   = read_sysreg_el1(ttbr0);
ctxt->sys_regs[TTBR1_EL1]   = read_sysreg_el1(ttbr1);
@@ -73,35 +82,46 @@ static void __hyp_text __sysreg_save_state(struct 
kvm_cpu_context *ctxt)
 }
 
 static hyp_alternate_select(__sysreg_call_save_host_state,
-   __sysreg_save_state, __sysreg_do_nothing,
+   __sysreg_save_el1_state, __sysreg_do_nothing,
ARM64_HAS_VIRT_HOST_EXTN);
 
 void __hyp_text __sysreg_save_host_state(struct kvm_cpu_context *ctxt)
 {
__sysreg_call_save_host_state()(ctxt);
__sysreg_save_common_state(ctxt);
+   __sysreg_save_user_state(ctxt);
 }
 
 void __hyp_text __sysreg_save_guest_state(struct kvm_cpu_context *ctxt)
 {
-   __sysreg_save_state(ctxt);
+   __sysreg_save_el1_state(ctxt);
__sysreg_save_common_state(ctxt);
+   __sysreg_save_user_state(ctxt);
 }
 
 static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context 
*ctxt)
 {
-   write_sysreg(ctxt->sys_regs[ACTLR_EL1],   actlr_el1);
-   write_sysreg(ctxt->sys_regs[TPIDR_EL0],   tpidr_el0);
-   write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
write_sysreg(ctxt->sys_regs[MDSCR_EL1],   mdscr_el1);
+
+   /*
+* The host arm64 Linux uses sp_el0 to point to 'current' and it must
+* therefore be saved/restored on every entry/exit to/from the guest.
+*/
write_sysreg(ctxt->gp_regs.regs.sp,   sp_el0);
 }
 
-static void __hyp_

[PATCH v5 15/40] KVM: arm64: Don't deactivate VM on VHE systems

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

There is no need to reset the VTTBR to zero when exiting the guest on
VHE systems.  VHE systems don't use stage 2 translations for the EL2&0
translation regime used by the host.

Reviewed-by: Andrew Jones <drjo...@redhat.com>
Acked-by: Marc Zyngier <marc.zyng...@arm.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v1:
 - Changed __activate_vm to take a kvm pointer
 - No longer adding inline attributes to functions

 arch/arm64/kvm/hyp/switch.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 1e26a81c0a16..9ffd802e775d 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -156,9 +156,8 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu 
*vcpu)
write_sysreg(0, pmuserenr_el0);
 }
 
-static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
+static void __hyp_text __activate_vm(struct kvm *kvm)
 {
-   struct kvm *kvm = kern_hyp_va(vcpu->kvm);
write_sysreg(kvm->arch.vttbr, vttbr_el2);
 }
 
@@ -377,7 +376,7 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
__sysreg_save_host_state(host_ctxt);
 
__activate_traps(vcpu);
-   __activate_vm(vcpu);
+   __activate_vm(vcpu->kvm);
 
__vgic_restore_state(vcpu);
__timer_enable_traps(vcpu);
@@ -405,7 +404,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
__vgic_save_state(vcpu);
 
__deactivate_traps(vcpu);
-   __deactivate_vm(vcpu);
 
__sysreg_restore_host_state(host_ctxt);
 
@@ -440,7 +438,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
__sysreg_save_host_state(host_ctxt);
 
__activate_traps(vcpu);
-   __activate_vm(vcpu);
+   __activate_vm(kern_hyp_va(vcpu->kvm));
 
__vgic_restore_state(vcpu);
__timer_enable_traps(vcpu);
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 16/40] KVM: arm64: Remove noop calls to timer save/restore from VHE switch

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

The VHE switch function calls __timer_enable_traps and
__timer_disable_traps which don't do anything on VHE systems.
Therefore, simply remove these calls from the VHE switch function and
make the functions non-conditional as they are now only called from the
non-VHE switch path.

Acked-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v2:
 - Added comment explaining the timer enable/disable functions
   are for !VHE only.

 arch/arm64/kvm/hyp/switch.c |  2 --
 virt/kvm/arm/hyp/timer-sr.c | 44 ++--
 2 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 9ffd802e775d..307f8c1fcc2f 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -379,7 +379,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
__activate_vm(vcpu->kvm);
 
__vgic_restore_state(vcpu);
-   __timer_enable_traps(vcpu);
 
/*
 * We must restore the 32-bit state before the sysregs, thanks
@@ -400,7 +399,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 
__sysreg_save_guest_state(guest_ctxt);
__sysreg32_save_state(vcpu);
-   __timer_disable_traps(vcpu);
__vgic_save_state(vcpu);
 
__deactivate_traps(vcpu);
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c
index f24404b3c8df..77754a62eb0c 100644
--- a/virt/kvm/arm/hyp/timer-sr.c
+++ b/virt/kvm/arm/hyp/timer-sr.c
@@ -27,34 +27,34 @@ void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, 
u32 cntvoff_high)
write_sysreg(cntvoff, cntvoff_el2);
 }
 
+/*
+ * Should only be called on non-VHE systems.
+ * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
+ */
 void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu)
 {
-   /*
-* We don't need to do this for VHE since the host kernel runs in EL2
-* with HCR_EL2.TGE ==1, which makes those bits have no impact.
-*/
-   if (!has_vhe()) {
-   u64 val;
+   u64 val;
 
-   /* Allow physical timer/counter access for the host */
-   val = read_sysreg(cnthctl_el2);
-   val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
-   write_sysreg(val, cnthctl_el2);
-   }
+   /* Allow physical timer/counter access for the host */
+   val = read_sysreg(cnthctl_el2);
+   val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
+   write_sysreg(val, cnthctl_el2);
 }
 
+/*
+ * Should only be called on non-VHE systems.
+ * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
+ */
 void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu)
 {
-   if (!has_vhe()) {
-   u64 val;
+   u64 val;
 
-   /*
-* Disallow physical timer access for the guest
-* Physical counter access is allowed
-*/
-   val = read_sysreg(cnthctl_el2);
-   val &= ~CNTHCTL_EL1PCEN;
-   val |= CNTHCTL_EL1PCTEN;
-   write_sysreg(val, cnthctl_el2);
-   }
+   /*
+* Disallow physical timer access for the guest
+* Physical counter access is allowed
+*/
+   val = read_sysreg(cnthctl_el2);
+   val &= ~CNTHCTL_EL1PCEN;
+   val |= CNTHCTL_EL1PCTEN;
+   write_sysreg(val, cnthctl_el2);
 }
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 13/40] KVM: arm64: Introduce VHE-specific kvm_vcpu_run

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

So far this is mostly (see below) a copy of the legacy non-VHE switch
function, but we will start reworking these functions in separate
directions to work on VHE and non-VHE in the most optimal way in later
patches.

The only difference after this patch between the VHE and non-VHE run
functions is that we omit the branch-predictor variant-2 hardening for
QC Falkor CPUs, because this workaround is specific to a series of
non-VHE ARMv8.0 CPUs.

Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v4:
 - Unified has_vhe() conditional calls in kvm_arch_vcpu_ioctl_run
 - Added isb() on exit path of VHE-specific kvm_vcpu_run(), because
   otherwise 32-bit guests can crash the system on reentry to the
   guest, because writes to FPEXC32_EL2 will cause a trap at EL2
   which leads to a hyp panic.  This can be observed on the model
   where writing FPEN to CPACR_EL1 on guest exit doesn't take effect
   prior to entering the guest again.

Changes since v3:
 - Added BUG() to 32-bit ARM VHE run function
 - Omitted QC Falkor BP Hardening functionality from VHE-specific
   function

Changes since v2:
 - Reworded commit message

Changes since v1:
 - Rename kvm_vcpu_run to kvm_vcpu_run_vhe and rename __kvm_vcpu_run to
   __kvm_vcpu_run_nvhe
 - Removed stray whitespace line

 arch/arm/include/asm/kvm_asm.h|  5 ++-
 arch/arm/kvm/hyp/switch.c |  2 +-
 arch/arm64/include/asm/kvm_asm.h  |  4 ++-
 arch/arm64/include/asm/kvm_host.h |  7 +
 arch/arm64/kvm/hyp/switch.c   | 66 ++-
 virt/kvm/arm/arm.c| 12 ---
 6 files changed, 87 insertions(+), 9 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 36dd2962a42d..5a953ecb0d78 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -70,7 +70,10 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu 
*vcpu);
 
 extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high);
 
-extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+/* no VHE on 32-bit :( */
+static inline int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { BUG(); return 0; }
+
+extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu);
 
 extern void __init_stage2_translation(void);
 
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index e86679daddff..aac025783ee8 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -154,7 +154,7 @@ static bool __hyp_text __populate_fault_info(struct 
kvm_vcpu *vcpu)
return true;
 }
 
-int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
+int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
 {
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 7149f1520382..d53d40704416 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -58,7 +58,9 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
 
 extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high);
 
-extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu);
+
+extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu);
 
 extern u64 __vgic_v3_get_ich_vtr_el2(void);
 extern u64 __vgic_v3_read_vmcr(void);
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index c30fc96992df..87abc94fb591 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -428,6 +428,13 @@ static inline void kvm_arm_vhe_guest_enter(void)
 static inline void kvm_arm_vhe_guest_exit(void)
 {
local_daif_restore(DAIF_PROCCTX_NOIRQ);
+
+   /*
+* When we exit from the guest we change a number of CPU configuration
+* parameters, such as traps.  Make sure these changes take effect
+* before running the host or additional guests.
+*/
+   isb();
 }
 
 static inline bool kvm_arm_harden_branch_predictor(void)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index b055111df1a1..1b94ac6a85e6 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -362,7 +362,71 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu 
*vcpu, u64 *exit_code)
return false;
 }
 
-int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
+/* Switch to the guest for VHE systems running in EL2 */
+int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
+{
+   struct kvm_cpu_context *host_ctxt;
+   struct kvm_cpu_context *guest_ctxt;
+   bool fp_enabled;
+   u64 exit_code;
+
+   vcpu = kern_hyp_va(vcpu);
+
+   host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+   host_ctx

[PATCH v5 14/40] KVM: arm64: Remove kern_hyp_va() use in VHE switch function

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

VHE kernels run completely in EL2 and therefore don't have a notion of
kernel and hyp addresses, they are all just kernel addresses.  Therefore
don't call kern_hyp_va() in the VHE switch function.

Reviewed-by: Andrew Jones <drjo...@redhat.com>
Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---
 arch/arm64/kvm/hyp/switch.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 1b94ac6a85e6..1e26a81c0a16 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -370,9 +370,7 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
bool fp_enabled;
u64 exit_code;
 
-   vcpu = kern_hyp_va(vcpu);
-
-   host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+   host_ctxt = vcpu->arch.host_cpu_context;
host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = >arch.ctxt;
 
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 11/40] KVM: arm64: Improve debug register save/restore flow

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

Instead of having multiple calls from the world switch path to the debug
logic, each figuring out if the dirty bit is set and if we should
save/restore the debug registers, let's just provide two hooks to the
debug save/restore functionality, one for switching to the guest
context, and one for switching to the host context, and we get the
benefit of only having to evaluate the dirty flag once on each path,
plus we give the compiler some more room to inline some of this
functionality.

Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v1:
 - Remove leading underscores from local variables

 arch/arm64/include/asm/kvm_hyp.h | 10 ++-
 arch/arm64/kvm/hyp/debug-sr.c| 56 +++-
 arch/arm64/kvm/hyp/switch.c  |  6 ++---
 3 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index f26f9cd70c72..aeda2a777365 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -138,14 +138,8 @@ void __sysreg_restore_guest_state(struct kvm_cpu_context 
*ctxt);
 void __sysreg32_save_state(struct kvm_vcpu *vcpu);
 void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
 
-void __debug_save_state(struct kvm_vcpu *vcpu,
-   struct kvm_guest_debug_arch *dbg,
-   struct kvm_cpu_context *ctxt);
-void __debug_restore_state(struct kvm_vcpu *vcpu,
-  struct kvm_guest_debug_arch *dbg,
-  struct kvm_cpu_context *ctxt);
-void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
-void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
+void __debug_switch_to_host(struct kvm_vcpu *vcpu);
 
 void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
 void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index 74f71fb5e36d..3e717f66f011 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -110,16 +110,13 @@ static void __hyp_text __debug_restore_spe_nvhe(u64 
pmscr_el1)
write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
 }
 
-void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
-  struct kvm_guest_debug_arch *dbg,
-  struct kvm_cpu_context *ctxt)
+static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt)
 {
u64 aa64dfr0;
int brps, wrps;
 
-   if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
-   return;
-
aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
brps = (aa64dfr0 >> 12) & 0xf;
wrps = (aa64dfr0 >> 20) & 0xf;
@@ -132,16 +129,13 @@ void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1);
 }
 
-void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt)
+static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
+struct kvm_guest_debug_arch *dbg,
+struct kvm_cpu_context *ctxt)
 {
u64 aa64dfr0;
int brps, wrps;
 
-   if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
-   return;
-
aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
 
brps = (aa64dfr0 >> 12) & 0xf;
@@ -155,10 +149,12 @@ void __hyp_text __debug_restore_state(struct kvm_vcpu 
*vcpu,
write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1);
 }
 
-void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu)
+void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu)
 {
-   __debug_save_state(vcpu, >arch.host_debug_state.regs,
-  kern_hyp_va(vcpu->arch.host_cpu_context));
+   struct kvm_cpu_context *host_ctxt;
+   struct kvm_cpu_context *guest_ctxt;
+   struct kvm_guest_debug_arch *host_dbg;
+   struct kvm_guest_debug_arch *guest_dbg;
 
/*
 * Non-VHE: Disable and flush SPE data generation
@@ -166,15 +162,39 @@ void __hyp_text __debug_cond_save_host_state(struct 
kvm_vcpu *vcpu)
 */
if (!has_vhe())
__debug_save_spe_nvhe(>arch.host_debug_state.pmscr_el1);
+
+   if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+   return;
+
+   host_ctxt = k

[PATCH v5 12/40] KVM: arm64: Factor out fault info population and gic workarounds

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

The current world-switch function has functionality to detect a number
of cases where we need to fixup some part of the exit condition and
possibly run the guest again, before having restored the host state.

This includes populating missing fault info, emulating GICv2 CPU
interface accesses when mapped at unaligned addresses, and emulating
the GICv3 CPU interface on systems that need it.

As we are about to have an alternative switch function for VHE systems,
but VHE systems still need the same early fixup logic, factor out this
logic into a separate function that can be shared by both switch
functions.

No functional change.

Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---
 arch/arm64/kvm/hyp/switch.c | 104 
 1 file changed, 57 insertions(+), 47 deletions(-)

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 35f3bbe17084..b055111df1a1 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -291,53 +291,27 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
}
 }
 
-int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
+/*
+ * Return true when we were able to fixup the guest exit and should return to
+ * the guest, false when we should restore the host state and return to the
+ * main run loop.
+ */
+static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 {
-   struct kvm_cpu_context *host_ctxt;
-   struct kvm_cpu_context *guest_ctxt;
-   bool fp_enabled;
-   u64 exit_code;
-
-   vcpu = kern_hyp_va(vcpu);
-
-   host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
-   host_ctxt->__hyp_running_vcpu = vcpu;
-   guest_ctxt = >arch.ctxt;
-
-   __sysreg_save_host_state(host_ctxt);
-
-   __activate_traps(vcpu);
-   __activate_vm(vcpu);
-
-   __vgic_restore_state(vcpu);
-   __timer_enable_traps(vcpu);
-
-   /*
-* We must restore the 32-bit state before the sysregs, thanks
-* to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
-*/
-   __sysreg32_restore_state(vcpu);
-   __sysreg_restore_guest_state(guest_ctxt);
-   __debug_switch_to_guest(vcpu);
-
-   /* Jump in the fire! */
-again:
-   exit_code = __guest_enter(vcpu, host_ctxt);
-   /* And we're baaack! */
-
-   if (ARM_EXCEPTION_CODE(exit_code) != ARM_EXCEPTION_IRQ)
+   if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);
+
/*
 * We're using the raw exception code in order to only process
 * the trap if no SError is pending. We will come back to the
 * same PC once the SError has been injected, and replay the
 * trapping instruction.
 */
-   if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
-   goto again;
+   if (*exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
+   return true;
 
if (static_branch_unlikely(_v2_cpuif_trap) &&
-   exit_code == ARM_EXCEPTION_TRAP) {
+   *exit_code == ARM_EXCEPTION_TRAP) {
bool valid;
 
valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
@@ -351,9 +325,9 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 
if (ret == 1) {
if (__skip_instr(vcpu))
-   goto again;
+   return true;
else
-   exit_code = ARM_EXCEPTION_TRAP;
+   *exit_code = ARM_EXCEPTION_TRAP;
}
 
if (ret == -1) {
@@ -365,29 +339,65 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 */
if (!__skip_instr(vcpu))
*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
-   exit_code = ARM_EXCEPTION_EL1_SERROR;
+   *exit_code = ARM_EXCEPTION_EL1_SERROR;
}
-
-   /* 0 falls through to be handler out of EL2 */
}
}
 
if (static_branch_unlikely(_v3_cpuif_trap) &&
-   exit_code == ARM_EXCEPTION_TRAP &&
+   *exit_code == ARM_EXCEPTION_TRAP &&
(kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
 kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
int ret = __vgic_v3_perform_cpuif_access(vcpu);
 
if (ret == 1) {
if (__skip_inst

[PATCH v5 10/40] KVM: arm64: Slightly improve debug save/restore functions

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

The debug save/restore functions can be improved by using the has_vhe()
static key instead of the instruction alternative.  Using the static key
uses the same paradigm as we're going to use elsewhere, it makes the
code more readable, and it generates slightly better code (no
stack setups and function calls unless necessary).

We also use a static key on the restore path, because it will be
marginally faster than loading a value from memory.

Finally, we don't have to conditionally clear the debug dirty flag if
it's set, we can just clear it.

Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v1:
 - Change dot to comma in comment
 - Rename __debug_restore_spe to __debug_restore_spe_nvhe

 arch/arm64/kvm/hyp/debug-sr.c | 26 --
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index d958cd63a547..74f71fb5e36d 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -66,11 +66,6 @@
default:write_debug(ptr[0], reg, 0);\
}
 
-static void __hyp_text __debug_save_spe_vhe(u64 *pmscr_el1)
-{
-   /* The vcpu can run. but it can't hide. */
-}
-
 static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
 {
u64 reg;
@@ -103,11 +98,7 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
dsb(nsh);
 }
 
-static hyp_alternate_select(__debug_save_spe,
-   __debug_save_spe_nvhe, __debug_save_spe_vhe,
-   ARM64_HAS_VIRT_HOST_EXTN);
-
-static void __hyp_text __debug_restore_spe(u64 pmscr_el1)
+static void __hyp_text __debug_restore_spe_nvhe(u64 pmscr_el1)
 {
if (!pmscr_el1)
return;
@@ -168,17 +159,24 @@ void __hyp_text __debug_cond_save_host_state(struct 
kvm_vcpu *vcpu)
 {
__debug_save_state(vcpu, >arch.host_debug_state.regs,
   kern_hyp_va(vcpu->arch.host_cpu_context));
-   __debug_save_spe()(>arch.host_debug_state.pmscr_el1);
+
+   /*
+* Non-VHE: Disable and flush SPE data generation
+* VHE: The vcpu can run, but it can't hide.
+*/
+   if (!has_vhe())
+   __debug_save_spe_nvhe(>arch.host_debug_state.pmscr_el1);
 }
 
 void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu)
 {
-   __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
+   if (!has_vhe())
+   __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1);
+
__debug_restore_state(vcpu, >arch.host_debug_state.regs,
  kern_hyp_va(vcpu->arch.host_cpu_context));
 
-   if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
-   vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY;
+   vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY;
 }
 
 u32 __hyp_text __kvm_get_mdcr_el2(void)
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 09/40] KVM: arm64: Move debug dirty flag calculation out of world switch

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

There is no need to figure out inside the world-switch if we should
save/restore the debug registers or not, we might as well do that in the
higher level debug setup code, making it easier to optimize down the
line.

Reviewed-by: Julien Thierry <julien.thie...@arm.com>
Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---
 arch/arm64/kvm/debug.c| 5 +
 arch/arm64/kvm/hyp/debug-sr.c | 6 --
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index fa63b28c65e0..feedb877cff8 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -193,6 +193,11 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
if (trap_debug)
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
 
+   /* If KDE or MDE are set, perform a full save/restore cycle. */
+   if ((vcpu_sys_reg(vcpu, MDSCR_EL1) & DBG_MDSCR_KDE) ||
+   (vcpu_sys_reg(vcpu, MDSCR_EL1) & DBG_MDSCR_MDE))
+   vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+
trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_sys_reg(vcpu, MDSCR_EL1));
 }
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index dabb5cc7b087..d958cd63a547 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -166,12 +166,6 @@ void __hyp_text __debug_restore_state(struct kvm_vcpu 
*vcpu,
 
 void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu)
 {
-   /* If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY is set, perform
-* a full save/restore cycle. */
-   if ((vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_KDE) ||
-   (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE))
-   vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
-
__debug_save_state(vcpu, >arch.host_debug_state.regs,
   kern_hyp_va(vcpu->arch.host_cpu_context));
__debug_save_spe()(>arch.host_debug_state.pmscr_el1);
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 05/40] KVM: arm64: Move HCR_INT_OVERRIDE to default HCR_EL2 guest flag

2018-02-27 Thread Christoffer Dall
From: Shih-Wei Li <shih...@cs.columbia.edu>

We always set the IMO and FMO bits in the HCR_EL2 when running the
guest, regardless if we use the vgic or not.  By moving these flags to
HCR_GUEST_FLAGS we can avoid one of the extra save/restore operations of
HCR_EL2 in the world switch code, and we can also soon get rid of the
other one.

This is safe, because even though the IMO and FMO bits control both
taking the interrupts to EL2 and remapping ICC_*_EL1 to ICV_*_EL1 when
executed at EL1, as long as we ensure that these bits are clear when
running the EL1 host, we're OK, because we reset the HCR_EL2 to only
have the HCR_RW bit set when returning to EL1 on non-VHE systems.

Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Signed-off-by: Shih-Wei Li <shih...@cs.columbia.edu>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v3:
 - Slightly reworded the commit message

 arch/arm64/include/asm/kvm_arm.h | 4 ++--
 arch/arm64/kvm/hyp/switch.c  | 3 ---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 1b438c334463..6dd285e979c9 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -83,9 +83,9 @@
  */
 #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
 HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
-HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR)
+HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
+HCR_FMO | HCR_IMO)
 #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
-#define HCR_INT_OVERRIDE   (HCR_FMO | HCR_IMO)
 #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
 
 /* TCR_EL2 Registers bits */
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 579d9a263853..4117717548b0 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -174,8 +174,6 @@ static void __hyp_text __vgic_save_state(struct kvm_vcpu 
*vcpu)
__vgic_v3_save_state(vcpu);
else
__vgic_v2_save_state(vcpu);
-
-   write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2);
 }
 
 static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
@@ -183,7 +181,6 @@ static void __hyp_text __vgic_restore_state(struct kvm_vcpu 
*vcpu)
u64 val;
 
val = read_sysreg(hcr_el2);
-   val |=  HCR_INT_OVERRIDE;
val |= vcpu->arch.irq_lines;
write_sysreg(val, hcr_el2);
 
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 06/40] KVM: arm/arm64: Get rid of vcpu->arch.irq_lines

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

We currently have a separate read-modify-write of the HCR_EL2 on entry
to the guest for the sole purpose of setting the VF and VI bits, if set.
Since this is most rarely the case (only when using userspace IRQ chip
and interrupts are in flight), let's get rid of this operation and
instead modify the bits in the vcpu->arch.hcr[_el2] directly when
needed.

Acked-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Reviewed-by: Julien Thierry <julien.thie...@arm.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h   |  9 ++---
 arch/arm/include/asm/kvm_host.h  |  3 ---
 arch/arm/kvm/emulate.c   |  2 +-
 arch/arm/kvm/hyp/switch.c|  2 +-
 arch/arm64/include/asm/kvm_emulate.h |  9 ++---
 arch/arm64/include/asm/kvm_host.h|  3 ---
 arch/arm64/kvm/hyp/switch.c  |  6 --
 arch/arm64/kvm/inject_fault.c|  2 +-
 virt/kvm/arm/arm.c   | 11 ++-
 virt/kvm/arm/mmu.c   |  6 +++---
 10 files changed, 16 insertions(+), 37 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h 
b/arch/arm/include/asm/kvm_emulate.h
index 9003bd19cb70..e27caa4b47a1 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -92,14 +92,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
vcpu->arch.hcr = HCR_GUEST_MASK;
 }
 
-static inline unsigned long vcpu_get_hcr(const struct kvm_vcpu *vcpu)
+static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu)
 {
-   return vcpu->arch.hcr;
-}
-
-static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
-{
-   vcpu->arch.hcr = hcr;
+   return (unsigned long *)>arch.hcr;
 }
 
 static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 248b930563e5..6137195ab815 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -155,9 +155,6 @@ struct kvm_vcpu_arch {
/* HYP trapping configuration */
u32 hcr;
 
-   /* Interrupt related fields */
-   u32 irq_lines;  /* IRQ and FIQ levels */
-
/* Exception Information */
struct kvm_vcpu_fault_info fault;
 
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index cdff963f133a..fa501bf437f3 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -174,5 +174,5 @@ unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu)
  */
 void kvm_inject_vabt(struct kvm_vcpu *vcpu)
 {
-   vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VA);
+   *vcpu_hcr(vcpu) |= HCR_VA;
 }
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index ae45ae96aac2..e86679daddff 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -44,7 +44,7 @@ static void __hyp_text __activate_traps(struct kvm_vcpu 
*vcpu, u32 *fpexc_host)
isb();
}
 
-   write_sysreg(vcpu->arch.hcr | vcpu->arch.irq_lines, HCR);
+   write_sysreg(vcpu->arch.hcr, HCR);
/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
write_sysreg(HSTR_T(15), HSTR);
write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR);
diff --git a/arch/arm64/include/asm/kvm_emulate.h 
b/arch/arm64/include/asm/kvm_emulate.h
index 4610bc818097..9ee316b962c8 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -69,14 +69,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
vcpu->arch.hcr_el2 |= HCR_TID3;
 }
 
-static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu)
+static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
 {
-   return vcpu->arch.hcr_el2;
-}
-
-static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
-{
-   vcpu->arch.hcr_el2 = hcr;
+   return (unsigned long *)>arch.hcr_el2;
 }
 
 static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 618cfee7206a..b027a7f025d4 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -272,9 +272,6 @@ struct kvm_vcpu_arch {
/* IO related fields */
struct kvm_decode mmio_decode;
 
-   /* Interrupt related fields */
-   u64 irq_lines;  /* IRQ and FIQ levels */
-
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
 
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 4117717548b0..80bf38ccc8a4 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -178,12 +178,6 @@ static void __hyp_text __vgic_save_state(struct kvm_vcpu 
*vcpu)
 
 static void __hyp_text __vgic_rest

[PATCH v5 07/40] KVM: arm/arm64: Add kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

As we are about to move a bunch of save/restore logic for VHE kernels to
the load and put functions, we need some infrastructure to do this.

Reviewed-by: Andrew Jones <drjo...@redhat.com>
Acked-by: Marc Zyngier <marc.zyng...@arm.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v1:
 - Reworded comments as suggested by Drew

 arch/arm/include/asm/kvm_host.h   |  3 +++
 arch/arm64/include/asm/kvm_host.h |  3 +++
 arch/arm64/kvm/hyp/sysreg-sr.c| 30 ++
 virt/kvm/arm/arm.c|  2 ++
 4 files changed, 38 insertions(+)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 6137195ab815..c6a749568dd6 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -312,4 +312,7 @@ static inline bool kvm_arm_harden_branch_predictor(void)
return false;
 }
 
+static inline void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) {}
+static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {}
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index b027a7f025d4..c30fc96992df 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -435,4 +435,7 @@ static inline bool kvm_arm_harden_branch_predictor(void)
return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
 }
 
+void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
+void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 43b7dd65e3e6..434f0fc9cfb3 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -190,6 +190,36 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu 
*vcpu)
write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
 }
 
+/**
+ * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Load system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2.  This function is called from KVM's vcpu_load() function
+ * and loading system register state early avoids having to load them on
+ * every entry to the VM.
+ */
+void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu)
+{
+}
+
+/**
+ * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Save guest system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2.  This function is called from KVM's vcpu_put() function
+ * and deferring saving system register state until we're no longer running the
+ * VCPU avoids having to save them on every exit from the VM.
+ */
+void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
+{
+}
+
 void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
 {
asm("msr tpidr_el2, %0": : "r" (tpidr_el2));
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 49d13510e9c2..2062d9357971 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -362,10 +362,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_arm_set_running_vcpu(vcpu);
kvm_vgic_load(vcpu);
kvm_timer_vcpu_load(vcpu);
+   kvm_vcpu_load_sysregs(vcpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
+   kvm_vcpu_put_sysregs(vcpu);
kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
 
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 04/40] KVM: arm64: Rework hyp_panic for VHE and non-VHE

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

VHE actually doesn't rely on clearing the VTTBR when returning to the
host kernel, and that is the current key mechanism of hyp_panic to
figure out how to attempt to return to a state good enough to print a
panic statement.

Therefore, we split the hyp_panic function into two functions, a VHE and
a non-VHE, keeping the non-VHE version intact, but changing the VHE
behavior.

The vttbr_el2 check on VHE doesn't really make that much sense, because
the only situation where we can get here on VHE is when the hypervisor
assembly code actually called into hyp_panic, which only happens when
VBAR_EL2 has been set to the KVM exception vectors.  On VHE, we can
always safely disable the traps and restore the host registers at this
point, so we simply do that unconditionally and call into the panic
function directly.

Acked-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v1:
 - Fixed typos in the commit message
 - Still use the generic __deactivte_traps() function in the hyp panic
   code until we rework that logic later.

 arch/arm64/kvm/hyp/switch.c | 42 +++---
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 46717da75643..579d9a263853 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -439,10 +439,20 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx 
ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
 
 static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
-struct kvm_vcpu *vcpu)
+struct kvm_cpu_context 
*__host_ctxt)
 {
+   struct kvm_vcpu *vcpu;
unsigned long str_va;
 
+   vcpu = __host_ctxt->__hyp_running_vcpu;
+
+   if (read_sysreg(vttbr_el2)) {
+   __timer_disable_traps(vcpu);
+   __deactivate_traps(vcpu);
+   __deactivate_vm(vcpu);
+   __sysreg_restore_host_state(__host_ctxt);
+   }
+
/*
 * Force the panic string to be loaded from the literal pool,
 * making sure it is a kernel address and not a PC-relative
@@ -456,37 +466,31 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, 
u64 elr, u64 par,
   read_sysreg(hpfar_el2), par, vcpu);
 }
 
-static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
-   struct kvm_vcpu *vcpu)
+static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
+struct kvm_cpu_context *host_ctxt)
 {
+   struct kvm_vcpu *vcpu;
+   vcpu = host_ctxt->__hyp_running_vcpu;
+
+   __deactivate_traps(vcpu);
+   __sysreg_restore_host_state(host_ctxt);
+
panic(__hyp_panic_string,
  spsr,  elr,
  read_sysreg_el2(esr),   read_sysreg_el2(far),
  read_sysreg(hpfar_el2), par, vcpu);
 }
 
-static hyp_alternate_select(__hyp_call_panic,
-   __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
-   ARM64_HAS_VIRT_HOST_EXTN);
-
 void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
 {
-   struct kvm_vcpu *vcpu = NULL;
-
u64 spsr = read_sysreg_el2(spsr);
u64 elr = read_sysreg_el2(elr);
u64 par = read_sysreg(par_el1);
 
-   if (read_sysreg(vttbr_el2)) {
-   vcpu = host_ctxt->__hyp_running_vcpu;
-   __timer_disable_traps(vcpu);
-   __deactivate_traps(vcpu);
-   __deactivate_vm(vcpu);
-   __sysreg_restore_host_state(host_ctxt);
-   }
-
-   /* Call panic for real */
-   __hyp_call_panic()(spsr, elr, par, vcpu);
+   if (!has_vhe())
+   __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt);
+   else
+   __hyp_call_panic_vhe(spsr, elr, par, host_ctxt);
 
unreachable();
 }
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 02/40] KVM: arm/arm64: Move vcpu_load call after kvm_vcpu_first_run_init

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

Moving the call to vcpu_load() in kvm_arch_vcpu_ioctl_run() to after
we've called kvm_vcpu_first_run_init() simplifies some of the vgic and
there is also no need to do vcpu_load() for things such as handling the
immediate_exit flag.

Reviewed-by: Julien Grall <julien.gr...@arm.com>
Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---
 virt/kvm/arm/arch_timer.c |  4 
 virt/kvm/arm/arm.c| 22 --
 virt/kvm/arm/vgic/vgic-init.c | 11 ---
 3 files changed, 8 insertions(+), 29 deletions(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index f7278358f133..872e1fbd94dc 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -854,11 +854,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
return ret;
 
 no_vgic:
-   preempt_disable();
timer->enabled = 1;
-   kvm_timer_vcpu_load(vcpu);
-   preempt_enable();
-
return 0;
 }
 
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 53572304843b..932e61858c55 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -632,27 +632,22 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
if (unlikely(!kvm_vcpu_initialized(vcpu)))
return -ENOEXEC;
 
-   vcpu_load(vcpu);
-
ret = kvm_vcpu_first_run_init(vcpu);
if (ret)
-   goto out;
+   return ret;
 
if (run->exit_reason == KVM_EXIT_MMIO) {
ret = kvm_handle_mmio_return(vcpu, vcpu->run);
if (ret)
-   goto out;
-   if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) {
-   ret = 0;
-   goto out;
-   }
-
+   return ret;
+   if (kvm_arm_handle_step_debug(vcpu, vcpu->run))
+   return 0;
}
 
-   if (run->immediate_exit) {
-   ret = -EINTR;
-   goto out;
-   }
+   if (run->immediate_exit)
+   return -EINTR;
+
+   vcpu_load(vcpu);
 
kvm_sigset_activate(vcpu);
 
@@ -811,7 +806,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
 
kvm_sigset_deactivate(vcpu);
 
-out:
vcpu_put(vcpu);
return ret;
 }
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 743ca5cb05ef..3e8209a07585 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -302,17 +302,6 @@ int vgic_init(struct kvm *kvm)
 
dist->initialized = true;
 
-   /*
-* If we're initializing GICv2 on-demand when first running the VCPU
-* then we need to load the VGIC state onto the CPU.  We can detect
-* this easily by checking if we are in between vcpu_load and vcpu_put
-* when we just initialized the VGIC.
-*/
-   preempt_disable();
-   vcpu = kvm_arm_get_running_vcpu();
-   if (vcpu)
-   kvm_vgic_load(vcpu);
-   preempt_enable();
 out:
return ret;
 }
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 03/40] KVM: arm64: Avoid storing the vcpu pointer on the stack

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

We already have the percpu area for the host cpu state, which points to
the VCPU, so there's no need to store the VCPU pointer on the stack on
every context switch.  We can be a little more clever and just use
tpidr_el2 for the percpu offset and load the VCPU pointer from the host
context.

This has the benefit of being able to retrieve the host context even
when our stack is corrupted, and it has a potential performance benefit
because we trade a store plus a load for an mrs and a load on a round
trip to the guest.

This does require us to calculate the percpu offset without including
the offset from the kernel mapping of the percpu array to the linear
mapping of the array (which is what we store in tpidr_el1), because a
PC-relative generated address in EL2 is already giving us the hyp alias
of the linear mapping of a kernel address.  We do this in
__cpu_init_hyp_mode() by using kvm_ksym_ref().

The code that accesses ESR_EL2 was previously using an alternative to
use the _EL1 accessor on VHE systems, but this was actually unnecessary
as the _EL1 accessor aliases the ESR_EL2 register on VHE, and the _EL2
accessor does the same thing on both systems.

Cc: Ard Biesheuvel <ard.biesheu...@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---

Notes:
Changes since v4:
 - Clarified rationale in commit message.
 - Called get_host_ctxt from get_vcpu and rename get_vcpu to
   get_vcpu_ptr.

Changes since v3:
 - Reworked the assembly part of the patch after rebasing on v4.16-rc1
   which created a conflict with the variant 2 mitigations.
 - Removed Marc's reviewed-by due to the rework.
 - Removed unneeded extern keyword in declaration in header file

Changes since v1:
 - Use PC-relative addressing to access per-cpu variables instead of
   using a load from the literal pool.
 - Remove stale comments as pointed out by Marc
 - Reworded the commit message as suggested by Drew

 arch/arm64/include/asm/kvm_asm.h  | 15 +++
 arch/arm64/include/asm/kvm_host.h | 15 +++
 arch/arm64/kernel/asm-offsets.c   |  1 +
 arch/arm64/kvm/hyp/entry.S|  6 +-
 arch/arm64/kvm/hyp/hyp-entry.S| 28 ++--
 arch/arm64/kvm/hyp/switch.c   |  5 +
 arch/arm64/kvm/hyp/sysreg-sr.c|  5 +
 7 files changed, 48 insertions(+), 27 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 24961b732e65..7149f1520382 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -33,6 +33,7 @@
 #define KVM_ARM64_DEBUG_DIRTY_SHIFT0
 #define KVM_ARM64_DEBUG_DIRTY  (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
 
+/* Translate a kernel address of @sym into its equivalent linear mapping */
 #define kvm_ksym_ref(sym)  \
({  \
void *val =\
@@ -70,6 +71,20 @@ extern u32 __init_stage2_translation(void);
 
 extern void __qcom_hyp_sanitize_btac_predictors(void);
 
+#else /* __ASSEMBLY__ */
+
+.macro get_host_ctxt reg, tmp
+   adr_l   \reg, kvm_host_cpu_state
+   mrs \tmp, tpidr_el2
+   add \reg, \reg, \tmp
+.endm
+
+.macro get_vcpu_ptr vcpu, ctxt
+   get_host_ctxt \ctxt, \vcpu
+   ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
+   kern_hyp_va \vcpu
+.endm
+
 #endif
 
 #endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 596f8e414a4c..618cfee7206a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -358,10 +358,15 @@ int kvm_perf_teardown(void);
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 
+void __kvm_set_tpidr_el2(u64 tpidr_el2);
+DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
+
 static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
   unsigned long hyp_stack_ptr,
   unsigned long vector_ptr)
 {
+   u64 tpidr_el2;
+
/*
 * Call initialization code, and switch to the full blown HYP code.
 * If the cpucaps haven't been finalized yet, something has gone very
@@ -370,6 +375,16 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 */
BUG_ON(!static_branch_likely(_const_caps_ready));
__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
+
+   /*
+* Calculate the raw per-cpu offset without a translation from the
+* kernel's mapping to the linear mapping, and store it in tpidr_el2
+* so that we can use adr_l to access per-cpu variables in EL2.

[PATCH v5 01/40] KVM: arm/arm64: Avoid vcpu_load for other vcpu ioctls than KVM_RUN

2018-02-27 Thread Christoffer Dall
From: Christoffer Dall <christoffer.d...@linaro.org>

Calling vcpu_load() registers preempt notifiers for this vcpu and calls
kvm_arch_vcpu_load().  The latter will soon be doing a lot of heavy
lifting on arm/arm64 and will try to do things such as enabling the
virtual timer and setting us up to handle interrupts from the timer
hardware.

Loading state onto hardware registers and enabling hardware to signal
interrupts can be problematic when we're not actually about to run the
VCPU, because it makes it difficult to establish the right context when
handling interrupts from the timer, and it makes the register access
code difficult to reason about.

Luckily, now when we call vcpu_load in each ioctl implementation, we can
simply remove the call from the non-KVM_RUN vcpu ioctls, and our
kvm_arch_vcpu_load() is only used for loading vcpu content to the
physical CPU when we're actually going to run the vcpu.

Reviewed-by: Julien Grall <julien.gr...@arm.com>
Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---
 arch/arm64/kvm/guest.c | 3 ---
 virt/kvm/arm/arm.c | 9 -
 2 files changed, 12 deletions(-)

diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index d7e3299a7734..959e50d2588c 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -363,8 +363,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu 
*vcpu,
 {
int ret = 0;
 
-   vcpu_load(vcpu);
-
trace_kvm_set_guest_debug(vcpu, dbg->control);
 
if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) {
@@ -386,7 +384,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu 
*vcpu,
}
 
 out:
-   vcpu_put(vcpu);
return ret;
 }
 
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 86941f6181bb..53572304843b 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -384,14 +384,11 @@ static void vcpu_power_off(struct kvm_vcpu *vcpu)
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
 {
-   vcpu_load(vcpu);
-
if (vcpu->arch.power_off)
mp_state->mp_state = KVM_MP_STATE_STOPPED;
else
mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
 
-   vcpu_put(vcpu);
return 0;
 }
 
@@ -400,8 +397,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 {
int ret = 0;
 
-   vcpu_load(vcpu);
-
switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.power_off = false;
@@ -413,7 +408,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
ret = -EINVAL;
}
 
-   vcpu_put(vcpu);
return ret;
 }
 
@@ -1036,8 +1030,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_device_attr attr;
long r;
 
-   vcpu_load(vcpu);
-
switch (ioctl) {
case KVM_ARM_VCPU_INIT: {
struct kvm_vcpu_init init;
@@ -1114,7 +1106,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EINVAL;
}
 
-   vcpu_put(vcpu);
return r;
 }
 
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 00/40] Optimize KVM/ARM for VHE systems

2018-02-27 Thread Christoffer Dall
This series redesigns parts of KVM/ARM to optimize the performance on
VHE systems.  The general approach is to try to do as little work as
possible when transitioning between the VM and the hypervisor.  This has
the benefit of lower latency when waiting for interrupts and delivering
virtual interrupts, and reduces the overhead of emulating behavior and
I/O in the host kernel.

Patches 01 through 06 are not VHE specific, but rework parts of KVM/ARM
that can be generally improved.  We then add infrastructure to move more
logic into vcpu_load and vcpu_put, we improve handling of VFP and debug
registers.

We then introduce a new world-switch function for VHE systems, which we
can tweak and optimize for VHE systems.  To do that, we rework a lot of
the system register save/restore handling and emulation code that may
need access to system registers, so that we can defer as many system
register save/restore operations to vcpu_load and vcpu_put, and move
this logic out of the VHE world switch function.

We then optimize the configuration of traps.  On non-VHE systems, both
the host and VM kernels run in EL1, but because the host kernel should
have full access to the underlying hardware, but the VM kernel should
not, we essentially make the host kernel more privileged than the VM
kernel despite them both running at the same privilege level by enabling
VE traps when entering the VM and disabling those traps when exiting the
VM.  On VHE systems, the host kernel runs in EL2 and has full access to
the hardware (as much as allowed by secure side software), and is
unaffected by the trap configuration.  That means we can configure the
traps for VMs running in EL1 once, and don't have to switch them on and
off for every entry/exit to/from the VM.

Finally, we improve our VGIC handling by moving all save/restore logic
out of the VHE world-switch, and we make it possible to truly only
evaluate if the AP list is empty and not do *any* VGIC work if that is
the case, and only do the minimal amount of work required in the course
of the VGIC processing when we have virtual interrupts in flight.

The patches are based on v4.16-rc3 with kvmarm/next and kvmarm/master
applied on top.

I've given the patches a fair amount of testing on Thunder-X, Mustang,
Seattle, and TC2 (32-bit) for non-VHE testing, and tested VHE
functionality on TX2 and on the foundation model.

The patches are also available in the vhe-optimize-v5 branch on my
kernel.org repository [1].  The vhe-optimize-v5-base branch contains
prerequisites of this series.

Patches 36, 38, and 40 are missing Acked-by or Reviewed-by tags.

[1]: git://git.kernel.org/pub/scm/linux/kernel/git/cdall/linux.git 
vhe-optimize-v5

Changes since v4:
 - Rebased on v4.16-rc3 and other patches queued for kvmarm/next.
 - Mostly cosmetic, detailed changelogs are in the individual patches.
 - Patch 13 introduces an additional ISB on return from the VHE run
   function which we lost now when we no longer call the kvm_call_hyp()
   wrapper which used to take care of that for us.

Changes since v3:
 - Rebased on v4.16-rc1 (fun!)
 - Removed VFP optimizations because it was discovered that the deferred
   approach taken in v3 was buggy
   (https://lists.cs.columbia.edu/pipermail/kvmarm/2018-February/029838.html)
   This causes a fair amount of changes throughout and I've removed
   affected reviewed-by and other tags as best I could to take this into
   account.
 - Used a switch-statement to handle deferred system registers instead
   of the macro approach taken in v3.
 - Addressed other review coments (tried to keep track of this with logs
   of changes in individual patches).
 - I ran Yury's IPI benchark test and other tests on TX2 and could not
   observe a performance regression, but rather an improvement of around
   65%.  I suspect the previous regression was due to the timer WFI
   problem solved for v4.16-rc1.
 - I haven't included Tomasz' reviewed-by, because I figured too much of
   the series has changed since v3.

Changes since v2:
 - Rebased on v4.15-rc3.
 - Includes two additional patches that only does vcpu_load after
   kvm_vcpu_first_run_init and only for KVM_RUN.
 - Addressed review comments from v2 (detailed changelogs are in the
   individual patches).

Thanks,
-Christoffer

Christoffer Dall (39):
  KVM: arm/arm64: Avoid vcpu_load for other vcpu ioctls than KVM_RUN
  KVM: arm/arm64: Move vcpu_load call after kvm_vcpu_first_run_init
  KVM: arm64: Avoid storing the vcpu pointer on the stack
  KVM: arm64: Rework hyp_panic for VHE and non-VHE
  KVM: arm/arm64: Get rid of vcpu->arch.irq_lines
  KVM: arm/arm64: Add kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs
  KVM: arm/arm64: Introduce vcpu_el1_is_32bit
  KVM: arm64: Move debug dirty flag calculation out of world switch
  KVM: arm64: Slightly improve debug save/restore functions
  KVM: arm64: Improve debug register save/restore flow
  KVM: arm64: Factor out fault info population and gic workarounds
  KVM: arm64: Introduce 

Re: VCPU hotplug on KVM/ARM

2018-02-27 Thread Christoffer Dall
Hi Bhupinder,

On Tue, Feb 27, 2018 at 03:01:17PM +0530, btha...@codeaurora.org wrote:
> I hope it is the right forum to post my query.
> 
>  
> 
> I am currently looking at the possibility of adding a new VCPU to a running
> guest VM in KVM/ARM. I see that currently, it is not allowed to add a new
> VCPU to a guest VM, if it is already initialized. The first check in
> kvm_arch_vcpu_create() returns failure if it is already initialized.
> 

This would require a major rework of a lot of logic surrounding the GIC
and other parts of KVM initialization.

>  
> 
> There was some work done in QEMU to add support for VCPU hotplug:
> https://lists.gnu.org/archive/html/qemu-arm/2017-05/msg00404.html
> 
>  
> 
> But I am looking at the KVM side for enabling adding a new VCPU. If you can
> point me to any relevant work/resources, which I can refer to then it will
> help me.
> 

I don't have any specific pointers, but I was always told that the way
we were going to do CPU hotplug would be to instantiate a large number
of VCPUs, and hotplug would be equivalent to turning on a VCPU which was
previously powered off.

Is this not still a feasible solution?

How does VCPU hotplug work on x86?

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 31/40] KVM: arm64: Move common VHE/non-VHE trap config in separate functions

2018-02-25 Thread Christoffer Dall
On Fri, Feb 23, 2018 at 02:30:54PM +, Julien Grall wrote:
> Hi Christoffer,
> 
> On 15/02/18 21:03, Christoffer Dall wrote:
> >@@ -85,37 +123,14 @@ static void __hyp_text __activate_traps(struct kvm_vcpu 
> >*vcpu)
> >  {
> > u64 hcr = vcpu->arch.hcr_el2;
> >-/*
> >- * We are about to set CPTR_EL2.TFP to trap all floating point
> >- * register accesses to EL2, however, the ARM ARM clearly states that
> >- * traps are only taken to EL2 if the operation would not otherwise
> >- * trap to EL1.  Therefore, always make sure that for 32-bit guests,
> >- * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
> >- * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
> >- * it will cause an exception.
> >- */
> >-if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
> >-write_sysreg(1 << 30, fpexc32_el2);
> >-isb();
> >-}
> >+write_sysreg(hcr, hcr_el2);
> > if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
> > write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
> >-write_sysreg(hcr, hcr_el2);
> 
> OOI, any reason to move the write to HCR_EL2 just before the if?
> 

Just to keep the two lines together where we read the value from the
vcpu structure and write it to hardware.  It's hard to tell from this
patch, but I think it looks nicer in the end.

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 31/40] KVM: arm64: Move common VHE/non-VHE trap config in separate functions

2018-02-25 Thread Christoffer Dall
On Wed, Feb 21, 2018 at 05:59:37PM +, Marc Zyngier wrote:
> On Thu, 15 Feb 2018 21:03:23 +,
> Christoffer Dall wrote:
> > 
> > As we are about to be more lazy with some of the trap configuration
> > register read/writes for VHE systems, move the logic that is currently
> > shared between VHE and non-VHE into a separate function which can be
> > called from either the world-switch path or from vcpu_load/vcpu_put.
> > 
> > Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> > ---
> > 
> > Notes:
> > Changes since v3:
> >  - Separate fpsimd32 trap configuration into a separate function
> >which is still called from __activate_traps, because we no longer
> >defer saving/restoring of VFP registers to load/put.
> > 
> >  arch/arm64/kvm/hyp/switch.c | 76 
> > +++--
> >  1 file changed, 45 insertions(+), 31 deletions(-)
> > 
> > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> > index 909aa3fe9196..17e3c6f26a34 100644
> > --- a/arch/arm64/kvm/hyp/switch.c
> > +++ b/arch/arm64/kvm/hyp/switch.c
> > @@ -56,7 +56,45 @@ static inline void __hyp_text 
> > __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
> > vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
> >  }
> >  
> > -static void __hyp_text __activate_traps_vhe(void)
> > +static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
> > +{
> > +   /*
> > +* We are about to set CPTR_EL2.TFP to trap all floating point
> > +* register accesses to EL2, however, the ARM ARM clearly states that
> > +* traps are only taken to EL2 if the operation would not otherwise
> > +* trap to EL1.  Therefore, always make sure that for 32-bit guests,
> > +* we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
> > +* If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
> > +* it will cause an exception.
> > +*/
> > +   if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
> > +   write_sysreg(1 << 30, fpexc32_el2);
> > +   isb();
> > +   }
> > +}
> > +
> > +static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu)
> > +{
> > +   /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
> > +   write_sysreg(1 << 15, hstr_el2);
> > +   /*
> > +* Make sure we trap PMU access from EL0 to EL2. Also sanitize
> > +* PMSELR_EL0 to make sure it never contains the cycle
> > +* counter, which could make a PMXEVCNTR_EL0 access UNDEF at
> > +* EL1 instead of being trapped to EL2.
> > +*/
> > +   write_sysreg(0, pmselr_el0);
> > +   write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
> > +   write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
> > +}
> > +
> > +static void __hyp_text __deactivate_traps_common(void)
> > +{
> > +   write_sysreg(0, hstr_el2);
> > +   write_sysreg(0, pmuserenr_el0);
> > +}
> > +
> > +static void __hyp_text __activate_traps_vhe(struct kvm_vcpu *vcpu)
> >  {
> > u64 val;
> >  
> > @@ -68,7 +106,7 @@ static void __hyp_text __activate_traps_vhe(void)
> > write_sysreg(kvm_get_hyp_vector(), vbar_el1);
> >  }
> >  
> > -static void __hyp_text __activate_traps_nvhe(void)
> > +static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
> 
> I have the ugly feeling that this hunk should not be in this
> patch. Have you tried bisecting the compilation of this series?
> 

Actually, I don't see why this shouldn't be in this patch?  We change
both versions of the functions and we change the call to
__activate_traps_arch() below.  We just don't change the hyp
alternative, because it doesn't care about arguments and parameters, it
just cares the the types of the functions are the same.

Admittedly, this patch is really hard to read, it's easier to apply it
and just look at the code.

> >  {
> > u64 val;
> >  
> > @@ -85,37 +123,14 @@ static void __hyp_text __activate_traps(struct 
> > kvm_vcpu *vcpu)
> >  {
> > u64 hcr = vcpu->arch.hcr_el2;
> >  
> > -   /*
> > -* We are about to set CPTR_EL2.TFP to trap all floating point
> > -* register accesses to EL2, however, the ARM ARM clearly states that
> > -* traps are only taken to EL2 if the operation would not otherwise
> > -* trap to EL1.  Therefore, always make sure that for 32-bit guests,
> > -* we set FPEXC.EN to prevent traps to EL

Re: [PATCH v4 10/40] KVM: arm64: Slightly improve debug save/restore functions

2018-02-24 Thread Christoffer Dall
On Wed, Feb 21, 2018 at 05:52:41PM +, Marc Zyngier wrote:
> On 21/02/18 17:39, Andrew Jones wrote:
> > On Thu, Feb 15, 2018 at 10:03:02PM +0100, Christoffer Dall wrote:
> >> The debug save/restore functions can be improved by using the has_vhe()
> >> static key instead of the instruction alternative.  Using the static key
> >> uses the same paradigm as we're going to use elsewhere, it makes the
> >> code more readable, and it generates slightly better code (no
> >> stack setups and function calls unless necessary).
> >>
> >> We also use a static key on the restore path, because it will be
> >> marginally faster than loading a value from memory.
> >>
> >> Finally, we don't have to conditionally clear the debug dirty flag if
> >> it's set, we can just clear it.
> >>
> >> Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
> >> Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> >> ---
> >>
> >> Notes:
> >> Changes since v1:
> >>  - Change dot to comma in comment
> >>  - Rename __debug_restore_spe to __debug_restore_spe_nvhe
> >>
> >>  arch/arm64/kvm/hyp/debug-sr.c | 26 --
> >>  1 file changed, 12 insertions(+), 14 deletions(-)
> >>
> > 
> > Maybe after this series is merged, if there are any hyp_alternate_select's
> > left, we can replace all the remaining ones with has_vhe() and then just
> > completely remove hyp_alternate_select.
> 
> Note that older compilers (such as GCC 4.8) will generate horrible code
> with static keys, as they do not support "asm goto". Not that I want to
> preserve the home brew hyp_alternate_select mechanism, but I just want
> to make it plain that some distros will definitely suffer from the
> transition.
> 
That's unfortunate.  I'd still like to use has_vhe() most places, but we
could change the implementation of has_vhe() to use the hyp alternative
until nobody cares about kernels compiled with GCC 4.8 ?

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PULL 2/2] ARM: kvm: fix building with gcc-8

2018-02-23 Thread Christoffer Dall
From: Arnd Bergmann <a...@arndb.de>

In banked-sr.c, we use a top-level '__asm__(".arch_extension virt")'
statement to allow compilation of a multi-CPU kernel for ARMv6
and older ARMv7-A that don't normally support access to the banked
registers.

This is considered to be a programming error by the gcc developers
and will no longer work in gcc-8, where we now get a build error:

/tmp/cc4Qy7GR.s:34: Error: Banked registers are not available with this 
architecture. -- `mrs r3,SP_usr'
/tmp/cc4Qy7GR.s:41: Error: Banked registers are not available with this 
architecture. -- `mrs r3,ELR_hyp'
/tmp/cc4Qy7GR.s:55: Error: Banked registers are not available with this 
architecture. -- `mrs r3,SP_svc'
/tmp/cc4Qy7GR.s:62: Error: Banked registers are not available with this 
architecture. -- `mrs r3,LR_svc'
/tmp/cc4Qy7GR.s:69: Error: Banked registers are not available with this 
architecture. -- `mrs r3,SPSR_svc'
/tmp/cc4Qy7GR.s:76: Error: Banked registers are not available with this 
architecture. -- `mrs r3,SP_abt'

Passign the '-march-armv7ve' flag to gcc works, and is ok here, because
we know the functions won't ever be called on pre-ARMv7VE machines.
Unfortunately, older compiler versions (4.8 and earlier) do not understand
that flag, so we still need to keep the asm around.

Backporting to stable kernels (4.6+) is needed to allow those to be built
with future compilers as well.

Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84129
Fixes: 33280b4cd1dc ("ARM: KVM: Add banked registers save/restore")
Cc: sta...@vger.kernel.org
Signed-off-by: Arnd Bergmann <a...@arndb.de>
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---
 arch/arm/kvm/hyp/Makefile| 5 +
 arch/arm/kvm/hyp/banked-sr.c | 4 
 2 files changed, 9 insertions(+)

diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile
index 5638ce0c9524..63d6b404d88e 100644
--- a/arch/arm/kvm/hyp/Makefile
+++ b/arch/arm/kvm/hyp/Makefile
@@ -7,6 +7,8 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING
 
 KVM=../../../../virt/kvm
 
+CFLAGS_ARMV7VE:=$(call cc-option, -march=armv7ve)
+
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
@@ -15,7 +17,10 @@ obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
 obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += vfp.o
 obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o
+CFLAGS_banked-sr.o+= $(CFLAGS_ARMV7VE)
+
 obj-$(CONFIG_KVM_ARM_HOST) += entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += switch.o
+CFLAGS_switch.o   += $(CFLAGS_ARMV7VE)
 obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
diff --git a/arch/arm/kvm/hyp/banked-sr.c b/arch/arm/kvm/hyp/banked-sr.c
index 111bda8cdebd..be4b8b0a40ad 100644
--- a/arch/arm/kvm/hyp/banked-sr.c
+++ b/arch/arm/kvm/hyp/banked-sr.c
@@ -20,6 +20,10 @@
 
 #include 
 
+/*
+ * gcc before 4.9 doesn't understand -march=armv7ve, so we have to
+ * trick the assembler.
+ */
 __asm__(".arch_extension virt");
 
 void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt)
-- 
2.14.2

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PULL 1/2] KVM: arm/arm64: Fix arch timers with userspace irqchips

2018-02-23 Thread Christoffer Dall
When introducing support for irqchip in userspace we needed a way to
mask the timer signal to prevent the guest continuously exiting due to a
screaming timer.

We did this by disabling the corresponding percpu interrupt on the
host interrupt controller, because we cannot rely on the host system
having a GIC, and therefore cannot make any assumptions about having an
active state to hide the timer signal.

Unfortunately, when introducing this feature, it became entirely
possible that a VCPU which belongs to a VM that has a userspace irqchip
can disable the vtimer irq on the host on some physical CPU, and then go
away without ever enabling the vtimer irq on that physical CPU again.

This means that using irqchips in userspace on a system that also
supports running VMs with an in-kernel GIC can prevent forward progress
from in-kernel GIC VMs.

Later on, when we started taking virtual timer interrupts in the arch
timer code, we would also leave this timer state active for userspace
irqchip VMs, because we leave it up to a VGIC-enabled guest to
deactivate the hardware IRQ using the HW bit in the LR.

Both issues are solved by only using the enable/disable trick on systems
that do not have a host GIC which supports the active state, because all
VMs on such systems must use irqchips in userspace.  Systems that have a
working GIC with support for an active state use the active state to
mask the timer signal for both userspace and in-kernel irqchips.

Cc: Alexander Graf <ag...@suse.de>
Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
Cc: <sta...@vger.kernel.org> # v4.12+
Fixes: d9e139778376 ("KVM: arm/arm64: Support arch timers with a userspace gic")
Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
---
 virt/kvm/arm/arch_timer.c | 116 +-
 1 file changed, 64 insertions(+), 52 deletions(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 70268c0bec79..70f4c30918eb 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -36,6 +36,8 @@ static struct timecounter *timecounter;
 static unsigned int host_vtimer_irq;
 static u32 host_vtimer_irq_flags;
 
+static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
+
 static const struct kvm_irq_level default_ptimer_irq = {
.irq= 30,
.level  = 1,
@@ -56,6 +58,12 @@ u64 kvm_phys_timer_read(void)
return timecounter->cc->read(timecounter->cc);
 }
 
+static inline bool userspace_irqchip(struct kvm *kvm)
+{
+   return static_branch_unlikely(_irqchip_in_use) &&
+   unlikely(!irqchip_in_kernel(kvm));
+}
+
 static void soft_timer_start(struct hrtimer *hrt, u64 ns)
 {
hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
@@ -69,25 +77,6 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct 
work_struct *work)
cancel_work_sync(work);
 }
 
-static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu)
-{
-   struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-
-   /*
-* When using a userspace irqchip with the architected timers, we must
-* prevent continuously exiting from the guest, and therefore mask the
-* physical interrupt by disabling it on the host interrupt controller
-* when the virtual level is high, such that the guest can make
-* forward progress.  Once we detect the output level being
-* de-asserted, we unmask the interrupt again so that we exit from the
-* guest when the timer fires.
-*/
-   if (vtimer->irq.level)
-   disable_percpu_irq(host_vtimer_irq);
-   else
-   enable_percpu_irq(host_vtimer_irq, 0);
-}
-
 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
 {
struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
@@ -106,9 +95,9 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void 
*dev_id)
if (kvm_timer_should_fire(vtimer))
kvm_timer_update_irq(vcpu, true, vtimer);
 
-   if (static_branch_unlikely(_irqchip_in_use) &&
-   unlikely(!irqchip_in_kernel(vcpu->kvm)))
-   kvm_vtimer_update_mask_user(vcpu);
+   if (userspace_irqchip(vcpu->kvm) &&
+   !static_branch_unlikely(_gic_active_state))
+   disable_percpu_irq(host_vtimer_irq);
 
return IRQ_HANDLED;
 }
@@ -290,8 +279,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, 
bool new_level,
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
   timer_ctx->irq.level);
 
-   if (!static_branch_unlikely(_irqchip_in_use) ||
-   likely(irqchip_in_kernel(vcpu->kvm))) {
+   if (!userspace_irqchip(vcpu->kvm)) {
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
  timer_ctx->irq.irq,
 

[PULL 0/2] KVM/ARM Fixes for v4.16

2018-02-23 Thread Christoffer Dall
Hi Paolo and Radim,

Here's the first round of KVM/ARM fixes for v4.16.

Not much in here; we fix the interaction of userspace irqchip VMs with in-kernel
irqchip VMs and make sure we can build 32-bit KVM/ARM with gcc-8.

The following changes since commit 7928b2cbe55b2a410a0f5c1f154610059c57b1b2:

  Linux 4.16-rc1 (2018-02-11 15:04:29 -0800)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git 
tags/kvm-arm-fixes-for-v4.16-1

for you to fetch changes up to 67870eb1204223598ea6d8a4467b482e9f5875b5:

  ARM: kvm: fix building with gcc-8 (2018-02-15 20:58:36 +0100)

Thanks,
-Christoffer

Arnd Bergmann (1):
  ARM: kvm: fix building with gcc-8

Christoffer Dall (1):
  KVM: arm/arm64: Fix arch timers with userspace irqchips

 arch/arm/kvm/hyp/Makefile|   5 ++
 arch/arm/kvm/hyp/banked-sr.c |   4 ++
 virt/kvm/arm/arch_timer.c| 116 ---
 3 files changed, 73 insertions(+), 52 deletions(-)

-- 
2.14.2
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 35/40] KVM: arm/arm64: Get rid of vgic_elrsr

2018-02-23 Thread Christoffer Dall
On Fri, Feb 23, 2018 at 02:44:30PM +, Julien Grall wrote:
> Hi Christoffer,
> 
> On 15/02/18 21:03, Christoffer Dall wrote:
> >There is really no need to store the vgic_elrsr on the VGIC data
> >structures as the only need we have for the elrsr is to figure out if an
> >LR is inactive when we save the VGIC state upon returning from the
> >guest.  We can might as well store this in a temporary local variable.
> >
> >This also gets rid of the endianness conversion in the VGIC save
> >function, which is completely unnecessary and would actually result in
> >incorrect functionality on big-endian systems, because we are only using
> >typed values here and not converting pointers and reading different
> >types here.
> 
> I can't find any endianness code removed in this code. What did I miss?
> 

Ah, good find, we ended up fixing this in a separate commit:

fc396e066318, "KVM: arm/arm64: Fix broken GICH_ELRSR big endian conversion", 
2017-12-03

I'll adjust the commit message.

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC PATCH 2/2] KVM: arm64: Eliminate most redundant FPSIMD saves and restores

2018-02-23 Thread Christoffer Dall
cpu)
>  
>  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>  {
> + /* Mark this vcpu's FPSIMD state as non-live initially: */
> + fpsimd_flush_state(>arch.ctxt.fpsimd_state);
> + vcpu->arch.guest_fpsimd_loaded = false;
> +
>   /* Force users to call KVM_ARM_VCPU_INIT */
>   vcpu->arch.target = -1;
>   bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
> @@ -631,6 +635,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
>  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  {
>   int ret;
> + struct fpsimd_state *guest_fpsimd = >arch.ctxt.fpsimd_state;
> + struct user_fpsimd_state *host_fpsimd =
> + >thread.fpsimd_state.user_fpsimd;
>  
>   if (unlikely(!kvm_vcpu_initialized(vcpu)))
>   return -ENOEXEC;
> @@ -650,6 +657,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
> struct kvm_run *run)
>   if (run->immediate_exit)
>   return -EINTR;
>  
> + WARN_ON(!current->mm);
> +
> + if (!test_thread_flag(TIF_MAPPED_TO_HYP)) {
> + ret = create_hyp_mappings(host_fpsimd, host_fpsimd + 1,
> +   PAGE_HYP);
> + if (ret)
> + return ret;
> +
> + set_thread_flag(TIF_MAPPED_TO_HYP);
> + }
> +

I have an alternate approach to this, see below.

>   vcpu_load(vcpu);
>  
>   kvm_sigset_activate(vcpu);
> @@ -680,6 +698,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
> struct kvm_run *run)
>  
>   local_irq_disable();
>  
> + /*
> +  * host_fpsimd_state indicates to hyp that there is host state
> +  * to save, and where to save it:
> +  */
> + if (test_thread_flag(TIF_FOREIGN_FPSTATE))
> + vcpu->arch.host_fpsimd_state = NULL;
> + else
> + vcpu->arch.host_fpsimd_state = kern_hyp_va(host_fpsimd);
> +
> + vcpu->arch.guest_fpsimd_loaded =
> + !fpsimd_foreign_fpstate(guest_fpsimd);

This is an awful lot of logic in the critical path...

> +
> + BUG_ON(system_supports_sve());
> +
> + BUG_ON(vcpu->arch.guest_fpsimd_loaded &&
> +vcpu->arch.host_fpsimd_state);
> +
>   kvm_vgic_flush_hwstate(vcpu);
>  
>   /*
> @@ -774,6 +809,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
> kvm_run *run)
>   if (static_branch_unlikely(_irqchip_in_use))
>   kvm_timer_sync_hwstate(vcpu);
>  
> + /* defend against kernel-mode NEON in softirq */
> + local_bh_disable();
> +
>   /*
>* We may have taken a host interrupt in HYP mode (ie
>* while executing the guest). This interrupt is still
> @@ -786,6 +824,18 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
> struct kvm_run *run)
>*/
>   local_irq_enable();
>  
> + if (vcpu->arch.guest_fpsimd_loaded) {
> + set_thread_flag(TIF_FOREIGN_FPSTATE);
> + fpsimd_bind_state_to_cpu(>arch.ctxt.fpsimd_state);
> +
> + /*
> +  * Protect ourselves against a softirq splatting the
> +  * FPSIMD state once irqs are enabled:
> +  */
> + fpsimd_save_state(guest_fpsimd);
> + }
> + local_bh_enable();
> +

And this seems farily involved as well.  The overlapping
local_bh_disable with enabling irqs doesn't fell very nice, although it
may be correct.

The main issue is that we still save the guest FPSIMD state on every
exit from the guest.

>   /*
>* We do local_irq_enable() before calling guest_exit() so
>* that if a timer interrupt hits while running the guest we
> -- 
> 2.1.4
> 

Building on these patches, I tried putting together something along the
lines of what I had imagined, but it's still untested (read, it doesn't
actually work).  If you think the approach is not completely crazy, I'm
happy to test it, and make it work for 32-bit etc.

commit e3f20ac5eab166d9257710486b9ceafb034195bf
Author: Christoffer Dall <christoffer.d...@linaro.org>
Date:   Fri Feb 23 17:23:57 2018 +0100

KVM: arm/arm64: Introduce kvm_arch_vcpu_run_pid_change

KVM/ARM differs from other architectures in having to maintain an
additional virtual address space from that of the host and the guest,
because we split the execution of KVM across both EL1 and EL

Re: [RFC PATCH 0.9/2] arm64: fpsimd: Expose CPU / FPSIMD state association helpers

2018-02-23 Thread Christoffer Dall
On Fri, Feb 16, 2018 at 06:39:30PM +, Dave Martin wrote:
> Oops, forgot to post this patch that goes before patch 1 in the series.
> 
> --8<--
> 
> Expose an interface for associating an FPSIMD context with a CPU and
> checking the association, for use by KVM.
> 
> Signed-off-by: Dave Martin 
> ---
>  arch/arm64/include/asm/fpsimd.h |  5 +
>  arch/arm64/kernel/fpsimd.c  | 42 
> +
>  2 files changed, 35 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 8857a0f..f4ce4d6 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -23,6 +23,7 @@
>  
>  #include 
>  #include 
> +#include 
>  
>  /*
>   * FP/SIMD storage area has:
> @@ -62,6 +63,8 @@ struct fpsimd_state {
>  
>  struct task_struct;
>  
> +extern bool fpsimd_foreign_fpstate(struct fpsimd_state const *state);
> +
>  extern void fpsimd_save_state(struct fpsimd_state *state);
>  extern void fpsimd_load_state(struct fpsimd_state *state);
>  
> @@ -76,6 +79,8 @@ extern void fpsimd_update_current_state(struct 
> user_fpsimd_state const *state);
>  extern void fpsimd_flush_task_state(struct task_struct *target);
>  extern void sve_flush_cpu_state(void);
>  
> +extern void fpsimd_bind_state_to_cpu(struct fpsimd_state *state);
> +
>  /* Maximum VL that SVE VL-agnostic software can transparently support */
>  #define SVE_VL_ARCH_MAX 0x100
>  
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index e7226c4..138efaf 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -38,6 +38,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -121,6 +122,14 @@ struct fpsimd_last_state_struct {
>  
>  static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
>  
> +bool fpsimd_foreign_fpstate(struct fpsimd_state const *st)
> +{
> + WARN_ON(!in_softirq() && !irqs_disabled());
> +
> + return st->cpu != smp_processor_id() ||
> + st != __this_cpu_read(fpsimd_last_state.st);
> +}
> +
>  /* Default VL for tasks that don't set it explicitly: */
>  static int sve_default_vl = -1;
>  
> @@ -908,13 +917,10 @@ void fpsimd_thread_switch(struct task_struct *next)
>* the TIF_FOREIGN_FPSTATE flag so the state will be loaded
>* upon the next return to userland.
>*/
> - struct fpsimd_state *st = >thread.fpsimd_state;
> -
> - if (__this_cpu_read(fpsimd_last_state.st) == st
> - && st->cpu == smp_processor_id())
> - clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
> - else
> + if (fpsimd_foreign_fpstate(>thread.fpsimd_state))
>   set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
> + else
> + clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
>   }
>  }
>  
> @@ -996,19 +1002,31 @@ void fpsimd_signal_preserve_current_state(void)
>   sve_to_fpsimd(current);
>  }
>  
> +static void __fpsimd_bind_to_cpu(struct fpsimd_last_state_struct *last,
> +  struct fpsimd_state *st)
> +{
> + WARN_ON(!in_softirq() || !irqs_disabled());

You meant && here, right?

Currently this makes my box explode.

Thanks,
-Christoffer

> +
> + last->st = st;
> + st->cpu = smp_processor_id();
> +}
> +
> +void fpsimd_bind_state_to_cpu(struct fpsimd_state *st)
> +{
> + __fpsimd_bind_to_cpu(this_cpu_ptr(_last_state), st);
> +}
> +
>  /*
>   * Associate current's FPSIMD context with this cpu
>   * Preemption must be disabled when calling this function.
>   */
> -static void fpsimd_bind_to_cpu(void)
> +static void fpsimd_bind_task_to_cpu(void)
>  {
>   struct fpsimd_last_state_struct *last =
>   this_cpu_ptr(_last_state);
> - struct fpsimd_state *st = >thread.fpsimd_state;
>  
> - last->st = st;
> + __fpsimd_bind_to_cpu(last, >thread.fpsimd_state);
>   last->sve_in_use = test_thread_flag(TIF_SVE);
> - st->cpu = smp_processor_id();
>  }
>  
>  /*
> @@ -1025,7 +1043,7 @@ void fpsimd_restore_current_state(void)
>  
>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
>   task_fpsimd_load();
> - fpsimd_bind_to_cpu();
> + fpsimd_bind_task_to_cpu();
>   }
>  
>   local_bh_enable();
> @@ -1050,7 +1068,7 @@ void fpsimd_update_current_state(struct 
> user_fpsimd_state const *state)
>   task_fpsimd_load();
>  
>   if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
> - fpsimd_bind_to_cpu();
> + fpsimd_bind_task_to_cpu();
>  
>   local_bh_enable();
>  }
> -- 
> 2.1.4
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 28/40] KVM: arm64: Defer saving/restoring 64-bit sysregs to vcpu load/put on VHE

2018-02-22 Thread Christoffer Dall
On Thu, Feb 22, 2018 at 06:30:11PM +, Julien Grall wrote:
> Hi Christoffer,
> 
> On 15/02/18 21:03, Christoffer Dall wrote:
> >Some system registers do not affect the host kernel's execution and can
> >therefore be loaded when we are about to run a VCPU and we don't have to
> >restore the host state to the hardware before the time when we are
> >actually about to return to userspace or schedule out the VCPU thread.
> >
> >The EL1 system registers and the userspace state registers only
> >affecting EL0 execution do not need to be saved and restored on every
> >switch between the VM and the host, because they don't affect the host
> >kernel's execution.
> >
> >We mark all registers which are now deffered as such in the
> 
> NIT: s/deffered/deferred/ I think.
> 
> >vcpu_{read,write}_sys_reg accessors in sys-regs.c to ensure the most
> >up-to-date copy is always accessed.
> >
> >Note MPIDR_EL1 (controlled via VMPIDR_EL2) is accessed from other vcpu
> >threads, for example via the GIC emulation, and therefore must be
> >declared as immediate, which is fine as the guest cannot modify this
> >value.
> >
> >The 32-bit sysregs can also be deferred but we do this in a separate
> >patch as it requires a bit more infrastructure.
> 
> 
> [...]
> 
> >diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> >index b3c3f014aa61..f060309337aa 100644
> >--- a/arch/arm64/kvm/sys_regs.c
> >+++ b/arch/arm64/kvm/sys_regs.c
> >@@ -87,6 +87,26 @@ u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
> >  * exit from the guest but are only saved on vcpu_put.
> >  */
> > switch (reg) {
> >+case CSSELR_EL1:return read_sysreg_s(SYS_CSSELR_EL1);
> >+case SCTLR_EL1: return read_sysreg_s(sctlr_EL12);
> >+case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1);
> >+case CPACR_EL1: return read_sysreg_s(cpacr_EL12);
> >+case TTBR0_EL1: return read_sysreg_s(ttbr0_EL12);
> >+case TTBR1_EL1: return read_sysreg_s(ttbr1_EL12);
> >+case TCR_EL1:   return read_sysreg_s(tcr_EL12);
> >+case ESR_EL1:   return read_sysreg_s(esr_EL12);
> >+case AFSR0_EL1: return read_sysreg_s(afsr0_EL12);
> >+case AFSR1_EL1: return read_sysreg_s(afsr1_EL12);
> >+case FAR_EL1:   return read_sysreg_s(far_EL12);
> >+case MAIR_EL1:  return read_sysreg_s(mair_EL12);
> >+case VBAR_EL1:  return read_sysreg_s(vbar_EL12);
> >+case CONTEXTIDR_EL1:return read_sysreg_s(contextidr_EL12);
> >+case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0);
> >+case TPIDRRO_EL0:   return read_sysreg_s(SYS_TPIDRRO_EL0);
> 
> I find a bit confusing to have some EL0 registers in the middle of EL1 ones.
> Is it because they are listed by encoding?
> 

They are sorted in the same way as the sysreg array defines.  I can add
that to the commentary.

> >+case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1);
> >+case AMAIR_EL1: return read_sysreg_s(amair_EL12);
> >+case CNTKCTL_EL1:   return read_sysreg_s(cntkctl_EL12);
> >+case PAR_EL1:   return read_sysreg_s(SYS_PAR_EL1);
> > }
> >  immediate_read:
> >@@ -103,6 +123,26 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, 
> >u64 val)
> >  * entry to the guest but are only restored on vcpu_load.
> >  */
> > switch (reg) {
> >+case CSSELR_EL1:write_sysreg_s(val, SYS_CSSELR_EL1);return;
> >+case SCTLR_EL1: write_sysreg_s(val, sctlr_EL12);return;
> >+case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return;
> >+case CPACR_EL1: write_sysreg_s(val, cpacr_EL12);return;
> >+case TTBR0_EL1: write_sysreg_s(val, ttbr0_EL12);return;
> >+case TTBR1_EL1: write_sysreg_s(val, ttbr1_EL12);return;
> >+case TCR_EL1:   write_sysreg_s(val, tcr_EL12);  return;
> >+case ESR_EL1:   write_sysreg_s(val, esr_EL12);  return;
> >+case AFSR0_EL1: write_sysreg_s(val, afsr0_EL12);return;
> >+case AFSR1_EL1: write_sysreg_s(val, afsr1_EL12);return;
> >+case FAR_EL1:   write_sysreg_s(val, far_EL12);  return;
> >+case MAIR_EL1:  write_sysreg_s(val, mair_EL12); return;
> >+case VBAR_EL1:  write_sysreg_s(val, vbar_EL12); return;
> >+case CONTEXTIDR_EL1:write_sysreg_s(val, contextidr_EL12);   return;
&

Re: [PATCH v4 33/40] KVM: arm64: Configure c15, PMU, and debug register traps on cpu load/put for VHE

2018-02-22 Thread Christoffer Dall
On Wed, Feb 21, 2018 at 06:20:54PM +, Marc Zyngier wrote:
> On Thu, 15 Feb 2018 21:03:25 +,
> Christoffer Dall wrote:
> > 
> > We do not have to change the c15 trap setting on each switch to/from the
> > guest on VHE systems, because this setting only affects EL0.
> 
> Did you mean EL1 instead?
> 

Not sure what I meant, but HSTR_EL2 appears to affect EL1 and EL0, and
the PMU configuration we can do on vcpu_load on VHE systems is only
about EL0 as far as I can tell.

> > 
> > The PMU and debug trap configuration can also be done on vcpu load/put
> > instead, because they don't affect how the host kernel can access the
> > debug registers while executing KVM kernel code.
> > 
> > Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> > ---
> >  arch/arm64/include/asm/kvm_hyp.h |  3 +++
> >  arch/arm64/kvm/hyp/switch.c  | 31 ++-
> >  arch/arm64/kvm/hyp/sysreg-sr.c   |  4 
> >  3 files changed, 29 insertions(+), 9 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_hyp.h 
> > b/arch/arm64/include/asm/kvm_hyp.h
> > index 2b1fda90dde4..949f2e77ae58 100644
> > --- a/arch/arm64/include/asm/kvm_hyp.h
> > +++ b/arch/arm64/include/asm/kvm_hyp.h
> > @@ -147,6 +147,9 @@ void __fpsimd_save_state(struct user_fpsimd_state 
> > *fp_regs);
> >  void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
> >  bool __fpsimd_enabled(void);
> >  
> > +void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
> > +void deactivate_traps_vhe_put(void);
> > +
> >  u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context 
> > *host_ctxt);
> >  void __noreturn __hyp_do_panic(unsigned long, ...);
> >  
> > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> > index 9c40e203bd09..5e94955b89ea 100644
> > --- a/arch/arm64/kvm/hyp/switch.c
> > +++ b/arch/arm64/kvm/hyp/switch.c
> > @@ -101,6 +101,8 @@ static void __hyp_text __activate_traps_nvhe(struct 
> > kvm_vcpu *vcpu)
> >  {
> > u64 val;
> >  
> > +   __activate_traps_common(vcpu);
> > +
> > val = CPTR_EL2_DEFAULT;
> > val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
> > write_sysreg(val, cptr_el2);
> > @@ -120,20 +122,12 @@ static void __hyp_text __activate_traps(struct 
> > kvm_vcpu *vcpu)
> > write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
> >  
> > __activate_traps_fpsimd32(vcpu);
> > -   __activate_traps_common(vcpu);
> > __activate_traps_arch()(vcpu);
> >  }
> >  
> >  static void __hyp_text __deactivate_traps_vhe(void)
> >  {
> > extern char vectors[];  /* kernel exception vectors */
> > -   u64 mdcr_el2 = read_sysreg(mdcr_el2);
> > -
> > -   mdcr_el2 &= MDCR_EL2_HPMN_MASK |
> > -   MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
> > -   MDCR_EL2_TPMS;
> > -
> > -   write_sysreg(mdcr_el2, mdcr_el2);
> > write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
> > write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
> > write_sysreg(vectors, vbar_el1);
> > @@ -143,6 +137,8 @@ static void __hyp_text __deactivate_traps_nvhe(void)
> >  {
> > u64 mdcr_el2 = read_sysreg(mdcr_el2);
> >  
> > +   __deactivate_traps_common();
> > +
> > mdcr_el2 &= MDCR_EL2_HPMN_MASK;
> > mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
> >  
> > @@ -166,10 +162,27 @@ static void __hyp_text __deactivate_traps(struct 
> > kvm_vcpu *vcpu)
> > if (vcpu->arch.hcr_el2 & HCR_VSE)
> > vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
> >  
> > -   __deactivate_traps_common();
> > __deactivate_traps_arch()();
> >  }
> >  
> > +void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
> > +{
> > +   __activate_traps_common(vcpu);
> > +}
> > +
> > +void deactivate_traps_vhe_put(void)
> > +{
> > +   u64 mdcr_el2 = read_sysreg(mdcr_el2);
> > +
> > +   mdcr_el2 &= MDCR_EL2_HPMN_MASK |
> > +   MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
> > +   MDCR_EL2_TPMS;
> > +
> > +   write_sysreg(mdcr_el2, mdcr_el2);
> > +
> > +   __deactivate_traps_common();
> > +}
> > +
> >  static void __hyp_text __activate_vm(struct kvm *kvm)
> >  {
> > write_sysreg(kvm->arch.vttbr, vttbr_el2);
> > diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
> > index aacba4636871..b3894df6bf1a 100644
> > --- a/arch

Re: [PATCH v4 31/40] KVM: arm64: Move common VHE/non-VHE trap config in separate functions

2018-02-22 Thread Christoffer Dall
On Wed, Feb 21, 2018 at 05:59:37PM +, Marc Zyngier wrote:
> On Thu, 15 Feb 2018 21:03:23 +,
> Christoffer Dall wrote:
> > 
> > As we are about to be more lazy with some of the trap configuration
> > register read/writes for VHE systems, move the logic that is currently
> > shared between VHE and non-VHE into a separate function which can be
> > called from either the world-switch path or from vcpu_load/vcpu_put.
> > 
> > Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> > ---
> > 
> > Notes:
> > Changes since v3:
> >  - Separate fpsimd32 trap configuration into a separate function
> >which is still called from __activate_traps, because we no longer
> >defer saving/restoring of VFP registers to load/put.
> > 
> >  arch/arm64/kvm/hyp/switch.c | 76 
> > +++--
> >  1 file changed, 45 insertions(+), 31 deletions(-)
> > 
> > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> > index 909aa3fe9196..17e3c6f26a34 100644
> > --- a/arch/arm64/kvm/hyp/switch.c
> > +++ b/arch/arm64/kvm/hyp/switch.c
> > @@ -56,7 +56,45 @@ static inline void __hyp_text 
> > __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
> > vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
> >  }
> >  
> > -static void __hyp_text __activate_traps_vhe(void)
> > +static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
> > +{
> > +   /*
> > +* We are about to set CPTR_EL2.TFP to trap all floating point
> > +* register accesses to EL2, however, the ARM ARM clearly states that
> > +* traps are only taken to EL2 if the operation would not otherwise
> > +* trap to EL1.  Therefore, always make sure that for 32-bit guests,
> > +* we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
> > +* If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
> > +* it will cause an exception.
> > +*/
> > +   if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
> > +   write_sysreg(1 << 30, fpexc32_el2);
> > +   isb();
> > +   }
> > +}
> > +
> > +static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu)
> > +{
> > +   /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
> > +   write_sysreg(1 << 15, hstr_el2);
> > +   /*
> > +* Make sure we trap PMU access from EL0 to EL2. Also sanitize
> > +* PMSELR_EL0 to make sure it never contains the cycle
> > +* counter, which could make a PMXEVCNTR_EL0 access UNDEF at
> > +* EL1 instead of being trapped to EL2.
> > +*/
> > +   write_sysreg(0, pmselr_el0);
> > +   write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
> > +   write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
> > +}
> > +
> > +static void __hyp_text __deactivate_traps_common(void)
> > +{
> > +   write_sysreg(0, hstr_el2);
> > +   write_sysreg(0, pmuserenr_el0);
> > +}
> > +
> > +static void __hyp_text __activate_traps_vhe(struct kvm_vcpu *vcpu)
> >  {
> > u64 val;
> >  
> > @@ -68,7 +106,7 @@ static void __hyp_text __activate_traps_vhe(void)
> > write_sysreg(kvm_get_hyp_vector(), vbar_el1);
> >  }
> >  
> > -static void __hyp_text __activate_traps_nvhe(void)
> > +static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
> 
> I have the ugly feeling that this hunk should not be in this
> patch. Have you tried bisecting the compilation of this series?
> 

I have, and I seem to remember catching this one during that exact
exercise, but I probably committed the change to the wrong patch.  Duh.

Thanks for spotting.
-Christoffer

> >  {
> > u64 val;
> >  
> > @@ -85,37 +123,14 @@ static void __hyp_text __activate_traps(struct 
> > kvm_vcpu *vcpu)
> >  {
> > u64 hcr = vcpu->arch.hcr_el2;
> >  
> > -   /*
> > -* We are about to set CPTR_EL2.TFP to trap all floating point
> > -* register accesses to EL2, however, the ARM ARM clearly states that
> > -* traps are only taken to EL2 if the operation would not otherwise
> > -* trap to EL1.  Therefore, always make sure that for 32-bit guests,
> > -* we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
> > -* If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
> > -* it will cause an exception.
> > -*/
> > -   if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd())

Re: [PATCH v4 30/40] KVM: arm64: Defer saving/restoring 32-bit sysregs to vcpu load/put

2018-02-22 Thread Christoffer Dall
On Wed, Feb 21, 2018 at 04:27:25PM +, Marc Zyngier wrote:
> On Thu, 15 Feb 2018 21:03:22 +,
> Christoffer Dall wrote:
> > 
> > When running a 32-bit VM (EL1 in AArch32), the AArch32 system registers
> > can be deferred to vcpu load/put on VHE systems because neither
> > the host kernel nor host userspace uses these registers.
> > 
> > Note that we can not defer saving DBGVCR32_EL2 conditionally based
> > on the state of the debug dirty flag on VHE, but since we do the
> > load/put pretty rarely, this comes out as a win anyway.
> 
> I'm not sure I understand that comment. We don't have any deferring
> for this register, so the load/put reference seems out of place.
> 

Yeah, this is a patch description editing snafu.  I'll fix it.

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 38/40] KVM: arm/arm64: Handle VGICv3 save/restore from the main VGIC code on VHE

2018-02-22 Thread Christoffer Dall
On Thu, Feb 22, 2018 at 03:01:17PM +, Marc Zyngier wrote:
> On Thu, 22 Feb 2018 14:42:27 +,
> Christoffer Dall wrote:
> > 
> > On Thu, Feb 22, 2018 at 12:32:11PM +, Marc Zyngier wrote:
> > > On 15/02/18 21:03, Christoffer Dall wrote:
> > > > Just like we can program the GICv2 hypervisor control interface directly
> > > > from the core vgic code, we can do the same for the GICv3 hypervisor
> > > > control interface on VHE systems.
> > > > 
> > > > We do this by simply calling the save/restore functions when we have VHE
> > > > and we can then get rid of the save/restore function calls from the VHE
> > > > world switch function.
> > > > 
> > > > One caveat is that we now write GICv3 system register state before the
> > > > potential early exit path in the run loop, and because we sync back
> > > > state in the early exit path, we have to ensure that we read a
> > > > consistent GIC state from the sync path, even though we have never
> > > > actually run the guest with the newly written GIC state.  We solve this
> > > > by inserting an ISB in the early exit path.
> > > > 
> > > > Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> > > > ---
> > > > 
> > > > Notes:
> > > > Changes since v2:
> > > >  - Added ISB in the early exit path in the run loop as explained
> > > >in the commit message.
> > > > 
> > > >  arch/arm64/kvm/hyp/switch.c | 3 ---
> > > >  virt/kvm/arm/arm.c  | 1 +
> > > >  virt/kvm/arm/vgic/vgic.c| 5 +
> > > >  3 files changed, 6 insertions(+), 3 deletions(-)
> > > > 
> > > > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> > > > index cbafc27a617b..466cfcdbcaf3 100644
> > > > --- a/arch/arm64/kvm/hyp/switch.c
> > > > +++ b/arch/arm64/kvm/hyp/switch.c
> > > > @@ -399,8 +399,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
> > > > __activate_traps(vcpu);
> > > > __activate_vm(vcpu->kvm);
> > > >  
> > > > -   __vgic_restore_state(vcpu);
> > > > -
> > > > sysreg_restore_guest_state_vhe(guest_ctxt);
> > > > __debug_switch_to_guest(vcpu);
> > > >  
> > > > @@ -414,7 +412,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
> > > > fp_enabled = fpsimd_enabled_vhe();
> > > >  
> > > > sysreg_save_guest_state_vhe(guest_ctxt);
> > > > -   __vgic_save_state(vcpu);
> > > >  
> > > > __deactivate_traps(vcpu);
> > > >  
> > > > diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> > > > index 5bd879c78951..6de7641f3ff2 100644
> > > > --- a/virt/kvm/arm/arm.c
> > > > +++ b/virt/kvm/arm/arm.c
> > > > @@ -717,6 +717,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
> > > > struct kvm_run *run)
> > > > if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
> > > > kvm_request_pending(vcpu)) {
> > > > vcpu->mode = OUTSIDE_GUEST_MODE;
> > > > +   isb(); /* Ensure work in x_flush_hwstate is 
> > > > committed */
> > > > kvm_pmu_sync_hwstate(vcpu);
> > > > if 
> > > > (static_branch_unlikely(_irqchip_in_use))
> > > > kvm_timer_sync_hwstate(vcpu);
> > > > diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> > > > index 12e2a28f437e..d0a19a8c196a 100644
> > > > --- a/virt/kvm/arm/vgic/vgic.c
> > > > +++ b/virt/kvm/arm/vgic/vgic.c
> > > > @@ -19,6 +19,7 @@
> > > >  #include 
> > > >  #include 
> > > >  #include 
> > > > +#include 
> > > >  
> > > >  #include "vgic.h"
> > > >  
> > > > @@ -753,6 +754,8 @@ static inline void vgic_save_state(struct kvm_vcpu 
> > > > *vcpu)
> > > >  {
> > > > if (!static_branch_unlikely(_vgic_global_state.gicv3_cpuif))
> > > > vgic_v2_save_state(vcpu);
> > > > +   else if (has_vhe())
> > > > +   __vgic_v3_save_state(vcpu);
> > > >  }
> > > >  
> > > >  /* Sync back the hard

Re: [PATCH v4 24/40] KVM: arm64: Rewrite system register accessors to read/write functions

2018-02-22 Thread Christoffer Dall
On Thu, Feb 22, 2018 at 04:11:38PM +0100, Andrew Jones wrote:
> 
> Hi Christoffer,
> 
> I'm just pointing out some broken lines that we could maybe cheat the
> 80-char limit on. Naturally feel free to ignore.

Thanks.  I'll go over them as I respin.

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 30/40] KVM: arm64: Defer saving/restoring 32-bit sysregs to vcpu load/put

2018-02-22 Thread Christoffer Dall
On Thu, Feb 22, 2018 at 03:35:06PM +0100, Andrew Jones wrote:
> On Thu, Feb 15, 2018 at 10:03:22PM +0100, Christoffer Dall wrote:
> > When running a 32-bit VM (EL1 in AArch32), the AArch32 system registers
> > can be deferred to vcpu load/put on VHE systems because neither
> > the host kernel nor host userspace uses these registers.
> > 
> > Note that we can not defer saving DBGVCR32_EL2 conditionally based
> > on the state of the debug dirty flag on VHE, but since we do the
> > load/put pretty rarely, this comes out as a win anyway.
> > 
> > We can also not defer saving FPEXC32_32 because this register only holds
> > a guest-valid value for 32-bit guests during the exit path when the
> > guest has used FPSIMD registers and restored the register in the early
> > assembly handler from taking the EL2 fault, and therefore we have to
> > check if fpsimd is enabled for the guest in the exit path and save the
> > register then, for both VHE and non-VHE guests.
> > 
> > Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> > ---
> > 
> > Notes:
> > Changes since v3:
> >  - Rework the FPEXC32 save/restore logic to no longer attempt to
> >save/restore this register lazily.
> > 
> > Changes since v2:
> >  - New patch (deferred register handling has been reworked)
> > 
> >  arch/arm64/kvm/hyp/switch.c| 17 +++--
> >  arch/arm64/kvm/hyp/sysreg-sr.c | 15 ++-
> >  2 files changed, 21 insertions(+), 11 deletions(-)
> > 
> > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> > index 22e77deb8e2e..909aa3fe9196 100644
> > --- a/arch/arm64/kvm/hyp/switch.c
> > +++ b/arch/arm64/kvm/hyp/switch.c
> > @@ -47,6 +47,15 @@ bool __hyp_text __fpsimd_enabled(void)
> > return __fpsimd_is_enabled()();
> >  }
> >  
> > +/* Save the 32-bit only FPSIMD system register state */
> > +static inline void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
> > +{
> > +   if (!vcpu_el1_is_32bit(vcpu))
> > +   return;
> > +
> > +   vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
> > +}
> > +
> 
> I realize it's much more convenient to have this function here, but it
> feels a bit out of place, being a _save_ function. Its logical place is
> an -sr file.
> 

Yes, maybe, but that would make it a function call to perform a mrs and
a store, which is a bit unfortunate.

> >  static void __hyp_text __activate_traps_vhe(void)
> >  {
> > u64 val;
> > @@ -380,11 +389,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
> >  
> > __vgic_restore_state(vcpu);
> >  
> > -   /*
> > -* We must restore the 32-bit state before the sysregs, thanks
> > -* to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
> > -*/
> > -   __sysreg32_restore_state(vcpu);
> > sysreg_restore_guest_state_vhe(guest_ctxt);
> > __debug_switch_to_guest(vcpu);
> >  
> > @@ -398,7 +402,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
> > fp_enabled = __fpsimd_enabled();
> >  
> > sysreg_save_guest_state_vhe(guest_ctxt);
> > -   __sysreg32_save_state(vcpu);
> > __vgic_save_state(vcpu);
> >  
> > __deactivate_traps(vcpu);
> > @@ -408,6 +411,7 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
> > if (fp_enabled) {
> > __fpsimd_save_state(_ctxt->gp_regs.fp_regs);
> > __fpsimd_restore_state(_ctxt->gp_regs.fp_regs);
> > +   __fpsimd_save_fpexc32(vcpu);
> > }
> >  
> > __debug_switch_to_host(vcpu);
> > @@ -475,6 +479,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu 
> > *vcpu)
> > if (fp_enabled) {
> > __fpsimd_save_state(_ctxt->gp_regs.fp_regs);
> > __fpsimd_restore_state(_ctxt->gp_regs.fp_regs);
> > +   __fpsimd_save_fpexc32(vcpu);
> > }
> >  
> > /*
> > diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
> > index 9c60b8062724..aacba4636871 100644
> > --- a/arch/arm64/kvm/hyp/sysreg-sr.c
> > +++ b/arch/arm64/kvm/hyp/sysreg-sr.c
> > @@ -196,10 +196,7 @@ void __hyp_text __sysreg32_save_state(struct kvm_vcpu 
> > *vcpu)
> > sysreg[DACR32_EL2] = read_sysreg(dacr32_el2);
> > sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2);
> >  
> > -   if (__fpsimd_enabled())
> > -   sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
> > -
> > -   if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG

Re: [PATCH v4 25/40] KVM: arm64: Introduce framework for accessing deferred sysregs

2018-02-22 Thread Christoffer Dall
On Thu, Feb 22, 2018 at 02:40:52PM +0100, Andrew Jones wrote:
> On Thu, Feb 15, 2018 at 10:03:17PM +0100, Christoffer Dall wrote:
> > We are about to defer saving and restoring some groups of system
> > registers to vcpu_put and vcpu_load on supported systems.  This means
> > that we need some infrastructure to access system registes which
> > supports either accessing the memory backing of the register or directly
> > accessing the system registers, depending on the state of the system
> > when we access the register.
> > 
> > We do this by defining read/write accessor functions, which can handle
> > both "immediate" and "deferrable" system registers.  Immediate registers
> > are always saved/restored in the world-switch path, but deferrable
> > registers are only saved/restored in vcpu_put/vcpu_load when supported
> > and sysregs_loaded_on_cpu will be set in that case.
> > 
> > Note that we don't use the deferred mechanism yet in this patch, but only
> > introduce infrastructure.  This is to improve convenience of review in
> > the subsequent patches where it is clear which registers become
> > deferred.
> > 
> > Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> > ---
> > 
> > Notes:
> > Changes since v3:
> >  - Changed to a switch-statement based approach to improve
> >readability.
> > 
> > Changes since v2:
> >  - New patch (deferred register handling has been reworked)
> > 
> >  arch/arm64/include/asm/kvm_host.h |  8 ++--
> >  arch/arm64/kvm/sys_regs.c | 33 +
> >  2 files changed, 39 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_host.h 
> > b/arch/arm64/include/asm/kvm_host.h
> > index 68398bf7882f..b463b5e28959 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -284,6 +284,10 @@ struct kvm_vcpu_arch {
> >  
> > /* Virtual SError ESR to restore when HCR_EL2.VSE is set */
> > u64 vsesr_el2;
> > +
> > +   /* True when deferrable sysregs are loaded on the physical CPU,
> > +* see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
> > +   bool sysregs_loaded_on_cpu;
> >  };
> >  
> >  #define vcpu_gp_regs(v)(&(v)->arch.ctxt.gp_regs)
> > @@ -296,8 +300,8 @@ struct kvm_vcpu_arch {
> >   */
> >  #define __vcpu_sys_reg(v,r)((v)->arch.ctxt.sys_regs[(r)])
> >  
> > -#define vcpu_read_sys_reg(v,r) __vcpu_sys_reg(v,r)
> > -#define vcpu_write_sys_reg(v,r,n)  do { __vcpu_sys_reg(v,r) = n; } while 
> > (0)
> > +u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg);
> > +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val);
> >  
> >  /*
> >   * CP14 and CP15 live in the same array, as they are backed by the
> > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > index a05d2c01c786..b3c3f014aa61 100644
> > --- a/arch/arm64/kvm/sys_regs.c
> > +++ b/arch/arm64/kvm/sys_regs.c
> > @@ -35,6 +35,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -76,6 +77,38 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
> > return false;
> >  }
> >  
> > +u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
> > +{
> > +   if (!vcpu->arch.sysregs_loaded_on_cpu)
> > +   goto immediate_read;
> > +
> > +   /*
> > +* All system registers listed in the switch are not saved on every
> > +* exit from the guest but are only saved on vcpu_put.
> 
> The "All ... are not" doesn't flow well for me. How about
> 
>  /*
>   * None of the system registers listed in the switch are saved on guest
>   * exit. These registers are only saved on vcpu_put.
>   */
> 
> > +*/
> > +   switch (reg) {
> > +   }
> > +
> > +immediate_read:
> > +   return __vcpu_sys_reg(vcpu, reg);
> > +}
> > +
> > +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val)
> > +{
> > +   if (!vcpu->arch.sysregs_loaded_on_cpu)
> > +   goto immediate_write;
> > +
> > +   /*
> > +* All system registers listed in the switch are not restored on every
> > +* entry to the guest but are only restored on vcpu_load.
> > +*/
> 
>  /*
>   * None of the system registers listed in the switch are restored on
>   * guest entry. If these registers were saved due to a vcpu_put, then
>   * they will be restored by vcpu_load.
>   */
> 

Hmmm, not sure the last sentence helps here.

I'll think about some nicer wording for you for the next version.

> > +   switch (reg) {
> > +   }
> > +
> > +immediate_write:
> > +__vcpu_sys_reg(vcpu, reg) = val;
> > +}
> > +
> >  /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 
> > */
> >  static u32 cache_levels;
> >  
> > -- 
> > 2.14.2
> >
> 
> Otherwise
> 
> Reviewed-by: Andrew Jones <drjo...@redhat.com>

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 39/40] KVM: arm/arm64: Move VGIC APR save/restore to vgic put/load

2018-02-22 Thread Christoffer Dall
On Thu, Feb 22, 2018 at 01:11:55PM +, Marc Zyngier wrote:
> On 15/02/18 21:03, Christoffer Dall wrote:
> > The APRs can only have bits set when the guest acknowledges an interrupt
> > in the LR and can only have a bit cleared when the guest EOIs an
> > interrupt in the LR.  Therefore, if we have no LRs with any
> > pending/active interrupts, the APR cannot change value and there is no
> > need to clear it on every exit from the VM (hint: it will have already
> > been cleared when we exited the guest the last time with the LRs all
> > EOIed).
> > 
> > The only case we need to take care of is when we migrate the VCPU away
> > from a CPU or migrate a new VCPU onto a CPU, or when we return to
> > userspace to capture the state of the VCPU for migration.  To make sure
> > this works, factor out the APR save/restore functionality into separate
> > functions called from the VCPU (and by extension VGIC) put/load hooks.
> > 
> > Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> > ---
> >  arch/arm/include/asm/kvm_hyp.h   |   2 +
> >  arch/arm64/include/asm/kvm_hyp.h |   2 +
> >  virt/kvm/arm/hyp/vgic-v3-sr.c| 124 
> > +--
> >  virt/kvm/arm/vgic/vgic-v2.c  |   7 +--
> >  virt/kvm/arm/vgic/vgic-v3.c  |   5 ++
> >  5 files changed, 78 insertions(+), 62 deletions(-)
> > 
> > diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
> > index 1ab8329e9ff7..530a3c1cfe6f 100644
> > --- a/arch/arm/include/asm/kvm_hyp.h
> > +++ b/arch/arm/include/asm/kvm_hyp.h
> > @@ -110,6 +110,8 @@ void __sysreg_restore_state(struct kvm_cpu_context 
> > *ctxt);
> >  
> >  void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
> >  void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
> > +void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
> > +void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
> >  
> >  asmlinkage void __vfp_save_state(struct vfp_hard_struct *vfp);
> >  asmlinkage void __vfp_restore_state(struct vfp_hard_struct *vfp);
> > diff --git a/arch/arm64/include/asm/kvm_hyp.h 
> > b/arch/arm64/include/asm/kvm_hyp.h
> > index febe417b8b4e..6f3929b2fcf7 100644
> > --- a/arch/arm64/include/asm/kvm_hyp.h
> > +++ b/arch/arm64/include/asm/kvm_hyp.h
> > @@ -124,6 +124,8 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu 
> > *vcpu);
> >  
> >  void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
> >  void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
> > +void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
> > +void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
> >  int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
> >  
> >  void __timer_enable_traps(struct kvm_vcpu *vcpu);
> > diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
> > index 9abf2f3c12b5..437d7af08683 100644
> > --- a/virt/kvm/arm/hyp/vgic-v3-sr.c
> > +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
> > @@ -21,6 +21,7 @@
> >  
> >  #include 
> >  #include 
> > +#include 
> >  
> >  #define vtr_to_max_lr_idx(v)   ((v) & 0xf)
> >  #define vtr_to_nr_pre_bits(v)  u32)(v) >> 26) & 7) + 1)
> > @@ -221,14 +222,11 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu 
> > *vcpu)
> >  
> > if (used_lrs) {
> > int i;
> > -   u32 nr_pre_bits;
> > u32 elrsr;
> >  
> > elrsr = read_gicreg(ICH_ELSR_EL2);
> >  
> > write_gicreg(0, ICH_HCR_EL2);
> > -   val = read_gicreg(ICH_VTR_EL2);
> > -   nr_pre_bits = vtr_to_nr_pre_bits(val);
> >  
> > for (i = 0; i < used_lrs; i++) {
> > if (elrsr & (1 << i))
> > @@ -238,39 +236,10 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu 
> > *vcpu)
> >  
> > __gic_v3_set_lr(0, i);
> > }
> > -
> > -   switch (nr_pre_bits) {
> > -   case 7:
> > -   cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3);
> > -   cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2);
> > -   case 6:
> > -   cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1);
> > -   default:
> > -   cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0);
> > -   }
> > -
> > -   switch (nr_pre_bits) {
> > -   case 7:
> > -   cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3);
> 

Re: [PATCH v4 38/40] KVM: arm/arm64: Handle VGICv3 save/restore from the main VGIC code on VHE

2018-02-22 Thread Christoffer Dall
On Thu, Feb 22, 2018 at 12:32:11PM +, Marc Zyngier wrote:
> On 15/02/18 21:03, Christoffer Dall wrote:
> > Just like we can program the GICv2 hypervisor control interface directly
> > from the core vgic code, we can do the same for the GICv3 hypervisor
> > control interface on VHE systems.
> > 
> > We do this by simply calling the save/restore functions when we have VHE
> > and we can then get rid of the save/restore function calls from the VHE
> > world switch function.
> > 
> > One caveat is that we now write GICv3 system register state before the
> > potential early exit path in the run loop, and because we sync back
> > state in the early exit path, we have to ensure that we read a
> > consistent GIC state from the sync path, even though we have never
> > actually run the guest with the newly written GIC state.  We solve this
> > by inserting an ISB in the early exit path.
> > 
> > Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> > ---
> > 
> > Notes:
> > Changes since v2:
> >  - Added ISB in the early exit path in the run loop as explained
> >in the commit message.
> > 
> >  arch/arm64/kvm/hyp/switch.c | 3 ---
> >  virt/kvm/arm/arm.c  | 1 +
> >  virt/kvm/arm/vgic/vgic.c| 5 +
> >  3 files changed, 6 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> > index cbafc27a617b..466cfcdbcaf3 100644
> > --- a/arch/arm64/kvm/hyp/switch.c
> > +++ b/arch/arm64/kvm/hyp/switch.c
> > @@ -399,8 +399,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
> > __activate_traps(vcpu);
> > __activate_vm(vcpu->kvm);
> >  
> > -   __vgic_restore_state(vcpu);
> > -
> > sysreg_restore_guest_state_vhe(guest_ctxt);
> > __debug_switch_to_guest(vcpu);
> >  
> > @@ -414,7 +412,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
> > fp_enabled = fpsimd_enabled_vhe();
> >  
> > sysreg_save_guest_state_vhe(guest_ctxt);
> > -   __vgic_save_state(vcpu);
> >  
> > __deactivate_traps(vcpu);
> >  
> > diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> > index 5bd879c78951..6de7641f3ff2 100644
> > --- a/virt/kvm/arm/arm.c
> > +++ b/virt/kvm/arm/arm.c
> > @@ -717,6 +717,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
> > struct kvm_run *run)
> > if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
> > kvm_request_pending(vcpu)) {
> > vcpu->mode = OUTSIDE_GUEST_MODE;
> > +   isb(); /* Ensure work in x_flush_hwstate is committed */
> > kvm_pmu_sync_hwstate(vcpu);
> > if (static_branch_unlikely(_irqchip_in_use))
> > kvm_timer_sync_hwstate(vcpu);
> > diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> > index 12e2a28f437e..d0a19a8c196a 100644
> > --- a/virt/kvm/arm/vgic/vgic.c
> > +++ b/virt/kvm/arm/vgic/vgic.c
> > @@ -19,6 +19,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  
> >  #include "vgic.h"
> >  
> > @@ -753,6 +754,8 @@ static inline void vgic_save_state(struct kvm_vcpu 
> > *vcpu)
> >  {
> > if (!static_branch_unlikely(_vgic_global_state.gicv3_cpuif))
> > vgic_v2_save_state(vcpu);
> > +   else if (has_vhe())
> > +   __vgic_v3_save_state(vcpu);
> >  }
> >  
> >  /* Sync back the hardware VGIC state into our emulation after a guest's 
> > run. */
> > @@ -777,6 +780,8 @@ static inline void vgic_restore_state(struct kvm_vcpu 
> > *vcpu)
> >  {
> > if (!static_branch_unlikely(_vgic_global_state.gicv3_cpuif))
> > vgic_v2_restore_state(vcpu);
> > +   else if (has_vhe())
> > +   __vgic_v3_restore_state(vcpu);
> >  }
> >  
> >  /* Flush our emulation state into the GIC hardware before entering the 
> > guest. */
> > 
> 
> I'm slowly wrapping my brain around this thing again. If I grasp the
> general idea, we end up with two cases:
> 
> (1) The GIC is accessible from the kernel, and we save/restore it
> outside of the HYP code.
> 
> (2) The GIC is only accessible from the HYP code, and we do it there.
> 
> Maybe we should bite the bullet and introduce that primitive instead?
> 

You mean something the following?

static inline bool can_access_vgic_from_kernel(void)
{
/*
 * GICv2 can always be accessed from the kernel because it is
 * memory-mapped, and VHE systems can access GICv3 EL2 system
 * registers.
 */
return !static_branch_unlikely(_vgic_global_state.gicv3_cpuif) || 
has_vhe();
}

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 37/40] KVM: arm/arm64: Move arm64-only vgic-v2-sr.c file to arm64

2018-02-22 Thread Christoffer Dall
On Thu, Feb 22, 2018 at 12:33:20PM +, Marc Zyngier wrote:
> On 15/02/18 21:03, Christoffer Dall wrote:
> > The vgic-v2-sr.c file now only contains the logic to replay unaligned
> > accesses to the virtual CPU interface on 16K and 64K page systems, which
> > is only relevant on 64-bit platforms.  Therefore move this file to the
> > arm64 KVM tree, remove the compile directive from the 32-bit side
> > makefile, and remove the ifdef in the C file.
> > 
> > Reviewed-by: Andre Przywara <andre.przyw...@arm.com>
> > Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> > ---
> >  arch/arm/kvm/hyp/Makefile | 1 -
> >  arch/arm64/kvm/hyp/Makefile   | 2 +-
> >  {virt/kvm/arm => arch/arm64/kvm}/hyp/vgic-v2-sr.c | 2 --
> >  3 files changed, 1 insertion(+), 4 deletions(-)
> >  rename {virt/kvm/arm => arch/arm64/kvm}/hyp/vgic-v2-sr.c (98%)
> > 
> > diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile
> > index 5638ce0c9524..1964111c984a 100644
> > --- a/arch/arm/kvm/hyp/Makefile
> > +++ b/arch/arm/kvm/hyp/Makefile
> > @@ -7,7 +7,6 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING
> >  
> >  KVM=../../../../virt/kvm
> >  
> > -obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
> >  obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
> >  obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
> >  
> > diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
> > index f04400d494b7..7e8d41210288 100644
> > --- a/arch/arm64/kvm/hyp/Makefile
> > +++ b/arch/arm64/kvm/hyp/Makefile
> > @@ -7,10 +7,10 @@ ccflags-y += -fno-stack-protector 
> > -DDISABLE_BRANCH_PROFILING
> >  
> >  KVM=../../../../virt/kvm
> >  
> > -obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
> >  obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
> >  obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
> >  
> > +obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o
> 
> How about renaming the file to vgic-v2-cpuif-proxy.c? It doesn't have
> anything to do with save/restore anymore...
> 

Yes, good idea.

> >  obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
> >  obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
> >  obj-$(CONFIG_KVM_ARM_HOST) += entry.o
> > diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/arch/arm64/kvm/hyp/vgic-v2-sr.c
> > similarity index 98%
> > rename from virt/kvm/arm/hyp/vgic-v2-sr.c
> > rename to arch/arm64/kvm/hyp/vgic-v2-sr.c
> > index 0bbafdfd4adb..97f357ea9c72 100644
> > --- a/virt/kvm/arm/hyp/vgic-v2-sr.c
> > +++ b/arch/arm64/kvm/hyp/vgic-v2-sr.c
> > @@ -23,7 +23,6 @@
> >  #include 
> >  #include 
> >  
> > -#ifdef CONFIG_ARM64
> >  /*
> >   * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the
> >   *  guest.
> > @@ -77,4 +76,3 @@ int __hyp_text __vgic_v2_perform_cpuif_access(struct 
> > kvm_vcpu *vcpu)
> >  
> > return 1;
> >  }
> > -#endif
> > 
> 
> Otherwise looks good.
> 
Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 24/40] KVM: arm64: Rewrite system register accessors to read/write functions

2018-02-22 Thread Christoffer Dall
On Thu, Feb 22, 2018 at 02:34:21PM +0100, Andrew Jones wrote:
> On Thu, Feb 15, 2018 at 10:03:16PM +0100, Christoffer Dall wrote:
> > From: Christoffer Dall <cd...@cs.columbia.edu>
> > 
> > Currently we access the system registers array via the vcpu_sys_reg()
> > macro.  However, we are about to change the behavior to some times
> > modify the register file directly, so let's change this to two
> > primitives:
> > 
> >  * Accessor macros vcpu_write_sys_reg() and vcpu_read_sys_reg()
> >  * Direct array access macro __vcpu_sys_reg()
> > 
> > The first primitive should be used in places where the code needs to
> > access the currently loaded VCPU's state as observed by the guest.  For
> > example, when trapping on cache related registers, a write to a system
> > register should go directly to the VCPU version of the register.
> > 
> > The second primitive can be used in places where the VCPU is known to
> > never be running (for example userspace access) or for registers which
> > are never context switched (for example all the PMU system registers).
> > 
> > This rewrites all users of vcpu_sys_regs to one of the two primitives
> > above.
> > 
> > No functional change.
> > 
> > Signed-off-by: Christoffer Dall <cd...@cs.columbia.edu>
> > ---
> > 
> > Notes:
> > Changes since v2:
> >  - New patch (deferred register handling has been reworked)
> > 
> >  arch/arm64/include/asm/kvm_emulate.h | 13 ---
> >  arch/arm64/include/asm/kvm_host.h| 13 ++-
> >  arch/arm64/include/asm/kvm_mmu.h |  2 +-
> >  arch/arm64/kvm/debug.c   | 27 +-
> >  arch/arm64/kvm/inject_fault.c|  8 ++--
> >  arch/arm64/kvm/sys_regs.c| 71 
> > ++--
> >  arch/arm64/kvm/sys_regs.h|  4 +-
> >  arch/arm64/kvm/sys_regs_generic_v8.c |  4 +-
> >  virt/kvm/arm/pmu.c   | 37 ++-
> >  9 files changed, 102 insertions(+), 77 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_emulate.h 
> > b/arch/arm64/include/asm/kvm_emulate.h
> > index 3cc535591bdf..d313aaae5c38 100644
> > --- a/arch/arm64/include/asm/kvm_emulate.h
> > +++ b/arch/arm64/include/asm/kvm_emulate.h
> > @@ -290,15 +290,18 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu 
> > *vcpu)
> >  
> >  static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
> >  {
> > -   return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
> > +   return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
> >  }
> >  
> >  static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
> >  {
> > -   if (vcpu_mode_is_32bit(vcpu))
> > +   if (vcpu_mode_is_32bit(vcpu)) {
> > *vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT;
> > -   else
> > -   vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25);
> > +   } else {
> > +   u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
> > +   sctlr |= (1 << 25);
> > +   vcpu_write_sys_reg(vcpu, SCTLR_EL1, sctlr);
> > +   }
> >  }
> >  
> >  static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
> > @@ -306,7 +309,7 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
> > if (vcpu_mode_is_32bit(vcpu))
> > return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT);
> >  
> > -   return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
> > +   return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
> >  }
> >  
> >  static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
> > diff --git a/arch/arm64/include/asm/kvm_host.h 
> > b/arch/arm64/include/asm/kvm_host.h
> > index f2a6f39aec87..68398bf7882f 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -287,7 +287,18 @@ struct kvm_vcpu_arch {
> >  };
> >  
> >  #define vcpu_gp_regs(v)(&(v)->arch.ctxt.gp_regs)
> > -#define vcpu_sys_reg(v,r)  ((v)->arch.ctxt.sys_regs[(r)])
> > +
> > +/*
> > + * Only use __vcpu_sys_reg if you know you want the memory backed version 
> > of a
> > + * register, and not the one most recently accessed by a runnning VCPU.  
> > For
> > + * example, for userpace access or for system registers that are never 
> > context
> > + * switched, but only emulated.
> > + */
> > +#define __vcpu_sys_reg(v,r)((v)->arch.ctxt.sys_regs[(r)])
>

Re: [PATCH v4 24/40] KVM: arm64: Rewrite system register accessors to read/write functions

2018-02-22 Thread Christoffer Dall
On Thu, Feb 22, 2018 at 10:48:10AM +, Marc Zyngier wrote:
> On Thu, 22 Feb 2018 09:22:37 +,
> Christoffer Dall wrote:
> > 
> > On Wed, Feb 21, 2018 at 01:32:45PM +, Marc Zyngier wrote:
> > > On Thu, 15 Feb 2018 21:03:16 +0000,
> > > Christoffer Dall wrote:
> > > > 
> > > > From: Christoffer Dall <cd...@cs.columbia.edu>
> > > > 
> > > > Currently we access the system registers array via the vcpu_sys_reg()
> > > > macro.  However, we are about to change the behavior to some times
> > > > modify the register file directly, so let's change this to two
> > > > primitives:
> > > > 
> > > >  * Accessor macros vcpu_write_sys_reg() and vcpu_read_sys_reg()
> > > >  * Direct array access macro __vcpu_sys_reg()
> > > > 
> > > > The first primitive should be used in places where the code needs to
> > > > access the currently loaded VCPU's state as observed by the guest.  For
> > > > example, when trapping on cache related registers, a write to a system
> > > > register should go directly to the VCPU version of the register.
> > > > 
> > > > The second primitive can be used in places where the VCPU is known to
> > > > never be running (for example userspace access) or for registers which
> > > > are never context switched (for example all the PMU system registers).
> > > > 
> > > > This rewrites all users of vcpu_sys_regs to one of the two primitives
> > > > above.
> > > > 
> > > > No functional change.
> > > > 
> > > > Signed-off-by: Christoffer Dall <cd...@cs.columbia.edu>
> > > > ---
> > > > 
> > > > Notes:
> > > > Changes since v2:
> > > >  - New patch (deferred register handling has been reworked)
> > > > 
> > > >  arch/arm64/include/asm/kvm_emulate.h | 13 ---
> > > >  arch/arm64/include/asm/kvm_host.h| 13 ++-
> > > >  arch/arm64/include/asm/kvm_mmu.h |  2 +-
> > > >  arch/arm64/kvm/debug.c   | 27 +-
> > > >  arch/arm64/kvm/inject_fault.c|  8 ++--
> > > >  arch/arm64/kvm/sys_regs.c| 71 
> > > > ++--
> > > >  arch/arm64/kvm/sys_regs.h|  4 +-
> > > >  arch/arm64/kvm/sys_regs_generic_v8.c |  4 +-
> > > >  virt/kvm/arm/pmu.c   | 37 ++-
> > > >  9 files changed, 102 insertions(+), 77 deletions(-)
> > > > 
> > > > diff --git a/arch/arm64/include/asm/kvm_emulate.h 
> > > > b/arch/arm64/include/asm/kvm_emulate.h
> > > > index 3cc535591bdf..d313aaae5c38 100644
> > > > --- a/arch/arm64/include/asm/kvm_emulate.h
> > > > +++ b/arch/arm64/include/asm/kvm_emulate.h
> > > > @@ -290,15 +290,18 @@ static inline int kvm_vcpu_sys_get_rt(struct 
> > > > kvm_vcpu *vcpu)
> > > >  
> > > >  static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu 
> > > > *vcpu)
> > > >  {
> > > > -   return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
> > > > +   return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
> > > >  }
> > > >  
> > > >  static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
> > > >  {
> > > > -   if (vcpu_mode_is_32bit(vcpu))
> > > > +   if (vcpu_mode_is_32bit(vcpu)) {
> > > > *vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT;
> > > > -   else
> > > > -   vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25);
> > > > +   } else {
> > > > +   u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
> > > > +   sctlr |= (1 << 25);
> > > > +   vcpu_write_sys_reg(vcpu, SCTLR_EL1, sctlr);
> > > 
> > > General comment: it is slightly annoying that vcpu_write_sys_reg takes
> > > its parameters in an order different from that of write_sysreg
> > > (register followed with value, instead of value followed with
> > > register). Not a deal breaker, but slightly confusing.
> > > 
> > 
> > Ah, I didn't compare to write_sysreg, I was thinking that
> > 
> >   vcpu_read_sys_reg(vcpu, SCTLR_EL1);
> >   vcpu_write_sys_reg(vcpu, SCTLR_EL1, val);
> > 
> > looked more symmetrical because the write 

Re: [PATCH v4 03/40] KVM: arm64: Avoid storing the vcpu pointer on the stack

2018-02-22 Thread Christoffer Dall
On Thu, Feb 22, 2018 at 10:56:41AM +0100, Andrew Jones wrote:
> On Thu, Feb 22, 2018 at 10:10:34AM +0100, Christoffer Dall wrote:
> > On Wed, Feb 21, 2018 at 06:32:00PM +0100, Andrew Jones wrote:
> > > 
> > > Besides my confusion on motivation, it looks good to me
> > > 
> > 
> > In that case, unless there's an argument that the code has become too
> > hard to understand, ...
> >
> 
> On the contrary, I think it's easier to read now than before. I just
> wasn't clear how it all tied together with this series. If its
> combination with other patches enables a speedup, then it certainly
> fits here. I was just comparing mrs+loads vs. a load from stack and

The comparison should be mrs+load, vs. str+load (you save the store on
the stack).

> it seemed it would actually cause a micro-performance-decrease.
> 

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 26/40] KVM: arm/arm64: Prepare to handle deferred save/restore of SPSR_EL1

2018-02-22 Thread Christoffer Dall
On Wed, Feb 21, 2018 at 02:47:44PM +, Marc Zyngier wrote:
> On Thu, 15 Feb 2018 21:03:18 +,
> Christoffer Dall wrote:
> > 
> > SPSR_EL1 is not used by a VHE host kernel and can be deferred, but we
> > need to rework the accesses to this register to access the latest value
> > depending on whether or not guest system registers are loaded on the CPU
> > or only reside in memory.
> > 
> > The handling of accessing the various banked SPSRs for 32-bit VMs is a
> > bit clunky, but this will be improved in following patches which will
> > first prepare and subsequently implement deferred save/restore of the
> > 32-bit registers, including the 32-bit SPSRs.
> > 
> > Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> > ---
> > 
> > Notes:
> > Changes since v2:
> >  - New patch (deferred register handling has been reworked)
> > 
> >  arch/arm/include/asm/kvm_emulate.h   | 12 ++-
> >  arch/arm/kvm/emulate.c   |  2 +-
> >  arch/arm64/include/asm/kvm_emulate.h | 41 
> > +++-
> >  arch/arm64/kvm/inject_fault.c|  4 ++--
> >  virt/kvm/arm/aarch32.c   |  2 +-
> >  5 files changed, 51 insertions(+), 10 deletions(-)
> > 
> > diff --git a/arch/arm/include/asm/kvm_emulate.h 
> > b/arch/arm/include/asm/kvm_emulate.h
> > index e27caa4b47a1..6493bd479ddc 100644
> > --- a/arch/arm/include/asm/kvm_emulate.h
> > +++ b/arch/arm/include/asm/kvm_emulate.h
> > @@ -41,7 +41,17 @@ static inline unsigned long *vcpu_reg32(struct kvm_vcpu 
> > *vcpu, u8 reg_num)
> > return vcpu_reg(vcpu, reg_num);
> >  }
> >  
> > -unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu);
> > +unsigned long *__vcpu_spsr(struct kvm_vcpu *vcpu);
> > +
> > +static inline unsigned long vpcu_read_spsr(struct kvm_vcpu *vcpu)
> > +{
> > +   return *__vcpu_spsr(vcpu);
> > +}
> > +
> > +static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
> > +{
> > +   *__vcpu_spsr(vcpu) = v;
> > +}
> >  
> >  static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu,
> >  u8 reg_num)
> > diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
> > index fa501bf437f3..9046b53d87c1 100644
> > --- a/arch/arm/kvm/emulate.c
> > +++ b/arch/arm/kvm/emulate.c
> > @@ -142,7 +142,7 @@ unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 
> > reg_num)
> >  /*
> >   * Return the SPSR for the current mode of the virtual CPU.
> >   */
> > -unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu)
> > +unsigned long *__vcpu_spsr(struct kvm_vcpu *vcpu)
> >  {
> > unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
> > switch (mode) {
> > diff --git a/arch/arm64/include/asm/kvm_emulate.h 
> > b/arch/arm64/include/asm/kvm_emulate.h
> > index d313aaae5c38..47c2406755fa 100644
> > --- a/arch/arm64/include/asm/kvm_emulate.h
> > +++ b/arch/arm64/include/asm/kvm_emulate.h
> > @@ -26,6 +26,7 @@
> >  
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -143,13 +144,43 @@ static inline void vcpu_set_reg(struct kvm_vcpu 
> > *vcpu, u8 reg_num,
> > vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val;
> >  }
> >  
> > -/* Get vcpu SPSR for current mode */
> > -static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
> > +static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu)
> >  {
> > -   if (vcpu_mode_is_32bit(vcpu))
> > -   return vcpu_spsr32(vcpu);
> > +   unsigned long *p = (unsigned long 
> > *)_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
> > +
> > +   if (vcpu_mode_is_32bit(vcpu)) {
> > +   unsigned long *p_32bit = vcpu_spsr32(vcpu);
> > +
> > +   /* KVM_SPSR_SVC aliases KVM_SPSR_EL1 */
> > +   if (p_32bit != (unsigned long *)p)
> > +   return *p_32bit;
> 
> Clunky, you said? ;-) p is already an unsigned long *, so there's no
> need to cast it.
> 

Right, I think this is a leftover from some attempts at making this less
terrible, but this was eventually the least terrible.  Believe it or
not.

> > +   }
> > +
> > +   if (vcpu->arch.sysregs_loaded_on_cpu)
> > +   return read_sysreg_el1(spsr);
> > +   else
> > +   return *p;
> > +}
> >  
> > -   return (unsigned long *)_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
> > +static inline void vcpu_write_spsr

Re: [PATCH v4 24/40] KVM: arm64: Rewrite system register accessors to read/write functions

2018-02-22 Thread Christoffer Dall
On Mon, Feb 19, 2018 at 06:12:29PM +, Julien Grall wrote:
> Hi Christoffer,
> 
> On 15/02/18 21:03, Christoffer Dall wrote:
> >From: Christoffer Dall <cd...@cs.columbia.edu>
> >
> >Currently we access the system registers array via the vcpu_sys_reg()
> >macro.  However, we are about to change the behavior to some times
> >modify the register file directly, so let's change this to two
> >primitives:
> >
> >  * Accessor macros vcpu_write_sys_reg() and vcpu_read_sys_reg()
> >  * Direct array access macro __vcpu_sys_reg()
> >
> >The first primitive should be used in places where the code needs to
> >access the currently loaded VCPU's state as observed by the guest.  For
> >example, when trapping on cache related registers, a write to a system
> >register should go directly to the VCPU version of the register.
> >
> >The second primitive can be used in places where the VCPU is known to
> 
> "second primitive" is a bit confusing here. I count 3 primitives above:
> (vcpu_write_sys_reg(), vcpu_read_sys_reg() and __vcpu_sys_reg(). From the
> description, I would say to refer to the latter (i.e third one).
> 

Good point.  I'll clarify.

> >never be running (for example userspace access) or for registers which
> >are never context switched (for example all the PMU system registers).
> >
> >This rewrites all users of vcpu_sys_regs to one of the two primitives
> >above.
> >
> >No functional change.
> >
> >Signed-off-by: Christoffer Dall <cd...@cs.columbia.edu>
> 
> [...]
> 
> >diff --git a/arch/arm64/include/asm/kvm_host.h 
> >b/arch/arm64/include/asm/kvm_host.h
> >index f2a6f39aec87..68398bf7882f 100644
> >--- a/arch/arm64/include/asm/kvm_host.h
> >+++ b/arch/arm64/include/asm/kvm_host.h
> >@@ -287,7 +287,18 @@ struct kvm_vcpu_arch {
> >  };
> >  #define vcpu_gp_regs(v)(&(v)->arch.ctxt.gp_regs)
> >-#define vcpu_sys_reg(v,r)   ((v)->arch.ctxt.sys_regs[(r)])
> >+
> >+/*
> >+ * Only use __vcpu_sys_reg if you know you want the memory backed version 
> >of a
> >+ * register, and not the one most recently accessed by a runnning VCPU.  For
> 
> NIT: s/runnning/running/
> 
> >+ * example, for userpace access or for system registers that are never 
> >context
> 
> NIT: s/userpace/userspace/
> 
> >+ * switched, but only emulated.
> >+ */
> >+#define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)])
> >+
> >+#define vcpu_read_sys_reg(v,r)  __vcpu_sys_reg(v,r)
> >+#define vcpu_write_sys_reg(v,r,n)   do { __vcpu_sys_reg(v,r) = n; } while 
> >(0)
> >+
> >  /*
> >   * CP14 and CP15 live in the same array, as they are backed by the
> >   * same system registers.
> 
> [...]
> 
> >diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> >index b48af790615e..a05d2c01c786 100644
> >--- a/arch/arm64/kvm/sys_regs.c
> >+++ b/arch/arm64/kvm/sys_regs.c
> 
> [...]
> 
> >@@ -817,10 +818,10 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, 
> >struct sys_reg_params *p,
> > return false;
> > }
> >-vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval
> >-& ARMV8_PMU_USERENR_MASK;
> >-} else {
> >-p->regval = vcpu_sys_reg(vcpu, PMUSERENR_EL0)
> >+__vcpu_sys_reg(vcpu, PMUSERENR_EL0) =
> >+   p->regval & ARMV8_PMU_USERENR_MASK;
> >+} else  {
> 
> NIT: There is a double space between else and {.
> 
> >+p->regval = __vcpu_sys_reg(vcpu, PMUSERENR_EL0)
> > & ARMV8_PMU_USERENR_MASK;
> > }
> 
Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 17/40] KVM: arm64: Move userspace system registers into separate function

2018-02-22 Thread Christoffer Dall
On Mon, Feb 19, 2018 at 05:21:17PM +, Julien Grall wrote:
> Hi Christoffer,
> 
> On 15/02/18 21:03, Christoffer Dall wrote:
> >There's a semantic difference between the EL1 registers that control
> >operation of a kernel running in EL1 and EL1 registers that only control
> >userspace execution in EL0.  Since we can defer saving/restoring the
> >latter, move them into their own function.
> >
> >ACTLR_EL1 is not used by a VHE host, so we can move this register into
> >the EL1 state which is not saved/restored for a VHE host.
> 
> Looking at D10.2.1 (ARM DDI 0487C.a), the statement regarding the use of
> ACTLR_EL1 seems to be less strong than what you state here. It looks like it
> would be possible to have hardware where ACTLR_EL1 would still have an
> effect on host EL0. I also read the comments on the version 2 of this patch
> but I wasn't able to find what I missing.
> 
As Marc points out, I'll add a reference to the spec that says that Arm
recommends ACTLR_EL1 not having an effect on VHE host EL0.  While
potentially possible, it's unlikely, and we can fix that later if anyone
builds a platform with VHE that we care about that uses ACTLR_EL1 for
VHE host EL0.

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 24/40] KVM: arm64: Rewrite system register accessors to read/write functions

2018-02-22 Thread Christoffer Dall
On Wed, Feb 21, 2018 at 01:32:45PM +, Marc Zyngier wrote:
> On Thu, 15 Feb 2018 21:03:16 +,
> Christoffer Dall wrote:
> > 
> > From: Christoffer Dall <cd...@cs.columbia.edu>
> > 
> > Currently we access the system registers array via the vcpu_sys_reg()
> > macro.  However, we are about to change the behavior to some times
> > modify the register file directly, so let's change this to two
> > primitives:
> > 
> >  * Accessor macros vcpu_write_sys_reg() and vcpu_read_sys_reg()
> >  * Direct array access macro __vcpu_sys_reg()
> > 
> > The first primitive should be used in places where the code needs to
> > access the currently loaded VCPU's state as observed by the guest.  For
> > example, when trapping on cache related registers, a write to a system
> > register should go directly to the VCPU version of the register.
> > 
> > The second primitive can be used in places where the VCPU is known to
> > never be running (for example userspace access) or for registers which
> > are never context switched (for example all the PMU system registers).
> > 
> > This rewrites all users of vcpu_sys_regs to one of the two primitives
> > above.
> > 
> > No functional change.
> > 
> > Signed-off-by: Christoffer Dall <cd...@cs.columbia.edu>
> > ---
> > 
> > Notes:
> > Changes since v2:
> >  - New patch (deferred register handling has been reworked)
> > 
> >  arch/arm64/include/asm/kvm_emulate.h | 13 ---
> >  arch/arm64/include/asm/kvm_host.h| 13 ++-
> >  arch/arm64/include/asm/kvm_mmu.h |  2 +-
> >  arch/arm64/kvm/debug.c   | 27 +-
> >  arch/arm64/kvm/inject_fault.c|  8 ++--
> >  arch/arm64/kvm/sys_regs.c| 71 
> > ++--
> >  arch/arm64/kvm/sys_regs.h|  4 +-
> >  arch/arm64/kvm/sys_regs_generic_v8.c |  4 +-
> >  virt/kvm/arm/pmu.c   | 37 ++-
> >  9 files changed, 102 insertions(+), 77 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_emulate.h 
> > b/arch/arm64/include/asm/kvm_emulate.h
> > index 3cc535591bdf..d313aaae5c38 100644
> > --- a/arch/arm64/include/asm/kvm_emulate.h
> > +++ b/arch/arm64/include/asm/kvm_emulate.h
> > @@ -290,15 +290,18 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu 
> > *vcpu)
> >  
> >  static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
> >  {
> > -   return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
> > +   return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
> >  }
> >  
> >  static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
> >  {
> > -   if (vcpu_mode_is_32bit(vcpu))
> > +   if (vcpu_mode_is_32bit(vcpu)) {
> > *vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT;
> > -   else
> > -   vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25);
> > +   } else {
> > +   u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
> > +   sctlr |= (1 << 25);
> > +   vcpu_write_sys_reg(vcpu, SCTLR_EL1, sctlr);
> 
> General comment: it is slightly annoying that vcpu_write_sys_reg takes
> its parameters in an order different from that of write_sysreg
> (register followed with value, instead of value followed with
> register). Not a deal breaker, but slightly confusing.
> 

Ah, I didn't compare to write_sysreg, I was thinking that

  vcpu_read_sys_reg(vcpu, SCTLR_EL1);
  vcpu_write_sys_reg(vcpu, SCTLR_EL1, val);

looked more symmetrical because the write just takes an extra value, but
I can see your argument as well.

I don't mind changing it if it matters to you?

> > +   }
> >  }
> >  
> >  static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
> > @@ -306,7 +309,7 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
> > if (vcpu_mode_is_32bit(vcpu))
> > return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT);
> >  
> > -   return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
> > +   return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
> >  }
> >  
> >  static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
> > diff --git a/arch/arm64/include/asm/kvm_host.h 
> > b/arch/arm64/include/asm/kvm_host.h
> > index f2a6f39aec87..68398bf7882f 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -287,7 +287,18 @@ struct kvm_vcpu_arch {
> >  };
> >  
> >  #define vcpu_gp_regs(v) 

Re: [PATCH v4 13/40] KVM: arm64: Introduce VHE-specific kvm_vcpu_run

2018-02-22 Thread Christoffer Dall
On Wed, Feb 21, 2018 at 07:18:32PM +0100, Andrew Jones wrote:
> On Wed, Feb 21, 2018 at 06:43:00PM +0100, Andrew Jones wrote:
> > On Thu, Feb 15, 2018 at 10:03:05PM +0100, Christoffer Dall wrote:
> > > So far this is mostly (see below) a copy of the legacy non-VHE switch
> > > function, but we will start reworking these functions in separate
> > > directions to work on VHE and non-VHE in the most optimal way in later
> > > patches.
> > > 
> > > The only difference after this patch between the VHE and non-VHE run
> > > functions is that we omit the branch-predictor variant-2 hardening for
> > > QC Falkor CPUs, because this workaround is specific to a series of
> > > non-VHE ARMv8.0 CPUs.
> > > 
> > > Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>
> > > Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> > > ---
> > > 
> > > Notes:
> > > Changes since v3:
> > >  - Added BUG() to 32-bit ARM VHE run function
> > >  - Omitted QC Falkor BP Hardening functionality from VHE-specific
> > >function
> > > 
> > > Changes since v2:
> > >  - Reworded commit message
> > > 
> > > Changes since v1:
> > >  - Rename kvm_vcpu_run to kvm_vcpu_run_vhe and rename __kvm_vcpu_run 
> > > to
> > >__kvm_vcpu_run_nvhe
> > >  - Removed stray whitespace line
> > > 
> > >  arch/arm/include/asm/kvm_asm.h   |  5 ++-
> > >  arch/arm/kvm/hyp/switch.c|  2 +-
> > >  arch/arm64/include/asm/kvm_asm.h |  4 ++-
> > >  arch/arm64/kvm/hyp/switch.c  | 66 
> > > +++-
> > >  virt/kvm/arm/arm.c   |  5 ++-
> > >  5 files changed, 77 insertions(+), 5 deletions(-)
> > > 
> > 
> > ...
> > 
> > > diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> > > index 2062d9357971..5bd879c78951 100644
> > > --- a/virt/kvm/arm/arm.c
> > > +++ b/virt/kvm/arm/arm.c
> > > @@ -736,7 +736,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
> > > struct kvm_run *run)
> > >   if (has_vhe())
> > >   kvm_arm_vhe_guest_enter();
> > >  
> > > - ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
> > > + if (has_vhe())
> > > + ret = kvm_vcpu_run_vhe(vcpu);
> > > + else
> > > + ret = kvm_call_hyp(__kvm_vcpu_run_nvhe, vcpu);
> > >  
> > >   if (has_vhe())
> > >   kvm_arm_vhe_guest_exit();
> > 
> > We can combine these has_vhe()'s
> > 
> >  if (has_vhe()) {
> > kvm_arm_vhe_guest_enter();
> > ret = kvm_vcpu_run_vhe(vcpu);
> > kvm_arm_vhe_guest_exit();
> >  } else
> > ret = kvm_call_hyp(__kvm_vcpu_run_nvhe, vcpu);
> 
> Maybe even do a cleanup patch that removes
> kvm_arm_vhe_guest_enter/exit by putting the daif
> masking/restoring directly into kvm_vcpu_run_vhe()?
> 
Yes, indeed.  This is a blind rebasing result on my part.

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


<    1   2   3   4   5   6   7   8   9   10   >