Re: [PATCH 35/59] KVM: arm/arm64: nv: Support multiple nested stage 2 mmu structures

2019-07-04 Thread Alexandru Elisei
On 6/21/19 10:38 AM, Marc Zyngier wrote:

> From: Christoffer Dall 
>
> Add stage 2 mmu data structures for virtual EL2 and for nested guests.
> We don't yet populate shadow stage 2 page tables, but we now have a
> framework for getting to a shadow stage 2 pgd.
>
> We allocate twice the number of vcpus as stage 2 mmu structures because
> that's sufficient for each vcpu running two VMs without having to flush
> the stage 2 page tables.
>
> Signed-off-by: Christoffer Dall 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm/include/asm/kvm_host.h |   4 +
>  arch/arm/include/asm/kvm_mmu.h  |   3 +
>  arch/arm64/include/asm/kvm_host.h   |  28 +
>  arch/arm64/include/asm/kvm_mmu.h|   8 ++
>  arch/arm64/include/asm/kvm_nested.h |   7 ++
>  arch/arm64/kvm/nested.c | 172 
>  virt/kvm/arm/arm.c  |  16 ++-
>  virt/kvm/arm/mmu.c  |  31 ++---
>  8 files changed, 254 insertions(+), 15 deletions(-)
>
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index e3217c4ad25b..b821eb2383ad 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -424,4 +424,8 @@ static inline bool kvm_arm_vcpu_is_finalized(struct 
> kvm_vcpu *vcpu)
>   return true;
>  }
>  
> +static inline void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu) {}
> +static inline void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu) {}
> +static inline int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu) { return 0; }
> +
>  #endif /* __ARM_KVM_HOST_H__ */
> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
> index be23e3f8e08c..e6984b6da2ce 100644
> --- a/arch/arm/include/asm/kvm_mmu.h
> +++ b/arch/arm/include/asm/kvm_mmu.h
> @@ -420,6 +420,9 @@ static inline int hyp_map_aux_data(void)
>  
>  static inline void kvm_set_ipa_limit(void) {}
>  
> +static inline void kvm_init_s2_mmu(struct kvm_s2_mmu *mmu) {}
> +static inline void kvm_init_nested(struct kvm *kvm) {}
> +
>  static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
>  {
>   struct kvm_vmid *vmid = >vmid;
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 3dee5e17a4ee..cc238de170d2 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -88,11 +88,39 @@ struct kvm_s2_mmu {
>   phys_addr_t pgd_phys;
>  
>   struct kvm *kvm;
> +
> + /*
> +  * For a shadow stage-2 MMU, the virtual vttbr programmed by the guest
> +  * hypervisor.  Unused for kvm_arch->mmu. Set to 1 when the structure
> +  * contains no valid information.
> +  */
> + u64 vttbr;
> +
> + /* true when this represents a nested context where virtual HCR_EL2.VM 
> == 1 */
> + boolnested_stage2_enabled;
> +
> + /*
> +  *  0: Nobody is currently using this, check vttbr for validity
> +  * >0: Somebody is actively using this.
> +  */
> + atomic_t refcnt;
>  };
>  
> +static inline bool kvm_s2_mmu_valid(struct kvm_s2_mmu *mmu)
> +{
> + return !(mmu->vttbr & 1);
> +}
> +
>  struct kvm_arch {
>   struct kvm_s2_mmu mmu;
>  
> + /*
> +  * Stage 2 paging stage for VMs with nested virtual using a virtual
> +  * VMID.
> +  */
> + struct kvm_s2_mmu *nested_mmus;
> + size_t nested_mmus_size;
> +
>   /* VTCR_EL2 value for this VM */
>   u64vtcr;
>  
> diff --git a/arch/arm64/include/asm/kvm_mmu.h 
> b/arch/arm64/include/asm/kvm_mmu.h
> index 1eb6e0ca61c2..32bcaa1845dc 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -100,6 +100,7 @@ alternative_cb_end
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  void kvm_update_va_mask(struct alt_instr *alt,
>   __le32 *origptr, __le32 *updptr, int nr_inst);
> @@ -164,6 +165,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, 
> size_t size,
>void **haddr);
>  void free_hyp_pgds(void);
>  
> +void kvm_unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 
> size);
>  void stage2_unmap_vm(struct kvm *kvm);
>  int kvm_alloc_stage2_pgd(struct kvm_s2_mmu *mmu);
>  void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
> @@ -635,5 +637,11 @@ static __always_inline void __load_guest_stage2(struct 
> kvm_s2_mmu *mmu)
>   asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_1165522));
>  }
>  
> +static inline u64 get_vmid(u64 vttbr)
> +{
> + return (vttbr & VTTBR_VMID_MASK(kvm_get_vmid_bits())) >>
> + VTTBR_VMID_SHIFT;
> +}
> +
>  #endif /* __ASSEMBLY__ */
>  #endif /* __ARM64_KVM_MMU_H__ */
> diff --git a/arch/arm64/include/asm/kvm_nested.h 
> b/arch/arm64/include/asm/kvm_nested.h
> index 61e71d0d2151..d4021d0892bd 100644
> --- a/arch/arm64/include/asm/kvm_nested.h
> +++ b/arch/arm64/include/asm/kvm_nested.h
> @@ -10,6 +10,13 @@ static inline bool nested_virt_in_use(const struct 
> kvm_vcpu *vcpu)
>   

Re: [PATCH 15/59] KVM: arm64: nv: Refactor vcpu_{read,write}_sys_reg

2019-07-04 Thread Marc Zyngier
On 27/06/2019 10:21, Alexandru Elisei wrote:
> On 6/21/19 10:37 AM, Marc Zyngier wrote:
>> Extract the direct HW accessors for later reuse.
>>
>> Signed-off-by: Marc Zyngier 
>> ---
>>  arch/arm64/kvm/sys_regs.c | 247 +-
>>  1 file changed, 139 insertions(+), 108 deletions(-)
>>
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index 2b8734f75a09..e181359adadf 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -182,99 +182,161 @@ const struct el2_sysreg_map *find_el2_sysreg(const 
>> struct el2_sysreg_map *map,
>>  return entry;
>>  }
>>  
>> +static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
>> +{
>> +/*
>> + * System registers listed in the switch are not saved on every
>> + * exit from the guest but are only saved on vcpu_put.
>> + *
>> + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
>> + * should never be listed below, because the guest cannot modify its
>> + * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
>> + * thread when emulating cross-VCPU communication.
>> + */
>> +switch (reg) {
>> +case CSSELR_EL1:*val = read_sysreg_s(SYS_CSSELR_EL1);   break;
>> +case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12);   break;
>> +case ACTLR_EL1: *val = read_sysreg_s(SYS_ACTLR_EL1);break;
>> +case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12);   break;
>> +case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12);   break;
>> +case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12);   break;
>> +case TCR_EL1:   *val = read_sysreg_s(SYS_TCR_EL12); break;
>> +case ESR_EL1:   *val = read_sysreg_s(SYS_ESR_EL12); break;
>> +case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12);   break;
>> +case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12);   break;
>> +case FAR_EL1:   *val = read_sysreg_s(SYS_FAR_EL12); break;
>> +case MAIR_EL1:  *val = read_sysreg_s(SYS_MAIR_EL12);break;
>> +case VBAR_EL1:  *val = read_sysreg_s(SYS_VBAR_EL12);break;
>> +case CONTEXTIDR_EL1:*val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
>> +case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0);break;
>> +case TPIDRRO_EL0:   *val = read_sysreg_s(SYS_TPIDRRO_EL0);  break;
>> +case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1);break;
>> +case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12);   break;
>> +case CNTKCTL_EL1:   *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
>> +case PAR_EL1:   *val = read_sysreg_s(SYS_PAR_EL1);  break;
>> +case DACR32_EL2:*val = read_sysreg_s(SYS_DACR32_EL2);   break;
>> +case IFSR32_EL2:*val = read_sysreg_s(SYS_IFSR32_EL2);   break;
>> +case DBGVCR32_EL2:  *val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
>> +default:return false;
>> +}
>> +
>> +return true;
>> +}
>> +
>> +static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
>> +{
>> +/*
>> + * System registers listed in the switch are not restored on every
>> + * entry to the guest but are only restored on vcpu_load.
>> + *
>> + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
>> + * should never be listed below, because the the MPIDR should only be
>> + * set once, before running the VCPU, and never changed later.
>> + */
>> +switch (reg) {
>> +case CSSELR_EL1:write_sysreg_s(val, SYS_CSSELR_EL1);break;
>> +case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12);break;
>> +case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); break;
>> +case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12);break;
>> +case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12);break;
>> +case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12);break;
>> +case TCR_EL1:   write_sysreg_s(val, SYS_TCR_EL12);  break;
>> +case ESR_EL1:   write_sysreg_s(val, SYS_ESR_EL12);  break;
>> +case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12);break;
>> +case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12);break;
>> +case FAR_EL1:   write_sysreg_s(val, SYS_FAR_EL12);  break;
>> +case MAIR_EL1:  write_sysreg_s(val, SYS_MAIR_EL12); break;
>> +case VBAR_EL1:  write_sysreg_s(val, SYS_VBAR_EL12); break;
>> +case CONTEXTIDR_EL1:write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
>> +case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break;
>> +case TPIDRRO_EL0:   write_sysreg_s(val, SYS_TPIDRRO_EL0);   break;
>> +case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break;
>> +case AMAIR_EL1: write_sysreg_s(val, 

Re: [PATCH 13/59] KVM: arm64: nv: Handle virtual EL2 registers in vcpu_read/write_sys_reg()

2019-07-04 Thread Marc Zyngier
On 26/06/2019 16:04, Alexandru Elisei wrote:
> On 6/21/19 10:37 AM, Marc Zyngier wrote:
>> From: Andre Przywara 
>>
>> KVM internally uses accessor functions when reading or writing the
>> guest's system registers. This takes care of accessing either the stored
>> copy or using the "live" EL1 system registers when the host uses VHE.
>>
>> With the introduction of virtual EL2 we add a bunch of EL2 system
>> registers, which now must also be taken care of:
>> - If the guest is running in vEL2, and we access an EL1 sysreg, we must
>>   revert to the stored version of that, and not use the CPU's copy.
>> - If the guest is running in vEL1, and we access an EL2 sysreg, we must
>>   also use the stored version, since the CPU carries the EL1 copy.
>> - Some EL2 system registers are supposed to affect the current execution
>>   of the system, so we need to put them into their respective EL1
>>   counterparts. For this we need to define a mapping between the two.
>>   This is done using the newly introduced struct el2_sysreg_map.
>> - Some EL2 system registers have a different format than their EL1
>>   counterpart, so we need to translate them before writing them to the
>>   CPU. This is done using an (optional) translate function in the map.
>> - There are the three special registers SP_EL2, SPSR_EL2 and ELR_EL2,
>>   which need some separate handling.
>>
>> All of these cases are now wrapped into the existing accessor functions,
>> so KVM users wouldn't need to care whether they access EL2 or EL1
>> registers and also which state the guest is in.
>>
>> This handles what was formerly known as the "shadow state" dynamically,
>> without requiring a separate copy for each vCPU EL.
>>
>> Signed-off-by: Andre Przywara 
>> Signed-off-by: Marc Zyngier 
>> ---
>>  arch/arm64/include/asm/kvm_emulate.h |   6 +
>>  arch/arm64/include/asm/kvm_host.h|   5 +
>>  arch/arm64/kvm/sys_regs.c| 163 +++
>>  3 files changed, 174 insertions(+)
>>
>> diff --git a/arch/arm64/include/asm/kvm_emulate.h 
>> b/arch/arm64/include/asm/kvm_emulate.h
>> index c43aac5fed69..f37006b6eec4 100644
>> --- a/arch/arm64/include/asm/kvm_emulate.h
>> +++ b/arch/arm64/include/asm/kvm_emulate.h
>> @@ -70,6 +70,12 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
>>  int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
>>  int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
>>  
>> +u64 translate_tcr(u64 tcr);
>> +u64 translate_cptr(u64 tcr);
>> +u64 translate_sctlr(u64 tcr);
>> +u64 translate_ttbr0(u64 tcr);
>> +u64 translate_cnthctl(u64 tcr);
>> +
>>  static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
>>  {
>>  return !(vcpu->arch.hcr_el2 & HCR_RW);
>> diff --git a/arch/arm64/include/asm/kvm_host.h 
>> b/arch/arm64/include/asm/kvm_host.h
>> index 2d4290d2513a..dae9c42a7219 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -217,6 +217,11 @@ enum vcpu_sysreg {
>>  NR_SYS_REGS /* Nothing after this line! */
>>  };
>>  
>> +static inline bool sysreg_is_el2(int reg)
>> +{
>> +return reg >= FIRST_EL2_SYSREG && reg < NR_SYS_REGS;
>> +}
>> +
>>  /* 32bit mapping */
>>  #define c0_MPIDR(MPIDR_EL1 * 2) /* MultiProcessor ID Register */
>>  #define c0_CSSELR   (CSSELR_EL1 * 2)/* Cache Size Selection Register */
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index 693dd063c9c2..d024114da162 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -76,11 +76,142 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
>>  return false;
>>  }
>>  
>> +static u64 tcr_el2_ips_to_tcr_el1_ps(u64 tcr_el2)
>> +{
>> +return ((tcr_el2 & TCR_EL2_PS_MASK) >> TCR_EL2_PS_SHIFT)
>> +<< TCR_IPS_SHIFT;
>> +}
>> +
>> +u64 translate_tcr(u64 tcr)
>> +{
>> +return TCR_EPD1_MASK |  /* disable TTBR1_EL1 */
>> +   ((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) |
>> +   tcr_el2_ips_to_tcr_el1_ps(tcr) |
>> +   (tcr & TCR_EL2_TG0_MASK) |
>> +   (tcr & TCR_EL2_ORGN0_MASK) |
>> +   (tcr & TCR_EL2_IRGN0_MASK) |
>> +   (tcr & TCR_EL2_T0SZ_MASK);
>> +}
>> +
>> +u64 translate_cptr(u64 cptr_el2)
>> +{
>> +u64 cpacr_el1 = 0;
>> +
>> +if (!(cptr_el2 & CPTR_EL2_TFP))
>> +cpacr_el1 |= CPACR_EL1_FPEN;
>> +if (cptr_el2 & CPTR_EL2_TTA)
>> +cpacr_el1 |= CPACR_EL1_TTA;
>> +if (!(cptr_el2 & CPTR_EL2_TZ))
>> +cpacr_el1 |= CPACR_EL1_ZEN;
>> +
>> +return cpacr_el1;
>> +}
>> +
>> +u64 translate_sctlr(u64 sctlr)
>> +{
>> +/* Bit 20 is RES1 in SCTLR_EL1, but RES0 in SCTLR_EL2 */
>> +return sctlr | BIT(20);
>> +}
>> +
>> +u64 translate_ttbr0(u64 ttbr0)
>> +{
>> +/* Force ASID to 0 (ASID 0 or RES0) */
>> +return ttbr0 & ~GENMASK_ULL(63, 48);
>> +}
>> +
>> +u64 translate_cnthctl(u64 cnthctl)
>> +{
>> +return ((cnthctl & 0x3) << 10) | (cnthctl & 0xfc);
>> +}
>> +
>> +#define 

Re: [PATCH 27/59] KVM: arm64: nv: Respect virtual HCR_EL2.TVM and TRVM settings

2019-07-04 Thread Marc Zyngier
On 26/06/2019 07:55, Julien Thierry wrote:
> 
> 
> On 06/21/2019 10:38 AM, Marc Zyngier wrote:
>> From: Jintack Lim 
>>
>> Forward the EL1 virtual memory register traps to the virtual EL2 if they
>> are not coming from the virtual EL2 and the virtual HCR_EL2.TVM or TRVM
>> bit is set.
>>
>> This is for recursive nested virtualization.
>>
>> Signed-off-by: Jintack Lim 
>> Signed-off-by: Marc Zyngier 
>> ---
>>  arch/arm64/kvm/sys_regs.c | 24 
>>  1 file changed, 24 insertions(+)
>>
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index 582d62aa48b7..0f74b9277a86 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -436,6 +436,27 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
>>  return true;
>>  }
>>  
>> +/* This function is to support the recursive nested virtualization */
>> +static bool forward_vm_traps(struct kvm_vcpu *vcpu, struct sys_reg_params 
>> *p)
>> +{
>> +u64 hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2);
>> +
>> +/* If a trap comes from the virtual EL2, the host hypervisor handles. */
>> +if (vcpu_mode_el2(vcpu))
>> +return false;
>> +
>> +/*
>> + * If the virtual HCR_EL2.TVM or TRVM bit is set, we need to foward
>> + * this trap to the virtual EL2.
>> + */
>> +if ((hcr_el2 & HCR_TVM) && p->is_write)
>> +return true;
>> +else if ((hcr_el2 & HCR_TRVM) && !p->is_write)
>> +return true;
>> +
>> +return false;
>> +}
>> +
>>  /*
>>   * Generic accessor for VM registers. Only called as long as HCR_TVM
>>   * is set. If the guest enables the MMU, we stop trapping the VM
>> @@ -452,6 +473,9 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
>>  if (el12_reg(p) && forward_nv_traps(vcpu))
>>  return false;
>>  
>> +if (!el12_reg(p) && forward_vm_traps(vcpu, p))
>> +return kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));
> 
> Since we already have forward_traps(), isn't this just:
> 
>   if (!el12_reg(p) && forward_traps(vcpu, p->is_write ? HCR_TVM : 
> HCR_TRVM))
>   return true;
> 
> We could maybe simplify forward_vm_traps() to just call forward_traps()
> similar to forward_nv_traps().

Odd. I remember doing something like that. Where has it gone? Yes, this
looks sensible.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC v2 11/14] arm64: Move the ASID allocator code in a separate file

2019-07-04 Thread James Morse
Hi Julien,

On 20/06/2019 14:06, Julien Grall wrote:
> We will want to re-use the ASID allocator in a separate context (e.g
> allocating VMID). So move the code in a new file.
> 
> The function asid_check_context has been moved in the header as a static
> inline function because we want to avoid add a branch when checking if the
> ASID is still valid.

> diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
> index 3df63a28856c..b745cf356fe1 100644
> --- a/arch/arm64/mm/context.c
> +++ b/arch/arm64/mm/context.c
> @@ -23,46 +23,21 @@

> -#define ASID_FIRST_VERSION(info) NUM_ASIDS(info)

> diff --git a/arch/arm64/lib/asid.c b/arch/arm64/lib/asid.c
> new file mode 100644
> index ..7252e4fdd5e9
> --- /dev/null
> +++ b/arch/arm64/lib/asid.c
> @@ -0,0 +1,185 @@

> +#define ASID_FIRST_VERSION(info) (1UL << ((info)->bits))

(oops!)


> @@ -344,7 +115,7 @@ static int asids_init(void)
>   if (!asid_allocator_init(_info, bits, ASID_PER_CONTEXT,
>asid_flush_cpu_ctxt))
>   panic("Unable to initialize ASID allocator for %lu ASIDs\n",
> -   1UL << bits);
> +   NUM_ASIDS(_info));

Could this go in the patch that adds NUM_ASIDS()?


Thanks,

James
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 13/59] KVM: arm64: nv: Handle virtual EL2 registers in vcpu_read/write_sys_reg()

2019-07-04 Thread Marc Zyngier
On 03/07/2019 17:32, Alexandru Elisei wrote:

[...]

 +}
 +
 +#define EL2_SYSREG(el2, el1, translate)   \
 +  [el2 - FIRST_EL2_SYSREG] = { el2, el1, translate }
 +#define PURE_EL2_SYSREG(el2) \
 +  [el2 - FIRST_EL2_SYSREG] = { el2,__INVALID_SYSREG__, NULL }
 +/*
 + * Associate vEL2 registers to their EL1 counterparts on the CPU.
 + * The translate function can be NULL, when the register layout is 
 identical.
 + */
 +struct el2_sysreg_map {
 +  int sysreg; /* EL2 register index into the array above */
 +  int mapping;/* associated EL1 register */
 +  u64 (*translate)(u64 value);
 +} nested_sysreg_map[NR_SYS_REGS - FIRST_EL2_SYSREG] = {
 +  PURE_EL2_SYSREG( VPIDR_EL2 ),
 +  PURE_EL2_SYSREG( VMPIDR_EL2 ),
 +  PURE_EL2_SYSREG( ACTLR_EL2 ),
 +  PURE_EL2_SYSREG( HCR_EL2 ),
 +  PURE_EL2_SYSREG( MDCR_EL2 ),
 +  PURE_EL2_SYSREG( HSTR_EL2 ),
 +  PURE_EL2_SYSREG( HACR_EL2 ),
 +  PURE_EL2_SYSREG( VTTBR_EL2 ),
 +  PURE_EL2_SYSREG( VTCR_EL2 ),
 +  PURE_EL2_SYSREG( RVBAR_EL2 ),
 +  PURE_EL2_SYSREG( RMR_EL2 ),
 +  PURE_EL2_SYSREG( TPIDR_EL2 ),
 +  PURE_EL2_SYSREG( CNTVOFF_EL2 ),
 +  PURE_EL2_SYSREG( CNTHCTL_EL2 ),
 +  PURE_EL2_SYSREG( HPFAR_EL2 ),
 +  EL2_SYSREG(  SCTLR_EL2,  SCTLR_EL1,  translate_sctlr ),
 +  EL2_SYSREG(  CPTR_EL2,   CPACR_EL1,  translate_cptr  ),
 +  EL2_SYSREG(  TTBR0_EL2,  TTBR0_EL1,  translate_ttbr0 ),
 +  EL2_SYSREG(  TTBR1_EL2,  TTBR1_EL1,  NULL),
 +  EL2_SYSREG(  TCR_EL2,TCR_EL1,translate_tcr   ),
 +  EL2_SYSREG(  VBAR_EL2,   VBAR_EL1,   NULL),
 +  EL2_SYSREG(  AFSR0_EL2,  AFSR0_EL1,  NULL),
 +  EL2_SYSREG(  AFSR1_EL2,  AFSR1_EL1,  NULL),
 +  EL2_SYSREG(  ESR_EL2,ESR_EL1,NULL),
 +  EL2_SYSREG(  FAR_EL2,FAR_EL1,NULL),
 +  EL2_SYSREG(  MAIR_EL2,   MAIR_EL1,   NULL),
 +  EL2_SYSREG(  AMAIR_EL2,  AMAIR_EL1,  NULL),
 +};
>>> Figuring out which registers are in this map and which aren't and are 
>>> supposed
>>> to be treated differently is really cumbersome because they are split into 
>>> two
>>> types of el2 registers and their order is different from the order in enum
>>> vcpu_sysreg (in kvm_host.h). Perhaps adding a comment about what registers 
>>> will
>>> be treated differently would make the code a bit easier to follow?
>> I'm not sure what this buys us. We have 3 categories of EL2 sysregs:
>> - Purely emulated
>> - Directly mapped onto an EL1 sysreg
>> - Translated from EL2 to EL1
>>
>> I think the wrappers represent that pretty well, although we could split
>> EL2_SYSREG into DIRECT_EL2_SYSREG and TRANSLATE_EL2_SYSREG. As for the
>> order, does it really matter? We also have the trap table order, which
>> is also different from the enum. Do you propose we reorder everything?
> 
> The wrappers and the naming are fine.
> 
> I was trying to figure out which EL2 registers are in the nested_sysreg_map 
> and
> which aren't (that's what I meant by "two types of registers") by looking at 
> the
> vcpu_sysreg enum. Because the order in the map is different than the order in
> the enum, I was having a difficult time figuring out which registers are not 
> in
> the nested_sysreg_map to make sure we haven't somehow forgot to emulate a 
> register.
> 
> So no, I wasn't asking to reorder everything. I was asking if it would be
> appropriate to write a comment stating the intention to treat registers X, Y 
> and
> Z separately from the registers in nested_sysreg_map.

Ah, fair enough. Yes, that's a very reasonable suggestion.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] KVM: arm64/sve: Fix vq_present() macro to yield a bool

2019-07-04 Thread Dave Martin
On Thu, Jul 04, 2019 at 02:24:42PM +0200, Paolo Bonzini wrote:
> On 04/07/19 10:20, Marc Zyngier wrote:
> > +KVM, Paolo and Radim,
> > 
> > Guys, do you mind picking this single patch and sending it to Linus?
> > That's the only fix left for 5.2. Alternatively, I can send you a pull
> > request, but it feels overkill.
> 
> Sure, will do.
> 
> Paolo

Thanks all for the quick turnaround!

[...]

Cheers
---Dave
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] KVM: arm64/sve: Fix vq_present() macro to yield a bool

2019-07-04 Thread Paolo Bonzini
On 04/07/19 10:20, Marc Zyngier wrote:
> +KVM, Paolo and Radim,
> 
> Guys, do you mind picking this single patch and sending it to Linus?
> That's the only fix left for 5.2. Alternatively, I can send you a pull
> request, but it feels overkill.

Sure, will do.

Paolo

> Either way, please let me know.
> 
> Thanks,
> 
>   M.
> 
> On Wed, 03 Jul 2019 18:42:50 +0100,
> Dave Martin  wrote:
>>
>> From: Zhang Lei 
>>
>> The original implementation of vq_present() relied on aggressive
>> inlining in order for the compiler to know that the code is
>> correct, due to some const-casting issues.  This was causing sparse
>> and clang to complain, while GCC compiled cleanly.
>>
>> Commit 0c529ff789bc addressed this problem, but since vq_present()
>> is no longer a function, there is now no implicit casting of the
>> returned value to the return type (bool).
>>
>> In set_sve_vls(), this uncast bit value is compared against a bool,
>> and so may spuriously compare as unequal when both are nonzero.  As
>> a result, KVM may reject valid SVE vector length configurations as
>> invalid, and vice versa.
>>
>> Fix it by forcing the returned value to a bool.
>>
>> Signed-off-by: Zhang Lei 
>> Fixes: 0c529ff789bc ("KVM: arm64: Implement vq_present() as a macro")
>> Signed-off-by: Dave Martin  [commit message rewrite]
>> Cc: Viresh Kumar 
>>
>> ---
>>
>> Posting this under Zhang Lei's authorship, due to the need to turn this
>> fix around quickly.  The fix is as per the original suggestion [1].
>>
>> Originally observed with the QEMU KVM SVE support under review:
>> https://lists.gnu.org/archive/html/qemu-devel/2019-06/msg04945.html
>>
>> Bug reproduced and fix tested on the Arm Fast Model, with
>> http://linux-arm.org/git?p=kvmtool-dm.git;a=shortlog;h=refs/heads/sve/v3/head
>> (After rerunning util/update_headers.sh.)
>>
>> (the --sve-vls command line argument was removed in v4 of the
>> kvmtool patches).
>>
>> [1] 
>> http://lists.infradead.org/pipermail/linux-arm-kernel/2019-July/664633.html
>> ---
>>  arch/arm64/kvm/guest.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
>> index c2afa79..dfd6264 100644
>> --- a/arch/arm64/kvm/guest.c
>> +++ b/arch/arm64/kvm/guest.c
>> @@ -208,7 +208,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const 
>> struct kvm_one_reg *reg)
>>  
>>  #define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64)
>>  #define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64)
>> -#define vq_present(vqs, vq) ((vqs)[vq_word(vq)] & vq_mask(vq))
>> +#define vq_present(vqs, vq) (!!((vqs)[vq_word(vq)] & vq_mask(vq)))
>>  
>>  static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>>  {
>> -- 
>> 2.1.4
>>
> 

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] KVM: arm64/sve: Fix vq_present() macro to yield a bool

2019-07-04 Thread Dave Martin
On Thu, Jul 04, 2019 at 10:04:08AM +, Zhang, Lei wrote:
> Hi guys,
> 
> I have started up KVM guest os successfully with SVE feature with Dave' patch.
> 
> Tested-by: Zhang Lei 

Thanks for verifying.

It's really your fix, I only wrote a commit message for it :)

[...]

Cheers
---Dave
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


RE: [PATCH] KVM: arm64/sve: Fix vq_present() macro to yield a bool

2019-07-04 Thread Zhang, Lei
Hi guys,

I have started up KVM guest os successfully with SVE feature with Dave' patch.

Tested-by: Zhang Lei 

> -Original Message-
> From: Dave Martin 
> Sent: Thursday, July 04, 2019 5:35 PM
> To: Viresh Kumar 
> Cc: Okamoto, Takayuki/岡本 高幸 ; Christoffer
> Dall ; Ard Biesheuvel ; Marc
> Zyngier ; Catalin Marinas
> ; Will Deacon ; Zhang,
> Lei/張 雷 ; Julien Grall ;
> kvmarm@lists.cs.columbia.edu; linux-arm-ker...@lists.infradead.org
> Subject: Re: [PATCH] KVM: arm64/sve: Fix vq_present() macro to yield a bool
> 
> On Thu, Jul 04, 2019 at 08:32:52AM +0530, Viresh Kumar wrote:
> > On 03-07-19, 18:42, Dave Martin wrote:
> > > From: Zhang Lei 
> > >
> > > The original implementation of vq_present() relied on aggressive
> > > inlining in order for the compiler to know that the code is correct,
> > > due to some const-casting issues.  This was causing sparse and clang
> > > to complain, while GCC compiled cleanly.
> > >
> > > Commit 0c529ff789bc addressed this problem, but since vq_present()
> > > is no longer a function, there is now no implicit casting of the
> > > returned value to the return type (bool).
> > >
> > > In set_sve_vls(), this uncast bit value is compared against a bool,
> > > and so may spuriously compare as unequal when both are nonzero.  As
> > > a result, KVM may reject valid SVE vector length configurations as
> > > invalid, and vice versa.
> > >
> > > Fix it by forcing the returned value to a bool.
> > >
> > > Signed-off-by: Zhang Lei 
> > > Fixes: 0c529ff789bc ("KVM: arm64: Implement vq_present() as a
> > > macro")
> > > Signed-off-by: Dave Martin  [commit message
> > > rewrite]
> > > Cc: Viresh Kumar 
> > >
> > > ---
> > >
> > > Posting this under Zhang Lei's authorship, due to the need to turn
> > > this fix around quickly.  The fix is as per the original suggestion [1].
> > >
> > > Originally observed with the QEMU KVM SVE support under review:
> > > https://lists.gnu.org/archive/html/qemu-devel/2019-06/msg04945.html
> > >
> > > Bug reproduced and fix tested on the Arm Fast Model, with
> > > http://linux-arm.org/git?p=kvmtool-dm.git;a=shortlog;h=refs/heads/sv
> > > e/v3/head (After rerunning util/update_headers.sh.)
> > >
> > > (the --sve-vls command line argument was removed in v4 of the
> > > kvmtool patches).
> > >
> > > [1]
> > > http://lists.infradead.org/pipermail/linux-arm-kernel/2019-July/6646
> > > 33.html
> > > ---
> > >  arch/arm64/kvm/guest.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index
> > > c2afa79..dfd6264 100644
> > > --- a/arch/arm64/kvm/guest.c
> > > +++ b/arch/arm64/kvm/guest.c
> > > @@ -208,7 +208,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu,
> > > const struct kvm_one_reg *reg)
> > >
> > >  #define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64)  #define vq_mask(vq)
> > > ((u64)1 << ((vq) - SVE_VQ_MIN) % 64) -#define vq_present(vqs, vq)
> > > ((vqs)[vq_word(vq)] & vq_mask(vq))
> > > +#define vq_present(vqs, vq) (!!((vqs)[vq_word(vq)] & vq_mask(vq)))
> > >
> > >  static int get_sve_vls(struct kvm_vcpu *vcpu, const struct
> > > kvm_one_reg *reg)  {
> >
> > It was a really nice bug :)
> >
> > Reviewed-by: Viresh Kumar 
> 
> Thanks for the quick review!
> 
> Maybe it makes sense to write equality comparisons on bools as !x == !y to be
> more defensive against this kind of thing.  Anyway, probably best to leave 
> this
> as-is while the dust settles.
> 
> Cheers
> ---Dave
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 06/59] KVM: arm64: nv: Allow userspace to set PSR_MODE_EL2x

2019-07-04 Thread Dave Martin
On Wed, Jul 03, 2019 at 10:21:57AM +0100, Marc Zyngier wrote:
> On 24/06/2019 13:48, Dave Martin wrote:
> > On Fri, Jun 21, 2019 at 02:50:08PM +0100, Marc Zyngier wrote:
> >> On 21/06/2019 14:24, Julien Thierry wrote:
> >>>
> >>>
> >>> On 21/06/2019 10:37, Marc Zyngier wrote:
>  From: Christoffer Dall 
> 
>  We were not allowing userspace to set a more privileged mode for the VCPU
>  than EL1, but we should allow this when nested virtualization is enabled
>  for the VCPU.
> 
>  Signed-off-by: Christoffer Dall 
>  Signed-off-by: Marc Zyngier 
>  ---
>   arch/arm64/kvm/guest.c | 6 ++
>   1 file changed, 6 insertions(+)
> 
>  diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
>  index 3ae2f82fca46..4c35b5d51e21 100644
>  --- a/arch/arm64/kvm/guest.c
>  +++ b/arch/arm64/kvm/guest.c
>  @@ -37,6 +37,7 @@
>   #include 
>   #include 
>   #include 
>  +#include 
>   #include 
>   
>   #include "trace.h"
>  @@ -194,6 +195,11 @@ static int set_core_reg(struct kvm_vcpu *vcpu, 
>  const struct kvm_one_reg *reg)
>   if (vcpu_el1_is_32bit(vcpu))
>   return -EINVAL;
>   break;
>  +case PSR_MODE_EL2h:
>  +case PSR_MODE_EL2t:
>  +if (vcpu_el1_is_32bit(vcpu) || 
>  !nested_virt_in_use(vcpu))
> >>>
> >>> This condition reads a bit weirdly. Why do we care about anything else
> >>> than !nested_virt_in_use() ?
> >>>
> >>> If nested virt is not in use then obviously we return the error.
> >>>
> >>> If nested virt is in use then why do we care about EL1? Or should this
> >>> test read as "highest_el_is_32bit" ?
> >>
> >> There are multiple things at play here:
> >>
> >> - MODE_EL2x is not a valid 32bit mode
> >> - The architecture forbids nested virt with 32bit EL2
> >>
> >> The code above is a simplification of these two conditions. But
> >> certainly we can do a bit better, as kvm_reset_cpu() doesn't really
> >> check that we don't create a vcpu with both 32bit+NV. These two bits
> >> should really be exclusive.
> > 
> > This code is safe for now because KVM_VCPU_MAX_FEATURES <=
> > KVM_ARM_VCPU_NESTED_VIRT, right, i.e., nested_virt_in_use() cannot be
> > true?
> > 
> > This makes me a little uneasy, but I think that's paranoia talking: we
> > want bisectably, but no sane person should ship with just half of this
> > series.  So I guess this is fine.
> > 
> > We could stick something like
> > 
> > if (WARN_ON(...))
> > return false;
> > 
> > in nested_virt_in_use() and then remove it in the final patch, but it's
> > probably overkill.
> 
> The only case I can imagine something going wrong is if this series is
> only applied halfway, and another series bumps the maximum feature to
> something that includes NV. I guess your suggestion would solve that.

I won't lose sleep over it either way.

Cheers
---Dave
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 52/59] KVM: arm64: nv: vgic: Allow userland to set VGIC maintenance IRQ

2019-07-04 Thread Julien Thierry



On 04/07/2019 10:01, Andre Przywara wrote:
> On Thu, 4 Jul 2019 08:38:20 +0100
> Julien Thierry  wrote:
> 
>> On 21/06/2019 10:38, Marc Zyngier wrote:
>>> From: Andre Przywara 
>>>
>>> The VGIC maintenance IRQ signals various conditions about the LRs, when
>>> the GIC's virtualization extension is used.
>>> So far we didn't need it, but nested virtualization needs to know about
>>> this interrupt, so add a userland interface to setup the IRQ number.
>>> The architecture mandates that it must be a PPI, on top of that this code
>>> only exports a per-device option, so the PPI is the same on all VCPUs.
>>>
>>> Signed-off-by: Andre Przywara 
>>> [added some bits of documentation]
>>> Signed-off-by: Marc Zyngier 
>>> ---
>>>  .../virtual/kvm/devices/arm-vgic-v3.txt   |  9 
>>>  arch/arm/include/uapi/asm/kvm.h   |  1 +
>>>  arch/arm64/include/uapi/asm/kvm.h |  1 +
>>>  include/kvm/arm_vgic.h|  3 +++
>>>  virt/kvm/arm/vgic/vgic-kvm-device.c   | 22 +++
>>>  5 files changed, 36 insertions(+)
>>>
>>> diff --git a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt 
>>> b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
>>> index ff290b43c8e5..c70e8f2e0c9c 100644
>>> --- a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
>>> +++ b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
>>> @@ -249,3 +249,12 @@ Groups:
>>>Errors:
>>>  -EINVAL: vINTID is not multiple of 32 or
>>>   info field is not VGIC_LEVEL_INFO_LINE_LEVEL
>>> +
>>> +  KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ
>>> +The attr field of kvm_device_attr encodes the following values:
>>> +bits: | 31   5 | 4    0 |
>>> +values:   |  RES0  |   vINTID   |
>>> +
>>> +The vINTID specifies which interrupt is generated when the vGIC
>>> +must generate a maintenance interrupt. This must be a PPI.
>>> +  
>>
>> Something seems off. The documentation suggests that the value of the
>> attribute will be between 0-15 (and other values will be masked down to
>> a value between 0 and 15).
> 
> Where does that happen? The mask is [4:0], so 5 bits, that should be enough 
> for PPIs as well.
> We could add a line to the documentation to stress that this is an interrupt 
> ID as seen by the virtual GIC, if that helps.
> 

You're right, I misread the length of the vINTID field.

Nevermind then!

Thanks,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 52/59] KVM: arm64: nv: vgic: Allow userland to set VGIC maintenance IRQ

2019-07-04 Thread Andre Przywara
On Thu, 4 Jul 2019 08:38:20 +0100
Julien Thierry  wrote:

> On 21/06/2019 10:38, Marc Zyngier wrote:
> > From: Andre Przywara 
> > 
> > The VGIC maintenance IRQ signals various conditions about the LRs, when
> > the GIC's virtualization extension is used.
> > So far we didn't need it, but nested virtualization needs to know about
> > this interrupt, so add a userland interface to setup the IRQ number.
> > The architecture mandates that it must be a PPI, on top of that this code
> > only exports a per-device option, so the PPI is the same on all VCPUs.
> > 
> > Signed-off-by: Andre Przywara 
> > [added some bits of documentation]
> > Signed-off-by: Marc Zyngier 
> > ---
> >  .../virtual/kvm/devices/arm-vgic-v3.txt   |  9 
> >  arch/arm/include/uapi/asm/kvm.h   |  1 +
> >  arch/arm64/include/uapi/asm/kvm.h |  1 +
> >  include/kvm/arm_vgic.h|  3 +++
> >  virt/kvm/arm/vgic/vgic-kvm-device.c   | 22 +++
> >  5 files changed, 36 insertions(+)
> > 
> > diff --git a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt 
> > b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
> > index ff290b43c8e5..c70e8f2e0c9c 100644
> > --- a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
> > +++ b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
> > @@ -249,3 +249,12 @@ Groups:
> >Errors:
> >  -EINVAL: vINTID is not multiple of 32 or
> >   info field is not VGIC_LEVEL_INFO_LINE_LEVEL
> > +
> > +  KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ
> > +The attr field of kvm_device_attr encodes the following values:
> > +bits: | 31   5 | 4    0 |
> > +values:   |  RES0  |   vINTID   |
> > +
> > +The vINTID specifies which interrupt is generated when the vGIC
> > +must generate a maintenance interrupt. This must be a PPI.
> > +  
> 
> Something seems off. The documentation suggests that the value of the
> attribute will be between 0-15 (and other values will be masked down to
> a value between 0 and 15).

Where does that happen? The mask is [4:0], so 5 bits, that should be enough for 
PPIs as well.
We could add a line to the documentation to stress that this is an interrupt ID 
as seen by the virtual GIC, if that helps.

Cheers,
Andre.

> However, in the code the value should be
> between 16 and 32 (PPI INTID) and other values are rejected.
> 
> Am I reading this wrong?
> 
> Cheers,
> 
> Julien
> 
> > diff --git a/arch/arm/include/uapi/asm/kvm.h 
> > b/arch/arm/include/uapi/asm/kvm.h
> > index 4602464ebdfb..a6af45645a6d 100644
> > --- a/arch/arm/include/uapi/asm/kvm.h
> > +++ b/arch/arm/include/uapi/asm/kvm.h
> > @@ -233,6 +233,7 @@ struct kvm_vcpu_events {
> >  #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
> >  #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
> >  #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS  8
> > +#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9
> >  #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
> >  #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
> > (0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
> > diff --git a/arch/arm64/include/uapi/asm/kvm.h 
> > b/arch/arm64/include/uapi/asm/kvm.h
> > index 563e2a8bae93..8e1c6ffe1b59 100644
> > --- a/arch/arm64/include/uapi/asm/kvm.h
> > +++ b/arch/arm64/include/uapi/asm/kvm.h
> > @@ -295,6 +295,7 @@ struct kvm_vcpu_events {
> >  #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
> >  #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
> >  #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
> > +#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ  9
> >  #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
> >  #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
> > (0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
> > diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> > index 17d32c2797c0..95c6232663c3 100644
> > --- a/include/kvm/arm_vgic.h
> > +++ b/include/kvm/arm_vgic.h
> > @@ -229,6 +229,9 @@ struct vgic_dist {
> >  
> > int nr_spis;
> >  
> > +   /* The GIC maintenance IRQ for nested hypervisors. */
> > +   u32 maint_irq;
> > +
> > /* base addresses in guest physical address space: */
> > gpa_t   vgic_dist_base; /* distributor */
> > union {
> > diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c 
> > b/virt/kvm/arm/vgic/vgic-kvm-device.c
> > index 44419679f91a..dfb1d7cc66b3 100644
> > --- a/virt/kvm/arm/vgic/vgic-kvm-device.c
> > +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
> > @@ -241,6 +241,12 @@ static int vgic_get_common_attr(struct kvm_device *dev,
> >  VGIC_NR_PRIVATE_IRQS, uaddr);
> > break;
> > }
> > +   case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: {
> > +   u32 __user *uaddr = (u32 __user *)(long)attr->addr;
> > +
> > +   r = put_user(dev->kvm->arch.vgic.maint_irq, uaddr);
> > +   break;
> > +   }
> > }
> >  
> > return r;
> > @@ -627,6 +633,21 @@ static int vgic_v3_set_attr(struct 

Re: [PATCH] KVM: arm64/sve: Fix vq_present() macro to yield a bool

2019-07-04 Thread Dave Martin
On Thu, Jul 04, 2019 at 08:32:52AM +0530, Viresh Kumar wrote:
> On 03-07-19, 18:42, Dave Martin wrote:
> > From: Zhang Lei 
> > 
> > The original implementation of vq_present() relied on aggressive
> > inlining in order for the compiler to know that the code is
> > correct, due to some const-casting issues.  This was causing sparse
> > and clang to complain, while GCC compiled cleanly.
> > 
> > Commit 0c529ff789bc addressed this problem, but since vq_present()
> > is no longer a function, there is now no implicit casting of the
> > returned value to the return type (bool).
> > 
> > In set_sve_vls(), this uncast bit value is compared against a bool,
> > and so may spuriously compare as unequal when both are nonzero.  As
> > a result, KVM may reject valid SVE vector length configurations as
> > invalid, and vice versa.
> > 
> > Fix it by forcing the returned value to a bool.
> > 
> > Signed-off-by: Zhang Lei 
> > Fixes: 0c529ff789bc ("KVM: arm64: Implement vq_present() as a macro")
> > Signed-off-by: Dave Martin  [commit message rewrite]
> > Cc: Viresh Kumar 
> > 
> > ---
> > 
> > Posting this under Zhang Lei's authorship, due to the need to turn this
> > fix around quickly.  The fix is as per the original suggestion [1].
> > 
> > Originally observed with the QEMU KVM SVE support under review:
> > https://lists.gnu.org/archive/html/qemu-devel/2019-06/msg04945.html
> > 
> > Bug reproduced and fix tested on the Arm Fast Model, with
> > http://linux-arm.org/git?p=kvmtool-dm.git;a=shortlog;h=refs/heads/sve/v3/head
> > (After rerunning util/update_headers.sh.)
> > 
> > (the --sve-vls command line argument was removed in v4 of the
> > kvmtool patches).
> > 
> > [1] 
> > http://lists.infradead.org/pipermail/linux-arm-kernel/2019-July/664633.html
> > ---
> >  arch/arm64/kvm/guest.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
> > index c2afa79..dfd6264 100644
> > --- a/arch/arm64/kvm/guest.c
> > +++ b/arch/arm64/kvm/guest.c
> > @@ -208,7 +208,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const 
> > struct kvm_one_reg *reg)
> >  
> >  #define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64)
> >  #define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64)
> > -#define vq_present(vqs, vq) ((vqs)[vq_word(vq)] & vq_mask(vq))
> > +#define vq_present(vqs, vq) (!!((vqs)[vq_word(vq)] & vq_mask(vq)))
> >  
> >  static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg 
> > *reg)
> >  {
> 
> It was a really nice bug :)
> 
> Reviewed-by: Viresh Kumar 

Thanks for the quick review!

Maybe it makes sense to write equality comparisons on bools as !x == !y
to be more defensive against this kind of thing.  Anyway, probably best
to leave this as-is while the dust settles.

Cheers
---Dave
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] KVM: arm64/sve: Fix vq_present() macro to yield a bool

2019-07-04 Thread Marc Zyngier
+KVM, Paolo and Radim,

Guys, do you mind picking this single patch and sending it to Linus?
That's the only fix left for 5.2. Alternatively, I can send you a pull
request, but it feels overkill.

Either way, please let me know.

Thanks,

M.

On Wed, 03 Jul 2019 18:42:50 +0100,
Dave Martin  wrote:
> 
> From: Zhang Lei 
> 
> The original implementation of vq_present() relied on aggressive
> inlining in order for the compiler to know that the code is
> correct, due to some const-casting issues.  This was causing sparse
> and clang to complain, while GCC compiled cleanly.
> 
> Commit 0c529ff789bc addressed this problem, but since vq_present()
> is no longer a function, there is now no implicit casting of the
> returned value to the return type (bool).
> 
> In set_sve_vls(), this uncast bit value is compared against a bool,
> and so may spuriously compare as unequal when both are nonzero.  As
> a result, KVM may reject valid SVE vector length configurations as
> invalid, and vice versa.
> 
> Fix it by forcing the returned value to a bool.
> 
> Signed-off-by: Zhang Lei 
> Fixes: 0c529ff789bc ("KVM: arm64: Implement vq_present() as a macro")
> Signed-off-by: Dave Martin  [commit message rewrite]
> Cc: Viresh Kumar 
> 
> ---
> 
> Posting this under Zhang Lei's authorship, due to the need to turn this
> fix around quickly.  The fix is as per the original suggestion [1].
> 
> Originally observed with the QEMU KVM SVE support under review:
> https://lists.gnu.org/archive/html/qemu-devel/2019-06/msg04945.html
> 
> Bug reproduced and fix tested on the Arm Fast Model, with
> http://linux-arm.org/git?p=kvmtool-dm.git;a=shortlog;h=refs/heads/sve/v3/head
> (After rerunning util/update_headers.sh.)
> 
> (the --sve-vls command line argument was removed in v4 of the
> kvmtool patches).
> 
> [1] 
> http://lists.infradead.org/pipermail/linux-arm-kernel/2019-July/664633.html
> ---
>  arch/arm64/kvm/guest.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
> index c2afa79..dfd6264 100644
> --- a/arch/arm64/kvm/guest.c
> +++ b/arch/arm64/kvm/guest.c
> @@ -208,7 +208,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const 
> struct kvm_one_reg *reg)
>  
>  #define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64)
>  #define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64)
> -#define vq_present(vqs, vq) ((vqs)[vq_word(vq)] & vq_mask(vq))
> +#define vq_present(vqs, vq) (!!((vqs)[vq_word(vq)] & vq_mask(vq)))
>  
>  static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>  {
> -- 
> 2.1.4
> 

-- 
Jazz is not dead, it just smells funny.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 53/59] KVM: arm64: nv: Implement maintenance interrupt forwarding

2019-07-04 Thread Julien Thierry



On 21/06/2019 10:38, Marc Zyngier wrote:
> When we take a maintenance interrupt, we need to decide whether
> it is generated on an action from the guest, or if it is something
> that needs to be forwarded to the guest hypervisor.
> 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm64/kvm/nested.c|  2 +-
>  virt/kvm/arm/vgic/vgic-init.c  | 30 ++
>  virt/kvm/arm/vgic/vgic-v3-nested.c | 25 +
>  3 files changed, 52 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
> index df2db9ab7cfb..ab61f0f30ee6 100644
> --- a/arch/arm64/kvm/nested.c
> +++ b/arch/arm64/kvm/nested.c
> @@ -545,7 +545,7 @@ bool vgic_state_is_nested(struct kvm_vcpu *vcpu)
>   bool imo = __vcpu_sys_reg(vcpu, HCR_EL2) & HCR_IMO;
>   bool fmo = __vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FMO;
>  
> - WARN(imo != fmo, "Separate virtual IRQ/FIQ settings not supported\n");
> + WARN_ONCE(imo != fmo, "Separate virtual IRQ/FIQ settings not 
> supported\n");
>  
>   return nested_virt_in_use(vcpu) && imo && fmo && !is_hyp_ctxt(vcpu);
>  }
> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
> index 3bdb31eaed64..ec54bc8d5126 100644
> --- a/virt/kvm/arm/vgic/vgic-init.c
> +++ b/virt/kvm/arm/vgic/vgic-init.c
> @@ -17,9 +17,11 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> +#include 
>  #include "vgic.h"
>  
>  /*
> @@ -240,6 +242,16 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
>   if (!irqchip_in_kernel(vcpu->kvm))
>   return 0;
>  
> + if (nested_virt_in_use(vcpu)) {
> + /* FIXME: remove this hack */
> + if (vcpu->kvm->arch.vgic.maint_irq == 0)
> + vcpu->kvm->arch.vgic.maint_irq = 
> kvm_vgic_global_state.maint_irq;
> + ret = kvm_vgic_set_owner(vcpu, vcpu->kvm->arch.vgic.maint_irq,
> +  vcpu);

Having this here, does it mean that the userland needs to set the
maintenance irq attribute before creating any vCPU for the VM?

If so, should it be clarified in the documentation? Or is it a general
rule that (vGIC?) attributes should be set before creating vCPUs?

Cheers,

Julien

> + if (ret)
> + return ret;
> + }
> +
>   /*
>* If we are creating a VCPU with a GICv3 we must also register the
>* KVM io device for the redistributor that belongs to this VCPU.
> @@ -455,12 +467,23 @@ static int vgic_init_cpu_dying(unsigned int cpu)
>  
>  static irqreturn_t vgic_maintenance_handler(int irq, void *data)
>  {
> + struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)data;
> +
>   /*
>* We cannot rely on the vgic maintenance interrupt to be
>* delivered synchronously. This means we can only use it to
>* exit the VM, and we perform the handling of EOIed
>* interrupts on the exit path (see vgic_fold_lr_state).
>*/
> +
> + /* If not nested, deactivate */
> + if (!vcpu || !vgic_state_is_nested(vcpu)) {
> + irq_set_irqchip_state(irq, IRQCHIP_STATE_ACTIVE, false);
> + return IRQ_HANDLED;
> + }
> +
> + /* Assume nested from now */
> + vgic_v3_handle_nested_maint_irq(vcpu);
>   return IRQ_HANDLED;
>  }
>  
> @@ -531,6 +554,13 @@ int kvm_vgic_hyp_init(void)
>   return ret;
>   }
>  
> + ret = irq_set_vcpu_affinity(kvm_vgic_global_state.maint_irq,
> + kvm_get_running_vcpus());
> + if (ret) {
> + kvm_err("Error setting vcpu affinity\n");
> + goto out_free_irq;
> + }
> +
>   ret = cpuhp_setup_state(CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
>   "kvm/arm/vgic:starting",
>   vgic_init_cpu_starting, vgic_init_cpu_dying);
> diff --git a/virt/kvm/arm/vgic/vgic-v3-nested.c 
> b/virt/kvm/arm/vgic/vgic-v3-nested.c
> index c917d49e4a14..7c5f82ae68e0 100644
> --- a/virt/kvm/arm/vgic/vgic-v3-nested.c
> +++ b/virt/kvm/arm/vgic/vgic-v3-nested.c
> @@ -172,10 +172,20 @@ void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
>  void vgic_v3_load_nested(struct kvm_vcpu *vcpu)
>  {
>   struct vgic_cpu *vgic_cpu = >arch.vgic_cpu;
> + struct vgic_irq *irq;
> + unsigned long flags;
>  
>   vgic_cpu->shadow_vgic_v3 = vgic_cpu->nested_vgic_v3;
>   vgic_v3_create_shadow_lr(vcpu);
>   __vgic_v3_restore_state(vcpu_shadow_if(vcpu));
> +
> + irq = vgic_get_irq(vcpu->kvm, vcpu, vcpu->kvm->arch.vgic.maint_irq);
> + raw_spin_lock_irqsave(>irq_lock, flags);
> + if (irq->line_level || irq->active)
> + irq_set_irqchip_state(kvm_vgic_global_state.maint_irq,
> +   IRQCHIP_STATE_ACTIVE, true);
> + raw_spin_unlock_irqrestore(>irq_lock, flags);
> + vgic_put_irq(vcpu->kvm, irq);
>  }
>  
>  void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
> @@ -190,11 

Re: [PATCH 52/59] KVM: arm64: nv: vgic: Allow userland to set VGIC maintenance IRQ

2019-07-04 Thread Julien Thierry



On 21/06/2019 10:38, Marc Zyngier wrote:
> From: Andre Przywara 
> 
> The VGIC maintenance IRQ signals various conditions about the LRs, when
> the GIC's virtualization extension is used.
> So far we didn't need it, but nested virtualization needs to know about
> this interrupt, so add a userland interface to setup the IRQ number.
> The architecture mandates that it must be a PPI, on top of that this code
> only exports a per-device option, so the PPI is the same on all VCPUs.
> 
> Signed-off-by: Andre Przywara 
> [added some bits of documentation]
> Signed-off-by: Marc Zyngier 
> ---
>  .../virtual/kvm/devices/arm-vgic-v3.txt   |  9 
>  arch/arm/include/uapi/asm/kvm.h   |  1 +
>  arch/arm64/include/uapi/asm/kvm.h |  1 +
>  include/kvm/arm_vgic.h|  3 +++
>  virt/kvm/arm/vgic/vgic-kvm-device.c   | 22 +++
>  5 files changed, 36 insertions(+)
> 
> diff --git a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt 
> b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
> index ff290b43c8e5..c70e8f2e0c9c 100644
> --- a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
> +++ b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
> @@ -249,3 +249,12 @@ Groups:
>Errors:
>  -EINVAL: vINTID is not multiple of 32 or
>   info field is not VGIC_LEVEL_INFO_LINE_LEVEL
> +
> +  KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ
> +The attr field of kvm_device_attr encodes the following values:
> +bits: | 31   5 | 4    0 |
> +values:   |  RES0  |   vINTID   |
> +
> +The vINTID specifies which interrupt is generated when the vGIC
> +must generate a maintenance interrupt. This must be a PPI.
> +

Something seems off. The documentation suggests that the value of the
attribute will be between 0-15 (and other values will be masked down to
a value between 0 and 15). However, in the code the value should be
between 16 and 32 (PPI INTID) and other values are rejected.

Am I reading this wrong?

Cheers,

Julien

> diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
> index 4602464ebdfb..a6af45645a6d 100644
> --- a/arch/arm/include/uapi/asm/kvm.h
> +++ b/arch/arm/include/uapi/asm/kvm.h
> @@ -233,6 +233,7 @@ struct kvm_vcpu_events {
>  #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
>  #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
>  #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS8
> +#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ   9
>  #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT   10
>  #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
>   (0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
> diff --git a/arch/arm64/include/uapi/asm/kvm.h 
> b/arch/arm64/include/uapi/asm/kvm.h
> index 563e2a8bae93..8e1c6ffe1b59 100644
> --- a/arch/arm64/include/uapi/asm/kvm.h
> +++ b/arch/arm64/include/uapi/asm/kvm.h
> @@ -295,6 +295,7 @@ struct kvm_vcpu_events {
>  #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
>  #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
>  #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
> +#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ  9
>  #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT   10
>  #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
>   (0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 17d32c2797c0..95c6232663c3 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -229,6 +229,9 @@ struct vgic_dist {
>  
>   int nr_spis;
>  
> + /* The GIC maintenance IRQ for nested hypervisors. */
> + u32 maint_irq;
> +
>   /* base addresses in guest physical address space: */
>   gpa_t   vgic_dist_base; /* distributor */
>   union {
> diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c 
> b/virt/kvm/arm/vgic/vgic-kvm-device.c
> index 44419679f91a..dfb1d7cc66b3 100644
> --- a/virt/kvm/arm/vgic/vgic-kvm-device.c
> +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
> @@ -241,6 +241,12 @@ static int vgic_get_common_attr(struct kvm_device *dev,
>VGIC_NR_PRIVATE_IRQS, uaddr);
>   break;
>   }
> + case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: {
> + u32 __user *uaddr = (u32 __user *)(long)attr->addr;
> +
> + r = put_user(dev->kvm->arch.vgic.maint_irq, uaddr);
> + break;
> + }
>   }
>  
>   return r;
> @@ -627,6 +633,21 @@ static int vgic_v3_set_attr(struct kvm_device *dev,
>   reg = tmp32;
>   return vgic_v3_attr_regs_access(dev, attr, , true);
>   }
> + case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: {
> + u32 __user *uaddr = (u32 __user *)(long)attr->addr;
> + u32 val;
> +
> + if (get_user(val, uaddr))
> + return -EFAULT;
> +
> + /* Must be a PPI. */
> + if ((val >= VGIC_NR_PRIVATE_IRQS) || (val < VGIC_NR_SGIS))
> +