Re: [PATCH 26/37] KVM: arm64: Prepare to handle traps on deferred AArch32 sysregs
On Thu, Oct 12, 2017 at 12:41:30PM +0200, Christoffer Dall wrote: > Handle accesses to any AArch32 EL1 system registers where we can defer > saving and restoring them to vcpu_load and vcpu_put, and which are > stored in special EL2 registers only used support 32-bit guests. > > Signed-off-by: Christoffer Dall> --- > arch/arm64/kvm/inject_fault.c | 14 +- > 1 file changed, 13 insertions(+), 1 deletion(-) > > diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c > index f4513fc..02990f5 100644 > --- a/arch/arm64/kvm/inject_fault.c > +++ b/arch/arm64/kvm/inject_fault.c > @@ -59,9 +59,18 @@ static void vcpu_set_elr_el1(struct kvm_vcpu *vcpu, u64 > val) > /* Set the SPSR for the current mode */ > static void vcpu_set_spsr(struct kvm_vcpu *vcpu, u64 val) > { > - if (vcpu_mode_is_32bit(vcpu)) > + if (vcpu_mode_is_32bit(vcpu)) { > + if (vcpu->arch.sysregs_loaded_on_cpu) > + __sysreg32_save_state(vcpu); > + > *vcpu_spsr32(vcpu) = val; > > + if (vcpu->arch.sysregs_loaded_on_cpu) > + __sysreg32_restore_state(vcpu); > + > + return; Is this a fix? I don't understand why it's necessary now, but it wasn't before. > + } > + > if (vcpu->arch.sysregs_loaded_on_cpu) > write_sysreg_el1(val, spsr); > else > @@ -129,11 +138,13 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool > is_pabt, >* IFAR: mapped to FAR_EL1 >* DFSR: mapped to ESR_EL1 >* TTBCR: mapped to TCR_EL1 > + * IFSR: stored in IFSR32_EL2 >*/ > if (vcpu->arch.sysregs_loaded_on_cpu) { > vcpu->arch.ctxt.sys_regs[FAR_EL1] = read_sysreg_el1(far); > vcpu->arch.ctxt.sys_regs[ESR_EL1] = read_sysreg_el1(esr); > vcpu->arch.ctxt.sys_regs[TCR_EL1] = read_sysreg_el1(tcr); > + vcpu->arch.ctxt.sys_regs[IFSR32_EL2] = read_sysreg(ifsr32_el2); > } > > if (is_pabt) { > @@ -161,6 +172,7 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool > is_pabt, > if (vcpu->arch.sysregs_loaded_on_cpu) { > write_sysreg_el1(vcpu->arch.ctxt.sys_regs[FAR_EL1], far); > write_sysreg_el1(vcpu->arch.ctxt.sys_regs[ESR_EL1], esr); > + write_sysreg(vcpu->arch.ctxt.sys_regs[IFSR32_EL2], ifsr32_el2); This appears to be a fix. Why not squash it into the patch that save/restore the other registers? > } > } > > -- > 2.9.0 > Thanks, drew ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH 25/37] KVM: arm64: Prepare to handle traps on remaining deferred EL1 sysregs
On Thu, Oct 12, 2017 at 12:41:29PM +0200, Christoffer Dall wrote: > Handle accesses during traps to any remaining EL1 registers which can be > deferred to vcpu_load and vcpu_put, by either accessing them directly on > the physical CPU when the latest version is stored there, or by > synchronizing the memory representation with the CPU state. > > Signed-off-by: Christoffer Dall> --- > arch/arm64/include/asm/kvm_emulate.h | 14 --- > arch/arm64/kvm/inject_fault.c| 79 > > arch/arm64/kvm/sys_regs.c| 6 ++- > 3 files changed, 76 insertions(+), 23 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_emulate.h > b/arch/arm64/include/asm/kvm_emulate.h > index 630dd60..69bb40d 100644 > --- a/arch/arm64/include/asm/kvm_emulate.h > +++ b/arch/arm64/include/asm/kvm_emulate.h > @@ -66,11 +66,6 @@ static inline unsigned long *vcpu_pc(const struct kvm_vcpu > *vcpu) > return (unsigned long *)_gp_regs(vcpu)->regs.pc; > } > > -static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu) > -{ > - return (unsigned long *)_gp_regs(vcpu)->elr_el1; > -} > - > static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) > { > return (unsigned long *)_gp_regs(vcpu)->regs.pstate; > @@ -120,15 +115,6 @@ static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, > u8 reg_num, > vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val; > } > > -/* Get vcpu SPSR for current mode */ > -static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu) > -{ > - if (vcpu_mode_is_32bit(vcpu)) > - return vcpu_spsr32(vcpu); > - > - return (unsigned long *)_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; > -} > - > static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) > { > u32 mode; > diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c > index 45c7026..f4513fc 100644 > --- a/arch/arm64/kvm/inject_fault.c > +++ b/arch/arm64/kvm/inject_fault.c > @@ -23,6 +23,7 @@ > > #include > #include > +#include > #include > > #define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT > | \ > @@ -33,13 +34,55 @@ > #define LOWER_EL_AArch64_VECTOR 0x400 > #define LOWER_EL_AArch32_VECTOR 0x600 > > +static u64 vcpu_get_vbar_el1(struct kvm_vcpu *vcpu) > +{ > + unsigned long vbar; > + > + if (vcpu->arch.sysregs_loaded_on_cpu) > + vbar = read_sysreg_el1(vbar); > + else > + vbar = vcpu_sys_reg(vcpu, VBAR_EL1); > + > + if (vcpu_el1_is_32bit(vcpu)) > + return lower_32_bits(vbar); > + return vbar; > +} > + > +static void vcpu_set_elr_el1(struct kvm_vcpu *vcpu, u64 val) > +{ > + if (vcpu->arch.sysregs_loaded_on_cpu) > + write_sysreg_el1(val, elr); > + else > + vcpu_gp_regs(vcpu)->elr_el1 = val; > +} > + > +/* Set the SPSR for the current mode */ > +static void vcpu_set_spsr(struct kvm_vcpu *vcpu, u64 val) > +{ > + if (vcpu_mode_is_32bit(vcpu)) > + *vcpu_spsr32(vcpu) = val; > + > + if (vcpu->arch.sysregs_loaded_on_cpu) > + write_sysreg_el1(val, spsr); > + else > + vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = val; > +} > + > +static u32 vcpu_get_c1_sctlr(struct kvm_vcpu *vcpu) > +{ > + if (vcpu->arch.sysregs_loaded_on_cpu) > + return lower_32_bits(read_sysreg_el1(sctlr)); > + else > + return vcpu_cp15(vcpu, c1_SCTLR); > +} > + > static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) > { > unsigned long cpsr; > unsigned long new_spsr_value = *vcpu_cpsr(vcpu); > bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT); > u32 return_offset = (is_thumb) ? 4 : 0; > - u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); > + u32 sctlr = vcpu_get_c1_sctlr(vcpu); > > cpsr = mode | COMPAT_PSR_I_BIT; > > @@ -51,14 +94,14 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 > mode, u32 vect_offset) > *vcpu_cpsr(vcpu) = cpsr; > > /* Note: These now point to the banked copies */ > - *vcpu_spsr(vcpu) = new_spsr_value; > + vcpu_set_spsr(vcpu, new_spsr_value); > *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; > > /* Branch to exception vector */ > if (sctlr & (1 << 13)) > vect_offset += 0x; > else /* always have security exceptions */ > - vect_offset += vcpu_cp15(vcpu, c12_VBAR); > + vect_offset += vcpu_get_vbar_el1(vcpu); > > *vcpu_pc(vcpu) = vect_offset; > } > @@ -79,6 +122,20 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool > is_pabt, > u32 *far, *fsr; > bool is_lpae; > > + /* > + * We are going to need the latest values of the following system > + * regiters: registers > + * DFAR: mapped to FAR_EL1 FAR_EL1[31:0] > + * IFAR: mapped to FAR_EL1 FAR_EL1[63:32]
Re: [PATCH v4 00/21] SError rework + RAS for firmware first support
On 13 November 2017 at 16:14, Andrew Joneswrote: > On Mon, Nov 13, 2017 at 12:29:46PM +0100, Christoffer Dall wrote: >> I'm thinking this is analogous to migrating a VM that uses an irqchip in >> userspace and has set the IRQ or FIQ lines using KVM_IRQ_LINE. My >> feeling is that this is also not supported today. > > Luckily userspace irqchip is mostly a debug feature, or just to support > oddball hardware. Or at least that's the way I see its usecases... True, but I think we should always insist on migration working for new features, because it's just an easier line to define; otherwise we end up with an annoying "feature mostly works, unless you happened to be using [list of random things], in which case it silently doesn't" effect. thanks -- PMM ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH 23/37] KVM: arm64: Prepare to handle traps on deferred VM sysregs
On Thu, Oct 12, 2017 at 12:41:27PM +0200, Christoffer Dall wrote: > When we defer the save/restore of system registers to vcpu_load and > vcpu_put, we need to take care of the emulation code that handles traps > to these registers, since simply reading the memory array will return > stale data. > > Therefore, introduce two functions to directly read/write the registers > from the physical CPU when we're on a VHE system that has loaded the > system registers onto the physical CPU. > > Signed-off-by: Christoffer Dall> --- > arch/arm64/include/asm/kvm_host.h | 4 +++ > arch/arm64/kvm/sys_regs.c | 54 > +-- > 2 files changed, 56 insertions(+), 2 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_host.h > b/arch/arm64/include/asm/kvm_host.h > index 9f5761f..dcded44 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -278,6 +278,10 @@ struct kvm_vcpu_arch { > > /* Detect first run of a vcpu */ > bool has_run_once; > + > + /* True when deferrable sysregs are loaded on the physical CPU, > + * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ > + bool sysregs_loaded_on_cpu; To be sure I understand correctly, this should only be false when we're servicing ioctls that don't do a full vcpu_load first (non-KVM_RUN) or when we're not running on a CPU with VHE. If we didn't need to worry about non-KVM_RUN ioctls, then we could use the static key has_vhe(), right? > }; > > #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c > index dbe35fd..f7887dd 100644 > --- a/arch/arm64/kvm/sys_regs.c > +++ b/arch/arm64/kvm/sys_regs.c > @@ -34,6 +34,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -110,8 +111,57 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, > return true; > } > > +static u64 read_deferrable_vm_reg(struct kvm_vcpu *vcpu, int reg) > +{ > + if (vcpu->arch.sysregs_loaded_on_cpu) { > + switch (reg) { > + case SCTLR_EL1: return read_sysreg_el1(sctlr); > + case TTBR0_EL1: return read_sysreg_el1(ttbr0); > + case TTBR1_EL1: return read_sysreg_el1(ttbr1); > + case TCR_EL1: return read_sysreg_el1(tcr); > + case ESR_EL1: return read_sysreg_el1(esr); > + case FAR_EL1: return read_sysreg_el1(far); > + case AFSR0_EL1: return read_sysreg_el1(afsr0); > + case AFSR1_EL1: return read_sysreg_el1(afsr1); > + case MAIR_EL1: return read_sysreg_el1(mair); > + case AMAIR_EL1: return read_sysreg_el1(amair); > + case CONTEXTIDR_EL1:return read_sysreg_el1(contextidr); > + case DACR32_EL2:return read_sysreg(dacr32_el2); > + case IFSR32_EL2:return read_sysreg(ifsr32_el2); > + default:BUG(); > + } > + } > + > + return vcpu_sys_reg(vcpu, reg); > +} > + > +static void write_deferrable_vm_reg(struct kvm_vcpu *vcpu, int reg, u64 val) > +{ > + if (vcpu->arch.sysregs_loaded_on_cpu) { > + switch (reg) { > + case SCTLR_EL1: write_sysreg_el1(val, sctlr); return; > + case TTBR0_EL1: write_sysreg_el1(val, ttbr0); return; > + case TTBR1_EL1: write_sysreg_el1(val, ttbr1); return; > + case TCR_EL1: write_sysreg_el1(val, tcr); return; > + case ESR_EL1: write_sysreg_el1(val, esr); return; > + case FAR_EL1: write_sysreg_el1(val, far); return; > + case AFSR0_EL1: write_sysreg_el1(val, afsr0); return; > + case AFSR1_EL1: write_sysreg_el1(val, afsr1); return; > + case MAIR_EL1: write_sysreg_el1(val, mair);return; > + case AMAIR_EL1: write_sysreg_el1(val, amair); return; > + case CONTEXTIDR_EL1:write_sysreg_el1(val, contextidr); > return; > + case DACR32_EL2:write_sysreg(val, dacr32_el2); return; > + case IFSR32_EL2:write_sysreg(val, ifsr32_el2); return; > + default:BUG(); > + } > + } > + > + vcpu_sys_reg(vcpu, reg) = val; > +} > + > /* > * Generic accessor for VM registers. Only called as long as HCR_TVM > + * > * is set. If the guest enables the MMU, we stop trapping the VM > * sys_regs and leave it in complete control of the caches. > */ > @@ -132,14 +182,14 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, > if (!p->is_aarch32 || !p->is_32bit) { > val = p->regval; > } else { > - val = vcpu_sys_reg(vcpu, reg); > + val =
Re: [PATCH 22/37] KVM: arm64: Change 32-bit handling of VM system registers
On Thu, Oct 12, 2017 at 12:41:26PM +0200, Christoffer Dall wrote: > We currently handle 32-bit accesses to trapped VM system registers using > the 32-bit index into the coproc array on the vcpu structure, which is a > union of the coprog array and the sysreg array. coproc > > Since all the 32-bit coproc indicies are created to correspond to the > architectural mapping between 64-bit system registers and 32-bit > coprocessor registers, and because the AArch64 system registers are the > double in size of the AArch32 coprocessor registers, we can always find > the system register entry that we must update by dividing the 32-bit > coproc index by 2. > > This is going to make our lives much easier when we have to start > accessing system registers that uses deferred save/restore and might use > have to be read directly from the physical CPU. > > Signed-off-by: Christoffer Dall> --- > arch/arm64/include/asm/kvm_host.h | 8 > arch/arm64/kvm/sys_regs.c | 20 +++- > 2 files changed, 15 insertions(+), 13 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_host.h > b/arch/arm64/include/asm/kvm_host.h > index 5e09eb9..9f5761f 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -289,14 +289,6 @@ struct kvm_vcpu_arch { > #define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)]) > #define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)]) > > -#ifdef CONFIG_CPU_BIG_ENDIAN > -#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r)) > -#define vcpu_cp15_64_low(v,r)vcpu_cp15((v),(r) + 1) > -#else > -#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r) + 1) > -#define vcpu_cp15_64_low(v,r)vcpu_cp15((v),(r)) > -#endif > - > struct kvm_vm_stat { > ulong remote_tlb_flush; > }; > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c > index bb0e41b..dbe35fd 100644 > --- a/arch/arm64/kvm/sys_regs.c > +++ b/arch/arm64/kvm/sys_regs.c > @@ -120,16 +120,26 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, > const struct sys_reg_desc *r) > { > bool was_enabled = vcpu_has_cache_enabled(vcpu); > + u64 val; > + int reg = r->reg; > > BUG_ON(!p->is_write); > > - if (!p->is_aarch32) { > - vcpu_sys_reg(vcpu, r->reg) = p->regval; > + /* See the 32bit mapping in kvm_host.h */ > + if (p->is_aarch32) > + reg = r->reg / 2; > + > + if (!p->is_aarch32 || !p->is_32bit) { > + val = p->regval; > } else { > - if (!p->is_32bit) > - vcpu_cp15_64_high(vcpu, r->reg) = > upper_32_bits(p->regval); > - vcpu_cp15_64_low(vcpu, r->reg) = lower_32_bits(p->regval); > + val = vcpu_sys_reg(vcpu, reg); > + if (r->reg % 2) > + val = (p->regval << 32) | (u64)lower_32_bits(val); > + else > + val = ((u64)upper_32_bits(val) << 32) | > + (u64)lower_32_bits(p->regval); > } > + vcpu_sys_reg(vcpu, reg) = val; > > kvm_toggle_cache(vcpu, was_enabled); > return true; > -- > 2.9.0 > Reviewed-by: Andrew Jones ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v4 00/21] SError rework + RAS for firmware first support
On Mon, Nov 13, 2017 at 12:29:46PM +0100, Christoffer Dall wrote: > On Thu, Nov 09, 2017 at 06:14:56PM +, James Morse wrote: > > Hi guys, > > > > On 19/10/17 15:57, James Morse wrote: > > > Known issues: > > [...] > > > * KVM-Migration: VDISR_EL2 is exposed to userspace as DISR_EL1, but how > > > should > > >HCR_EL2.VSE or VSESR_EL2 be migrated when the guest has an SError > > > pending but > > >hasn't taken it yet...? > > > > I've been trying to work out how this pending-SError-migration could work. > > > > If HCR_EL2.VSE is set then the guest will take a virtual SError when it next > > unmasks SError. Today this doesn't get migrated, but only KVM sets this bit > > as > > an attempt to kill the guest. > > > > This will be more of a problem with GengDongjiu's SError CAP for triggering > > guest SError from user-space, which will also allow the VSESR_EL2 to be > > specified. (this register becomes the guest ESR_EL1 when the virtual SError > > is > > taken and is used to emulate firmware-first's NOTIFY_SEI and eventually > > kernel-first RAS). These errors are likely to be handled by the guest. > > > > > > We don't want to expose VSESR_EL2 to user-space, and for migration it isn't > > enough as a value of '0' doesn't tell us if HCR_EL2.VSE is set. > > > > To get out of this corner: why not declare pending-SError-migration an > > invalid > > thing to do? > > To answer that question we'd have to know if that is generally a valid > thing to require. How will higher level tools in the stack deal with > this (e.g. libvirt, and OpenStack). Is it really valid to tell them > "nope, can't migrate right now". I'm thinking if you have a failing > host and want to signal some error to the guest, that's probably a > really good time to migrate your mission-critical VM away to a different > host, and being told, "sorry, cannot do this" would be painful. I'm > cc'ing Drew for his insight into libvirt and how this is done on x86, > but I'm not really crazy about this idea. Without actually confirming, I'm pretty sure it's handled with a best effort to cancel the migration, continuing/restoring execution on the source host (or there may be other policies that could be set as well). Naturally, if the source host is going down and the migration is cancelled, then the VM goes down too... Anyway, I don't think we would generally want to introduce guest controlled migration blockers. IIUC, this migration blocker would remain until the guest handled the SError, which it may never unmask. > > > > > We can give Qemu a way to query if a virtual SError is (still) pending. Qemu > > would need to check this on each vcpu after migration, just before it > > throws the > > switch and the guest runs on the new host. This way the VSESR_EL2 value > > doesn't > > need migrating at all. > > > > In the ideal world, Qemu could re-inject the last SError it triggered if > > there > > is still one pending when it migrates... but because KVM injects errors > > too, it > > would need to block migration until this flag is cleared. > > I don't understand your conclusion here. > > If QEMU can query the virtual SError pending state, it can also inject > that before running the VM after a restore, and we should have preserved > the same state. > > > KVM can promise this doesn't change unless you run the vcpu, so provided the > > vcpu actually takes the SError at some point this thing can still be > > migrated. > > > > This does make the VSE machinery hidden unmigratable state in KVM, which is > > nasty. > > Yes, nasty. > > > > > Can anyone suggest a better way? > > > > I'm thinking this is analogous to migrating a VM that uses an irqchip in > userspace and has set the IRQ or FIQ lines using KVM_IRQ_LINE. My > feeling is that this is also not supported today. Luckily userspace irqchip is mostly a debug feature, or just to support oddball hardware. Or at least that's the way I see its usecases... > > My suggestion would be to add some set of VCPU exception state, > potentially as flags, which can be migrated along with the VM, or at > least used by userspace to query the state of the VM, if there exists a > reliable mechanism to restore the state again without any side effects. > > I think we have to comb through Documentation/virtual/kvm/api.txt to see > if we can reuse anything, and if not, add something. We could also Maybe KVM_GET/SET_VCPU_EVENTS? Looks like the doc mistakenly states it's a VM ioctl, but it's a VCPU ioctl. > consider adding something to Documentation/virtual/kvm/devices/vcpu.txt, > where I think we have a large number space to use from. > > Hope this helps? > > -Christoffer Thanks, drew ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v2 3/3] kvm: arm64: handle single-step of userspace mmio instructions
Julien Thierrywrites: > Hi Alex, > > On 09/11/17 17:00, Alex Bennée wrote: >> The system state of KVM when using userspace emulation is not complete >> until we return into KVM_RUN. To handle mmio related updates we wait >> until they have been committed and then schedule our KVM_EXIT_DEBUG. >> >> The kvm_arm_handle_step_debug() helper tells us if we need to return >> and sets up the exit_reason for us. >> >> Signed-off-by: Alex Bennée >> >> --- >> v2 >>- call helper directly from kvm_arch_vcpu_ioctl_run >> --- >> virt/kvm/arm/arm.c | 3 +++ >> 1 file changed, 3 insertions(+) >> >> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c >> index 95cba0799828..2991adfaca9d 100644 >> --- a/virt/kvm/arm/arm.c >> +++ b/virt/kvm/arm/arm.c >> @@ -625,6 +625,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, >> struct kvm_run *run) >> ret = kvm_handle_mmio_return(vcpu, vcpu->run); >> if (ret) >> return ret; >> +if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) >> +return 1; >> + > > In the previous patch, kvm_arch_vcpu_ioctl_run return 0 when telling > userspace about a debug exception. Shouldn't this branch return 0 > instead of 1? Probably - although in practice it makes no difference. In QEMU for example the test is if (run_ret < 0) to handle errors. Otherwise success is assumed. > Returning on non-zero for kvm_handle_mmio_return is done because it > means there was an error. This is not the case for > kvm_arm_handle_step_debug. > > The description in the comment of kvm_arch_vcpu_ioctl_run is not very > clear whether non-zero result should be used for errors or if only the > negative values are treated as such, and positive values seems to be > generally used to keep the vcpu going. So, I thought it might make > sense to always return the same value upon debug exceptions. There is a subtle mis-match between what gets passed back to the ioctl and what terminates the while() loop later on. As far as the ioctl is concerned it's 0 success and - error. Once you get to the while loop you'll only ever exit once ret is no longer > 0. Anyway for consistency we should certainly return 0, I'll fix it on the next iteration. > > Cheers, -- Alex Bennée ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v4 00/21] SError rework + RAS for firmware first support
On 13 November 2017 at 11:29, Christoffer Dallwrote: > I'm thinking this is analogous to migrating a VM that uses an irqchip in > userspace and has set the IRQ or FIQ lines using KVM_IRQ_LINE. My > feeling is that this is also not supported today. Oops, yes, we completely forgot about migration when we added that feature... I think you're right that we won't get that right. thanks -- PMM ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v4 00/21] SError rework + RAS for firmware first support
On Thu, Nov 09, 2017 at 06:14:56PM +, James Morse wrote: > Hi guys, > > On 19/10/17 15:57, James Morse wrote: > > Known issues: > [...] > > * KVM-Migration: VDISR_EL2 is exposed to userspace as DISR_EL1, but how > > should > >HCR_EL2.VSE or VSESR_EL2 be migrated when the guest has an SError > > pending but > >hasn't taken it yet...? > > I've been trying to work out how this pending-SError-migration could work. > > If HCR_EL2.VSE is set then the guest will take a virtual SError when it next > unmasks SError. Today this doesn't get migrated, but only KVM sets this bit as > an attempt to kill the guest. > > This will be more of a problem with GengDongjiu's SError CAP for triggering > guest SError from user-space, which will also allow the VSESR_EL2 to be > specified. (this register becomes the guest ESR_EL1 when the virtual SError is > taken and is used to emulate firmware-first's NOTIFY_SEI and eventually > kernel-first RAS). These errors are likely to be handled by the guest. > > > We don't want to expose VSESR_EL2 to user-space, and for migration it isn't > enough as a value of '0' doesn't tell us if HCR_EL2.VSE is set. > > To get out of this corner: why not declare pending-SError-migration an invalid > thing to do? To answer that question we'd have to know if that is generally a valid thing to require. How will higher level tools in the stack deal with this (e.g. libvirt, and OpenStack). Is it really valid to tell them "nope, can't migrate right now". I'm thinking if you have a failing host and want to signal some error to the guest, that's probably a really good time to migrate your mission-critical VM away to a different host, and being told, "sorry, cannot do this" would be painful. I'm cc'ing Drew for his insight into libvirt and how this is done on x86, but I'm not really crazy about this idea. > > We can give Qemu a way to query if a virtual SError is (still) pending. Qemu > would need to check this on each vcpu after migration, just before it throws > the > switch and the guest runs on the new host. This way the VSESR_EL2 value > doesn't > need migrating at all. > > In the ideal world, Qemu could re-inject the last SError it triggered if there > is still one pending when it migrates... but because KVM injects errors too, > it > would need to block migration until this flag is cleared. I don't understand your conclusion here. If QEMU can query the virtual SError pending state, it can also inject that before running the VM after a restore, and we should have preserved the same state. > KVM can promise this doesn't change unless you run the vcpu, so provided the > vcpu actually takes the SError at some point this thing can still be migrated. > > This does make the VSE machinery hidden unmigratable state in KVM, which is > nasty. Yes, nasty. > > Can anyone suggest a better way? > I'm thinking this is analogous to migrating a VM that uses an irqchip in userspace and has set the IRQ or FIQ lines using KVM_IRQ_LINE. My feeling is that this is also not supported today. My suggestion would be to add some set of VCPU exception state, potentially as flags, which can be migrated along with the VM, or at least used by userspace to query the state of the VM, if there exists a reliable mechanism to restore the state again without any side effects. I think we have to comb through Documentation/virtual/kvm/api.txt to see if we can reuse anything, and if not, add something. We could also consider adding something to Documentation/virtual/kvm/devices/vcpu.txt, where I think we have a large number space to use from. Hope this helps? -Christoffer ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v2 3/3] kvm: arm64: handle single-step of userspace mmio instructions
Hi Alex, On 09/11/17 17:00, Alex Bennée wrote: The system state of KVM when using userspace emulation is not complete until we return into KVM_RUN. To handle mmio related updates we wait until they have been committed and then schedule our KVM_EXIT_DEBUG. The kvm_arm_handle_step_debug() helper tells us if we need to return and sets up the exit_reason for us. Signed-off-by: Alex Bennée--- v2 - call helper directly from kvm_arch_vcpu_ioctl_run --- virt/kvm/arm/arm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 95cba0799828..2991adfaca9d 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -625,6 +625,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_handle_mmio_return(vcpu, vcpu->run); if (ret) return ret; + if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) + return 1; + In the previous patch, kvm_arch_vcpu_ioctl_run return 0 when telling userspace about a debug exception. Shouldn't this branch return 0 instead of 1? Returning on non-zero for kvm_handle_mmio_return is done because it means there was an error. This is not the case for kvm_arm_handle_step_debug. The description in the comment of kvm_arch_vcpu_ioctl_run is not very clear whether non-zero result should be used for errors or if only the negative values are treated as such, and positive values seems to be generally used to keep the vcpu going. So, I thought it might make sense to always return the same value upon debug exceptions. Cheers, -- Julien Thierry ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v2 2/3] kvm: arm64: handle single-stepping trapped instructions
On 09/11/17 17:00, Alex Bennée wrote: If we are using guest debug to single-step the guest we need to ensure we exit after emulating the instruction. This only affects instructions completely emulated by the kernel. For userspace emulated instructions we need to exit and return to complete the emulation. The kvm_arm_handle_step_debug() helper sets up the necessary exit state if needed. Signed-off-by: Alex BennéeReviewed-by: Julien Thierry --- v2 - use helper from patch 1 - if (handled > 0) instead of if (handled) so errors propagate --- arch/arm64/kvm/handle_exit.c | 47 +++- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 7debb74843a0..af1c804742f6 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -178,6 +178,38 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) return arm_exit_handlers[hsr_ec]; } +/* + * We may be single-stepping an emulated instruction. If the emulation + * has been completed in-kernel we can return to userspace with a + * KVM_EXIT_DEBUG, otherwise the userspace needs to complete its + * emulation first. + */ + +static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + int handled; + + /* +* See ARM ARM B1.14.1: "Hyp traps on instructions +* that fail their condition code check" +*/ + if (!kvm_condition_valid(vcpu)) { + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + handled = 1; + } else { + exit_handle_fn exit_handler; + + exit_handler = kvm_get_exit_handler(vcpu); + handled = exit_handler(vcpu, run); + } + + /* helper sets exit_reason if we need to return to userspace */ + if (handled > 0 && kvm_arm_handle_step_debug(vcpu, run)) + handled = 0; + + return handled; +} + /* * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on * proper exit to userspace. @@ -185,8 +217,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index) { - exit_handle_fn exit_handler; - if (ARM_SERROR_PENDING(exception_index)) { u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); @@ -214,18 +244,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, kvm_inject_vabt(vcpu); return 1; case ARM_EXCEPTION_TRAP: - /* -* See ARM ARM B1.14.1: "Hyp traps on instructions -* that fail their condition code check" -*/ - if (!kvm_condition_valid(vcpu)) { - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - return 1; - } - - exit_handler = kvm_get_exit_handler(vcpu); - - return exit_handler(vcpu, run); + return handle_trap_exceptions(vcpu, run); case ARM_EXCEPTION_HYP_GONE: /* * EL2 has been reset to the hyp-stub. This happens when a guest -- Julien Thierry ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v4 15/13] firmware: arm_sdei: be more robust against cpu-hotplug
On Wed, Nov 08, 2017 at 04:06:24PM +, James Morse wrote: > dpm_suspend() calls the freeze/thaw callbacks for hibernate before > disable_non_bootcpus() takes down secondaries. > > This leads to a fun race where the freeze/thaw callbacks reset the > SDEI interface (as we may be restoring a kernel with a different > layout due to KASLR), then the cpu-hotplug callbacks come in to > save the current state, which has already been reset. > > I tried to solve this with a 'frozen' flag that stops the hotplug > callback from overwriting the saved values. Instead this just > moves the race around and makes it even harder to think about. > > Instead, make it look like the secondaries have gone offline. > Call cpuhp_remove_state() in the freeze callback, this will call the > teardown hook on all online CPUs, then remove the state. This saves > all private events and makes future CPU up/down events invisible. > > Change sdei_event_unregister_all()/sdei_reregister_events() to > only save/restore shared events, which are all that is left. With > this we can remove the frozen flag. We can remove the device > suspend/resume calls too as cpuhotplug's teardown call has masked > the CPUs. > > All that is left is the reboot notifier, (which was abusing the > frozen flag). Call cpuhp_remove_state() to make it look like > secondary CPUs have gone offline. > > Suggested-by: Will Deacon> Signed-off-by: James Morse > --- > drivers/firmware/arm_sdei.c | 60 > +++-- > 1 file changed, 31 insertions(+), 29 deletions(-) Thanks, this appears to address my concerns. It's too late for 4.15 now, but please resend for 4.16 and Catalin can pick this series up. Will ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v2 1/3] kvm: arm debug: introduce helper for single-step
On 09/11/17 17:00, Alex Bennée wrote: After emulating instructions we may want return to user-space to handle a single-step. If single-step is enabled the helper set the run structure for return and returns true. Signed-off-by: Alex BennéeWith the fixup: Reviewed-by: Julien Thierry --- v2 - kvm_arm_maybe_return_debug -> kvm_arm_handle_step_debug - return bool, true if return to userspace is required --- arch/arm/include/asm/kvm_host.h | 2 ++ arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/debug.c| 22 ++ 3 files changed, 25 insertions(+) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 4a879f6ff13b..a2e881d6108e 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -285,6 +285,8 @@ static inline void kvm_arm_init_debug(void) {} static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} +static inline bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, +struct kvm_run *run) {} int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e923b58606e2..bbfd6a2adb2b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -369,6 +369,7 @@ void kvm_arm_init_debug(void); void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); +bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index dbadfaf850a7..95afd22a4634 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -221,3 +221,25 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) } } } + + +/* + * When KVM has successfully emulated the instruction we might want to + * return to user space with a KVM_EXIT_DEBUG. We can only do this + * once the emulation is complete though so for userspace emulations + * we have to wait until we have re-entered KVM before calling this + * helper. + * + * Return true (and set exit_reason) to return to userspace or false + * if no further action required. + */ + +bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { + run->exit_reason = KVM_EXIT_DEBUG; + run->debug.arch.hsr = ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT; + return true; + } + return false; +} -- Julien Thierry ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 25/27] KVM: arm/arm64: GICv4: Theory of operations
From: Marc ZyngierYet another braindump so I can free some cells... Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-v4.c | 67 + 1 file changed, 67 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c index bf874dd01fc0..915d09dc2638 100644 --- a/virt/kvm/arm/vgic/vgic-v4.c +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -23,6 +23,73 @@ #include "vgic.h" +/* + * How KVM uses GICv4 (insert rude comments here): + * + * The vgic-v4 layer acts as a bridge between several entities: + * - The GICv4 ITS representation offered by the ITS driver + * - VFIO, which is in charge of the PCI endpoint + * - The virtual ITS, which is the only thing the guest sees + * + * The configuration of VLPIs is triggered by a callback from VFIO, + * instructing KVM that a PCI device has been configured to deliver + * MSIs to a vITS. + * + * kvm_vgic_v4_set_forwarding() is thus called with the routing entry, + * and this is used to find the corresponding vITS data structures + * (ITS instance, device, event and irq) using a process that is + * extremely similar to the injection of an MSI. + * + * At this stage, we can link the guest's view of an LPI (uniquely + * identified by the routing entry) and the host irq, using the GICv4 + * driver mapping operation. Should the mapping succeed, we've then + * successfully upgraded the guest's LPI to a VLPI. We can then start + * with updating GICv4's view of the property table and generating an + * INValidation in order to kickstart the delivery of this VLPI to the + * guest directly, without software intervention. Well, almost. + * + * When the PCI endpoint is deconfigured, this operation is reversed + * with VFIO calling kvm_vgic_v4_unset_forwarding(). + * + * Once the VLPI has been mapped, it needs to follow any change the + * guest performs on its LPI through the vITS. For that, a number of + * command handlers have hooks to communicate these changes to the HW: + * - Any invalidation triggers a call to its_prop_update_vlpi() + * - The INT command results in a irq_set_irqchip_state(), which + * generates an INT on the corresponding VLPI. + * - The CLEAR command results in a irq_set_irqchip_state(), which + * generates an CLEAR on the corresponding VLPI. + * - DISCARD translates into an unmap, similar to a call to + * kvm_vgic_v4_unset_forwarding(). + * - MOVI is translated by an update of the existing mapping, changing + * the target vcpu, resulting in a VMOVI being generated. + * - MOVALL is translated by a string of mapping updates (similar to + * the handling of MOVI). MOVALL is horrible. + * + * Note that a DISCARD/MAPTI sequence emitted from the guest without + * reprogramming the PCI endpoint after MAPTI does not result in a + * VLPI being mapped, as there is no callback from VFIO (the guest + * will get the interrupt via the normal SW injection). Fixing this is + * not trivial, and requires some horrible messing with the VFIO + * internals. Not fun. Don't do that. + * + * Then there is the scheduling. Each time a vcpu is about to run on a + * physical CPU, KVM must tell the corresponding redistributor about + * it. And if we've migrated our vcpu from one CPU to another, we must + * tell the ITS (so that the messages reach the right redistributor). + * This is done in two steps: first issue a irq_set_affinity() on the + * irq corresponding to the vcpu, then call its_schedule_vpe(). You + * must be in a non-preemptible context. On exit, another call to + * its_schedule_vpe() tells the redistributor that we're done with the + * vcpu. + * + * Finally, the doorbell handling: Each vcpu is allocated an interrupt + * which will fire each time a VLPI is made pending whilst the vcpu is + * not running. Each time the vcpu gets blocked, the doorbell + * interrupt gets enabled. When the vcpu is unblocked (for whatever + * reason), the doorbell interrupt is disabled. + */ + #define DB_IRQ_FLAGS (IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY | IRQ_NO_BALANCING) static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info) -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 16/27] KVM: arm/arm64: GICv4: Handle INVALL applied to a vPE
From: Marc ZyngierThere is no need to perform an INV for each interrupt when updating multiple interrupts. Instead, we can rely on the final VINVALL that gets sent to the ITS to do the work for all of them. Acked-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-its.c | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 4768a0e21cc5..bf571f6d2b32 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -38,7 +38,7 @@ static int vgic_its_save_tables_v0(struct vgic_its *its); static int vgic_its_restore_tables_v0(struct vgic_its *its); static int vgic_its_commit_v0(struct vgic_its *its); static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, -struct kvm_vcpu *filter_vcpu); +struct kvm_vcpu *filter_vcpu, bool needs_inv); /* * Creates a new (reference to a) struct vgic_irq for a given LPI. @@ -106,7 +106,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, * However we only have those structs for mapped IRQs, so we read in * the respective config data from memory here upon mapping the LPI. */ - ret = update_lpi_config(kvm, irq, NULL); + ret = update_lpi_config(kvm, irq, NULL, false); if (ret) return ERR_PTR(ret); @@ -273,7 +273,7 @@ static struct its_collection *find_collection(struct vgic_its *its, int coll_id) * VCPU. Unconditionally applies if filter_vcpu is NULL. */ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, -struct kvm_vcpu *filter_vcpu) +struct kvm_vcpu *filter_vcpu, bool needs_inv) { u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser); u8 prop; @@ -298,7 +298,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, } if (irq->hw) - return its_prop_update_vlpi(irq->host_irq, prop, true); + return its_prop_update_vlpi(irq->host_irq, prop, needs_inv); return 0; } @@ -1117,7 +1117,7 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its, if (!ite) return E_ITS_INV_UNMAPPED_INTERRUPT; - return update_lpi_config(kvm, ite->irq, NULL); + return update_lpi_config(kvm, ite->irq, NULL, true); } /* @@ -1152,12 +1152,15 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, irq = vgic_get_irq(kvm, NULL, intids[i]); if (!irq) continue; - update_lpi_config(kvm, irq, vcpu); + update_lpi_config(kvm, irq, vcpu, false); vgic_put_irq(kvm, irq); } kfree(intids); + if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm) + its_invall_vpe(>arch.vgic_cpu.vgic_v3.its_vpe); + return 0; } -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 02/27] KVM: arm/arm64: vgic: restructure kvm_vgic_(un)map_phys_irq
From: Eric AugerWe want to reuse the core of the map/unmap functions for IRQ forwarding. Let's move the computation of the hwirq in kvm_vgic_map_phys_irq and pass the linux IRQ as parameter. the host_irq is added to struct vgic_irq. We introduce kvm_vgic_map/unmap_irq which take a struct vgic_irq handle as a parameter. Acked-by: Christoffer Dall Signed-off-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h| 8 --- virt/kvm/arm/arch_timer.c | 24 +-- virt/kvm/arm/vgic/vgic.c | 60 +++ 3 files changed, 51 insertions(+), 41 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 34dba516ef24..53f631bdec75 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -116,6 +116,7 @@ struct vgic_irq { bool hw;/* Tied to HW IRQ */ struct kref refcount; /* Used for LPIs */ u32 hwintid;/* HW INTID number */ + unsigned int host_irq; /* linux irq corresponding to hwintid */ union { u8 targets; /* GICv2 target VCPUs mask */ u32 mpidr; /* GICv3 target VCPU */ @@ -307,9 +308,10 @@ void kvm_vgic_init_cpu_hardware(void); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, bool level, void *owner); -int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq); -int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq); -bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); +int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, + u32 vintid); +int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid); +bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid); int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 4db54ff08d9e..4151250ce8da 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -817,9 +817,6 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = >arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct irq_desc *desc; - struct irq_data *data; - int phys_irq; int ret; if (timer->enabled) @@ -837,26 +834,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) return -EINVAL; } - /* -* Find the physical IRQ number corresponding to the host_vtimer_irq -*/ - desc = irq_to_desc(host_vtimer_irq); - if (!desc) { - kvm_err("%s: no interrupt descriptor\n", __func__); - return -EINVAL; - } - - data = irq_desc_get_irq_data(desc); - while (data->parent_data) - data = data->parent_data; - - phys_irq = data->hwirq; - - /* -* Tell the VGIC that the virtual interrupt is tied to a -* physical interrupt. We do that once per VCPU. -*/ - ret = kvm_vgic_map_phys_irq(vcpu, vtimer->irq.irq, phys_irq); + ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq); if (ret) return ret; diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index e54ef2fdf73d..a05a5700910b 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include "vgic.h" @@ -409,25 +411,56 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, return 0; } -int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq) +/* @irq->irq_lock must be held */ +static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + unsigned int host_irq) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); + struct irq_desc *desc; + struct irq_data *data; + + /* +* Find the physical IRQ number corresponding to @host_irq +*/ + desc = irq_to_desc(host_irq); + if (!desc) { + kvm_err("%s: no interrupt descriptor\n", __func__); + return -EINVAL; + } + data = irq_desc_get_irq_data(desc); + while (data->parent_data) + data = data->parent_data; + + irq->hw = true; + irq->host_irq = host_irq; + irq->hwintid = data->hwirq; + return 0; +} + +/* @irq->irq_lock must be held */ +static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) +{ + irq->hw = false; + irq->hwintid = 0; +} + +int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
[PULL 20/27] KVM: arm/arm64: GICv4: Hook vPE scheduling into vgic flush/sync
From: Marc ZyngierThe redistributor needs to be told which vPE is about to be run, and tells us whether there is any pending VLPI on exit. Let's add the scheduling calls to the vgic flush/sync functions, allowing the VLPIs to be delivered to the guest. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-v4.c | 39 +++ virt/kvm/arm/vgic/vgic.c| 4 virt/kvm/arm/vgic/vgic.h| 2 ++ 3 files changed, 45 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c index 1375a53054b9..c9cec01008c2 100644 --- a/virt/kvm/arm/vgic/vgic-v4.c +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -131,6 +131,45 @@ void vgic_v4_teardown(struct kvm *kvm) its_vm->vpes = NULL; } +int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu) +{ + if (!vgic_supports_direct_msis(vcpu->kvm)) + return 0; + + return its_schedule_vpe(>arch.vgic_cpu.vgic_v3.its_vpe, false); +} + +int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu) +{ + int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; + int err; + + if (!vgic_supports_direct_msis(vcpu->kvm)) + return 0; + + /* +* Before making the VPE resident, make sure the redistributor +* corresponding to our current CPU expects us here. See the +* doc in drivers/irqchip/irq-gic-v4.c to understand how this +* turns into a VMOVP command at the ITS level. +*/ + err = irq_set_affinity(irq, cpumask_of(smp_processor_id())); + if (err) + return err; + + err = its_schedule_vpe(>arch.vgic_cpu.vgic_v3.its_vpe, true); + if (err) + return err; + + /* +* Now that the VPE is resident, let's get rid of a potential +* doorbell interrupt that would still be pending. +*/ + err = irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, false); + + return err; +} + static struct vgic_its *vgic_get_its(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *irq_entry) { diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index d29e91215366..b168a328a9e0 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -718,6 +718,8 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = >arch.vgic_cpu; + WARN_ON(vgic_v4_sync_hwstate(vcpu)); + /* An empty ap_list_head implies used_lrs == 0 */ if (list_empty(>arch.vgic_cpu.ap_list_head)) return; @@ -730,6 +732,8 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) /* Flush our emulation state into the GIC hardware before entering the guest. */ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) { + WARN_ON(vgic_v4_flush_hwstate(vcpu)); + /* * If there are no virtual interrupts active or pending for this * VCPU, then there is no work to do and we can bail out without diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index bf03a9648fbb..efbcf8f96f9c 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -244,5 +244,7 @@ struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi); bool vgic_supports_direct_msis(struct kvm *kvm); int vgic_v4_init(struct kvm *kvm); void vgic_v4_teardown(struct kvm *kvm); +int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu); +int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu); #endif -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 13/27] KVM: arm/arm64: GICv4: Handle CLEAR applied to a VLPI
From: Marc ZyngierHandling CLEAR is pretty easy. Just ask the ITS driver to clear the corresponding pending bit (which will turn into a CLEAR command on the physical side). Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-its.c | 4 1 file changed, 4 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 80bea7021e2e..15e79285380d 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1091,6 +1091,10 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its, ite->irq->pending_latch = false; + if (ite->irq->hw) + return irq_set_irqchip_state(ite->irq->host_irq, +IRQCHIP_STATE_PENDING, false); + return 0; } -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 05/27] KVM: arm/arm64: vITS: Add MSI translation helpers
From: Marc ZyngierThe whole MSI injection process is fairly monolithic. An MSI write gets turned into an injected LPI in one swift go. But this is actually a more fine-grained process: - First, a virtual ITS gets selected using the doorbell address - Then the DevID/EventID pair gets translated into an LPI - Finally the LPI is injected Since the GICv4 code needs the first two steps in order to match an IRQ routing entry to an LPI, let's expose them as helpers, and refactor the existing code to use them Reviewed-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-its.c | 95 +--- virt/kvm/arm/vgic/vgic.h | 4 ++ 2 files changed, 58 insertions(+), 41 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 40791c121710..1de4e68ef1b6 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -505,19 +505,11 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm, return 0; } -/* - * Find the target VCPU and the LPI number for a given devid/eventid pair - * and make this IRQ pending, possibly injecting it. - * Must be called with the its_lock mutex held. - * Returns 0 on success, a positive error value for any ITS mapping - * related errors and negative error values for generic errors. - */ -static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, - u32 devid, u32 eventid) +int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, +u32 devid, u32 eventid, struct vgic_irq **irq) { struct kvm_vcpu *vcpu; struct its_ite *ite; - unsigned long flags; if (!its->enabled) return -EBUSY; @@ -533,26 +525,61 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, if (!vcpu->arch.vgic_cpu.lpis_enabled) return -EBUSY; - spin_lock_irqsave(>irq->irq_lock, flags); - ite->irq->pending_latch = true; - vgic_queue_irq_unlock(kvm, ite->irq, flags); - + *irq = ite->irq; return 0; } -static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev) +struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi) { + u64 address; + struct kvm_io_device *kvm_io_dev; struct vgic_io_device *iodev; - if (dev->ops != _io_gic_ops) - return NULL; + if (!vgic_has_its(kvm)) + return ERR_PTR(-ENODEV); - iodev = container_of(dev, struct vgic_io_device, dev); + if (!(msi->flags & KVM_MSI_VALID_DEVID)) + return ERR_PTR(-EINVAL); + + address = (u64)msi->address_hi << 32 | msi->address_lo; + + kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); + if (!kvm_io_dev) + return ERR_PTR(-EINVAL); + if (kvm_io_dev->ops != _io_gic_ops) + return ERR_PTR(-EINVAL); + + iodev = container_of(kvm_io_dev, struct vgic_io_device, dev); if (iodev->iodev_type != IODEV_ITS) - return NULL; + return ERR_PTR(-EINVAL); + + return iodev->its; +} + +/* + * Find the target VCPU and the LPI number for a given devid/eventid pair + * and make this IRQ pending, possibly injecting it. + * Must be called with the its_lock mutex held. + * Returns 0 on success, a positive error value for any ITS mapping + * related errors and negative error values for generic errors. + */ +static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid) +{ + struct vgic_irq *irq = NULL; + unsigned long flags; + int err; + + err = vgic_its_resolve_lpi(kvm, its, devid, eventid, ); + if (err) + return err; - return iodev; + spin_lock_irqsave(>irq_lock, flags); + irq->pending_latch = true; + vgic_queue_irq_unlock(kvm, irq, flags); + + return 0; } /* @@ -563,30 +590,16 @@ static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev) */ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) { - u64 address; - struct kvm_io_device *kvm_io_dev; - struct vgic_io_device *iodev; + struct vgic_its *its; int ret; - if (!vgic_has_its(kvm)) - return -ENODEV; - - if (!(msi->flags & KVM_MSI_VALID_DEVID)) - return -EINVAL; - - address = (u64)msi->address_hi << 32 | msi->address_lo; + its = vgic_msi_to_its(kvm, msi); + if (IS_ERR(its)) + return PTR_ERR(its); - kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); - if (!kvm_io_dev) - return -EINVAL; - - iodev =
[PULL 26/27] KVM: arm/arm64: Fix GICv4 ITS initialization issues
We should only try to initialize GICv4 data structures on a GICv4 capable system. Move the vgic_supports_direct_msis() check inito vgic_v4_init() so that any KVM VGIC initialization path does not fail on non-GICv4 systems. Also be slightly more strict in the checking of the return value in vgic_its_create, and only error out on negative return values from the vgic_v4_init() function. This is important because the kvm device code only treats negative values as errors and only cleans up in this case. Errornously treating a positive return value as an error from the vgic_v4_init() function can lead to NULL pointer dereferences, as has recently been observed. Acked-by: Marc ZyngierSigned-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-init.c | 8 +++- virt/kvm/arm/vgic/vgic-its.c | 2 +- virt/kvm/arm/vgic/vgic-v4.c | 3 +++ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 40be908da238..62310122ee78 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -285,11 +285,9 @@ int vgic_init(struct kvm *kvm) if (ret) goto out; - if (vgic_supports_direct_msis(kvm)) { - ret = vgic_v4_init(kvm); - if (ret) - goto out; - } + ret = vgic_v4_init(kvm); + if (ret) + goto out; kvm_for_each_vcpu(i, vcpu, kvm) kvm_vgic_vcpu_enable(vcpu); diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index b8c1b724ba3e..c93ecd4a903b 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1673,7 +1673,7 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) if (vgic_initialized(dev->kvm)) { int ret = vgic_v4_init(dev->kvm); - if (ret) { + if (ret < 0) { kfree(its); return ret; } diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c index 915d09dc2638..53c324aa44ef 100644 --- a/virt/kvm/arm/vgic/vgic-v4.c +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -118,6 +118,9 @@ int vgic_v4_init(struct kvm *kvm) struct kvm_vcpu *vcpu; int i, nr_vcpus, ret; + if (!vgic_supports_direct_msis(kvm)) + return 0; /* Nothing to see here... move along. */ + if (dist->its_vm.vpes) return 0; -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 04/27] KVM: arm/arm64: vgic: Move kvm_vgic_destroy call around
From: Marc ZyngierThe way we call kvm_vgic_destroy is a bit bizarre. We call it *after* having freed the vcpus, which sort of defeats the point of cleaning up things before that point. Let's move kvm_vgic_destroy towards the beginning of kvm_arch_destroy_vm, which seems more sensible. Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/arm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 79f8ddd604c1..e1e947fe9bc2 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -177,6 +177,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) { int i; + kvm_vgic_destroy(kvm); + free_percpu(kvm->arch.last_vcpu_ran); kvm->arch.last_vcpu_ran = NULL; @@ -186,8 +188,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm->vcpus[i] = NULL; } } - - kvm_vgic_destroy(kvm); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 18/27] KVM: arm/arm64: GICv4: Add doorbell interrupt handling
From: Marc ZyngierWhen a vPE is not running, a VLPI being made pending results in a doorbell interrupt being delivered. Let's handle this interrupt and update the pending_last flag that indicates that VLPIs are pending. The corresponding vcpu is also kicked into action. Special care is taken to prevent the doorbell from being enabled at request time (this is controlled separately), and to make the disabling on the interrupt non-lazy. Reviewed-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-v4.c | 48 + 1 file changed, 48 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c index 2edfe23c8238..796e00c77903 100644 --- a/virt/kvm/arm/vgic/vgic-v4.c +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -16,12 +16,24 @@ */ #include +#include #include #include #include #include "vgic.h" +static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info) +{ + struct kvm_vcpu *vcpu = info; + + vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true; + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + + return IRQ_HANDLED; +} + /** * vgic_v4_init - Initialize the GICv4 data structures * @kvm: Pointer to the VM being initialized @@ -61,6 +73,33 @@ int vgic_v4_init(struct kvm *kvm) return ret; } + kvm_for_each_vcpu(i, vcpu, kvm) { + int irq = dist->its_vm.vpes[i]->irq; + + /* +* Don't automatically enable the doorbell, as we're +* flipping it back and forth when the vcpu gets +* blocked. Also disable the lazy disabling, as the +* doorbell could kick us out of the guest too +* early... +*/ + irq_set_status_flags(irq, IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY); + ret = request_irq(irq, vgic_v4_doorbell_handler, + 0, "vcpu", vcpu); + if (ret) { + kvm_err("failed to allocate vcpu IRQ%d\n", irq); + /* +* Trick: adjust the number of vpes so we know +* how many to nuke on teardown... +*/ + dist->its_vm.nr_vpes = i; + break; + } + } + + if (ret) + vgic_v4_teardown(kvm); + return ret; } @@ -73,10 +112,19 @@ int vgic_v4_init(struct kvm *kvm) void vgic_v4_teardown(struct kvm *kvm) { struct its_vm *its_vm = >arch.vgic.its_vm; + int i; if (!its_vm->vpes) return; + for (i = 0; i < its_vm->nr_vpes; i++) { + struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i); + int irq = its_vm->vpes[i]->irq; + + irq_clear_status_flags(irq, IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY); + free_irq(irq, vcpu); + } + its_free_vcpu_irqs(its_vm); kfree(its_vm->vpes); its_vm->nr_vpes = 0; -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 15/27] KVM: arm/arm64: GICv4: Propagate property updates to VLPIs
From: Marc ZyngierUpon updating a property, we propagate it all the way to the physical ITS, and ask for an INV command to be executed there. Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-its.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 78d11aed1e17..4768a0e21cc5 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -297,6 +297,9 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, spin_unlock_irqrestore(>irq_lock, flags); } + if (irq->hw) + return its_prop_update_vlpi(irq->host_irq, prop, true); + return 0; } -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 01/27] KVM: arm/arm64: register irq bypass consumer on ARM/ARM64
From: Eric AugerThis patch selects IRQ_BYPASS_MANAGER and HAVE_KVM_IRQ_BYPASS configs for ARM/ARM64. kvm_arch_has_irq_bypass() now is implemented and returns true. As a consequence the irq bypass consumer will be registered for ARM/ARM64 with the forwarding callbacks: - stop/start: halt/resume guest execution - add/del_producer: set/unset forwarding at vgic/irqchip level We don't have any actual support yet, so nothing gets actually forwarded. Acked-by: Christoffer Dall Signed-off-by: Eric Auger [maz: dropped the DEOI stuff for the time being in order to reduce the dependency chain, amended commit message] Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/Kconfig | 3 +++ arch/arm64/kvm/Kconfig | 3 +++ virt/kvm/arm/arm.c | 40 3 files changed, 46 insertions(+) diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 90d0176fb30d..4e2b192a030a 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -3,6 +3,7 @@ # source "virt/kvm/Kconfig" +source "virt/lib/Kconfig" menuconfig VIRTUALIZATION bool "Virtualization" @@ -35,6 +36,8 @@ config KVM select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_MSI + select IRQ_BYPASS_MANAGER + select HAVE_KVM_IRQ_BYPASS depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER ---help--- Support hosting virtualized guest machines. diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 52cb7ad9b2fd..7e0d6e63cc71 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -3,6 +3,7 @@ # source "virt/kvm/Kconfig" +source "virt/lib/Kconfig" menuconfig VIRTUALIZATION bool "Virtualization" @@ -35,6 +36,8 @@ config KVM select HAVE_KVM_MSI select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQ_ROUTING + select IRQ_BYPASS_MANAGER + select HAVE_KVM_IRQ_BYPASS ---help--- Support hosting virtualized guest machines. We don't support KVM with 16K page tables yet, due to the multiple diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index bc126fb99a3d..79f8ddd604c1 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include @@ -1458,6 +1460,44 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) return NULL; } +bool kvm_arch_has_irq_bypass(void) +{ + return true; +} + +int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + return 0; +} +void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + return; +} + +void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + kvm_arm_halt_guest(irqfd->kvm); +} + +void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + kvm_arm_resume_guest(irqfd->kvm); +} + /** * Initialize Hyp-mode and memory mappings on all CPUs. */ -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 06/27] KVM: arm/arm64: vITS: Add a helper to update the affinity of an LPI
From: Marc ZyngierIn order to help integrating the vITS code with GICv4, let's add a new helper that deals with updating the affinity of an LPI, which will later be augmented with super duper extra GICv4 goodness. Reviewed-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-its.c | 20 +++- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 1de4e68ef1b6..09accdfed189 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -336,6 +336,15 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) return i; } +static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) +{ + spin_lock(>irq_lock); + irq->target_vcpu = vcpu; + spin_unlock(>irq_lock); + + return 0; +} + /* * Promotes the ITS view of affinity of an ITTE (which redistributor this LPI * is targeting) to the VGIC's view, which deals with target VCPUs. @@ -350,10 +359,7 @@ static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite) return; vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); - - spin_lock(>irq->irq_lock); - ite->irq->target_vcpu = vcpu; - spin_unlock(>irq->irq_lock); + update_affinity(ite->irq, vcpu); } /* @@ -696,11 +702,7 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its, ite->collection = collection; vcpu = kvm_get_vcpu(kvm, collection->target_addr); - spin_lock(>irq->irq_lock); - ite->irq->target_vcpu = vcpu; - spin_unlock(>irq->irq_lock); - - return 0; + return update_affinity(ite->irq, vcpu); } /* -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 11/27] KVM: arm/arm64: GICv4: Unmap VLPI when freeing an LPI
From: Marc ZyngierWhen freeing an LPI (on a DISCARD command, for example), we need to unmap the VLPI down to the physical ITS level. Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-its.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 590f794b7330..ad14af8a4412 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -631,8 +631,12 @@ static void its_free_ite(struct kvm *kvm, struct its_ite *ite) list_del(>ite_list); /* This put matches the get in vgic_add_lpi. */ - if (ite->irq) + if (ite->irq) { + if (ite->irq->hw) + WARN_ON(its_unmap_vlpi(ite->irq->host_irq)); + vgic_put_irq(kvm, ite->irq); + } kfree(ite); } -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 19/27] KVM: arm/arm64: GICv4: Use the doorbell interrupt as an unblocking source
From: Marc ZyngierThe doorbell interrupt is only useful if the vcpu is blocked on WFI. In all other cases, recieving a doorbell interrupt is just a waste of cycles. So let's only enable the doorbell if a vcpu is getting blocked, and disable it when it is unblocked. This is very similar to what we're doing for the background timer. Reviewed-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 3 +++ virt/kvm/arm/arm.c | 2 ++ virt/kvm/arm/vgic/vgic-v4.c | 18 ++ 3 files changed, 23 insertions(+) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 2f750c770bf2..8c896540a72c 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -381,4 +381,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq, int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq, struct kvm_kernel_irq_routing_entry *irq_entry); +void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu); +void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu); + #endif /* __KVM_ARM_VGIC_H */ diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 34a15c0c65ab..01e575b9f78b 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -315,11 +315,13 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) { kvm_timer_schedule(vcpu); + kvm_vgic_v4_enable_doorbell(vcpu); } void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) { kvm_timer_unschedule(vcpu); + kvm_vgic_v4_disable_doorbell(vcpu); } int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c index 796e00c77903..1375a53054b9 100644 --- a/virt/kvm/arm/vgic/vgic-v4.c +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -233,3 +233,21 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq, mutex_unlock(>its_lock); return ret; } + +void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu) +{ + if (vgic_supports_direct_msis(vcpu->kvm)) { + int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; + if (irq) + enable_irq(irq); + } +} + +void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu) +{ + if (vgic_supports_direct_msis(vcpu->kvm)) { + int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; + if (irq) + disable_irq(irq); + } +} -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 17/27] KVM: arm/arm64: GICv4: Use pending_last as a scheduling hint
From: Marc ZyngierWhen a vPE exits, the pending_last flag is set when there are pending VLPIs stored in the pending table. Similarily, this flag will be set when a doorbell interrupt fires, as it indicates the same condition. Let's update kvm_vgic_vcpu_pending_irq() to account for that flag as well, making a vcpu runnable when set. Acked-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index a05a5700910b..d29e91215366 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -781,6 +781,9 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) if (!vcpu->kvm->arch.vgic.enabled) return false; + if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last) + return true; + spin_lock_irqsave(_cpu->ap_list_lock, flags); list_for_each_entry(irq, _cpu->ap_list_head, ap_list) { -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 22/27] KVM: arm/arm64: GICv4: Prevent a VM using GICv4 from being saved
From: Marc ZyngierThe GICv4 architecture doesn't make it easy for save/restore to work, as it doesn't give any guarantee that the pending state is written into the pending table. So let's not take any chance, and let's return an error if we encounter any LPI that has the HW bit set. In order for userspace to distinguish this error from other failure modes, use -EACCES as an error code. Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/devices/arm-vgic-its.txt | 2 ++ virt/kvm/arm/vgic/vgic-its.c | 9 + 2 files changed, 11 insertions(+) diff --git a/Documentation/virtual/kvm/devices/arm-vgic-its.txt b/Documentation/virtual/kvm/devices/arm-vgic-its.txt index 8d5830eab26a..4f0c9fc40365 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic-its.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic-its.txt @@ -64,6 +64,8 @@ Groups: -EINVAL: Inconsistent restored data -EFAULT: Invalid guest ram access -EBUSY: One or more VCPUS are running +-EACCES: The virtual ITS is backed by a physical GICv4 ITS, and the +state is not available KVM_DEV_ARM_VGIC_GRP_ITS_REGS Attributes: diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index bf571f6d2b32..b8c1b724ba3e 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1995,6 +1995,15 @@ static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device) list_for_each_entry(ite, >itt_head, ite_list) { gpa_t gpa = base + ite->event_id * ite_esz; + /* +* If an LPI carries the HW bit, this means that this +* interrupt is controlled by GICv4, and we do not +* have direct access to that state. Let's simply fail +* the save operation... +*/ + if (ite->irq->hw) + return -EACCES; + ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz); if (ret) return ret; -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 00/27] KVM/ARM GICv4 Support for v4.15
Hi Paolo and Radim, Here is the second pull request for KVM/ARM for v4.15. These changes introduce GICv4 support in KVM/ARM, allowing direct injection of LPIs from devices generating MSIs into VMs. The changes rely on irqchip infrastructure for GICv4 already merged in tip:irq/core. The pull request is based on kvm-arm-for-v4.15 plus 722c908f84c6 merged from tip:irq/core. The diffstat has been trimmed to only show the changes between the merge base mentioned above and the KVM/ARM side of GICv4 support, and does not include the additional changes in KVM not present in tip:irq/core. I expect that pulling this into the KVM tree after the irq/core changes land in Linus' tree should result in a clean subsequent pull request upstream, but let me know if you want me to work this out in some other way. The following changes since commit 722c908f84c67bf120105ca870675cadc1bb7b20: Merge tag 'irqchip-4.15-2' of git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms into irq/core (2017-11-02 19:18:08 +0100) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git tags/kvm-arm-gicv4-for-v4.15 for you to fetch changes up to 95b110ab9a093b5baf3a72e51f71279368d618e2: KVM: arm/arm64: Don't queue VLPIs on INV/INVALL (2017-11-10 09:59:20 +0100) Thanks, -Christoffer Christoffer Dall (2): KVM: arm/arm64: Fix GICv4 ITS initialization issues KVM: arm/arm64: Don't queue VLPIs on INV/INVALL Eric Auger (2): KVM: arm/arm64: register irq bypass consumer on ARM/ARM64 KVM: arm/arm64: vgic: restructure kvm_vgic_(un)map_phys_irq Marc Zyngier (23): KVM: arm: Select ARM_GIC_V3 and ARM_GIC_V3_ITS KVM: arm/arm64: vgic: Move kvm_vgic_destroy call around KVM: arm/arm64: vITS: Add MSI translation helpers KVM: arm/arm64: vITS: Add a helper to update the affinity of an LPI KVM: arm/arm64: GICv4: Add property field and per-VM predicate KVM: arm/arm64: GICv4: Add init/teardown of the per-VM vPE irq domain KVM: arm/arm64: GICv4: Wire mapping/unmapping of VLPIs in VFIO irq bypass KVM: arm/arm64: GICv4: Handle INT command applied to a VLPI KVM: arm/arm64: GICv4: Unmap VLPI when freeing an LPI KVM: arm/arm64: GICv4: Propagate affinity changes to the physical ITS KVM: arm/arm64: GICv4: Handle CLEAR applied to a VLPI KVM: arm/arm64: GICv4: Handle MOVALL applied to a vPE KVM: arm/arm64: GICv4: Propagate property updates to VLPIs KVM: arm/arm64: GICv4: Handle INVALL applied to a vPE KVM: arm/arm64: GICv4: Use pending_last as a scheduling hint KVM: arm/arm64: GICv4: Add doorbell interrupt handling KVM: arm/arm64: GICv4: Use the doorbell interrupt as an unblocking source KVM: arm/arm64: GICv4: Hook vPE scheduling into vgic flush/sync KVM: arm/arm64: GICv4: Enable virtual cpuif if VLPIs can be delivered KVM: arm/arm64: GICv4: Prevent a VM using GICv4 from being saved KVM: arm/arm64: GICv4: Prevent userspace from changing doorbell affinity KVM: arm/arm64: GICv4: Enable VLPI support KVM: arm/arm64: GICv4: Theory of operations Documentation/admin-guide/kernel-parameters.txt| 4 + Documentation/virtual/kvm/devices/arm-vgic-its.txt | 2 + arch/arm/kvm/Kconfig | 5 + arch/arm/kvm/Makefile | 1 + arch/arm64/kvm/Kconfig | 3 + arch/arm64/kvm/Makefile| 1 + include/kvm/arm_vgic.h | 41 ++- virt/kvm/arm/arch_timer.c | 24 +- virt/kvm/arm/arm.c | 48 ++- virt/kvm/arm/hyp/vgic-v3-sr.c | 9 +- virt/kvm/arm/vgic/vgic-init.c | 7 + virt/kvm/arm/vgic/vgic-its.c | 204 virt/kvm/arm/vgic/vgic-mmio-v3.c | 5 + virt/kvm/arm/vgic/vgic-v3.c| 14 + virt/kvm/arm/vgic/vgic-v4.c| 364 + virt/kvm/arm/vgic/vgic.c | 67 +++- virt/kvm/arm/vgic/vgic.h | 10 + 17 files changed, 695 insertions(+), 114 deletions(-) create mode 100644 virt/kvm/arm/vgic/vgic-v4.c -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 03/27] KVM: arm: Select ARM_GIC_V3 and ARM_GIC_V3_ITS
From: Marc ZyngierThe GICv4 support introduces a hard dependency between the KVM core and the ITS infrastructure. arm64 already selects it at the architecture level, but 32bit doesn't. In order to avoid littering the kernel with #ifdefs, let's just select the whole of the GICv3 suport code. You know you want it. Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 4e2b192a030a..52b50af04e3b 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -23,6 +23,8 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select ARM_GIC + select ARM_GIC_V3 + select ARM_GIC_V3_ITS select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 12/27] KVM: arm/arm64: GICv4: Propagate affinity changes to the physical ITS
From: Marc ZyngierWhen the guest issues an affinity change, we need to tell the physical ITS that we're now targetting a new vcpu. This is done by extracting the current mapping, updating the target, and reapplying the mapping. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-its.c | 16 +++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index ad14af8a4412..80bea7021e2e 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -338,11 +338,25 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) { + int ret = 0; + spin_lock(>irq_lock); irq->target_vcpu = vcpu; spin_unlock(>irq_lock); - return 0; + if (irq->hw) { + struct its_vlpi_map map; + + ret = its_get_vlpi(irq->host_irq, ); + if (ret) + return ret; + + map.vpe = >arch.vgic_cpu.vgic_v3.its_vpe; + + ret = its_map_vlpi(irq->host_irq, ); + } + + return ret; } /* -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 10/27] KVM: arm/arm64: GICv4: Handle INT command applied to a VLPI
From: Marc ZyngierIf the guest issues an INT command targetting a VLPI, let's call into the irq_set_irqchip_state() helper to make it pending on the physical side. This works just as well if userspace decides to inject an interrupt using the normal userspace API... Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-its.c | 4 1 file changed, 4 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 54f81eb24a07..590f794b7330 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -581,6 +581,10 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, if (err) return err; + if (irq->hw) + return irq_set_irqchip_state(irq->host_irq, +IRQCHIP_STATE_PENDING, true); + spin_lock_irqsave(>irq_lock, flags); irq->pending_latch = true; vgic_queue_irq_unlock(kvm, irq, flags); -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 08/27] KVM: arm/arm64: GICv4: Add init/teardown of the per-VM vPE irq domain
From: Marc ZyngierIn order to control the GICv4 view of virtual CPUs, we rely on an irqdomain allocated for that purpose. Let's add a couple of helpers to that effect. At the same time, the vgic data structures gain new fields to track all this... erm... wonderful stuff. The way we hook into the vgic init is slightly convoluted. We need the vgic to be initialized (in order to guarantee that the number of vcpus is now fixed), and we must have a vITS (otherwise this is all very pointless). So we end-up calling the init from both vgic_init and vgic_its_create. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/Makefile | 1 + arch/arm64/kvm/Makefile | 1 + include/kvm/arm_vgic.h| 19 ++ virt/kvm/arm/vgic/vgic-init.c | 9 + virt/kvm/arm/vgic/vgic-its.c | 8 + virt/kvm/arm/vgic/vgic-v4.c | 83 +++ virt/kvm/arm/vgic/vgic.h | 2 ++ 7 files changed, 123 insertions(+) create mode 100644 virt/kvm/arm/vgic/vgic-v4.c diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index d9beee652d36..0a1dd2cdb928 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -31,6 +31,7 @@ obj-y += $(KVM)/arm/vgic/vgic-init.o obj-y += $(KVM)/arm/vgic/vgic-irqfd.o obj-y += $(KVM)/arm/vgic/vgic-v2.o obj-y += $(KVM)/arm/vgic/vgic-v3.o +obj-y += $(KVM)/arm/vgic/vgic-v4.o obj-y += $(KVM)/arm/vgic/vgic-mmio.o obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 5d9810086c25..c30fd388ef80 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -26,6 +26,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index ba9fb450aa1b..7eeb6c2a2f9c 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -26,6 +26,8 @@ #include #include +#include + #define VGIC_V3_MAX_CPUS 255 #define VGIC_V2_MAX_CPUS 8 #define VGIC_NR_IRQS_LEGACY 256 @@ -236,6 +238,15 @@ struct vgic_dist { /* used by vgic-debug */ struct vgic_state_iter *iter; + + /* +* GICv4 ITS per-VM data, containing the IRQ domain, the VPE +* array, the property table pointer as well as allocation +* data. This essentially ties the Linux IRQ core and ITS +* together, and avoids leaking KVM's data structures anywhere +* else. +*/ + struct its_vm its_vm; }; struct vgic_v2_cpu_if { @@ -254,6 +265,14 @@ struct vgic_v3_cpu_if { u32 vgic_ap0r[4]; u32 vgic_ap1r[4]; u64 vgic_lr[VGIC_V3_MAX_LRS]; + + /* +* GICv4 ITS per-VPE data, containing the doorbell IRQ, the +* pending table pointer, the its_vm pointer and a few other +* HW specific things. As for the its_vm structure, this is +* linking the Linux IRQ subsystem and the ITS together. +*/ + struct its_vpe its_vpe; }; struct vgic_cpu { diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 5801261f3add..40be908da238 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -285,6 +285,12 @@ int vgic_init(struct kvm *kvm) if (ret) goto out; + if (vgic_supports_direct_msis(kvm)) { + ret = vgic_v4_init(kvm); + if (ret) + goto out; + } + kvm_for_each_vcpu(i, vcpu, kvm) kvm_vgic_vcpu_enable(vcpu); @@ -320,6 +326,9 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) kfree(dist->spis); dist->nr_spis = 0; + + if (vgic_supports_direct_msis(kvm)) + vgic_v4_teardown(kvm); } void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 09accdfed189..54f81eb24a07 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1638,6 +1638,14 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) if (!its) return -ENOMEM; + if (vgic_initialized(dev->kvm)) { + int ret = vgic_v4_init(dev->kvm); + if (ret) { + kfree(its); + return ret; + } + } +
[PULL 09/27] KVM: arm/arm64: GICv4: Wire mapping/unmapping of VLPIs in VFIO irq bypass
From: Marc ZyngierLet's use the irq bypass mechanism also used for x86 posted interrupts to intercept the virtual PCIe endpoint configuration and establish our LPI->VLPI mapping. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 8 virt/kvm/arm/arm.c | 6 ++- virt/kvm/arm/vgic/vgic-v4.c | 104 3 files changed, 116 insertions(+), 2 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 7eeb6c2a2f9c..2f750c770bf2 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -373,4 +373,12 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm); int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner); +struct kvm_kernel_irq_routing_entry; + +int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq, + struct kvm_kernel_irq_routing_entry *irq_entry); + +int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq, +struct kvm_kernel_irq_routing_entry *irq_entry); + #endif /* __KVM_ARM_VGIC_H */ diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index e1e947fe9bc2..34a15c0c65ab 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1471,7 +1471,8 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); - return 0; + return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq, + >irq_entry); } void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod) @@ -1479,7 +1480,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); - return; + kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq, +>irq_entry); } void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons) diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c index c794f0cef09e..2edfe23c8238 100644 --- a/virt/kvm/arm/vgic/vgic-v4.c +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "vgic.h" @@ -81,3 +82,106 @@ void vgic_v4_teardown(struct kvm *kvm) its_vm->nr_vpes = 0; its_vm->vpes = NULL; } + +static struct vgic_its *vgic_get_its(struct kvm *kvm, +struct kvm_kernel_irq_routing_entry *irq_entry) +{ + struct kvm_msi msi = (struct kvm_msi) { + .address_lo = irq_entry->msi.address_lo, + .address_hi = irq_entry->msi.address_hi, + .data = irq_entry->msi.data, + .flags = irq_entry->msi.flags, + .devid = irq_entry->msi.devid, + }; + + return vgic_msi_to_its(kvm, ); +} + +int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, + struct kvm_kernel_irq_routing_entry *irq_entry) +{ + struct vgic_its *its; + struct vgic_irq *irq; + struct its_vlpi_map map; + int ret; + + if (!vgic_supports_direct_msis(kvm)) + return 0; + + /* +* Get the ITS, and escape early on error (not a valid +* doorbell for any of our vITSs). +*/ + its = vgic_get_its(kvm, irq_entry); + if (IS_ERR(its)) + return 0; + + mutex_lock(>its_lock); + + /* Perform then actual DevID/EventID -> LPI translation. */ + ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, + irq_entry->msi.data, ); + if (ret) + goto out; + + /* +* Emit the mapping request. If it fails, the ITS probably +* isn't v4 compatible, so let's silently bail out. Holding +* the ITS lock should ensure that nothing can modify the +* target vcpu. +*/ + map = (struct its_vlpi_map) { + .vm = >arch.vgic.its_vm, + .vpe= >target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe, + .vintid = irq->intid, + .properties = ((irq->priority & 0xfc) | + (irq->enabled ? LPI_PROP_ENABLED : 0) | + LPI_PROP_GROUP1), + .db_enabled = true, + }; + + ret = its_map_vlpi(virq, ); + if (ret) + goto out; + + irq->hw = true; + irq->host_irq = virq; + +out: + mutex_unlock(>its_lock); + return ret; +} + +int
[PULL 07/27] KVM: arm/arm64: GICv4: Add property field and per-VM predicate
From: Marc ZyngierAdd a new has_gicv4 field in the global VGIC state that indicates whether the HW is GICv4 capable, as a per-VM predicate indicating if there is a possibility for a VM to support direct injection (the above being true and the VM having an ITS). Reviewed-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 3 +++ virt/kvm/arm/vgic/vgic-mmio-v3.c | 5 + virt/kvm/arm/vgic/vgic.h | 2 ++ 3 files changed, 10 insertions(+) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 53f631bdec75..ba9fb450aa1b 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -73,6 +73,9 @@ struct vgic_global { /* Only needed for the legacy KVM_CREATE_IRQCHIP */ boolcan_emulate_gicv2; + /* Hardware has GICv4? */ + boolhas_gicv4; + /* GIC system register CPU interface */ struct static_key_false gicv3_cpuif; diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 83786108829e..671fe81f8e1d 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -54,6 +54,11 @@ bool vgic_has_its(struct kvm *kvm) return dist->has_its; } +bool vgic_supports_direct_msis(struct kvm *kvm) +{ + return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm); +} + static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 0ac85045c0c7..67509b203cf5 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -241,4 +241,6 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, u32 devid, u32 eventid, struct vgic_irq **irq); struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi); +bool vgic_supports_direct_msis(struct kvm *kvm); + #endif -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 21/27] KVM: arm/arm64: GICv4: Enable virtual cpuif if VLPIs can be delivered
From: Marc ZyngierIn order for VLPIs to be delivered to the guest, we must make sure that the virtual cpuif is always enabled, irrespective of the presence of virtual interrupt in the LRs. Acked-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/hyp/vgic-v3-sr.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 91728faa13fd..f5c3d6d7019e 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -258,7 +258,8 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); } } else { - if (static_branch_unlikely(_v3_cpuif_trap)) + if (static_branch_unlikely(_v3_cpuif_trap) || + cpu_if->its_vpe.its_vm) write_gicreg(0, ICH_HCR_EL2); cpu_if->vgic_elrsr = 0x; @@ -337,9 +338,11 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) /* * If we need to trap system registers, we must write * ICH_HCR_EL2 anyway, even if no interrupts are being -* injected, +* injected. Same thing if GICv4 is used, as VLPI +* delivery is gated by ICH_HCR_EL2.En. */ - if (static_branch_unlikely(_v3_cpuif_trap)) + if (static_branch_unlikely(_v3_cpuif_trap) || + cpu_if->its_vpe.its_vm) write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); } -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 23/27] KVM: arm/arm64: GICv4: Prevent userspace from changing doorbell affinity
From: Marc ZyngierWe so far allocate the doorbell interrupts without taking any special measure regarding the affinity of these interrupts. We simply move them around as required when the vcpu gets scheduled on a different CPU. But that's counting without userspace (and the evil irqbalance) that can try and move the VPE interrupt around, causing the ITS code to emit VMOVP commands and remap the doorbell to another redistributor. Worse, this can happen while the vcpu is running, causing all kind of trouble if the VPE is already resident, and we end-up in UNPRED territory. So let's take a definitive action and prevent userspace from messing with us. This is just a matter of adding IRQ_NO_BALANCING to the set of flags we already have, letting the kernel in sole control of the affinity. Acked-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-v4.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c index c9cec01008c2..bf874dd01fc0 100644 --- a/virt/kvm/arm/vgic/vgic-v4.c +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -23,6 +23,8 @@ #include "vgic.h" +#define DB_IRQ_FLAGS (IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY | IRQ_NO_BALANCING) + static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info) { struct kvm_vcpu *vcpu = info; @@ -83,7 +85,7 @@ int vgic_v4_init(struct kvm *kvm) * doorbell could kick us out of the guest too * early... */ - irq_set_status_flags(irq, IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY); + irq_set_status_flags(irq, DB_IRQ_FLAGS); ret = request_irq(irq, vgic_v4_doorbell_handler, 0, "vcpu", vcpu); if (ret) { @@ -121,7 +123,7 @@ void vgic_v4_teardown(struct kvm *kvm) struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i); int irq = its_vm->vpes[i]->irq; - irq_clear_status_flags(irq, IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY); + irq_clear_status_flags(irq, DB_IRQ_FLAGS); free_irq(irq, vcpu); } -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PULL 14/27] KVM: arm/arm64: GICv4: Handle MOVALL applied to a vPE
From: Marc ZyngierThe current implementation of MOVALL doesn't allow us to call into the core ITS code as we hold a number of spinlocks. Let's try a method used in other parts of the code, were we copy the intids of the candicate interrupts, and then do whatever we need to do with them outside of the critical section. This allows us to move the interrupts one by one, at the expense of a bit of CPU time. Who cares? MOVALL is such a stupid command anyway... Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-its.c | 19 ++- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 15e79285380d..78d11aed1e17 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1169,11 +1169,12 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, u64 *its_cmd) { - struct vgic_dist *dist = >arch.vgic; u32 target1_addr = its_cmd_get_target_addr(its_cmd); u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32); struct kvm_vcpu *vcpu1, *vcpu2; struct vgic_irq *irq; + u32 *intids; + int irq_count, i; if (target1_addr >= atomic_read(>online_vcpus) || target2_addr >= atomic_read(>online_vcpus)) @@ -1185,19 +1186,19 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, vcpu1 = kvm_get_vcpu(kvm, target1_addr); vcpu2 = kvm_get_vcpu(kvm, target2_addr); - spin_lock(>lpi_list_lock); + irq_count = vgic_copy_lpi_list(vcpu1, ); + if (irq_count < 0) + return irq_count; - list_for_each_entry(irq, >lpi_list_head, lpi_list) { - spin_lock(>irq_lock); + for (i = 0; i < irq_count; i++) { + irq = vgic_get_irq(kvm, NULL, intids[i]); - if (irq->target_vcpu == vcpu1) - irq->target_vcpu = vcpu2; + update_affinity(irq, vcpu2); - spin_unlock(>irq_lock); + vgic_put_irq(kvm, irq); } - spin_unlock(>lpi_list_lock); - + kfree(intids); return 0; } -- 2.14.2 ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm