On Wed, Jun 18, 2014 at 08:12:06PM -0300, mtosa...@redhat.com wrote: > Request KVM_REQ_MMU_RELOAD when deleting sptes from MMU notifiers. > > Keep pinned sptes intact if page aging. > > Signed-off-by: Marcelo Tosatti <mtosa...@redhat.com> > > --- > arch/x86/kvm/mmu.c | 71 > ++++++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 62 insertions(+), 9 deletions(-) > > Index: kvm.pinned-sptes/arch/x86/kvm/mmu.c > =================================================================== > --- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.c 2014-06-18 17:28:24.339435654 > -0300 > +++ kvm.pinned-sptes/arch/x86/kvm/mmu.c 2014-06-18 17:29:32.510225755 > -0300 > @@ -1184,6 +1184,42 @@ > kvm_flush_remote_tlbs(vcpu->kvm); > } > > +static void ack_flush(void *_completed) > +{ > +} > + > +static void mmu_reload_pinned_vcpus(struct kvm *kvm) > +{ > + int i, cpu, me; > + cpumask_var_t cpus; > + struct kvm_vcpu *vcpu; > + unsigned int req = KVM_REQ_MMU_RELOAD; > + > + zalloc_cpumask_var(&cpus, GFP_ATOMIC); > + > + me = get_cpu(); > + kvm_for_each_vcpu(i, vcpu, kvm) { > + if (list_empty(&vcpu->arch.pinned_mmu_pages)) > + continue; > + kvm_make_request(req, vcpu); > + cpu = vcpu->cpu; > + > + /* Set ->requests bit before we read ->mode */ > + smp_mb(); > + > + if (cpus != NULL && cpu != -1 && cpu != me && > + kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE) > + cpumask_set_cpu(cpu, cpus); > + } > + if (unlikely(cpus == NULL)) > + smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1); > + else if (!cpumask_empty(cpus)) > + smp_call_function_many(cpus, ack_flush, NULL, 1); > + put_cpu(); > + free_cpumask_var(cpus); > + return; > +} This is a c&p of make_all_cpus_request(), the only difference is checking of vcpu->arch.pinned_mmu_pages. You can add make_some_cpus_request(..., bool (*predicate)(struct kvm_vcpu *)) to kvm_main.c and rewrite make_all_cpus_request() to use it instead.
> + > /* > * Write-protect on the specified @sptep, @pt_protect indicates whether > * spte write-protection is caused by protecting shadow page table. > @@ -1276,7 +1312,8 @@ > } > > static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, > - struct kvm_memory_slot *slot, unsigned long data) > + struct kvm_memory_slot *slot, unsigned long data, > + bool age) > { > u64 *sptep; > struct rmap_iterator iter; > @@ -1286,6 +1323,14 @@ > BUG_ON(!(*sptep & PT_PRESENT_MASK)); > rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, > *sptep); > > + if (is_pinned_spte(*sptep)) { > + /* don't nuke pinned sptes if page aging: return > + * young=yes instead. > + */ > + if (age) > + return 1; > + mmu_reload_pinned_vcpus(kvm); > + } > drop_spte(kvm, sptep); > need_tlb_flush = 1; > } > @@ -1294,7 +1339,8 @@ > } > > static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, > - struct kvm_memory_slot *slot, unsigned long data) > + struct kvm_memory_slot *slot, unsigned long data, > + bool age) > { > u64 *sptep; > struct rmap_iterator iter; > @@ -1312,6 +1358,9 @@ > > need_flush = 1; > > + if (is_pinned_spte(*sptep)) > + mmu_reload_pinned_vcpus(kvm); > + > if (pte_write(*ptep)) { > drop_spte(kvm, sptep); > sptep = rmap_get_first(*rmapp, &iter); > @@ -1342,7 +1391,8 @@ > int (*handler)(struct kvm *kvm, > unsigned long *rmapp, > struct kvm_memory_slot *slot, > - unsigned long data)) > + unsigned long data, > + bool age)) > { > int j; > int ret = 0; > @@ -1382,7 +1432,7 @@ > rmapp = __gfn_to_rmap(gfn_start, j, memslot); > > for (; idx <= idx_end; ++idx) > - ret |= handler(kvm, rmapp++, memslot, data); > + ret |= handler(kvm, rmapp++, memslot, data, > false); > } > } > > @@ -1393,7 +1443,8 @@ > unsigned long data, > int (*handler)(struct kvm *kvm, unsigned long *rmapp, > struct kvm_memory_slot *slot, > - unsigned long data)) > + unsigned long data, > + bool age)) > { > return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); > } > @@ -1414,7 +1465,8 @@ > } > > static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, > - struct kvm_memory_slot *slot, unsigned long data) > + struct kvm_memory_slot *slot, unsigned long data, > + bool age) > { > u64 *sptep; > struct rmap_iterator uninitialized_var(iter); > @@ -1429,7 +1481,7 @@ > * out actively used pages or breaking up actively used hugepages. > */ > if (!shadow_accessed_mask) { > - young = kvm_unmap_rmapp(kvm, rmapp, slot, data); > + young = kvm_unmap_rmapp(kvm, rmapp, slot, data, true); > goto out; > } > > @@ -1450,7 +1502,8 @@ > } > > static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, > - struct kvm_memory_slot *slot, unsigned long data) > + struct kvm_memory_slot *slot, unsigned long data, > + bool age) > { > u64 *sptep; > struct rmap_iterator iter; > @@ -1488,7 +1541,7 @@ > > rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); > > - kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0); > + kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0, false); > kvm_flush_remote_tlbs(vcpu->kvm); > } > > > -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html