Re: [RFC PATCH] KVM: x86: Support write protect huge pages lazily

Keqian Zhu Thu, 08 Apr 2021 00:35:25 -0700

Hi Ben,

Do you have any similar idea that can share with us?



Thanks
Keqian

On 2021/4/7 7:42, Sean Christopherson wrote:
> +Ben
> 
> On Tue, Apr 06, 2021, Keqian Zhu wrote:
>> Hi Paolo,
>>
>> I plan to rework this patch and do full test. What do you think about this 
>> idea
>> (enable dirty logging for huge pages lazily)?
> 
> Ben, don't you also have something similar (or maybe the exact opposite?) in 
> the
> hopper?  This sounds very familiar, but I can't quite connect the dots that 
> are
> floating around my head...
>  
>> PS: As dirty log of TDP MMU has been supported, I should add more code.
>>
>> On 2020/8/28 16:11, Keqian Zhu wrote:
>>> Currently during enable dirty logging, if we're with init-all-set,
>>> we just write protect huge pages and leave normal pages untouched,
>>> for that we can enable dirty logging for these pages lazily.
>>>
>>> It seems that enable dirty logging lazily for huge pages is feasible
>>> too, which not only reduces the time of start dirty logging, also
>>> greatly reduces side-effect on guest when there is high dirty rate.
>>>
>>> (These codes are not tested, for RFC purpose :-) ).
>>>
>>> Signed-off-by: Keqian Zhu <[email protected]>
>>> ---
>>>  arch/x86/include/asm/kvm_host.h |  3 +-
>>>  arch/x86/kvm/mmu/mmu.c          | 65 ++++++++++++++++++++++++++-------
>>>  arch/x86/kvm/vmx/vmx.c          |  3 +-
>>>  arch/x86/kvm/x86.c              | 22 +++++------
>>>  4 files changed, 62 insertions(+), 31 deletions(-)
>>>
>>> diff --git a/arch/x86/include/asm/kvm_host.h 
>>> b/arch/x86/include/asm/kvm_host.h
>>> index 5303dbc5c9bc..201a068cf43d 100644
>>> --- a/arch/x86/include/asm/kvm_host.h
>>> +++ b/arch/x86/include/asm/kvm_host.h
>>> @@ -1296,8 +1296,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 
>>> accessed_mask,
>>>  
>>>  void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
>>>  void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
>>> -                                 struct kvm_memory_slot *memslot,
>>> -                                 int start_level);
>>> +                                 struct kvm_memory_slot *memslot);
>>>  void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
>>>                                const struct kvm_memory_slot *memslot);
>>>  void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
>>> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
>>> index 43fdb0c12a5d..4b7d577de6cd 100644
>>> --- a/arch/x86/kvm/mmu/mmu.c
>>> +++ b/arch/x86/kvm/mmu/mmu.c
>>> @@ -1625,14 +1625,45 @@ static bool __rmap_set_dirty(struct kvm *kvm, 
>>> struct kvm_rmap_head *rmap_head)
>>>  }
>>>  
>>>  /**
>>> - * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages
>>> + * kvm_mmu_write_protect_largepage_masked - write protect selected 
>>> largepages
>>>   * @kvm: kvm instance
>>>   * @slot: slot to protect
>>>   * @gfn_offset: start of the BITS_PER_LONG pages we care about
>>>   * @mask: indicates which pages we should protect
>>>   *
>>> - * Used when we do not need to care about huge page mappings: e.g. during 
>>> dirty
>>> - * logging we do not have any such mappings.
>>> + * @ret: true if all pages are write protected
>>> + */
>>> +static bool kvm_mmu_write_protect_largepage_masked(struct kvm *kvm,
>>> +                               struct kvm_memory_slot *slot,
>>> +                               gfn_t gfn_offset, unsigned long mask)
>>> +{
>>> +   struct kvm_rmap_head *rmap_head;
>>> +   bool protected, all_protected;
>>> +   gfn_t start_gfn = slot->base_gfn + gfn_offset;
>>> +   int i;
>>> +
>>> +   all_protected = true;
>>> +   while (mask) {
>>> +           protected = false;
>>> +           for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
>>> +                   rmap_head = __gfn_to_rmap(start_gfn + __ffs(mask), i, 
>>> slot);
>>> +                   protectd |= __rmap_write_protect(kvm, rmap_head, false);
>>> +           }
>>> +
>>> +           all_protected &= protectd;
>>> +           /* clear the first set bit */
>>> +           mask &= mask - 1;
>>> +   }
>>> +
>>> +   return all_protected;
>>> +}
>>> +
>>> +/**
>>> + * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages
>>> + * @kvm: kvm instance
>>> + * @slot: slot to protect
>>> + * @gfn_offset: start of the BITS_PER_LONG pages we care about
>>> + * @mask: indicates which pages we should protect
>>>   */
>>>  static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
>>>                                  struct kvm_memory_slot *slot,
>>> @@ -1679,18 +1710,25 @@ EXPORT_SYMBOL_GPL(kvm_mmu_clear_dirty_pt_masked);
>>>  
>>>  /**
>>>   * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for 
>>> selected
>>> - * PT level pages.
>>> - *
>>> - * It calls kvm_mmu_write_protect_pt_masked to write protect selected 
>>> pages to
>>> - * enable dirty logging for them.
>>> - *
>>> - * Used when we do not need to care about huge page mappings: e.g. during 
>>> dirty
>>> - * logging we do not have any such mappings.
>>> + * dirty pages.
>>>   */
>>>  void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
>>>                             struct kvm_memory_slot *slot,
>>>                             gfn_t gfn_offset, unsigned long mask)
>>>  {
>>> +   /*
>>> +    * If we're with initial-all-set, huge pages are NOT
>>> +    * write protected when we start dirty log, so we must
>>> +    * write protect them here.
>>> +    */
>>> +   if (kvm_dirty_log_manual_protect_and_init_set(kvm)) {
>>> +           if (kvm_mmu_write_protect_largepage_masked(kvm, slot,
>>> +                                   gfn_offset, mask))
>>> +                   return;
>>> +   }
>>> +
>>> +   /* Then we can handle the 4K level pages */
>>> +
>>>     if (kvm_x86_ops.enable_log_dirty_pt_masked)
>>>             kvm_x86_ops.enable_log_dirty_pt_masked(kvm, slot, gfn_offset,
>>>                             mask);
>>> @@ -5906,14 +5944,13 @@ static bool slot_rmap_write_protect(struct kvm *kvm,
>>>  }
>>>  
>>>  void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
>>> -                                 struct kvm_memory_slot *memslot,
>>> -                                 int start_level)
>>> +                                 struct kvm_memory_slot *memslot)
>>>  {
>>>     bool flush;
>>>  
>>>     spin_lock(&kvm->mmu_lock);
>>> -   flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
>>> -                           start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
>>> +   flush = slot_handle_all_level(kvm, memslot, slot_rmap_write_protect,
>>> +                                 false);
>>>     spin_unlock(&kvm->mmu_lock);
>>>  
>>>     /*
>>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>>> index 819c185adf09..ba871c52ef8b 100644
>>> --- a/arch/x86/kvm/vmx/vmx.c
>>> +++ b/arch/x86/kvm/vmx/vmx.c
>>> @@ -7538,8 +7538,7 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int 
>>> cpu)
>>>  static void vmx_slot_enable_log_dirty(struct kvm *kvm,
>>>                                  struct kvm_memory_slot *slot)
>>>  {
>>> -   if (!kvm_dirty_log_manual_protect_and_init_set(kvm))
>>> -           kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
>>> +   kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
>>>     kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
>>>  }
>>>  
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index d39d6cf1d473..c31c32f1424b 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -10225,22 +10225,18 @@ static void kvm_mmu_slot_apply_flags(struct kvm 
>>> *kvm,
>>>      * is enabled the D-bit or the W-bit will be cleared.
>>>      */
>>>     if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
>>> +           /*
>>> +            * If we're with initial-all-set, we don't need
>>> +            * to write protect any page because they're
>>> +            * reported as dirty already.
>>> +            */
>>> +           if (kvm_dirty_log_manual_protect_and_init_set(kvm))
>>> +                   return;
>>> +
>>>             if (kvm_x86_ops.slot_enable_log_dirty) {
>>>                     kvm_x86_ops.slot_enable_log_dirty(kvm, new);
>>>             } else {
>>> -                   int level =
>>> -                           kvm_dirty_log_manual_protect_and_init_set(kvm) ?
>>> -                           PG_LEVEL_2M : PG_LEVEL_4K;
>>> -
>>> -                   /*
>>> -                    * If we're with initial-all-set, we don't need
>>> -                    * to write protect any small page because
>>> -                    * they're reported as dirty already.  However
>>> -                    * we still need to write-protect huge pages
>>> -                    * so that the page split can happen lazily on
>>> -                    * the first write to the huge page.
>>> -                    */
>>> -                   kvm_mmu_slot_remove_write_access(kvm, new, level);
>>> +                   kvm_mmu_slot_remove_write_access(kvm, new);
>>>             }
>>>     } else {
>>>             if (kvm_x86_ops.slot_disable_log_dirty)
>>>
> .
>

Re: [RFC PATCH] KVM: x86: Support write protect huge pages lazily

Reply via email to