Re: [PATCH 3/3] KVM: Add helpers to wrap vcpu->srcu_idx and yell if it's abused

2022-04-19 Thread Maxim Levitsky
On Tue, 2022-04-19 at 15:45 +, Sean Christopherson wrote:
> On Tue, Apr 19, 2022, Maxim Levitsky wrote:
> > On Fri, 2022-04-15 at 00:43 +, Sean Christopherson wrote:
> > > Add wrappers to acquire/release KVM's SRCU lock when stashing the index
> > > in vcpu->src_idx, along with rudimentary detection of illegal usage,
> > > e.g. re-acquiring SRCU and thus overwriting vcpu->src_idx.  Because the
> > > SRCU index is (currently) either 0 or 1, illegal nesting bugs can go
> > > unnoticed for quite some time and only cause problems when the nested
> > > lock happens to get a different index.
> > > 
> > > Wrap the WARNs in PROVE_RCU=y, and make them ONCE, otherwise KVM will
> > > likely yell so loudly that it will bring the kernel to its knees.
> > > 
> > > Signed-off-by: Sean Christopherson 
> > > ---
> 
> ...
> 
> > Looks good to me overall.
> > 
> > Note that there are still places that acquire the lock and store the idx 
> > into
> > a local variable, for example kvm_xen_vcpu_set_attr and such.
> > I didn't check yet if these should be converted as well.
> 
> Using a local variable is ok, even desirable.  Nested/multiple readers is not 
> an
> issue, the bug fixed by patch 1 is purely that kvm_vcpu.srcu_idx gets 
> corrupted.

Makes sense. I still recal *that* bug in AVIC inhibition where that srcu lock 
was
a major PITA, but now I remember that it was not due to nesting of the lock,
but rather fact that we attempted to call syncronize_srcu or something like that
with it held.


> 
> In an ideal world, KVM would _only_ track the SRCU index in local variables, 
> but
> that would require plumbing the local variable down into vcpu_enter_guest() 
> and
> kvm_vcpu_block() so that SRCU can be unlocked prior to entering the guest or
> scheduling out the vCPU.
> 
It all makes sense now - thanks.

Best regards,
Maxim Levistky



Re: [PATCH 3/3] KVM: Add helpers to wrap vcpu->srcu_idx and yell if it's abused

2022-04-19 Thread Fabiano Rosas
Sean Christopherson  writes:

> Add wrappers to acquire/release KVM's SRCU lock when stashing the index
> in vcpu->src_idx, along with rudimentary detection of illegal usage,
> e.g. re-acquiring SRCU and thus overwriting vcpu->src_idx.  Because the
> SRCU index is (currently) either 0 or 1, illegal nesting bugs can go
> unnoticed for quite some time and only cause problems when the nested
> lock happens to get a different index.
>
> Wrap the WARNs in PROVE_RCU=y, and make them ONCE, otherwise KVM will
> likely yell so loudly that it will bring the kernel to its knees.
>
> Signed-off-by: Sean Christopherson 

For the powerpc part:

Tested-by: Fabiano Rosas 

> ---
>  arch/powerpc/kvm/book3s_64_mmu_radix.c |  9 +
>  arch/powerpc/kvm/book3s_hv_nested.c| 16 +++
>  arch/powerpc/kvm/book3s_rtas.c |  4 ++--
>  arch/powerpc/kvm/powerpc.c |  4 ++--
>  arch/riscv/kvm/vcpu.c  | 16 +++
>  arch/riscv/kvm/vcpu_exit.c |  4 ++--
>  arch/s390/kvm/interrupt.c  |  4 ++--
>  arch/s390/kvm/kvm-s390.c   |  8 
>  arch/s390/kvm/vsie.c   |  4 ++--
>  arch/x86/kvm/x86.c | 28 --
>  include/linux/kvm_host.h   | 24 +-
>  11 files changed, 71 insertions(+), 50 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
> b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> index e4ce2a35483f..42851c32ff3b 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> @@ -168,9 +168,10 @@ int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, 
> gva_t eaddr,
>   return -EINVAL;
>   /* Read the entry from guest memory */
>   addr = base + (index * sizeof(rpte));
> - vcpu->srcu_idx = srcu_read_lock(>srcu);
> +
> + kvm_vcpu_srcu_read_lock(vcpu);
>   ret = kvm_read_guest(kvm, addr, , sizeof(rpte));
> - srcu_read_unlock(>srcu, vcpu->srcu_idx);
> + kvm_vcpu_srcu_read_unlock(vcpu);
>   if (ret) {
>   if (pte_ret_p)
>   *pte_ret_p = addr;
> @@ -246,9 +247,9 @@ int kvmppc_mmu_radix_translate_table(struct kvm_vcpu 
> *vcpu, gva_t eaddr,
>  
>   /* Read the table to find the root of the radix tree */
>   ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
> - vcpu->srcu_idx = srcu_read_lock(>srcu);
> + kvm_vcpu_srcu_read_lock(vcpu);
>   ret = kvm_read_guest(kvm, ptbl, , sizeof(entry));
> - srcu_read_unlock(>srcu, vcpu->srcu_idx);
> + kvm_vcpu_srcu_read_unlock(vcpu);
>   if (ret)
>   return ret;
>  
> diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
> b/arch/powerpc/kvm/book3s_hv_nested.c
> index 9d373f8963ee..c943a051c6e7 100644
> --- a/arch/powerpc/kvm/book3s_hv_nested.c
> +++ b/arch/powerpc/kvm/book3s_hv_nested.c
> @@ -306,10 +306,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
>   /* copy parameters in */
>   hv_ptr = kvmppc_get_gpr(vcpu, 4);
>   regs_ptr = kvmppc_get_gpr(vcpu, 5);
> - vcpu->srcu_idx = srcu_read_lock(>kvm->srcu);
> + kvm_vcpu_srcu_read_lock(vcpu);
>   err = kvmhv_read_guest_state_and_regs(vcpu, _hv, _regs,
> hv_ptr, regs_ptr);
> - srcu_read_unlock(>kvm->srcu, vcpu->srcu_idx);
> + kvm_vcpu_srcu_read_unlock(vcpu);
>   if (err)
>   return H_PARAMETER;
>  
> @@ -410,10 +410,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
>   byteswap_hv_regs(_hv);
>   byteswap_pt_regs(_regs);
>   }
> - vcpu->srcu_idx = srcu_read_lock(>kvm->srcu);
> + kvm_vcpu_srcu_read_lock(vcpu);
>   err = kvmhv_write_guest_state_and_regs(vcpu, _hv, _regs,
>  hv_ptr, regs_ptr);
> - srcu_read_unlock(>kvm->srcu, vcpu->srcu_idx);
> + kvm_vcpu_srcu_read_unlock(vcpu);
>   if (err)
>   return H_AUTHORITY;
>  
> @@ -600,16 +600,16 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu 
> *vcpu)
>   goto not_found;
>  
>   /* Write what was loaded into our buffer back to the L1 guest */
> - vcpu->srcu_idx = srcu_read_lock(>kvm->srcu);
> + kvm_vcpu_srcu_read_lock(vcpu);
>   rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
> - srcu_read_unlock(>kvm->srcu, vcpu->srcu_idx);
> + kvm_vcpu_srcu_read_unlock(vcpu);
>   if (rc)
>   goto not_found;
>   } else {
>   /* Load the data to be stored from the L1 guest into our buf */
> - vcpu->srcu_idx = srcu_read_lock(>kvm->srcu);
> + kvm_vcpu_srcu_read_lock(vcpu);
>   rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
> - srcu_read_unlock(>kvm->srcu, vcpu->srcu_idx);
> +

Re: [PATCH 3/3] KVM: Add helpers to wrap vcpu->srcu_idx and yell if it's abused

2022-04-19 Thread Maxim Levitsky
On Fri, 2022-04-15 at 00:43 +, Sean Christopherson wrote:
> Add wrappers to acquire/release KVM's SRCU lock when stashing the index
> in vcpu->src_idx, along with rudimentary detection of illegal usage,
> e.g. re-acquiring SRCU and thus overwriting vcpu->src_idx.  Because the
> SRCU index is (currently) either 0 or 1, illegal nesting bugs can go
> unnoticed for quite some time and only cause problems when the nested
> lock happens to get a different index.
> 
> Wrap the WARNs in PROVE_RCU=y, and make them ONCE, otherwise KVM will
> likely yell so loudly that it will bring the kernel to its knees.
> 
> Signed-off-by: Sean Christopherson 
> ---
>  arch/powerpc/kvm/book3s_64_mmu_radix.c |  9 +
>  arch/powerpc/kvm/book3s_hv_nested.c| 16 +++
>  arch/powerpc/kvm/book3s_rtas.c |  4 ++--
>  arch/powerpc/kvm/powerpc.c |  4 ++--
>  arch/riscv/kvm/vcpu.c  | 16 +++
>  arch/riscv/kvm/vcpu_exit.c |  4 ++--
>  arch/s390/kvm/interrupt.c  |  4 ++--
>  arch/s390/kvm/kvm-s390.c   |  8 
>  arch/s390/kvm/vsie.c   |  4 ++--
>  arch/x86/kvm/x86.c | 28 --
>  include/linux/kvm_host.h   | 24 +-
>  11 files changed, 71 insertions(+), 50 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
> b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> index e4ce2a35483f..42851c32ff3b 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> @@ -168,9 +168,10 @@ int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, 
> gva_t eaddr,
>   return -EINVAL;
>   /* Read the entry from guest memory */
>   addr = base + (index * sizeof(rpte));
> - vcpu->srcu_idx = srcu_read_lock(>srcu);
> +
> + kvm_vcpu_srcu_read_lock(vcpu);
>   ret = kvm_read_guest(kvm, addr, , sizeof(rpte));
> - srcu_read_unlock(>srcu, vcpu->srcu_idx);
> + kvm_vcpu_srcu_read_unlock(vcpu);
>   if (ret) {
>   if (pte_ret_p)
>   *pte_ret_p = addr;
> @@ -246,9 +247,9 @@ int kvmppc_mmu_radix_translate_table(struct kvm_vcpu 
> *vcpu, gva_t eaddr,
>  
>   /* Read the table to find the root of the radix tree */
>   ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
> - vcpu->srcu_idx = srcu_read_lock(>srcu);
> + kvm_vcpu_srcu_read_lock(vcpu);
>   ret = kvm_read_guest(kvm, ptbl, , sizeof(entry));
> - srcu_read_unlock(>srcu, vcpu->srcu_idx);
> + kvm_vcpu_srcu_read_unlock(vcpu);
>   if (ret)
>   return ret;
>  
> diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
> b/arch/powerpc/kvm/book3s_hv_nested.c
> index 9d373f8963ee..c943a051c6e7 100644
> --- a/arch/powerpc/kvm/book3s_hv_nested.c
> +++ b/arch/powerpc/kvm/book3s_hv_nested.c
> @@ -306,10 +306,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
>   /* copy parameters in */
>   hv_ptr = kvmppc_get_gpr(vcpu, 4);
>   regs_ptr = kvmppc_get_gpr(vcpu, 5);
> - vcpu->srcu_idx = srcu_read_lock(>kvm->srcu);
> + kvm_vcpu_srcu_read_lock(vcpu);
>   err = kvmhv_read_guest_state_and_regs(vcpu, _hv, _regs,
> hv_ptr, regs_ptr);
> - srcu_read_unlock(>kvm->srcu, vcpu->srcu_idx);
> + kvm_vcpu_srcu_read_unlock(vcpu);
>   if (err)
>   return H_PARAMETER;
>  
> @@ -410,10 +410,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
>   byteswap_hv_regs(_hv);
>   byteswap_pt_regs(_regs);
>   }
> - vcpu->srcu_idx = srcu_read_lock(>kvm->srcu);
> + kvm_vcpu_srcu_read_lock(vcpu);
>   err = kvmhv_write_guest_state_and_regs(vcpu, _hv, _regs,
>  hv_ptr, regs_ptr);
> - srcu_read_unlock(>kvm->srcu, vcpu->srcu_idx);
> + kvm_vcpu_srcu_read_unlock(vcpu);
>   if (err)
>   return H_AUTHORITY;
>  
> @@ -600,16 +600,16 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu 
> *vcpu)
>   goto not_found;
>  
>   /* Write what was loaded into our buffer back to the L1 guest */
> - vcpu->srcu_idx = srcu_read_lock(>kvm->srcu);
> + kvm_vcpu_srcu_read_lock(vcpu);
>   rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
> - srcu_read_unlock(>kvm->srcu, vcpu->srcu_idx);
> + kvm_vcpu_srcu_read_unlock(vcpu);
>   if (rc)
>   goto not_found;
>   } else {
>   /* Load the data to be stored from the L1 guest into our buf */
> - vcpu->srcu_idx = srcu_read_lock(>kvm->srcu);
> + kvm_vcpu_srcu_read_lock(vcpu);
>   rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
> - srcu_read_unlock(>kvm->srcu, vcpu->srcu_idx);
> +