Excerpts from Fabiano Rosas's message of April 16, 2021 9:09 am:
> As one of the arguments of the H_ENTER_NESTED hypercall, the nested
> hypervisor (L1) prepares a structure containing the values of various
> hypervisor-privileged registers with which it wants the nested guest
> (L2) to run. Since the nested HV runs in supervisor mode it needs the
> host to write to these registers.
> 
> To stop a nested HV manipulating this mechanism and using a nested
> guest as a proxy to access a facility that has been made unavailable
> to it, we have a routine that sanitises the values of the HV registers
> before copying them into the nested guest's vcpu struct.
> 
> However, when coming out of the guest the values are copied as they
> were back into L1 memory, which means that any sanitisation we did
> during guest entry will be exposed to L1 after H_ENTER_NESTED returns.
> 
> This patch alters this sanitisation to have effect on the vcpu->arch
> registers directly before entering and after exiting the guest,
> leaving the structure that is copied back into L1 unchanged (except
> when we really want L1 to access the value, e.g the Cause bits of
> HFSCR).
> 
> Signed-off-by: Fabiano Rosas <faro...@linux.ibm.com>
> ---
>  arch/powerpc/kvm/book3s_hv_nested.c | 55 ++++++++++++++++++-----------
>  1 file changed, 34 insertions(+), 21 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
> b/arch/powerpc/kvm/book3s_hv_nested.c
> index 0cd0e7aad588..270552dd42c5 100644
> --- a/arch/powerpc/kvm/book3s_hv_nested.c
> +++ b/arch/powerpc/kvm/book3s_hv_nested.c
> @@ -102,8 +102,17 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, 
> int trap,
>  {
>       struct kvmppc_vcore *vc = vcpu->arch.vcore;
>  
> +     /*
> +      * When loading the hypervisor-privileged registers to run L2,
> +      * we might have used bits from L1 state to restrict what the
> +      * L2 state is allowed to be. Since L1 is not allowed to read
> +      * the HV registers, do not include these modifications in the
> +      * return state.
> +      */
> +     hr->hfscr = ((~HFSCR_INTR_CAUSE & hr->hfscr) |
> +                  (HFSCR_INTR_CAUSE & vcpu->arch.hfscr));
> +
>       hr->dpdes = vc->dpdes;
> -     hr->hfscr = vcpu->arch.hfscr;
>       hr->purr = vcpu->arch.purr;
>       hr->spurr = vcpu->arch.spurr;
>       hr->ic = vcpu->arch.ic;

The below parts of the patch I have no problem with, I think it's good to 
be able to restore the hv_guest_state for return, e.g., for cases where 
the L0 might emulate some HV behaviour transparently it will be useful,
at least.

Thanks,
Nick

> @@ -132,24 +141,7 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, 
> int trap,
>       }
>  }
>  
> -static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state 
> *hr)
> -{
> -     /*
> -      * Don't let L1 enable features for L2 which we've disabled for L1,
> -      * but preserve the interrupt cause field.
> -      */
> -     hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
> -
> -     /* Don't let data address watchpoint match in hypervisor state */
> -     hr->dawrx0 &= ~DAWRX_HYP;
> -     hr->dawrx1 &= ~DAWRX_HYP;
> -
> -     /* Don't let completed instruction address breakpt match in HV state */
> -     if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
> -             hr->ciabr &= ~CIABR_PRIV;
> -}
> -
> -static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
> +static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct 
> hv_guest_state *hr)
>  {
>       struct kvmppc_vcore *vc = vcpu->arch.vcore;
>  
> @@ -261,6 +253,27 @@ static int kvmhv_write_guest_state_and_regs(struct 
> kvm_vcpu *vcpu,
>                                    sizeof(struct pt_regs));
>  }
>  
> +static void load_l2_hv_regs(struct kvm_vcpu *vcpu,
> +                         const struct hv_guest_state *l2_hv,
> +                         const struct hv_guest_state *l1_hv)
> +{
> +     restore_hv_regs(vcpu, l2_hv);
> +
> +     /*
> +      * Don't let L1 enable features for L2 which we've disabled for L1,
> +      * but preserve the interrupt cause field.
> +      */
> +     vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | l1_hv->hfscr);
> +
> +     /* Don't let data address watchpoint match in hypervisor state */
> +     vcpu->arch.dawrx0 = l2_hv->dawrx0 & ~DAWRX_HYP;
> +     vcpu->arch.dawrx1 = l2_hv->dawrx1 & ~DAWRX_HYP;
> +
> +     /* Don't let completed instruction address breakpt match in HV state */
> +     if ((l2_hv->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
> +             vcpu->arch.ciabr = l2_hv->ciabr & ~CIABR_PRIV;
> +}
> +
>  long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
>  {
>       long int err, r;
> @@ -324,8 +337,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
>       mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
>               LPCR_LPES | LPCR_MER;
>       lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask);
> -     sanitise_hv_regs(vcpu, &l2_hv);
> -     restore_hv_regs(vcpu, &l2_hv);
> +
> +     load_l2_hv_regs(vcpu, &l2_hv, &saved_l1_hv);
>  
>       vcpu->arch.ret = RESUME_GUEST;
>       vcpu->arch.trap = 0;
> -- 
> 2.29.2
> 
> 

Reply via email to