On 6/21/19 10:37 AM, Marc Zyngier wrote:
> Extract the direct HW accessors for later reuse.
>
> Signed-off-by: Marc Zyngier <marc.zyng...@arm.com>
> ---
>  arch/arm64/kvm/sys_regs.c | 247 +++++++++++++++++++++-----------------
>  1 file changed, 139 insertions(+), 108 deletions(-)
>
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 2b8734f75a09..e181359adadf 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -182,99 +182,161 @@ const struct el2_sysreg_map *find_el2_sysreg(const 
> struct el2_sysreg_map *map,
>       return entry;
>  }
>  
> +static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
> +{
> +     /*
> +      * System registers listed in the switch are not saved on every
> +      * exit from the guest but are only saved on vcpu_put.
> +      *
> +      * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
> +      * should never be listed below, because the guest cannot modify its
> +      * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
> +      * thread when emulating cross-VCPU communication.
> +      */
> +     switch (reg) {
> +     case CSSELR_EL1:        *val = read_sysreg_s(SYS_CSSELR_EL1);   break;
> +     case SCTLR_EL1:         *val = read_sysreg_s(SYS_SCTLR_EL12);   break;
> +     case ACTLR_EL1:         *val = read_sysreg_s(SYS_ACTLR_EL1);    break;
> +     case CPACR_EL1:         *val = read_sysreg_s(SYS_CPACR_EL12);   break;
> +     case TTBR0_EL1:         *val = read_sysreg_s(SYS_TTBR0_EL12);   break;
> +     case TTBR1_EL1:         *val = read_sysreg_s(SYS_TTBR1_EL12);   break;
> +     case TCR_EL1:           *val = read_sysreg_s(SYS_TCR_EL12);     break;
> +     case ESR_EL1:           *val = read_sysreg_s(SYS_ESR_EL12);     break;
> +     case AFSR0_EL1:         *val = read_sysreg_s(SYS_AFSR0_EL12);   break;
> +     case AFSR1_EL1:         *val = read_sysreg_s(SYS_AFSR1_EL12);   break;
> +     case FAR_EL1:           *val = read_sysreg_s(SYS_FAR_EL12);     break;
> +     case MAIR_EL1:          *val = read_sysreg_s(SYS_MAIR_EL12);    break;
> +     case VBAR_EL1:          *val = read_sysreg_s(SYS_VBAR_EL12);    break;
> +     case CONTEXTIDR_EL1:    *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
> +     case TPIDR_EL0:         *val = read_sysreg_s(SYS_TPIDR_EL0);    break;
> +     case TPIDRRO_EL0:       *val = read_sysreg_s(SYS_TPIDRRO_EL0);  break;
> +     case TPIDR_EL1:         *val = read_sysreg_s(SYS_TPIDR_EL1);    break;
> +     case AMAIR_EL1:         *val = read_sysreg_s(SYS_AMAIR_EL12);   break;
> +     case CNTKCTL_EL1:       *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
> +     case PAR_EL1:           *val = read_sysreg_s(SYS_PAR_EL1);      break;
> +     case DACR32_EL2:        *val = read_sysreg_s(SYS_DACR32_EL2);   break;
> +     case IFSR32_EL2:        *val = read_sysreg_s(SYS_IFSR32_EL2);   break;
> +     case DBGVCR32_EL2:      *val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
> +     default:                return false;
> +     }
> +
> +     return true;
> +}
> +
> +static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
> +{
> +     /*
> +      * System registers listed in the switch are not restored on every
> +      * entry to the guest but are only restored on vcpu_load.
> +      *
> +      * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
> +      * should never be listed below, because the the MPIDR should only be
> +      * set once, before running the VCPU, and never changed later.
> +      */
> +     switch (reg) {
> +     case CSSELR_EL1:        write_sysreg_s(val, SYS_CSSELR_EL1);    break;
> +     case SCTLR_EL1:         write_sysreg_s(val, SYS_SCTLR_EL12);    break;
> +     case ACTLR_EL1:         write_sysreg_s(val, SYS_ACTLR_EL1);     break;
> +     case CPACR_EL1:         write_sysreg_s(val, SYS_CPACR_EL12);    break;
> +     case TTBR0_EL1:         write_sysreg_s(val, SYS_TTBR0_EL12);    break;
> +     case TTBR1_EL1:         write_sysreg_s(val, SYS_TTBR1_EL12);    break;
> +     case TCR_EL1:           write_sysreg_s(val, SYS_TCR_EL12);      break;
> +     case ESR_EL1:           write_sysreg_s(val, SYS_ESR_EL12);      break;
> +     case AFSR0_EL1:         write_sysreg_s(val, SYS_AFSR0_EL12);    break;
> +     case AFSR1_EL1:         write_sysreg_s(val, SYS_AFSR1_EL12);    break;
> +     case FAR_EL1:           write_sysreg_s(val, SYS_FAR_EL12);      break;
> +     case MAIR_EL1:          write_sysreg_s(val, SYS_MAIR_EL12);     break;
> +     case VBAR_EL1:          write_sysreg_s(val, SYS_VBAR_EL12);     break;
> +     case CONTEXTIDR_EL1:    write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
> +     case TPIDR_EL0:         write_sysreg_s(val, SYS_TPIDR_EL0);     break;
> +     case TPIDRRO_EL0:       write_sysreg_s(val, SYS_TPIDRRO_EL0);   break;
> +     case TPIDR_EL1:         write_sysreg_s(val, SYS_TPIDR_EL1);     break;
> +     case AMAIR_EL1:         write_sysreg_s(val, SYS_AMAIR_EL12);    break;
> +     case CNTKCTL_EL1:       write_sysreg_s(val, SYS_CNTKCTL_EL12);  break;
> +     case PAR_EL1:           write_sysreg_s(val, SYS_PAR_EL1);       break;
> +     case DACR32_EL2:        write_sysreg_s(val, SYS_DACR32_EL2);    break;
> +     case IFSR32_EL2:        write_sysreg_s(val, SYS_IFSR32_EL2);    break;
> +     case DBGVCR32_EL2:      write_sysreg_s(val, SYS_DBGVCR32_EL2);  break;
> +     default:                return false;
> +     }
> +
> +     return true;
> +}
> +
>  u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
>  {
> -     u64 val;
> +     u64 val = 0x8badf00d8badf00d;
>  
>       if (!vcpu->arch.sysregs_loaded_on_cpu)
> -             goto immediate_read;
> +             goto memory_read;
>  
>       if (unlikely(sysreg_is_el2(reg))) {
>               const struct el2_sysreg_map *el2_reg;
>  
>               if (!is_hyp_ctxt(vcpu))
> -                     goto immediate_read;
> +                     goto memory_read;
>  
>               switch (reg) {
> +             case ELR_EL2:
> +                     return read_sysreg_el1(SYS_ELR);
>               case SPSR_EL2:
>                       val = read_sysreg_el1(SYS_SPSR);
>                       return __fixup_spsr_el2_read(&vcpu->arch.ctxt, val);
>               }
>  
>               el2_reg = find_el2_sysreg(nested_sysreg_map, reg);
> -             if (el2_reg) {
> -                     /*
> -                      * If this register does not have an EL1 counterpart,
> -                      * then read the stored EL2 version.
> -                      */
> -                     if (el2_reg->mapping == __INVALID_SYSREG__)
> -                             goto immediate_read;
> -
> -                     /* Get the current version of the EL1 counterpart. */
> -                     reg = el2_reg->mapping;
> -             }
> -     } else {
> -             /* EL1 register can't be on the CPU if the guest is in vEL2. */
> -             if (unlikely(is_hyp_ctxt(vcpu)))
> -                     goto immediate_read;
> +             BUG_ON(!el2_reg);
> +
> +             /*
> +              * If this register does not have an EL1 counterpart,
> +              * then read the stored EL2 version.
> +              */
> +             if (el2_reg->mapping == __INVALID_SYSREG__)
> +                     goto memory_read;
> +
> +             if (!vcpu_el2_e2h_is_set(vcpu) &&
> +                 el2_reg->translate)
> +                     goto memory_read;

Nit: the condition can be written on one line.

This condition wasn't present in patch 13 which introduced EL2 register
handling, and I'm struggling to understand what it does. As I understand the
code, this condition basically translates into:

- if the register is one of SCTLR_EL2, TTBR0_EL2, CPTR_EL2 or TCR_EL2, then read
it from memory.

- if the register is an EL2 register whose value is written unmodified to the
corresponding EL1 register, then read the corresponding EL1 register and return
that value.

Looking at vcpu_write_sys_reg, the values for the EL2 registers are always saved
in memory. The guest is a non-vhe guest, so writes to EL1 registers shouldn't be
reflected in the corresponding EL2 register. I think it's safe to always return
the value from memory.

I tried testing this with the following patch:

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1235a88ec575..27d39bb9564d 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -290,6 +290,9 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
                el2_reg = find_el2_sysreg(nested_sysreg_map, reg);
                BUG_ON(!el2_reg);
 
+               if (!vcpu_el2_e2h_is_set(vcpu))
+                       goto memory_read;
+
                /*
                 * If this register does not have an EL1 counterpart,
                 * then read the stored EL2 version.
@@ -297,10 +300,6 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
                if (el2_reg->mapping == __INVALID_SYSREG__)
                        goto memory_read;
 
-               if (!vcpu_el2_e2h_is_set(vcpu) &&
-                   el2_reg->translate)
-                       goto memory_read;
-
                /* Get the current version of the EL1 counterpart. */
                reg = el2_reg->mapping;
                WARN_ON(!__vcpu_read_sys_reg_from_cpu(reg, &val));

I know it's not conclusive, but I was able to boot a L2 guest under a L1 non-vhe
hypervisor.

> +
> +             /* Get the current version of the EL1 counterpart. */
> +             reg = el2_reg->mapping;
> +             WARN_ON(!__vcpu_read_sys_reg_from_cpu(reg, &val));
> +             return val;
>       }
>  
> -     /*
> -      * System registers listed in the switch are not saved on every
> -      * exit from the guest but are only saved on vcpu_put.
> -      *
> -      * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
> -      * should never be listed below, because the guest cannot modify its
> -      * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
> -      * thread when emulating cross-VCPU communication.
> -      */
> -     switch (reg) {
> -     case CSSELR_EL1:        return read_sysreg_s(SYS_CSSELR_EL1);
> -     case SCTLR_EL1:         return read_sysreg_s(SYS_SCTLR_EL12);
> -     case ACTLR_EL1:         return read_sysreg_s(SYS_ACTLR_EL1);
> -     case CPACR_EL1:         return read_sysreg_s(SYS_CPACR_EL12);
> -     case TTBR0_EL1:         return read_sysreg_s(SYS_TTBR0_EL12);
> -     case TTBR1_EL1:         return read_sysreg_s(SYS_TTBR1_EL12);
> -     case TCR_EL1:           return read_sysreg_s(SYS_TCR_EL12);
> -     case ESR_EL1:           return read_sysreg_s(SYS_ESR_EL12);
> -     case AFSR0_EL1:         return read_sysreg_s(SYS_AFSR0_EL12);
> -     case AFSR1_EL1:         return read_sysreg_s(SYS_AFSR1_EL12);
> -     case FAR_EL1:           return read_sysreg_s(SYS_FAR_EL12);
> -     case MAIR_EL1:          return read_sysreg_s(SYS_MAIR_EL12);
> -     case VBAR_EL1:          return read_sysreg_s(SYS_VBAR_EL12);
> -     case CONTEXTIDR_EL1:    return read_sysreg_s(SYS_CONTEXTIDR_EL12);
> -     case TPIDR_EL0:         return read_sysreg_s(SYS_TPIDR_EL0);
> -     case TPIDRRO_EL0:       return read_sysreg_s(SYS_TPIDRRO_EL0);
> -     case TPIDR_EL1:         return read_sysreg_s(SYS_TPIDR_EL1);
> -     case AMAIR_EL1:         return read_sysreg_s(SYS_AMAIR_EL12);
> -     case CNTKCTL_EL1:       return read_sysreg_s(SYS_CNTKCTL_EL12);
> -     case PAR_EL1:           return read_sysreg_s(SYS_PAR_EL1);
> -     case DACR32_EL2:        return read_sysreg_s(SYS_DACR32_EL2);
> -     case IFSR32_EL2:        return read_sysreg_s(SYS_IFSR32_EL2);
> -     case DBGVCR32_EL2:      return read_sysreg_s(SYS_DBGVCR32_EL2);
> -     case SP_EL2:            return read_sysreg(sp_el1);
> -     case ELR_EL2:           return read_sysreg_el1(SYS_ELR);
> -     }
> +     /* EL1 register can't be on the CPU if the guest is in vEL2. */
> +     if (unlikely(is_hyp_ctxt(vcpu)))
> +             goto memory_read;
> +
> +     if (__vcpu_read_sys_reg_from_cpu(reg, &val))
> +             return val;
>  
> -immediate_read:
> +memory_read:
>       return __vcpu_sys_reg(vcpu, reg);
>  }
>  
>  void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
>  {
>       if (!vcpu->arch.sysregs_loaded_on_cpu)
> -             goto immediate_write;
> +             goto memory_write;
>  
>       if (unlikely(sysreg_is_el2(reg))) {
>               const struct el2_sysreg_map *el2_reg;
>  
>               if (!is_hyp_ctxt(vcpu))
> -                     goto immediate_write;
> +                     goto memory_write;
>  
> -             /* Store the EL2 version in the sysregs array. */
> +             /*
> +              * Always store a copy of the write to memory to avoid having
> +              * to reverse-translate virtual EL2 system registers for a
> +              * non-VHE guest hypervisor.
> +              */
>               __vcpu_sys_reg(vcpu, reg) = val;
>  
>               switch (reg) {
> +             case ELR_EL2:
> +                     write_sysreg_el1(val, SYS_ELR);
> +                     return;
>               case SPSR_EL2:
>                       val = __fixup_spsr_el2_write(&vcpu->arch.ctxt, val);
>                       write_sysreg_el1(val, SYS_SPSR);
> @@ -282,61 +344,30 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, 
> int reg)
>               }
>  
>               el2_reg = find_el2_sysreg(nested_sysreg_map, reg);
> -             if (el2_reg) {
> -                     /* Does this register have an EL1 counterpart? */
> -                     if (el2_reg->mapping == __INVALID_SYSREG__)
> -                             return;
> +             WARN(!el2_reg, "reg: %d\n", reg);
>  
> -                     if (!vcpu_el2_e2h_is_set(vcpu) &&
> -                         el2_reg->translate)
> -                             val = el2_reg->translate(val);
> +             /* Does this register have an EL1 counterpart? */
> +             if (el2_reg->mapping == __INVALID_SYSREG__)
> +                     goto memory_write;
>  
> -                     /* Redirect this to the EL1 version of the register. */
> -                     reg = el2_reg->mapping;
> -             }
> -     } else {
> -             /* EL1 register can't be on the CPU if the guest is in vEL2. */
> -             if (unlikely(is_hyp_ctxt(vcpu)))
> -                     goto immediate_write;
> -     }
> +             if (!vcpu_el2_e2h_is_set(vcpu) &&
> +                 el2_reg->translate)
> +                     val = el2_reg->translate(val);
>  
> -     /*
> -      * System registers listed in the switch are not restored on every
> -      * entry to the guest but are only restored on vcpu_load.
> -      *
> -      * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
> -      * should never be listed below, because the the MPIDR should only be
> -      * set once, before running the VCPU, and never changed later.
> -      */
> -     switch (reg) {
> -     case CSSELR_EL1:        write_sysreg_s(val, SYS_CSSELR_EL1);    return;
> -     case SCTLR_EL1:         write_sysreg_s(val, SYS_SCTLR_EL12);    return;
> -     case ACTLR_EL1:         write_sysreg_s(val, SYS_ACTLR_EL1);     return;
> -     case CPACR_EL1:         write_sysreg_s(val, SYS_CPACR_EL12);    return;
> -     case TTBR0_EL1:         write_sysreg_s(val, SYS_TTBR0_EL12);    return;
> -     case TTBR1_EL1:         write_sysreg_s(val, SYS_TTBR1_EL12);    return;
> -     case TCR_EL1:           write_sysreg_s(val, SYS_TCR_EL12);      return;
> -     case ESR_EL1:           write_sysreg_s(val, SYS_ESR_EL12);      return;
> -     case AFSR0_EL1:         write_sysreg_s(val, SYS_AFSR0_EL12);    return;
> -     case AFSR1_EL1:         write_sysreg_s(val, SYS_AFSR1_EL12);    return;
> -     case FAR_EL1:           write_sysreg_s(val, SYS_FAR_EL12);      return;
> -     case MAIR_EL1:          write_sysreg_s(val, SYS_MAIR_EL12);     return;
> -     case VBAR_EL1:          write_sysreg_s(val, SYS_VBAR_EL12);     return;
> -     case CONTEXTIDR_EL1:    write_sysreg_s(val, SYS_CONTEXTIDR_EL12); 
> return;
> -     case TPIDR_EL0:         write_sysreg_s(val, SYS_TPIDR_EL0);     return;
> -     case TPIDRRO_EL0:       write_sysreg_s(val, SYS_TPIDRRO_EL0);   return;
> -     case TPIDR_EL1:         write_sysreg_s(val, SYS_TPIDR_EL1);     return;
> -     case AMAIR_EL1:         write_sysreg_s(val, SYS_AMAIR_EL12);    return;
> -     case CNTKCTL_EL1:       write_sysreg_s(val, SYS_CNTKCTL_EL12);  return;
> -     case PAR_EL1:           write_sysreg_s(val, SYS_PAR_EL1);       return;
> -     case DACR32_EL2:        write_sysreg_s(val, SYS_DACR32_EL2);    return;
> -     case IFSR32_EL2:        write_sysreg_s(val, SYS_IFSR32_EL2);    return;
> -     case DBGVCR32_EL2:      write_sysreg_s(val, SYS_DBGVCR32_EL2);  return;
> -     case SP_EL2:            write_sysreg(val, sp_el1);              return;
> -     case ELR_EL2:           write_sysreg_el1(val, SYS_ELR);         return;
> +             /* Redirect this to the EL1 version of the register. */
> +             reg = el2_reg->mapping;
> +             WARN_ON(!__vcpu_write_sys_reg_to_cpu(val, reg));
> +             return;
>       }
>  
> -immediate_write:
> +     /* EL1 register can't be on the CPU if the guest is in vEL2. */
> +     if (unlikely(is_hyp_ctxt(vcpu)))
> +             goto memory_write;
> +
> +     if (__vcpu_write_sys_reg_to_cpu(val, reg))
> +             return;
> +
> +memory_write:
>        __vcpu_sys_reg(vcpu, reg) = val;
>  }
>  
_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Reply via email to