On Wed, 25 Mar 2026 00:36:18 +0000,
Wei-Lin Chang <[email protected]> wrote:
> 
> The API is designed for userspace to first call prepare_{l2_stack,
> hyp_state, eret_destination, nested_sync_handler}, with a function
> supplied to prepare_eret_destination() to be run in L2. Then run_l2()
> can be called in L1 to run the given function in L2.
> 
> Signed-off-by: Wei-Lin Chang <[email protected]>
> ---
>  tools/testing/selftests/kvm/Makefile.kvm      |  2 +
>  .../selftests/kvm/include/arm64/nested.h      | 18 ++++++
>  .../testing/selftests/kvm/lib/arm64/nested.c  | 61 +++++++++++++++++++
>  .../selftests/kvm/lib/arm64/nested_asm.S      | 35 +++++++++++
>  4 files changed, 116 insertions(+)
>  create mode 100644 tools/testing/selftests/kvm/include/arm64/nested.h
>  create mode 100644 tools/testing/selftests/kvm/lib/arm64/nested.c
>  create mode 100644 tools/testing/selftests/kvm/lib/arm64/nested_asm.S
> 
> diff --git a/tools/testing/selftests/kvm/Makefile.kvm 
> b/tools/testing/selftests/kvm/Makefile.kvm
> index 98da9fa4b8b7..5e681e8e0cd7 100644
> --- a/tools/testing/selftests/kvm/Makefile.kvm
> +++ b/tools/testing/selftests/kvm/Makefile.kvm
> @@ -34,6 +34,8 @@ LIBKVM_arm64 += lib/arm64/gic.c
>  LIBKVM_arm64 += lib/arm64/gic_v3.c
>  LIBKVM_arm64 += lib/arm64/gic_v3_its.c
>  LIBKVM_arm64 += lib/arm64/handlers.S
> +LIBKVM_arm64 += lib/arm64/nested.c
> +LIBKVM_arm64 += lib/arm64/nested_asm.S
>  LIBKVM_arm64 += lib/arm64/processor.c
>  LIBKVM_arm64 += lib/arm64/spinlock.c
>  LIBKVM_arm64 += lib/arm64/ucall.c
> diff --git a/tools/testing/selftests/kvm/include/arm64/nested.h 
> b/tools/testing/selftests/kvm/include/arm64/nested.h
> new file mode 100644
> index 000000000000..739ff2ee0161
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/include/arm64/nested.h
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * ARM64 Nested virtualization defines
> + */
> +
> +#ifndef SELFTEST_KVM_NESTED_H
> +#define SELFTEST_KVM_NESTED_H
> +
> +void prepare_l2_stack(struct kvm_vm *vm, struct kvm_vcpu *vcpu);
> +void prepare_hyp_state(struct kvm_vm *vm, struct kvm_vcpu *vcpu);
> +void prepare_eret_destination(struct kvm_vm *vm, struct kvm_vcpu *vcpu, void 
> *l2_pc);
> +void prepare_nested_sync_handler(struct kvm_vm *vm, struct kvm_vcpu *vcpu);
> +
> +void run_l2(void);
> +void after_hvc(void);
> +void do_hvc(void);
> +
> +#endif /* SELFTEST_KVM_NESTED_H */
> diff --git a/tools/testing/selftests/kvm/lib/arm64/nested.c 
> b/tools/testing/selftests/kvm/lib/arm64/nested.c
> new file mode 100644
> index 000000000000..111d02f44cfe
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/lib/arm64/nested.c
> @@ -0,0 +1,61 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * ARM64 Nested virtualization helpers
> + */
> +
> +#include "kvm_util.h"
> +#include "nested.h"
> +#include "processor.h"
> +#include "test_util.h"
> +
> +#include <asm/sysreg.h>
> +
> +static void hvc_handler(struct ex_regs *regs)
> +{
> +     GUEST_ASSERT_EQ(get_current_el(), 2);
> +     GUEST_PRINTF("hvc handler\n");
> +     regs->pstate = PSR_MODE_EL2h | PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | 
> PSR_F_BIT;
> +     regs->pc = (u64)after_hvc;
> +}
> +
> +void prepare_l2_stack(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
> +{
> +     size_t l2_stack_size;
> +     uint64_t l2_stack_paddr;
> +
> +     l2_stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * 
> vm->page_size :
> +                                      vm->page_size;

Please use symbolic constants. Also, this looks wrong if the default
stack size is 32k and the page size is 16k. You probably want to
express a stack size directly, rather than a number of pages.

> +     l2_stack_paddr = __vm_phy_pages_alloc(vm, l2_stack_size / vm->page_size,
> +                                           0, 0, false);
> +     vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), l2_stack_paddr + 
> l2_stack_size);
> +}
> +
> +void prepare_hyp_state(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
> +{
> +     vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2), HCR_EL2_RW);

Surely the E2H value matters. Or are you planning to only run this on
configuration that hardcode E2H==0? That'd be pretty limiting.

> +}
> +
> +void prepare_eret_destination(struct kvm_vm *vm, struct kvm_vcpu *vcpu, void 
> *l2_pc)
> +{
> +     vm_paddr_t do_hvc_paddr = addr_gva2gpa(vm, (vm_vaddr_t)do_hvc);
> +     vm_paddr_t l2_pc_paddr = addr_gva2gpa(vm, (vm_vaddr_t)l2_pc);
> +
> +     vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SPSR_EL2), PSR_MODE_EL1h |
> +                                                         PSR_D_BIT     |
> +                                                         PSR_A_BIT     |
> +                                                         PSR_I_BIT     |
> +                                                         PSR_F_BIT);
> +     vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ELR_EL2), l2_pc_paddr);
> +     /* HACK: use TPIDR_EL2 to pass address, see run_l2() in nested_asm.S */
> +     vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL2), do_hvc_paddr);
> +}
> +
> +void prepare_nested_sync_handler(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
> +{
> +     if (!vm->handlers) {
> +             vm_init_descriptor_tables(vm);
> +             vcpu_init_descriptor_tables(vcpu);
> +     }
> +     vm_install_sync_handler(vm, VECTOR_SYNC_LOWER_64,
> +                             ESR_ELx_EC_HVC64, hvc_handler);
> +}
> diff --git a/tools/testing/selftests/kvm/lib/arm64/nested_asm.S 
> b/tools/testing/selftests/kvm/lib/arm64/nested_asm.S
> new file mode 100644
> index 000000000000..4ecf2d510a6f
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/lib/arm64/nested_asm.S
> @@ -0,0 +1,35 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * ARM64 Nested virtualization assembly helpers
> + */
> +
> +.globl run_l2
> +.globl after_hvc
> +.globl do_hvc
> +run_l2:
> +     /*
> +      * At this point TPIDR_EL2 will contain the gpa of do_hvc from
> +      * prepare_eret_destination(). gpa of do_hvc have to be passed in
> +      * because we want L2 to issue an hvc after it returns from the user
> +      * passed function. In order for that to happen the lr must be
> +      * controlled, which at this point holds the value of the address of
> +      * the next instruction after this run_l2() call, which is not useful
> +      * for L2. Additionally, L1 can't translate gva into gpa, so we can't
> +      * calculate it here.
> +      *
> +      * So first save lr, then move TPIDR_EL2 to lr so when the user supplied
> +      * L2 function returns, L2 jumps to do_hvc and let the L1 hvc handler
> +      * take control. This implies we expect the L2 code to preserve lr and
> +      * calls a regular ret in the end, which is true for normal C functions.
> +      * The hvc handler will jump back to after_hvc when finished, and lr
> +      * will be restored and we can return run_l2().
> +      */
> +     stp     x29, lr, [sp, #-16]!
> +     mrs     x0, tpidr_el2
> +     mov     lr, x0
> +     eret
> +after_hvc:
> +     ldp     x29, lr, [sp], #16
> +     ret
> +do_hvc:
> +     hvc #0

This probably works for a single instruction L2 guest, but not having
any save/restore of the L2 context makes it hard to build anything on
top of this.

Thanks,

        M.

-- 
Without deviation from the norm, progress is not possible.

Reply via email to