Use the live update infrastructure to make kexec safe when Microsoft Hypervisor is active.
The kernel cannot access hypervisor-deposited pages; any access triggers a GPF. Until the deposited-page state can be handed over to the next kernel, kexec must be blocked is there is any hsared state between kernel and hypervisor. During the freeze stage: - Refuse the transition while VMs are running - Withdraw all pages from L1VH host (guest pages are withdrawn upon guest shutdown) - Verify no deposited pages remain Abort kexec if any of the above checks fail. Signed-off-by: Stanislav Kinsburskii <[email protected]> --- MAINTAINERS | 1 drivers/hv/Kconfig | 1 drivers/hv/Makefile | 1 drivers/hv/mshv_luo.c | 113 ++++++++++++++++++++++++++++++++++++++++++ drivers/hv/mshv_root.h | 13 +++++ drivers/hv/mshv_root_main.c | 7 +++ include/linux/kho/abi/mshv.h | 14 +++++ 7 files changed, 150 insertions(+) create mode 100644 drivers/hv/mshv_luo.c create mode 100644 include/linux/kho/abi/mshv.h diff --git a/MAINTAINERS b/MAINTAINERS index 5b11839cba9d..d625a1c111e2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11756,6 +11756,7 @@ F: include/hyperv/hvgdk_mini.h F: include/hyperv/hvhdk.h F: include/hyperv/hvhdk_mini.h F: include/linux/hyperv.h +F: include/linux/kho/abi/mshv.h F: include/net/mana F: include/uapi/linux/hyperv.h F: include/uapi/rdma/mana-abi.h diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 7937ac0cbd0f..94887b8b92b5 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -78,6 +78,7 @@ config MSHV_ROOT select VIRT_XFER_TO_GUEST_WORK select HMM_MIRROR select MMU_NOTIFIER + select LIVEUPDATE default n help Select this option to enable support for booting and running as root diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile index a49f93c2d245..73258fb811eb 100644 --- a/drivers/hv/Makefile +++ b/drivers/hv/Makefile @@ -15,6 +15,7 @@ hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \ mshv_root_hv_call.o mshv_portid_table.o mshv_regions.o +mshv_root-$(CONFIG_LIVEUPDATE) += mshv_luo.o mshv_vtl-y := mshv_vtl_main.o # Code that must be built-in diff --git a/drivers/hv/mshv_luo.c b/drivers/hv/mshv_luo.c new file mode 100644 index 000000000000..eed7755fc27e --- /dev/null +++ b/drivers/hv/mshv_luo.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2026, Microsoft Corporation. + * + * Live update orchestration management for mshv_root module. + * + * Author: Stanislav Kinsburskii <[email protected]> + */ + +#include <linux/errno.h> +#include <linux/liveupdate.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/kho/abi/mshv.h> +#include <asm/mshyperv.h> +#include "mshv_root.h" + +static struct file *mshv_luo_file; + +static void mshv_luo_finish(struct liveupdate_file_op_args *args) +{ +} + +static int mshv_luo_retrieve(struct liveupdate_file_op_args *args) +{ + return 0; +} + +static int mshv_luo_freeze(struct liveupdate_file_op_args *args) +{ + if (!hash_empty(mshv_root.pt_htable)) { + pr_warn("mshv: Cannot perform live update while VMs are active\n"); + return -EBUSY; + } + + if (hv_l1vh_partition()) { + int err; + + /* Attempt to withdraw all the deposited pages */ + err = hv_call_withdraw_memory(U64_MAX, NUMA_NO_NODE, + hv_current_partition_id); + if (err) { + pr_err("mshv: Failed to withdraw memory from L1 virtualization: %d\n", err); + return err; + } + } + + if (atomic_read(&hv_pages_deposited)) { + pr_warn("mshv: Cannot perform live update while pages are deposited\n"); + return -EBUSY; + } + return 0; +} + +static void mshv_luo_unpreserve(struct liveupdate_file_op_args *args) +{ +} + +static int mshv_luo_preserve(struct liveupdate_file_op_args *args) +{ + return 0; +} + +static bool mshv_luo_can_preserve(struct liveupdate_file_handler *handler, + struct file *file) +{ + return file == mshv_luo_file; +} + +static const struct liveupdate_file_ops mshv_luo_file_ops = { + .can_preserve = mshv_luo_can_preserve, + .preserve = mshv_luo_preserve, + .unpreserve = mshv_luo_unpreserve, + .retrieve = mshv_luo_retrieve, + .freeze = mshv_luo_freeze, + .finish = mshv_luo_finish, + .owner = THIS_MODULE, +}; + +static struct liveupdate_file_handler mshv_luo_handler = { + .ops = &mshv_luo_file_ops, + .compatible = MSHV_LUO_FH_COMPATIBLE, +}; + +int __init mshv_luo_init(void) +{ + int err; + + err = liveupdate_register_file_handler(&mshv_luo_handler); + if (err && err != -EOPNOTSUPP) { + pr_err("Could not register luo filesystem handler: %pe\n", + ERR_PTR(err)); + return err; + } + + err = liveupdate_session_create("mshv_root", &mshv_luo_file); + if (err) + goto err_session; + + pr_info("mshv_root live update handler registered\n"); + return 0; + +err_session: + liveupdate_unregister_file_handler(&mshv_luo_handler); + return err; +} + +void __exit mshv_luo_exit(void) +{ + if (mshv_luo_file) + fput(mshv_luo_file); + liveupdate_unregister_file_handler(&mshv_luo_handler); +} diff --git a/drivers/hv/mshv_root.h b/drivers/hv/mshv_root.h index c792afce0839..89d5ece0b538 100644 --- a/drivers/hv/mshv_root.h +++ b/drivers/hv/mshv_root.h @@ -17,6 +17,7 @@ #include <linux/build_bug.h> #include <linux/mmu_notifier.h> #include <uapi/linux/mshv.h> +#include <hyperv/hvhdk.h> /* * Hypervisor must be between these version numbers (inclusive) @@ -334,4 +335,16 @@ bool mshv_region_handle_gfn_fault(struct mshv_mem_region *region, u64 gfn); void mshv_region_movable_fini(struct mshv_mem_region *region); bool mshv_region_movable_init(struct mshv_mem_region *region); +#if IS_ENABLED(CONFIG_LIVEUPDATE) +int __init mshv_luo_init(void); +void __exit mshv_luo_exit(void); +#else +static inline int mshv_luo_init(void) +{ + return 0; +} + +static inline void mshv_luo_exit(void) { } +#endif + #endif /* _MSHV_ROOT_H_ */ diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c index 5fc572e31cd7..c0274bbc65ac 100644 --- a/drivers/hv/mshv_root_main.c +++ b/drivers/hv/mshv_root_main.c @@ -2330,6 +2330,10 @@ static int __init mshv_parent_partition_init(void) if (ret) goto deinit_root_scheduler; + ret = mshv_luo_init(); + if (ret) + goto deinit_irqfd_wq; + spin_lock_init(&mshv_root.pt_ht_lock); hash_init(mshv_root.pt_htable); @@ -2337,6 +2341,8 @@ static int __init mshv_parent_partition_init(void) return 0; +deinit_irqfd_wq: + mshv_irqfd_wq_cleanup(); deinit_root_scheduler: root_scheduler_deinit(); exit_partition: @@ -2356,6 +2362,7 @@ static void __exit mshv_parent_partition_exit(void) hv_setup_mshv_handler(NULL); mshv_port_table_fini(); misc_deregister(&mshv_dev); + mshv_luo_exit(); mshv_irqfd_wq_cleanup(); root_scheduler_deinit(); if (hv_root_partition()) diff --git a/include/linux/kho/abi/mshv.h b/include/linux/kho/abi/mshv.h new file mode 100644 index 000000000000..e6ae5a731802 --- /dev/null +++ b/include/linux/kho/abi/mshv.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2026, Microsoft Corporation. + * + * Author: Stanislav Kinsburskii <[email protected]> + */ + +#ifndef _LINUX_KHO_ABI_MSHV_H +#define _LINUX_KHO_ABI_MSHV_H + +/* The compatibility string for mshv file handler */ +#define MSHV_LUO_FH_COMPATIBLE "mshv-v1" + +#endif /* _LINUX_KHO_ABI_MSHV_H */
