On Fri, Feb 23, 2024 at 11:39:07AM -0800, Dave Hansen wrote: > On 2/12/24 02:44, Kirill A. Shutemov wrote: > > +static void tdx_kexec_stop_conversion(bool crash) > > +{ > > + /* Stop new private<->shared conversions */ > > + conversion_allowed = false; > > + > > + /* > > + * Make sure conversion_allowed is cleared before checking > > + * conversions_in_progress. > > + */ > > + barrier(); > > + > > + /* > > + * Crash kernel reaches here with interrupts disabled: can't wait for > > + * conversions to finish. > > + * > > + * If race happened, just report and proceed. > > + */ > > + if (!crash) { > > + unsigned long timeout; > > + > > + /* > > + * Wait for in-flight conversions to complete. > > + * > > + * Do not wait more than 30 seconds. > > + */ > > + timeout = 30 * USEC_PER_SEC; > > + while (atomic_read(&conversions_in_progress) && timeout--) > > + udelay(1); > > + } > > + > > + if (atomic_read(&conversions_in_progress)) > > + pr_warn("Failed to finish shared<->private conversions\n"); > > +} > > I'd really prefer we find a way to do this with actual locks, especially > 'conversion_allowed'. > > This is _awfully_ close to being able to be handled by a rwsem where the > readers are the converters and tdx_kexec_stop_conversion() takes a write.
Okay, here's what I come up with. It needs more testing. Any comments? diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index fd212c9bad89..5eb0dac33f37 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -6,8 +6,10 @@ #include <linux/cpufeature.h> #include <linux/debugfs.h> +#include <linux/delay.h> #include <linux/export.h> #include <linux/io.h> +#include <linux/kexec.h> #include <asm/coco.h> #include <asm/tdx.h> #include <asm/vmx.h> @@ -15,6 +17,7 @@ #include <asm/insn.h> #include <asm/insn-eval.h> #include <asm/pgtable.h> +#include <asm/set_memory.h> /* MMIO direction */ #define EPT_READ 0 @@ -837,6 +840,65 @@ static int tdx_enc_status_change_finish(unsigned long vaddr, int numpages, return 0; } +static void tdx_kexec_stop_conversion(bool crash) +{ + /* Stop new private<->shared conversions */ + if (!stop_memory_enc_conversion(!crash)) + pr_warn("Failed to finish shared<->private conversions\n"); +} + +static void tdx_kexec_unshare_mem(void) +{ + unsigned long addr, end; + long found = 0, shared; + + /* + * Walk direct mapping and convert all shared memory back to private, + */ + + addr = PAGE_OFFSET; + end = PAGE_OFFSET + get_max_mapped(); + + while (addr < end) { + unsigned long size; + unsigned int level; + pte_t *pte; + + pte = lookup_address(addr, &level); + size = page_level_size(level); + + if (pte && pte_decrypted(*pte)) { + int pages = size / PAGE_SIZE; + + /* + * Touching memory with shared bit set triggers implicit + * conversion to shared. + * + * Make sure nobody touches the shared range from + * now on. + */ + set_pte(pte, __pte(0)); + + if (!tdx_enc_status_changed(addr, pages, true)) { + pr_err("Failed to unshare range %#lx-%#lx\n", + addr, addr + size); + } + + found += pages; + } + + addr += size; + } + + __flush_tlb_all(); + + shared = atomic_long_read(&nr_shared); + if (shared != found) { + pr_err("shared page accounting is off\n"); + pr_err("nr_shared = %ld, nr_found = %ld\n", shared, found); + } +} + void __init tdx_early_init(void) { struct tdx_module_args args = { @@ -896,6 +958,9 @@ void __init tdx_early_init(void) x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; + x86_platform.guest.enc_kexec_stop_conversion = tdx_kexec_stop_conversion; + x86_platform.guest.enc_kexec_unshare_mem = tdx_kexec_unshare_mem; + /* * TDX intercepts the RDMSR to read the X2APIC ID in the parallel * bringup low level code. That raises #VE which cannot be handled diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index a5e89641bd2d..9d4a8e548820 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -48,8 +48,11 @@ int set_memory_wc(unsigned long addr, int numpages); int set_memory_wb(unsigned long addr, int numpages); int set_memory_np(unsigned long addr, int numpages); int set_memory_4k(unsigned long addr, int numpages); + +bool stop_memory_enc_conversion(bool wait); int set_memory_encrypted(unsigned long addr, int numpages); int set_memory_decrypted(unsigned long addr, int numpages); + int set_memory_np_noalias(unsigned long addr, int numpages); int set_memory_nonglobal(unsigned long addr, int numpages); int set_memory_global(unsigned long addr, int numpages); diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 0d2267ad4e0e..e074b2aca970 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2176,12 +2176,32 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) return ret; } +static DECLARE_RWSEM(mem_enc_lock); + +bool stop_memory_enc_conversion(bool wait) +{ + if (!wait) + return down_write_trylock(&mem_enc_lock); + + down_write(&mem_enc_lock); + + return true; +} + static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) { - if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) - return __set_memory_enc_pgtable(addr, numpages, enc); + int ret = 0; - return 0; + if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { + if (!down_read_trylock(&mem_enc_lock)) + return -EBUSY; + + ret =__set_memory_enc_pgtable(addr, numpages, enc); + + up_read(&mem_enc_lock); + } + + return ret; } int set_memory_encrypted(unsigned long addr, int numpages) -- Kiryl Shutsemau / Kirill A. Shutemov _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec