During patching, there is a very slim risk that an NMI or MCE interrupt in the middle of altering the code in the NMI/MCE paths, in which case bad things will happen.
The NMI risk can be eliminated by running the patching loop in NMI context, at which point the CPU will defer further NMIs until patching is complete. Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com> --- CC: Jan Beulich <jbeul...@suse.com> v2: * Poll for alt_done in case self_nmi() happens to be asyncrhonous. --- xen/arch/x86/alternative.c | 72 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 18 deletions(-) diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c index ee18e6c..74d1206 100644 --- a/xen/arch/x86/alternative.c +++ b/xen/arch/x86/alternative.c @@ -15,7 +15,9 @@ * along with this program; If not, see <http://www.gnu.org/licenses/>. */ +#include <xen/delay.h> #include <xen/types.h> +#include <asm/apic.h> #include <asm/processor.h> #include <asm/alternative.h> #include <xen/init.h> @@ -82,11 +84,6 @@ static const unsigned char * const p6_nops[ASM_NOP_MAX+1] init_or_livepatch_cons static const unsigned char * const *ideal_nops init_or_livepatch_data = p6_nops; -static int __init mask_nmi_callback(const struct cpu_user_regs *regs, int cpu) -{ - return 1; -} - static void __init arch_init_ideal_nops(void) { switch ( boot_cpu_data.x86_vendor ) @@ -202,25 +199,52 @@ void init_or_livepatch apply_alternatives(const struct alt_instr *start, } } +static bool __initdata alt_done; + +/* + * At boot time, we patch alternatives in NMI context. This means that the + * active NMI-shadow will defer any further NMIs, removing the slim race + * condition where an NMI hits while we are midway though patching some + * instructions in the NMI path. + */ +static int __init nmi_apply_alternatives(const struct cpu_user_regs *regs, + int cpu) +{ + /* + * More than one NMI may occur between the two set_nmi_callback() below. + * We only need to apply alternatives once. + */ + if ( !alt_done ) + { + unsigned long cr0; + + cr0 = read_cr0(); + + /* Disable WP to allow patching read-only pages. */ + write_cr0(cr0 & ~X86_CR0_WP); + + apply_alternatives(__alt_instructions, __alt_instructions_end); + + write_cr0(cr0); + + alt_done = true; + } + + return 1; +} + /* * This routine is called with local interrupt disabled and used during * bootup. */ void __init alternative_instructions(void) { + unsigned int i; nmi_callback_t *saved_nmi_callback; - unsigned long cr0 = read_cr0(); arch_init_ideal_nops(); /* - * The patching is not fully atomic, so try to avoid local interruptions - * that might execute the to be patched code. - * Other CPUs are not running. - */ - saved_nmi_callback = set_nmi_callback(mask_nmi_callback); - - /* * Don't stop machine check exceptions while patching. * MCEs only happen when something got corrupted and in this * case we must do something about the corruption. @@ -232,13 +256,25 @@ void __init alternative_instructions(void) */ ASSERT(!local_irq_is_enabled()); - /* Disable WP to allow application of alternatives to read-only pages. */ - write_cr0(cr0 & ~X86_CR0_WP); + /* + * As soon as the callback is set up, the next NMI will trigger patching, + * even an NMI ahead of our explicit self-NMI. + */ + saved_nmi_callback = set_nmi_callback(nmi_apply_alternatives); - apply_alternatives(__alt_instructions, __alt_instructions_end); + /* Send ourselves an NMI to trigger the callback. */ + self_nmi(); + + /* + * Sending ourself an NMI isn't architecturally guaranteed to result in + * the synchronous delivery (although in practice, it appears to be). + * Poll alt_done for up to 1 second. + */ + for ( i = 0; !ACCESS_ONCE(alt_done) && i < 1000; ++i ) + mdelay(1); - /* Reinstate WP. */ - write_cr0(cr0); + if ( i >= 1000 ) + panic("Timed out waiting for alternatives self-NMI to hit"); set_nmi_callback(saved_nmi_callback); } -- 2.1.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel