Using the same thunk for heterogeneous calls (i.e. ones to functions with entirely different parameter types) is a problem: Speculation may e.g. result in use as pointers for arguments which are are actually integers. This is a result of target prediction information being tied to the address of the RET instruction in each thunk (of which we've got only 15, and of which in turn a fair share of the call sites use the single one where %rax is holding the target address).
Except when actually using the full retpoline, arrange for alternatives patching to put the JMP and LFENCE forms of the thunk back inline. Signed-off-by: Jan Beulich <jbeul...@suse.com> --- I'm sure I didn't get the reasoning right / complete; I'd appreciate clear enough indication of what needs adding/changing. Some of the changes to may not strictly be necessary: - INDIRECT_BRANCH: We don't have any uses left in assembly code. - GEN_INDIRECT_THUNK: Continuing to patch the thunks when they're not used is merely pointless. The change, however, is more in anticipation that X86_FEATURE_IND_THUNK_{LFENCE,JMP} may not be fully suitable names anymore when the code gets inlined (at least I probably wouldn't call such "thunk" anymore). While perhaps not a big problem, I'd like to point out that this more than doubles the size of the .altinstr_replacement section (with an accordingly large increase of .altinstructions), unless we were to make use of "x86/alternatives: allow replacement code snippets to be merged" (which of course affects only .altinstr_replacement). --- a/xen/Makefile +++ b/xen/Makefile @@ -195,7 +195,7 @@ t2 = $(call as-insn,$(CC) -I$(BASEDIR)/a # https://bugs.llvm.org/show_bug.cgi?id=36110 t3 = $(call as-insn,$(CC),".macro FOO;.endm"$(close); asm volatile $(open)".macro FOO;.endm",-no-integrated-as) -CLANG_FLAGS += $(call or,$(t1),$(t2),$(t3)) +CLANG_FLAGS += $(call or,$(t1),$(t2),$(t3),-DCLANG_INTEGRATED_AS) endif CLANG_FLAGS += -Werror=unknown-warning-option --- a/xen/arch/x86/Makefile +++ b/xen/arch/x86/Makefile @@ -240,7 +240,7 @@ $(obj)/efi/buildid.o $(obj)/efi/relocs-d .PHONY: include include: $(BASEDIR)/arch/x86/include/asm/asm-macros.h -$(obj)/asm-macros.i: CFLAGS-y += -D__ASSEMBLY__ -P +$(obj)/asm-macros.i: CFLAGS-y += -D__ASSEMBLY__ -D__ASM_MACROS__ -P $(BASEDIR)/arch/x86/include/asm/asm-macros.h: $(obj)/asm-macros.i $(src)/Makefile $(call filechk,asm-macros.h) --- a/xen/arch/x86/alternative.c +++ b/xen/arch/x86/alternative.c @@ -244,11 +244,19 @@ static void init_or_livepatch _apply_alt a->priv = 1; - /* Nothing useful to do? */ - if ( toolchain_nops_are_ideal || a->pad_len <= 1 ) + /* No padding at all? */ + if ( !a->pad_len ) continue; - add_nops(buf, a->pad_len); + /* For a JMP to an indirect thunk, replace NOP by INT3. */ + if ( IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) && + a->cpuid == X86_FEATURE_IND_THUNK_JMP && orig[0] == 0xe9 ) + memset(buf, 0xcc, a->pad_len); + /* Nothing useful to do? */ + else if ( toolchain_nops_are_ideal || a->pad_len <= 1 ) + continue; + else + add_nops(buf, a->pad_len); text_poke(orig + a->orig_len, buf, a->pad_len); continue; } @@ -330,7 +338,12 @@ static void init_or_livepatch _apply_alt a->priv = 1; - add_nops(buf + a->repl_len, total_len - a->repl_len); + /* When re-inlining an indirect JMP, pad it by INT3, not NOPs. */ + if ( IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) && + a->cpuid == X86_FEATURE_IND_THUNK_JMP && orig[0] == 0xe9 ) + memset(buf + a->repl_len, 0xcc, total_len - a->repl_len); + else + add_nops(buf + a->repl_len, total_len - a->repl_len); text_poke(orig, buf, total_len); } --- a/xen/arch/x86/asm-macros.c +++ b/xen/arch/x86/asm-macros.c @@ -1,2 +1,3 @@ #include <asm/asm-defns.h> #include <asm/alternative-asm.h> +#include <asm/spec_ctrl_asm.h> --- a/xen/arch/x86/extable.c +++ b/xen/arch/x86/extable.c @@ -98,12 +98,42 @@ search_exception_table(const struct cpu_ regs->rsp > (unsigned long)regs && regs->rsp < (unsigned long)get_cpu_info() ) { - unsigned long retptr = *(unsigned long *)regs->rsp; + unsigned long retaddr = *(unsigned long *)regs->rsp; + unsigned long retptr = 0; + unsigned int pad = 0; - region = find_text_region(retptr); - retptr = region && region->ex - ? search_one_extable(region->ex, region->ex_end - 1, retptr) - : 0; + region = find_text_region(retaddr); + while ( region && region->ex ) + { + retptr = search_one_extable(region->ex, region->ex_end - 1, + retaddr + pad); + if ( !retptr ) + { + /* + * Indirect call thunk patching can lead to NOP padding between + * the CALL and the recovery entry recorded in .fixup. Skip + * past such NOPs. See asm/nops.h for the forms possible and + * note that no more than 3 bytes of padding will be present. + */ + const uint8_t *ptr = (void *)retaddr + pad; + + switch ( ptr[0] ) + { + case 0x66: + case 0x90: + if ( ++pad > 3 ) + break; + continue; + + case 0x0f: + if ( pad || ptr[1] != 0x1f || ptr[2] != 0x00 ) + break; + pad = 3; + continue; + } + } + break; + } if ( retptr ) { /* --- a/xen/arch/x86/include/asm/asm-defns.h +++ b/xen/arch/x86/include/asm/asm-defns.h @@ -33,7 +33,13 @@ $done = 0 .irp reg, ax, cx, dx, bx, bp, si, di, 8, 9, 10, 11, 12, 13, 14, 15 .ifeqs "\arg", "%r\reg" +#ifdef __ASM_MACROS__ \insn __x86_indirect_thunk_r\reg +#else + ALTERNATIVE_2 "\insn __x86_indirect_thunk_r\reg", \ + "lfence; \insn *\arg", X86_FEATURE_IND_THUNK_LFENCE, \ + "\insn *\arg", X86_FEATURE_IND_THUNK_JMP +#endif $done = 1 .exitm .endif --- a/xen/arch/x86/include/asm/spec_ctrl_asm.h +++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h @@ -20,7 +20,7 @@ #ifndef __X86_SPEC_CTRL_ASM_H__ #define __X86_SPEC_CTRL_ASM_H__ -#ifdef __ASSEMBLY__ +#if defined(__ASSEMBLY__) && !defined(__ASM_MACROS__) #include <asm/msr-index.h> #include <asm/spec_ctrl.h> @@ -300,7 +300,50 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): .L\@_skip: .endm -#endif /* __ASSEMBLY__ */ +#elif defined(__ASM_MACROS__) && defined(CONFIG_INDIRECT_THUNK) && \ + !defined(CLANG_INTEGRATED_AS) + +/* + * To allow re-inlining of indirect branches, override CALL and JMP insn + * mnemonics, to attach alternatives patching data. + */ +.macro call operand:vararg + branch$ call \operand +.endm +.macro callq operand:vararg + branch$ callq \operand +.endm +.macro jmp operand:vararg + branch$ jmp \operand +.endm +.macro branch$ insn:req, operand:vararg + .purgem \insn + + $done = 0 + .irp reg, ax, cx, dx, bx, bp, si, di, 8, 9, 10, 11, 12, 13, 14, 15 + .ifeqs "\operand", "__x86_indirect_thunk_r\reg" + ALTERNATIVE_2 "\insn \operand", \ + "lfence; \insn *%r\reg", X86_FEATURE_IND_THUNK_LFENCE, \ + "\insn *%r\reg", X86_FEATURE_IND_THUNK_JMP + $done = 1 + .exitm + .endif + .ifeqs "\operand", "__x86_indirect_thunk_r\reg\(@plt)" + .error "unexpected: \insn \operand" + .exitm + .endif + .endr + + .if !$done + \insn \operand + .endif + + .macro \insn operand:vararg + branch$ \insn \\(operand) + .endm +.endm + +#endif /* __ASSEMBLY__ / __ASM_MACROS__ */ #endif /* !__X86_SPEC_CTRL_ASM_H__ */ /* --- a/xen/arch/x86/indirect-thunk.S +++ b/xen/arch/x86/indirect-thunk.S @@ -38,9 +38,13 @@ .section .text.__x86_indirect_thunk_\reg, "ax", @progbits ENTRY(__x86_indirect_thunk_\reg) +#ifdef CLANG_INTEGRATED_AS ALTERNATIVE_2 __stringify(IND_THUNK_RETPOLINE \reg), \ __stringify(IND_THUNK_LFENCE \reg), X86_FEATURE_IND_THUNK_LFENCE, \ __stringify(IND_THUNK_JMP \reg), X86_FEATURE_IND_THUNK_JMP +#else + IND_THUNK_RETPOLINE \reg +#endif int3 /* Halt straight-line speculation */