From: Borislav Petkov <b...@alien8.de>

Simplify it to call an asm-function instead of pasting 41 insn bytes at
every call site. Also, add alignment to the macro as suggested here:

  https://support.google.com/faqs/answer/7625886

[dwmw2: Clean up comments, let it clobber %ebx and just tell the compiler]
Signed-off-by: Borislav Petkov <b...@suse.de>
Signed-off-by: David Woodhouse <d...@amazon.co.uk>
---
 arch/x86/entry/entry_32.S             |  3 +-
 arch/x86/entry/entry_64.S             |  3 +-
 arch/x86/include/asm/asm-prototypes.h |  3 ++
 arch/x86/include/asm/nospec-branch.h  | 70 ++++-------------------------------
 arch/x86/lib/Makefile                 |  1 +
 arch/x86/lib/retpoline.S              | 56 ++++++++++++++++++++++++++++
 6 files changed, 71 insertions(+), 65 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 60c4c34..2a35b1e 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,7 +252,8 @@ ENTRY(__switch_to_asm)
         * exist, overwrite the RSB with entries which capture
         * speculative execution to prevent attack.
         */
-       FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+       /* Clobbers %ebx */
+       FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 #endif
 
        /* restore callee-saved registers */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 63f4320..b4f0098 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -495,7 +495,8 @@ ENTRY(__switch_to_asm)
         * exist, overwrite the RSB with entries which capture
         * speculative execution to prevent attack.
         */
-       FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+       /* Clobbers %rbx */
+       FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 #endif
 
        /* restore callee-saved registers */
diff --git a/arch/x86/include/asm/asm-prototypes.h 
b/arch/x86/include/asm/asm-prototypes.h
index 1908214..4d11161 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -38,4 +38,7 @@ INDIRECT_THUNK(dx)
 INDIRECT_THUNK(si)
 INDIRECT_THUNK(di)
 INDIRECT_THUNK(bp)
+asmlinkage void __fill_rsb(void);
+asmlinkage void __clear_rsb(void);
+
 #endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/nospec-branch.h 
b/arch/x86/include/asm/nospec-branch.h
index 19ecb54..df4ececa 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -7,50 +7,6 @@
 #include <asm/alternative-asm.h>
 #include <asm/cpufeatures.h>
 
-/*
- * Fill the CPU return stack buffer.
- *
- * Each entry in the RSB, if used for a speculative 'ret', contains an
- * infinite 'pause; lfence; jmp' loop to capture speculative execution.
- *
- * This is required in various cases for retpoline and IBRS-based
- * mitigations for the Spectre variant 2 vulnerability. Sometimes to
- * eliminate potentially bogus entries from the RSB, and sometimes
- * purely to ensure that it doesn't get empty, which on some CPUs would
- * allow predictions from other (unwanted!) sources to be used.
- *
- * We define a CPP macro such that it can be used from both .S files and
- * inline assembly. It's possible to do a .macro and then include that
- * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
- */
-
-#define RSB_CLEAR_LOOPS                32      /* To forcibly overwrite all 
entries */
-#define RSB_FILL_LOOPS         16      /* To avoid underflow */
-
-/*
- * Google experimented with loop-unrolling and this turned out to be
- * the optimal version — two calls, each with their own speculation
- * trap should their return address end up getting used, in a loop.
- */
-#define __FILL_RETURN_BUFFER(reg, nr, sp)      \
-       mov     $(nr/2), reg;                   \
-771:                                           \
-       call    772f;                           \
-773:   /* speculation trap */                  \
-       pause;                                  \
-       lfence;                                 \
-       jmp     773b;                           \
-772:                                           \
-       call    774f;                           \
-775:   /* speculation trap */                  \
-       pause;                                  \
-       lfence;                                 \
-       jmp     775b;                           \
-774:                                           \
-       dec     reg;                            \
-       jnz     771b;                           \
-       add     $(BITS_PER_LONG/8) * nr, sp;
-
 #ifdef __ASSEMBLY__
 
 /*
@@ -121,17 +77,10 @@
 #endif
 .endm
 
- /*
-  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
-  * monstrosity above, manually.
-  */
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+/* This clobbers the BX register */
+.macro FILL_RETURN_BUFFER nr:req ftr:req
 #ifdef CONFIG_RETPOLINE
-       ANNOTATE_NOSPEC_ALTERNATIVE
-       ALTERNATIVE "jmp .Lskip_rsb_\@",                                \
-               __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))    \
-               \ftr
-.Lskip_rsb_\@:
+       ALTERNATIVE "", "call __clear_rsb", \ftr
 #endif
 .endm
 
@@ -206,15 +155,10 @@ extern char __indirect_thunk_end[];
 static inline void vmexit_fill_RSB(void)
 {
 #ifdef CONFIG_RETPOLINE
-       unsigned long loops;
-
-       asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
-                     ALTERNATIVE("jmp 910f",
-                                 __stringify(__FILL_RETURN_BUFFER(%0, 
RSB_CLEAR_LOOPS, %1)),
-                                 X86_FEATURE_RETPOLINE)
-                     "910:"
-                     : "=r" (loops), ASM_CALL_CONSTRAINT
-                     : : "memory" );
+       alternative_input("",
+                         "call __fill_rsb",
+                         X86_FEATURE_RETPOLINE,
+                         ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
 #endif
 }
 
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index d435c89..d0a3170 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -27,6 +27,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
 lib-$(CONFIG_RETPOLINE) += retpoline.o
+OBJECT_FILES_NON_STANDARD_retpoline.o :=y
 
 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
 
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index c909961..3040848 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,6 +7,7 @@
 #include <asm/alternative-asm.h>
 #include <asm/export.h>
 #include <asm/nospec-branch.h>
+#include <asm/bitsperlong.h>
 
 .macro THUNK reg
        .section .text.__x86.indirect_thunk
@@ -46,3 +47,58 @@ GENERATE_THUNK(r13)
 GENERATE_THUNK(r14)
 GENERATE_THUNK(r15)
 #endif
+
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+.macro STUFF_RSB nr:req sp:req
+       mov     $(\nr / 2), %_ASM_BX
+       .align 16
+771:
+       call    772f
+773:                                           /* speculation trap */
+       pause
+       lfence
+       jmp     773b
+       .align 16
+772:
+       call    774f
+775:                                           /* speculation trap */
+       pause
+       lfence
+       jmp     775b
+       .align 16
+774:
+       dec     %_ASM_BX
+       jnz     771b
+       add     $((BITS_PER_LONG/8) * \nr), \sp
+.endm
+
+#define RSB_FILL_LOOPS         16      /* To avoid underflow */
+
+ENTRY(__fill_rsb)
+       STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
+       ret
+END(__fill_rsb)
+EXPORT_SYMBOL_GPL(__fill_rsb)
+
+#define RSB_CLEAR_LOOPS                32      /* To forcibly overwrite all 
entries */
+
+ENTRY(__clear_rsb)
+       STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
+       ret
+END(__clear_rsb)
+EXPORT_SYMBOL_GPL(__clear_rsb)
-- 
2.7.4

Reply via email to