Simplify it to call an asm-function instead of pasting 41 insn bytes at
every call site. Also, add alignment to the macro as suggested here:

  https://support.google.com/faqs/answer/7625886

Signed-off-by: Borislav Petkov <[email protected]>
Cc: David Woodhouse <[email protected]>
---
 arch/x86/entry/entry_32.S             |  2 +-
 arch/x86/entry/entry_64.S             |  2 +-
 arch/x86/include/asm/asm-prototypes.h |  3 +++
 arch/x86/include/asm/nospec-branch.h  | 49 +++++------------------------------
 arch/x86/lib/Makefile                 |  1 +
 arch/x86/lib/retpoline.S              | 44 +++++++++++++++++++++++++++++++
 6 files changed, 56 insertions(+), 45 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 60c4c342316c..f7823a5a8714 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,7 +252,7 @@ ENTRY(__switch_to_asm)
         * exist, overwrite the RSB with entries which capture
         * speculative execution to prevent attack.
         */
-       FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+       FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 #endif
 
        /* restore callee-saved registers */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ff6f8022612c..7a190ff524e2 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -499,7 +499,7 @@ ENTRY(__switch_to_asm)
         * exist, overwrite the RSB with entries which capture
         * speculative execution to prevent attack.
         */
-       FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+       FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 #endif
 
        /* restore callee-saved registers */
diff --git a/arch/x86/include/asm/asm-prototypes.h 
b/arch/x86/include/asm/asm-prototypes.h
index 1908214b9125..4d111616524b 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -38,4 +38,7 @@ INDIRECT_THUNK(dx)
 INDIRECT_THUNK(si)
 INDIRECT_THUNK(di)
 INDIRECT_THUNK(bp)
+asmlinkage void __fill_rsb(void);
+asmlinkage void __clear_rsb(void);
+
 #endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/nospec-branch.h 
b/arch/x86/include/asm/nospec-branch.h
index 865192a2cc31..4f88e1b2599f 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -27,30 +27,6 @@
 #define RSB_CLEAR_LOOPS                32      /* To forcibly overwrite all 
entries */
 #define RSB_FILL_LOOPS         16      /* To avoid underflow */
 
-/*
- * Google experimented with loop-unrolling and this turned out to be
- * the optimal version — two calls, each with their own speculation
- * trap should their return address end up getting used, in a loop.
- */
-#define __FILL_RETURN_BUFFER(reg, nr, sp)      \
-       mov     $(nr/2), reg;                   \
-771:                                           \
-       call    772f;                           \
-773:   /* speculation trap */                  \
-       pause;                                  \
-       lfence;                                 \
-       jmp     773b;                           \
-772:                                           \
-       call    774f;                           \
-775:   /* speculation trap */                  \
-       pause;                                  \
-       lfence;                                 \
-       jmp     775b;                           \
-774:                                           \
-       dec     reg;                            \
-       jnz     771b;                           \
-       add     $(BITS_PER_LONG/8) * nr, sp;
-
 #ifdef __ASSEMBLY__
 
 /*
@@ -121,17 +97,9 @@
 #endif
 .endm
 
- /*
-  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
-  * monstrosity above, manually.
-  */
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+.macro FILL_RETURN_BUFFER nr:req ftr:req
 #ifdef CONFIG_RETPOLINE
-       ANNOTATE_NOSPEC_ALTERNATIVE
-       ALTERNATIVE "jmp .Lskip_rsb_\@",                                \
-               __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))    \
-               \ftr
-.Lskip_rsb_\@:
+       ALTERNATIVE "", "call __clear_rsb", \ftr
 #endif
 .endm
 
@@ -206,15 +174,10 @@ extern char __indirect_thunk_end[];
 static inline void vmexit_fill_RSB(void)
 {
 #ifdef CONFIG_RETPOLINE
-       unsigned long loops;
-
-       asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
-                     ALTERNATIVE("jmp 910f",
-                                 __stringify(__FILL_RETURN_BUFFER(%0, 
RSB_CLEAR_LOOPS, %1)),
-                                 X86_FEATURE_RETPOLINE)
-                     "910:"
-                     : "=r" (loops), ASM_CALL_CONSTRAINT
-                     : : "memory" );
+       alternative_input("",
+                         "call __fill_rsb",
+                         X86_FEATURE_RETPOLINE,
+                         ASM_NO_INPUT_CLOBBER("memory"));
 #endif
 }
 
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index f23934bbaf4e..69a473919260 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -27,6 +27,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
 lib-$(CONFIG_RETPOLINE) += retpoline.o
+OBJECT_FILES_NON_STANDARD_retpoline.o :=y
 
 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
 
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index c909961e678a..3dcabe2ea2d6 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,6 +7,7 @@
 #include <asm/alternative-asm.h>
 #include <asm/export.h>
 #include <asm/nospec-branch.h>
+#include <asm/bitsperlong.h>
 
 .macro THUNK reg
        .section .text.__x86.indirect_thunk
@@ -19,6 +20,37 @@ ENDPROC(__x86_indirect_thunk_\reg)
 .endm
 
 /*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+.macro BOINK_RSB nr:req sp:req
+       push %_ASM_AX
+       mov     $(\nr / 2), %_ASM_AX
+       .align 16
+771:
+       call    772f
+773:                                           /* speculation trap */
+       pause
+       lfence
+       jmp     773b
+       .align 16
+772:
+       call    774f
+775:                                           /* speculation trap */
+       pause
+       lfence
+       jmp     775b
+       .align 16
+774:
+       dec     %_ASM_AX
+       jnz     771b
+       add     $((BITS_PER_LONG/8) * \nr), \sp
+       pop %_ASM_AX
+.endm
+
+
+/*
  * Despite being an assembler file we can't just use .irp here
  * because __KSYM_DEPS__ only uses the C preprocessor and would
  * only see one instance of "__x86_indirect_thunk_\reg" rather
@@ -46,3 +78,15 @@ GENERATE_THUNK(r13)
 GENERATE_THUNK(r14)
 GENERATE_THUNK(r15)
 #endif
+
+ENTRY(__fill_rsb)
+       BOINK_RSB RSB_FILL_LOOPS, %_ASM_SP
+       ret
+END(__fill_rsb)
+EXPORT_SYMBOL_GPL(__fill_rsb)
+
+ENTRY(__clear_rsb)
+       BOINK_RSB RSB_CLEAR_LOOPS, %_ASM_SP
+       ret
+END(__clear_rsb)
+EXPORT_SYMBOL_GPL(__clear_rsb)
-- 
2.13.0


-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

Reply via email to