Extend the static_call infrastructure to optimize the following common
pattern:

        if (func_ptr)
                func_ptr(args...)

For the trampoline (which is in effect a tail-call), we patch the
JMP.d32 into a RET, which then directly consumes the trampoline call.

For the in-line sites we replace the CALL with a NOP5.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
 arch/x86/kernel/static_call.c |   42 ++++++++++++++++++++++++++++++++----------
 include/linux/static_call.h   |    7 +++++++
 2 files changed, 39 insertions(+), 10 deletions(-)

--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -4,19 +4,41 @@
 #include <linux/bug.h>
 #include <asm/text-patching.h>
 
-static void __static_call_transform(void *insn, u8 opcode, void *func)
+enum insn_type {
+       call = 0, /* site call */
+       nop = 1,  /* site cond-call */
+       jmp = 2,  /* tramp / site tail-call */
+       ret = 3,  /* tramp / site cond-tail-call */
+};
+
+static void __static_call_transform(void *insn, enum insn_type type, void 
*func)
 {
-       const void *code = text_gen_insn(opcode, (long)insn, (long)func);
+       int size = CALL_INSN_SIZE;
+       const void *code;
 
-       if (WARN_ONCE(*(u8 *)insn != opcode,
-                     "unexpected static call insn opcode 0x%x at %pS\n",
-                     opcode, insn))
-               return;
+       switch (type) {
+       case call:
+               code = text_gen_insn(CALL_INSN_OPCODE, insn, func);
+               break;
+
+       case nop:
+               code = ideal_nops[NOP_ATOMIC5];
+               break;
+
+       case jmp:
+               code = text_gen_insn(JMP32_INSN_OPCODE, insn, func);
+               break;
+
+       case ret:
+               code = text_gen_insn(RET_INSN_OPCODE, insn, func);
+               size = RET_INSN_SIZE;
+               break;
+       }
 
-       if (memcmp(insn, code, CALL_INSN_SIZE) == 0)
+       if (memcmp(insn, code, size) == 0)
                return;
 
-       text_poke_bp(insn, code, CALL_INSN_SIZE, NULL);
+       text_poke_bp(insn, code, size, NULL);
 }
 
 void arch_static_call_transform(void *site, void *tramp, void *func)
@@ -24,10 +46,10 @@ void arch_static_call_transform(void *si
        mutex_lock(&text_mutex);
 
        if (tramp)
-               __static_call_transform(tramp, JMP32_INSN_OPCODE, func);
+               __static_call_transform(tramp, jmp + !func, func);
 
        if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site)
-               __static_call_transform(site, CALL_INSN_OPCODE, func);
+               __static_call_transform(site, !func, func);
 
        mutex_unlock(&text_mutex);
 }
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -17,6 +17,7 @@
  *   DECLARE_STATIC_CALL(name, func);
  *   DEFINE_STATIC_CALL(name, func);
  *   static_call(name)(args...);
+ *   static_cond_call(name)(args...)
  *   static_call_update(name, func);
  *
  * Usage example:
@@ -105,6 +106,7 @@ extern int static_call_text_reserved(voi
        ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)
 
 #define static_call(name)      STATIC_CALL_TRAMP(name)
+#define static_cond_call(name) STATIC_CALL_TRAMP(name)
 
 #define EXPORT_STATIC_CALL(name)                                       \
        EXPORT_SYMBOL(STATIC_CALL_NAME(name));                          \
@@ -128,6 +130,7 @@ struct static_call_key {
        ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)
 
 #define static_call(name)      STATIC_CALL_TRAMP(name)
+#define static_cond_call(name) STATIC_CALL_TRAMP(name)
 
 static inline
 void __static_call_update(struct static_call_key *key, void *tramp, void *func)
@@ -161,6 +164,10 @@ struct static_call_key {
 #define static_call(name)                                              \
        ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_NAME(name).func))
 
+#define static_cond_call(name)                                         \
+       if (STATIC_CALL_NAME(name).func)                                \
+               
((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_NAME(name).func))
+
 static inline
 void __static_call_update(struct static_call_key *key, void *tramp, void *func)
 {


Reply via email to