On Thu, Jun 21, 2018 at 07:16:32AM -0400, Pavel Tatashin wrote:
> > Do we still need add a static_key? after Peter worked out the patch
> > to enable ealy jump_label_init?
> 
> Hi Feng,
> 
> With Pete's patch we will still need at least one static branch, but
> as I replied to Pete's email I like the idea of initializing
> jump_label_init() early, but in my opinion it should be a separate
> work, with tsc.c cleanup patch.

Bah, no, we don't make a mess first and then maybe clean it up.

Have a look at the below. The patch is a mess, but I have two sick kids
on hands, please clean up / split where appropriate.

Seems to work though:

Booting the kernel.
[    0.000000] microcode: microcode updated early to revision 0x428, date = 
2014-05-29
[    0.000000] Linux version 4.17.0-09589-g7a36b8fc167a-dirty (root@ivb-ep) 
(gcc version 7.3.0 (Debian 7.3.0-3)) #360 SMP PREEMPT Thu Jun 21 15:03:32 CEST 
2018
[    0.000000] Command line: 
BOOT_IMAGE=/boot/vmlinuz-4.17.0-09589-g7a36b8fc167a-dirty 
root=UUID=ee91c0f0-977f-434d-bfaa-92daf7cdbe07 ro possible_cpus=40 debug 
ignore_loglevel sysrq_always_enabled ftrace=nop earlyprintk=serial,ttyS0,115200 
console=ttyS0,115200 no_console_suspend force_early_printk sched_debug
[    0.000000] x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point 
registers'
[    0.000000] x86/fpu: Supporting XSAVE feature 0x002: 'SSE registers'
[    0.000000] x86/fpu: Supporting XSAVE feature 0x004: 'AVX registers'
[    0.000000] x86/fpu: xstate_offset[2]:  576, xstate_sizes[2]:  256
[    0.000000] x86/fpu: Enabled xstate features 0x7, context size is 832 bytes, 
using 'standard' format.
[    0.000000] key: ffffffff83033e90 enabled: 0 enable: 1
[    0.000000] transform: setup_arch+0x104/0xc6a type: 1
[    0.000000] transform: setup_arch+0xcf/0xc6a type: 1
[    0.000000] transform: setup_arch+0xf3/0xc6a type: 0
[    0.000000] transform: setup_arch+0xbe/0xc6a type: 0
[    0.000000] post-likely
[    0.000000] post-unlikely

---
 arch/x86/include/asm/jump_label.h |  2 ++
 arch/x86/kernel/alternative.c     |  2 ++
 arch/x86/kernel/cpu/amd.c         | 13 +++++++-----
 arch/x86/kernel/cpu/common.c      |  2 ++
 arch/x86/kernel/jump_label.c      | 43 +++++++++++++++++++++++++++++++++------
 arch/x86/kernel/setup.c           | 19 +++++++++++++++--
 include/linux/jump_label.h        | 25 +++++++++++++++++++----
 kernel/jump_label.c               | 16 ---------------
 8 files changed, 89 insertions(+), 33 deletions(-)

diff --git a/arch/x86/include/asm/jump_label.h 
b/arch/x86/include/asm/jump_label.h
index 8c0de4282659..555fb57ea872 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -74,6 +74,8 @@ struct jump_entry {
        jump_label_t key;
 };
 
+extern void jump_label_update_early(struct static_key *key, bool enable);
+
 #else  /* __ASSEMBLY__ */
 
 .macro STATIC_JUMP_IF_TRUE target, key, def
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a481763a3776..874bb274af2f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -215,6 +215,7 @@ void __init arch_init_ideal_nops(void)
                           ideal_nops = p6_nops;
                } else {
 #ifdef CONFIG_X86_64
+                       /* FEATURE_NOPL is unconditionally true on 64bit so 
this is dead code */
                        ideal_nops = k8_nops;
 #else
                        ideal_nops = intel_nops;
@@ -668,6 +669,7 @@ void *__init_or_module text_poke_early(void *addr, const 
void *opcode,
        local_irq_save(flags);
        memcpy(addr, opcode, len);
        local_irq_restore(flags);
+       sync_core();
        /* Could also do a CLFLUSH here to speed up CPU recovery; but
           that causes hangs on some VIA CPUs. */
        return addr;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 082d7875cef8..355105aebc4e 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -232,8 +232,6 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
                }
        }
 
-       set_cpu_cap(c, X86_FEATURE_K7);
-
        /* calling is from identify_secondary_cpu() ? */
        if (!c->cpu_index)
                return;
@@ -615,6 +613,14 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 
        early_init_amd_mc(c);
 
+#ifdef CONFIG_X86_32
+       if (c->x86 == 6)
+               set_cpu_cap(c, X86_FEATURE_K7);
+#endif
+
+       if (c->x86 >= 0xf)
+               set_cpu_cap(c, X86_FEATURE_K8);
+
        rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
 
        /*
@@ -861,9 +867,6 @@ static void init_amd(struct cpuinfo_x86 *c)
 
        init_amd_cacheinfo(c);
 
-       if (c->x86 >= 0xf)
-               set_cpu_cap(c, X86_FEATURE_K8);
-
        if (cpu_has(c, X86_FEATURE_XMM2)) {
                unsigned long long val;
                int ret;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 910b47ee8078..2a4024f7a222 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1086,6 +1086,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 
*c)
         */
        if (!pgtable_l5_enabled())
                setup_clear_cpu_cap(X86_FEATURE_LA57);
+
+       detect_nopl(c);
 }
 
 void __init early_cpu_init(void)
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index e56c95be2808..abebe1318e6b 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -52,16 +52,14 @@ static void __jump_label_transform(struct jump_entry *entry,
                         * Jump label is enabled for the first time.
                         * So we expect a default_nop...
                         */
-                       if (unlikely(memcmp((void *)entry->code, default_nop, 5)
-                                    != 0))
+                       if (unlikely(memcmp((void *)entry->code, default_nop, 
5) != 0))
                                bug_at((void *)entry->code, __LINE__);
                } else {
                        /*
                         * ...otherwise expect an ideal_nop. Otherwise
                         * something went horribly wrong.
                         */
-                       if (unlikely(memcmp((void *)entry->code, ideal_nop, 5)
-                                    != 0))
+                       if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) 
!= 0))
                                bug_at((void *)entry->code, __LINE__);
                }
 
@@ -80,8 +78,8 @@ static void __jump_label_transform(struct jump_entry *entry,
                                bug_at((void *)entry->code, __LINE__);
                } else {
                        code.jump = 0xe9;
-                       code.offset = entry->target -
-                               (entry->code + JUMP_LABEL_NOP_SIZE);
+                       code.offset = entry->target - (entry->code + 
JUMP_LABEL_NOP_SIZE);
+
                        if (unlikely(memcmp((void *)entry->code, &code, 5) != 
0))
                                bug_at((void *)entry->code, __LINE__);
                }
@@ -140,4 +138,37 @@ __init_or_module void 
arch_jump_label_transform_static(struct jump_entry *entry,
                __jump_label_transform(entry, type, text_poke_early, 1);
 }
 
+void jump_label_update_early(struct static_key *key, bool enable)
+{
+       struct jump_entry *entry, *stop = __stop___jump_table;
+
+       /*
+        * We need the table sorted and key->entries set up.
+        */
+       WARN_ON_ONCE(!static_key_initialized);
+
+       entry = static_key_entries(key);
+
+       /*
+        * Sanity check for early users, there had beter be a core kernel user.
+        */
+       if (!entry || !entry->code || !core_kernel_text(entry->code)) {
+               WARN_ON(1);
+               return;
+       }
+
+       printk("key: %px enabled: %d enable: %d\n", key, 
atomic_read(&key->enabled), (int)enable);
+
+       if (!(!!atomic_read(&key->enabled) ^ !!enable))
+               return;
+
+       for ( ; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
+               enum jump_label_type type = enable ^ jump_entry_branch(entry);
+               printk("transform: %pS type: %d\n", (void *)entry->code, type);
+               __jump_label_transform(entry, type, text_poke_early, 0);
+       }
+
+       atomic_set_release(&key->enabled, !!enable);
+}
+
 #endif
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2f86d883dd95..3731245b8ec7 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -805,6 +805,8 @@ dump_kernel_offset(struct notifier_block *self, unsigned 
long v, void *p)
        return 0;
 }
 
+static DEFINE_STATIC_KEY_FALSE(__test);
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -866,6 +868,21 @@ void __init setup_arch(char **cmdline_p)
 
        idt_setup_early_traps();
        early_cpu_init();
+       arch_init_ideal_nops();
+       jump_label_init();
+
+       if (static_branch_likely(&__test))
+               printk("pre-likely\n");
+       if (static_branch_unlikely(&__test))
+               printk("pre-unlikely\n");
+
+       jump_label_update_early(&__test.key, true);
+
+       if (static_branch_likely(&__test))
+               printk("post-likely\n");
+       if (static_branch_unlikely(&__test))
+               printk("post-unlikely\n");
+
        early_ioremap_init();
 
        setup_olpc_ofw_pgd();
@@ -1272,8 +1289,6 @@ void __init setup_arch(char **cmdline_p)
 
        mcheck_init();
 
-       arch_init_ideal_nops();
-
        register_refined_jiffies(CLOCK_TICK_RATE);
 
 #ifdef CONFIG_EFI
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index b46b541c67c4..7a693d0fb5b5 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -79,6 +79,7 @@
 
 #include <linux/types.h>
 #include <linux/compiler.h>
+#include <linux/bug.h>
 
 extern bool static_key_initialized;
 
@@ -110,6 +111,17 @@ struct static_key {
        };
 };
 
+#define JUMP_TYPE_FALSE                0UL
+#define JUMP_TYPE_TRUE         1UL
+#define JUMP_TYPE_LINKED       2UL
+#define JUMP_TYPE_MASK         3UL
+
+static inline struct jump_entry *static_key_entries(struct static_key *key)
+{
+       WARN_ON_ONCE(key->type & JUMP_TYPE_LINKED);
+       return (struct jump_entry *)(key->type & ~JUMP_TYPE_MASK);
+}
+
 #else
 struct static_key {
        atomic_t enabled;
@@ -132,10 +144,15 @@ struct module;
 
 #ifdef HAVE_JUMP_LABEL
 
-#define JUMP_TYPE_FALSE                0UL
-#define JUMP_TYPE_TRUE         1UL
-#define JUMP_TYPE_LINKED       2UL
-#define JUMP_TYPE_MASK         3UL
+static inline struct static_key *jump_entry_key(struct jump_entry *entry)
+{
+       return (struct static_key *)((unsigned long)entry->key & ~1UL);
+}
+
+static inline bool jump_entry_branch(struct jump_entry *entry)
+{
+       return (unsigned long)entry->key & 1UL;
+}
 
 static __always_inline bool static_key_false(struct static_key *key)
 {
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 01ebdf1f9f40..9710fa7582aa 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -295,12 +295,6 @@ void __weak __init_or_module 
arch_jump_label_transform_static(struct jump_entry
        arch_jump_label_transform(entry, type);
 }
 
-static inline struct jump_entry *static_key_entries(struct static_key *key)
-{
-       WARN_ON_ONCE(key->type & JUMP_TYPE_LINKED);
-       return (struct jump_entry *)(key->type & ~JUMP_TYPE_MASK);
-}
-
 static inline bool static_key_type(struct static_key *key)
 {
        return key->type & JUMP_TYPE_TRUE;
@@ -321,16 +315,6 @@ static inline void static_key_set_linked(struct static_key 
*key)
        key->type |= JUMP_TYPE_LINKED;
 }
 
-static inline struct static_key *jump_entry_key(struct jump_entry *entry)
-{
-       return (struct static_key *)((unsigned long)entry->key & ~1UL);
-}
-
-static bool jump_entry_branch(struct jump_entry *entry)
-{
-       return (unsigned long)entry->key & 1UL;
-}
-
 /***
  * A 'struct static_key' uses a union such that it either points directly
  * to a table of 'struct jump_entry' or to a linked list of modules which in

Reply via email to