Linus,

please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
x86-urgent-for-linus

The x86 updates contain:

    - A fix for a longstanding PAT bug, where PAT was reported on CPUs that
      do not support it, which leads to wrong caching attributes and
      missing MTRR updates

    - Prevent overwriting of the e820 firmware table, which causes kexec
      kernels to lose the fake mptable which is stored there.

    - Cleanup of the UV/BAU code, removing unused code and making
      local functions static.

Thanks,

        tglx

------------------>
Chen Yu (3):
      x86/boot/e820: Avoid overwriting e820_table_firmware
      x86/boot/e820: Rename the e820_table_firmware to e820_table_kexec
      x86/boot/e820: Introduce the bootloader provided e820_table_firmware[] 
table

Colin Ian King (1):
      x86/platform/uv/BAU: Minor cleanup, make some local functions static

Mikulas Patocka (1):
      x86/mm/pat: Don't report PAT on CPUs that don't support it


 arch/x86/include/asm/e820/api.h   |  1 +
 arch/x86/include/asm/pat.h        |  1 +
 arch/x86/kernel/e820.c            | 49 ++++++++++++++++++++++++++++-----------
 arch/x86/kernel/kexec-bzimage64.c |  4 ++--
 arch/x86/kernel/setup.c           |  7 ++++++
 arch/x86/mm/pat.c                 | 28 ++++++++++------------
 arch/x86/platform/uv/tlb_uv.c     | 31 +++++--------------------
 7 files changed, 65 insertions(+), 56 deletions(-)

diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index 8e0f8b85b209..a504adc661a4 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -4,6 +4,7 @@
 #include <asm/e820/types.h>
 
 extern struct e820_table *e820_table;
+extern struct e820_table *e820_table_kexec;
 extern struct e820_table *e820_table_firmware;
 
 extern unsigned long pci_mem_start;
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index 0b1ff4c1c14e..fffb2794dd89 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -7,6 +7,7 @@
 bool pat_enabled(void);
 void pat_disable(const char *reason);
 extern void pat_init(void);
+extern void init_cache_modes(void);
 
 extern int reserve_memtype(u64 start, u64 end,
                enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d78a586ba8dc..532da61d605c 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -20,10 +20,12 @@
 #include <asm/setup.h>
 
 /*
- * We organize the E820 table into two main data structures:
+ * We organize the E820 table into three main data structures:
  *
  * - 'e820_table_firmware': the original firmware version passed to us by the
- *   bootloader - not modified by the kernel. We use this to:
+ *   bootloader - not modified by the kernel. It is composed of two parts:
+ *   the first 128 E820 memory entries in boot_params.e820_table and the 
remaining
+ *   (if any) entries of the SETUP_E820_EXT nodes. We use this to:
  *
  *       - inform the user about the firmware's notion of memory layout
  *         via /sys/firmware/memmap
@@ -31,6 +33,14 @@
  *       - the hibernation code uses it to generate a kernel-independent MD5
  *         fingerprint of the physical memory layout of a system.
  *
+ * - 'e820_table_kexec': a slightly modified (by the kernel) firmware version
+ *   passed to us by the bootloader - the major difference between
+ *   e820_table_firmware[] and this one is that, the latter marks the 
setup_data
+ *   list created by the EFI boot stub as reserved, so that kexec can reuse the
+ *   setup_data information in the second kernel. Besides, e820_table_kexec[]
+ *   might also be modified by the kexec itself to fake a mptable.
+ *   We use this to:
+ *
  *       - kexec, which is a bootloader in disguise, uses the original E820
  *         layout to pass to the kexec-ed kernel. This way the original kernel
  *         can have a restricted E820 map while the kexec()-ed kexec-kernel
@@ -46,9 +56,11 @@
  * specific memory layout data during early bootup.
  */
 static struct e820_table e820_table_init               __initdata;
+static struct e820_table e820_table_kexec_init         __initdata;
 static struct e820_table e820_table_firmware_init      __initdata;
 
 struct e820_table *e820_table __refdata                        = 
&e820_table_init;
+struct e820_table *e820_table_kexec __refdata          = 
&e820_table_kexec_init;
 struct e820_table *e820_table_firmware __refdata       = 
&e820_table_firmware_init;
 
 /* For PCI or other memory-mapped resources */
@@ -470,9 +482,9 @@ u64 __init e820__range_update(u64 start, u64 size, enum 
e820_type old_type, enum
        return __e820__range_update(e820_table, start, size, old_type, 
new_type);
 }
 
-static u64 __init e820__range_update_firmware(u64 start, u64 size, enum 
e820_type old_type, enum e820_type  new_type)
+static u64 __init e820__range_update_kexec(u64 start, u64 size, enum e820_type 
old_type, enum e820_type  new_type)
 {
-       return __e820__range_update(e820_table_firmware, start, size, old_type, 
new_type);
+       return __e820__range_update(e820_table_kexec, start, size, old_type, 
new_type);
 }
 
 /* Remove a range of memory from the E820 table: */
@@ -546,9 +558,9 @@ void __init e820__update_table_print(void)
        e820__print_table("modified");
 }
 
-static void __init e820__update_table_firmware(void)
+static void __init e820__update_table_kexec(void)
 {
-       e820__update_table(e820_table_firmware);
+       e820__update_table(e820_table_kexec);
 }
 
 #define MAX_GAP_END 0x100000000ull
@@ -623,7 +635,7 @@ __init void e820__setup_pci_gap(void)
 /*
  * Called late during init, in free_initmem().
  *
- * Initial e820_table and e820_table_firmware are largish __initdata arrays.
+ * Initial e820_table and e820_table_kexec are largish __initdata arrays.
  *
  * Copy them to a (usually much smaller) dynamically allocated area that is
  * sized precisely after the number of e820 entries.
@@ -643,6 +655,12 @@ __init void e820__reallocate_tables(void)
        memcpy(n, e820_table, size);
        e820_table = n;
 
+       size = offsetof(struct e820_table, entries) + sizeof(struct 
e820_entry)*e820_table_kexec->nr_entries;
+       n = kmalloc(size, GFP_KERNEL);
+       BUG_ON(!n);
+       memcpy(n, e820_table_kexec, size);
+       e820_table_kexec = n;
+
        size = offsetof(struct e820_table, entries) + sizeof(struct 
e820_entry)*e820_table_firmware->nr_entries;
        n = kmalloc(size, GFP_KERNEL);
        BUG_ON(!n);
@@ -669,6 +687,9 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 
data_len)
        __append_e820_table(extmap, entries);
        e820__update_table(e820_table);
 
+       memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec));
+       memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
+
        early_memunmap(sdata, data_len);
        pr_info("e820: extended physical RAM map:\n");
        e820__print_table("extended");
@@ -727,7 +748,7 @@ core_initcall(e820__register_nvs_regions);
 /*
  * Allocate the requested number of bytes with the requsted alignment
  * and return (the physical address) to the caller. Also register this
- * range in the 'firmware' E820 table as a reserved range.
+ * range in the 'kexec' E820 table as a reserved range.
  *
  * This allows kexec to fake a new mptable, as if it came from the real
  * system.
@@ -738,9 +759,9 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 
align)
 
        addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
        if (addr) {
-               e820__range_update_firmware(addr, size, E820_TYPE_RAM, 
E820_TYPE_RESERVED);
-               pr_info("e820: update e820_table_firmware for 
e820__memblock_alloc_reserved()\n");
-               e820__update_table_firmware();
+               e820__range_update_kexec(addr, size, E820_TYPE_RAM, 
E820_TYPE_RESERVED);
+               pr_info("e820: update e820_table_kexec for 
e820__memblock_alloc_reserved()\n");
+               e820__update_table_kexec();
        }
 
        return addr;
@@ -923,13 +944,13 @@ void __init e820__reserve_setup_data(void)
        while (pa_data) {
                data = early_memremap(pa_data, sizeof(*data));
                e820__range_update(pa_data, sizeof(*data)+data->len, 
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+               e820__range_update_kexec(pa_data, sizeof(*data)+data->len, 
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
                pa_data = data->next;
                early_memunmap(data, sizeof(*data));
        }
 
        e820__update_table(e820_table);
-
-       memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
+       e820__update_table(e820_table_kexec);
 
        pr_info("extended physical RAM map:\n");
        e820__print_table("reserve setup_data");
@@ -1062,6 +1083,7 @@ void __init e820__reserve_resources(void)
                res++;
        }
 
+       /* Expose the bootloader-provided memory layout to the sysfs. */
        for (i = 0; i < e820_table_firmware->nr_entries; i++) {
                struct e820_entry *entry = e820_table_firmware->entries + i;
 
@@ -1175,6 +1197,7 @@ void __init e820__memory_setup(void)
 
        who = x86_init.resources.memory_setup();
 
+       memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec));
        memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
 
        pr_info("e820: BIOS-provided physical RAM map:\n");
diff --git a/arch/x86/kernel/kexec-bzimage64.c 
b/arch/x86/kernel/kexec-bzimage64.c
index 9d7fd5e6689a..fb095ba0c02f 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -100,14 +100,14 @@ static int setup_e820_entries(struct boot_params *params)
 {
        unsigned int nr_e820_entries;
 
-       nr_e820_entries = e820_table_firmware->nr_entries;
+       nr_e820_entries = e820_table_kexec->nr_entries;
 
        /* TODO: Pass entries more than E820_MAX_ENTRIES_ZEROPAGE in bootparams 
setup data */
        if (nr_e820_entries > E820_MAX_ENTRIES_ZEROPAGE)
                nr_e820_entries = E820_MAX_ENTRIES_ZEROPAGE;
 
        params->e820_entries = nr_e820_entries;
-       memcpy(&params->e820_table, &e820_table_firmware->entries, 
nr_e820_entries*sizeof(struct e820_entry));
+       memcpy(&params->e820_table, &e820_table_kexec->entries, 
nr_e820_entries*sizeof(struct e820_entry));
 
        return 0;
 }
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 65622f07e633..3486d0498800 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1076,6 +1076,13 @@ void __init setup_arch(char **cmdline_p)
        max_possible_pfn = max_pfn;
 
        /*
+        * This call is required when the CPU does not support PAT. If
+        * mtrr_bp_init() invoked it already via pat_init() the call has no
+        * effect.
+        */
+       init_cache_modes();
+
+       /*
         * Define random base addresses for memory sections after max_pfn is
         * defined and before each memory section base is used.
         */
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 9b78685b66e6..45979502f64b 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -37,14 +37,14 @@
 #undef pr_fmt
 #define pr_fmt(fmt) "" fmt
 
-static bool boot_cpu_done;
-
-static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
-static void init_cache_modes(void);
+static bool __read_mostly boot_cpu_done;
+static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT);
+static bool __read_mostly pat_initialized;
+static bool __read_mostly init_cm_done;
 
 void pat_disable(const char *reason)
 {
-       if (!__pat_enabled)
+       if (pat_disabled)
                return;
 
        if (boot_cpu_done) {
@@ -52,10 +52,8 @@ void pat_disable(const char *reason)
                return;
        }
 
-       __pat_enabled = 0;
+       pat_disabled = true;
        pr_info("x86/PAT: %s\n", reason);
-
-       init_cache_modes();
 }
 
 static int __init nopat(char *str)
@@ -67,7 +65,7 @@ early_param("nopat", nopat);
 
 bool pat_enabled(void)
 {
-       return !!__pat_enabled;
+       return pat_initialized;
 }
 EXPORT_SYMBOL_GPL(pat_enabled);
 
@@ -205,6 +203,8 @@ static void __init_cache_modes(u64 pat)
                update_cache_mode_entry(i, cache);
        }
        pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg);
+
+       init_cm_done = true;
 }
 
 #define PAT(x, y)      ((u64)PAT_ ## y << ((x)*8))
@@ -225,6 +225,7 @@ static void pat_bsp_init(u64 pat)
        }
 
        wrmsrl(MSR_IA32_CR_PAT, pat);
+       pat_initialized = true;
 
        __init_cache_modes(pat);
 }
@@ -242,10 +243,9 @@ static void pat_ap_init(u64 pat)
        wrmsrl(MSR_IA32_CR_PAT, pat);
 }
 
-static void init_cache_modes(void)
+void init_cache_modes(void)
 {
        u64 pat = 0;
-       static int init_cm_done;
 
        if (init_cm_done)
                return;
@@ -287,8 +287,6 @@ static void init_cache_modes(void)
        }
 
        __init_cache_modes(pat);
-
-       init_cm_done = 1;
 }
 
 /**
@@ -306,10 +304,8 @@ void pat_init(void)
        u64 pat;
        struct cpuinfo_x86 *c = &boot_cpu_data;
 
-       if (!pat_enabled()) {
-               init_cache_modes();
+       if (pat_disabled)
                return;
-       }
 
        if ((c->x86_vendor == X86_VENDOR_INTEL) &&
            (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 2983faab5b18..d4a61ddf9e62 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -588,31 +588,11 @@ static unsigned long uv2_3_read_status(unsigned long 
offset, int rshft, int desc
 }
 
 /*
- * Return whether the status of the descriptor that is normally used for this
- * cpu (the one indexed by its hub-relative cpu number) is busy.
- * The status of the original 32 descriptors is always reflected in the 64
- * bits of UVH_LB_BAU_SB_ACTIVATION_STATUS_0.
- * The bit provided by the activation_status_2 register is irrelevant to
- * the status if it is only being tested for busy or not busy.
- */
-int normal_busy(struct bau_control *bcp)
-{
-       int cpu = bcp->uvhub_cpu;
-       int mmr_offset;
-       int right_shift;
-
-       mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
-       right_shift = cpu * UV_ACT_STATUS_SIZE;
-       return (((((read_lmmr(mmr_offset) >> right_shift) &
-                               UV_ACT_STATUS_MASK)) << 1) == UV2H_DESC_BUSY);
-}
-
-/*
  * Entered when a bau descriptor has gone into a permanent busy wait because
  * of a hardware bug.
  * Workaround the bug.
  */
-int handle_uv2_busy(struct bau_control *bcp)
+static int handle_uv2_busy(struct bau_control *bcp)
 {
        struct ptc_stats *stat = bcp->statp;
 
@@ -917,8 +897,9 @@ static void handle_cmplt(int completion_status, struct 
bau_desc *bau_desc,
  * Returns 1 if it gives up entirely and the original cpu mask is to be
  * returned to the kernel.
  */
-int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp,
-       struct bau_desc *bau_desc)
+static int uv_flush_send_and_wait(struct cpumask *flush_mask,
+                                 struct bau_control *bcp,
+                                 struct bau_desc *bau_desc)
 {
        int seq_number = 0;
        int completion_stat = 0;
@@ -1212,8 +1193,8 @@ const struct cpumask *uv_flush_tlb_others(const struct 
cpumask *cpumask,
  * Search the message queue for any 'other' unprocessed message with the
  * same software acknowledge resource bit vector as the 'msg' message.
  */
-struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
-                                          struct bau_control *bcp)
+static struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
+                                                 struct bau_control *bcp)
 {
        struct bau_pq_entry *msg_next = msg + 1;
        unsigned char swack_vec = msg->swack_vec;

Reply via email to