Commit 422588e885 ("x86/xpti: Hide almost all of .text and all
.data/.rodata/.bss mappings") carefully limited the Xen image cloning to
just entry code, but then overwrote the just allocated and populated L3
entry with the normal one again covering both Xen image and stubs.

Drop the respective code in favor of an explicit clone_mapping()
invocation. This in turn now requires setup_cpu_root_pgt() to run after
stub setup in all cases. Additionally, with (almost) no unintended
mappings left, the BSP's IDT now also needs to be page aligned.

Note that the removed BUILD_BUG_ON()s don't get replaced by anything -
there already is a suitable ASSERT() in xen.lds.S.

The moving ahead of cleanup_cpu_root_pgt() is not strictly necessary
for functionality, but things are more logical this way, and we retain
cleanup being done in the inverse order of setup.

Signed-off-by: Jan Beulich <jbeul...@suse.com>
---
v2: Add missing cleanup of the stub mapping.
---
What should we do with the TSS? Currently together with it we expose
almost a full page of other per-CPU data. A simple (but slightly
hackish) option would be to use one of the two unused stack slots.

--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -622,9 +622,6 @@ unsigned long alloc_stub_page(unsigned i
         unmap_domain_page(memset(__map_domain_page(pg), 0xcc, PAGE_SIZE));
     }
 
-    /* Confirm that all stubs fit in a single L3 entry. */
-    BUILD_BUG_ON(NR_CPUS * PAGE_SIZE > (1u << L3_PAGETABLE_SHIFT));
-
     stub_va = XEN_VIRT_END - (cpu + 1) * PAGE_SIZE;
     if ( map_pages_to_xen(stub_va, mfn_x(page_to_mfn(pg)), 1,
                           PAGE_HYPERVISOR_RX | MAP_SMALL_PAGES) )
@@ -758,12 +755,12 @@ static int clone_mapping(const void *ptr
 boolean_param("xpti", opt_xpti);
 DEFINE_PER_CPU(root_pgentry_t *, root_pgt);
 
+static root_pgentry_t common_pgt;
+
 extern const char _stextentry[], _etextentry[];
 
 static int setup_cpu_root_pgt(unsigned int cpu)
 {
-    static root_pgentry_t common_pgt;
-
     root_pgentry_t *rpt;
     unsigned int off;
     int rc;
@@ -786,8 +783,6 @@ static int setup_cpu_root_pgt(unsigned i
     /* One-time setup of common_pgt, which maps .text.entry and the stubs. */
     if ( unlikely(!root_get_intpte(common_pgt)) )
     {
-        unsigned long stubs_linear = XEN_VIRT_END - 1;
-        l3_pgentry_t *stubs_main, *stubs_shadow;
         const char *ptr;
 
         for ( rc = 0, ptr = _stextentry;
@@ -797,16 +792,6 @@ static int setup_cpu_root_pgt(unsigned i
         if ( rc )
             return rc;
 
-        /* Confirm that all stubs fit in a single L3 entry. */
-        BUILD_BUG_ON(NR_CPUS * PAGE_SIZE > (1u << L3_PAGETABLE_SHIFT));
-
-        stubs_main = l4e_to_l3e(idle_pg_table[l4_table_offset(stubs_linear)]);
-        stubs_shadow = l4e_to_l3e(rpt[l4_table_offset(stubs_linear)]);
-
-        /* Splice into the regular L2 mapping the stubs. */
-        stubs_shadow[l3_table_offset(stubs_linear)] =
-            stubs_main[l3_table_offset(stubs_linear)];
-
         common_pgt = rpt[root_table_offset(XEN_VIRT_START)];
     }
 
@@ -820,6 +805,8 @@ static int setup_cpu_root_pgt(unsigned i
         rc = clone_mapping(idt_tables[cpu], rpt);
     if ( !rc )
         rc = clone_mapping(&per_cpu(init_tss, cpu), rpt);
+    if ( !rc )
+        rc = clone_mapping((void *)per_cpu(stubs.addr, cpu), rpt);
 
     return rc;
 }
@@ -828,6 +815,7 @@ static void cleanup_cpu_root_pgt(unsigne
 {
     root_pgentry_t *rpt = per_cpu(root_pgt, cpu);
     unsigned int r;
+    unsigned long stub_linear = per_cpu(stubs.addr, cpu);
 
     if ( !rpt )
         return;
@@ -872,6 +860,16 @@ static void cleanup_cpu_root_pgt(unsigne
     }
 
     free_xen_pagetable(rpt);
+
+    /* Also zap the stub mapping for this CPU. */
+    if ( stub_linear )
+    {
+        l3_pgentry_t *l3t = l4e_to_l3e(common_pgt);
+        l2_pgentry_t *l2t = l3e_to_l2e(l3t[l3_table_offset(stub_linear)]);
+        l1_pgentry_t *l1t = l2e_to_l1e(l2t[l2_table_offset(stub_linear)]);
+
+        l1t[l2_table_offset(stub_linear)] = l1e_empty();
+    }
 }
 
 static void cpu_smpboot_free(unsigned int cpu)
@@ -895,6 +893,8 @@ static void cpu_smpboot_free(unsigned in
     if ( per_cpu(scratch_cpumask, cpu) != &scratch_cpu0mask )
         free_cpumask_var(per_cpu(scratch_cpumask, cpu));
 
+    cleanup_cpu_root_pgt(cpu);
+
     if ( per_cpu(stubs.addr, cpu) )
     {
         mfn_t mfn = _mfn(per_cpu(stubs.mfn, cpu));
@@ -912,8 +912,6 @@ static void cpu_smpboot_free(unsigned in
             free_domheap_page(mfn_to_page(mfn));
     }
 
-    cleanup_cpu_root_pgt(cpu);
-
     order = get_order_from_pages(NR_RESERVED_GDT_PAGES);
     free_xenheap_pages(per_cpu(gdt_table, cpu), order);
 
@@ -968,11 +966,6 @@ static int cpu_smpboot_alloc(unsigned in
     memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES * sizeof(idt_entry_t));
     disable_each_ist(idt_tables[cpu]);
 
-    rc = setup_cpu_root_pgt(cpu);
-    if ( rc )
-        goto out;
-    rc = -ENOMEM;
-
     for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1);
           i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i )
         if ( cpu_online(i) && cpu_to_node(i) == node )
@@ -986,6 +979,11 @@ static int cpu_smpboot_alloc(unsigned in
         goto out;
     per_cpu(stubs.addr, cpu) = stub_page + STUB_BUF_CPU_OFFS(cpu);
 
+    rc = setup_cpu_root_pgt(cpu);
+    if ( rc )
+        goto out;
+    rc = -ENOMEM;
+
     if ( secondary_socket_cpumask == NULL &&
          (secondary_socket_cpumask = xzalloc(cpumask_t)) == NULL )
         goto out;
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -102,7 +102,8 @@ DEFINE_PER_CPU_READ_MOSTLY(struct desc_s
 DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, compat_gdt_table);
 
 /* Master table, used by CPU0. */
-idt_entry_t idt_table[IDT_ENTRIES];
+idt_entry_t __section(".data.page_aligned") __aligned(PAGE_SIZE)
+    idt_table[IDT_ENTRIES];
 
 /* Pointer to the IDT of every CPU. */
 idt_entry_t *idt_tables[NR_CPUS] __read_mostly;



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Reply via email to