On 01/07/2015, 02:49 AM, Greg Kroah-Hartman wrote:
> 3.18-stable review patch.  If anyone has any objections, please let me know.

Greg, Andi raised an objection against this one for 3.12 which still
holds here and for other trees:
https://www.mail-archive.com/stable@vger.kernel.org/msg106471.html

Quoting:
On 01/06/2015, 07:53 PM, Andi Kleen wrote:
> IMHO this is not stable material. Significant risk you broke
> something obscure, and it's not clear it fixes any real problem.
>
> At least wait some more time first.

Thanks.

> ------------------
> 
> From: Andy Lutomirski <l...@amacapital.net>
> 
> commit f647d7c155f069c1a068030255c300663516420e upstream.
> 
> Otherwise, if buggy user code points DS or ES into the TLS
> array, they would be corrupted after a context switch.
> 
> This also significantly improves the comments and documents some
> gotchas in the code.
> 
> Before this patch, the both tests below failed.  With this
> patch, the es test passes, although the gsbase test still fails.
> 
>  ----- begin es test -----
> 
> /*
>  * Copyright (c) 2014 Andy Lutomirski
>  * GPL v2
>  */
> 
> static unsigned short GDT3(int idx)
> {
>       return (idx << 3) | 3;
> }
> 
> static int create_tls(int idx, unsigned int base)
> {
>       struct user_desc desc = {
>               .entry_number    = idx,
>               .base_addr       = base,
>               .limit           = 0xfffff,
>               .seg_32bit       = 1,
>               .contents        = 0, /* Data, grow-up */
>               .read_exec_only  = 0,
>               .limit_in_pages  = 1,
>               .seg_not_present = 0,
>               .useable         = 0,
>       };
> 
>       if (syscall(SYS_set_thread_area, &desc) != 0)
>               err(1, "set_thread_area");
> 
>       return desc.entry_number;
> }
> 
> int main()
> {
>       int idx = create_tls(-1, 0);
>       printf("Allocated GDT index %d\n", idx);
> 
>       unsigned short orig_es;
>       asm volatile ("mov %%es,%0" : "=rm" (orig_es));
> 
>       int errors = 0;
>       int total = 1000;
>       for (int i = 0; i < total; i++) {
>               asm volatile ("mov %0,%%es" : : "rm" (GDT3(idx)));
>               usleep(100);
> 
>               unsigned short es;
>               asm volatile ("mov %%es,%0" : "=rm" (es));
>               asm volatile ("mov %0,%%es" : : "rm" (orig_es));
>               if (es != GDT3(idx)) {
>                       if (errors == 0)
>                               printf("[FAIL]\tES changed from 0x%hx to 
> 0x%hx\n",
>                                      GDT3(idx), es);
>                       errors++;
>               }
>       }
> 
>       if (errors) {
>               printf("[FAIL]\tES was corrupted %d/%d times\n", errors, total);
>               return 1;
>       } else {
>               printf("[OK]\tES was preserved\n");
>               return 0;
>       }
> }
> 
>  ----- end es test -----
> 
>  ----- begin gsbase test -----
> 
> /*
>  * gsbase.c, a gsbase test
>  * Copyright (c) 2014 Andy Lutomirski
>  * GPL v2
>  */
> 
> static unsigned char *testptr, *testptr2;
> 
> static unsigned char read_gs_testvals(void)
> {
>       unsigned char ret;
>       asm volatile ("movb %%gs:%1, %0" : "=r" (ret) : "m" (*testptr));
>       return ret;
> }
> 
> int main()
> {
>       int errors = 0;
> 
>       testptr = mmap((void *)0x200000000UL, 1, PROT_READ | PROT_WRITE,
>                      MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
>       if (testptr == MAP_FAILED)
>               err(1, "mmap");
> 
>       testptr2 = mmap((void *)0x300000000UL, 1, PROT_READ | PROT_WRITE,
>                      MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
>       if (testptr2 == MAP_FAILED)
>               err(1, "mmap");
> 
>       *testptr = 0;
>       *testptr2 = 1;
> 
>       if (syscall(SYS_arch_prctl, ARCH_SET_GS,
>                   (unsigned long)testptr2 - (unsigned long)testptr) != 0)
>               err(1, "ARCH_SET_GS");
> 
>       usleep(100);
> 
>       if (read_gs_testvals() == 1) {
>               printf("[OK]\tARCH_SET_GS worked\n");
>       } else {
>               printf("[FAIL]\tARCH_SET_GS failed\n");
>               errors++;
>       }
> 
>       asm volatile ("mov %0,%%gs" : : "r" (0));
> 
>       if (read_gs_testvals() == 0) {
>               printf("[OK]\tWriting 0 to gs worked\n");
>       } else {
>               printf("[FAIL]\tWriting 0 to gs failed\n");
>               errors++;
>       }
> 
>       usleep(100);
> 
>       if (read_gs_testvals() == 0) {
>               printf("[OK]\tgsbase is still zero\n");
>       } else {
>               printf("[FAIL]\tgsbase was corrupted\n");
>               errors++;
>       }
> 
>       return errors == 0 ? 0 : 1;
> }
> 
>  ----- end gsbase test -----
> 
> Signed-off-by: Andy Lutomirski <l...@amacapital.net>
> Cc: Andi Kleen <a...@firstfloor.org>
> Cc: Linus Torvalds <torva...@linux-foundation.org>
> Link: 
> http://lkml.kernel.org/r/509d27c9fec78217691c3dad91cec87e1006b34a.1418075657.git.l...@amacapital.net
> Signed-off-by: Ingo Molnar <mi...@kernel.org>
> Signed-off-by: Greg Kroah-Hartman <gre...@linuxfoundation.org>
> 
> ---
>  arch/x86/kernel/process_64.c |  101 
> +++++++++++++++++++++++++++++++------------
>  1 file changed, 73 insertions(+), 28 deletions(-)
> 
> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -283,24 +283,9 @@ __switch_to(struct task_struct *prev_p,
>  
>       fpu = switch_fpu_prepare(prev_p, next_p, cpu);
>  
> -     /*
> -      * Reload esp0, LDT and the page table pointer:
> -      */
> +     /* Reload esp0 and ss1. */
>       load_sp0(tss, next);
>  
> -     /*
> -      * Switch DS and ES.
> -      * This won't pick up thread selector changes, but I guess that is ok.
> -      */
> -     savesegment(es, prev->es);
> -     if (unlikely(next->es | prev->es))
> -             loadsegment(es, next->es);
> -
> -     savesegment(ds, prev->ds);
> -     if (unlikely(next->ds | prev->ds))
> -             loadsegment(ds, next->ds);
> -
> -
>       /* We must save %fs and %gs before load_TLS() because
>        * %fs and %gs may be cleared by load_TLS().
>        *
> @@ -309,41 +294,101 @@ __switch_to(struct task_struct *prev_p,
>       savesegment(fs, fsindex);
>       savesegment(gs, gsindex);
>  
> +     /*
> +      * Load TLS before restoring any segments so that segment loads
> +      * reference the correct GDT entries.
> +      */
>       load_TLS(next, cpu);
>  
>       /*
> -      * Leave lazy mode, flushing any hypercalls made here.
> -      * This must be done before restoring TLS segments so
> -      * the GDT and LDT are properly updated, and must be
> -      * done before math_state_restore, so the TS bit is up
> -      * to date.
> +      * Leave lazy mode, flushing any hypercalls made here.  This
> +      * must be done after loading TLS entries in the GDT but before
> +      * loading segments that might reference them, and and it must
> +      * be done before math_state_restore, so the TS bit is up to
> +      * date.
>        */
>       arch_end_context_switch(next_p);
>  
> +     /* Switch DS and ES.
> +      *
> +      * Reading them only returns the selectors, but writing them (if
> +      * nonzero) loads the full descriptor from the GDT or LDT.  The
> +      * LDT for next is loaded in switch_mm, and the GDT is loaded
> +      * above.
> +      *
> +      * We therefore need to write new values to the segment
> +      * registers on every context switch unless both the new and old
> +      * values are zero.
> +      *
> +      * Note that we don't need to do anything for CS and SS, as
> +      * those are saved and restored as part of pt_regs.
> +      */
> +     savesegment(es, prev->es);
> +     if (unlikely(next->es | prev->es))
> +             loadsegment(es, next->es);
> +
> +     savesegment(ds, prev->ds);
> +     if (unlikely(next->ds | prev->ds))
> +             loadsegment(ds, next->ds);
> +
>       /*
>        * Switch FS and GS.
>        *
> -      * Segment register != 0 always requires a reload.  Also
> -      * reload when it has changed.  When prev process used 64bit
> -      * base always reload to avoid an information leak.
> +      * These are even more complicated than FS and GS: they have
> +      * 64-bit bases are that controlled by arch_prctl.  Those bases
> +      * only differ from the values in the GDT or LDT if the selector
> +      * is 0.
> +      *
> +      * Loading the segment register resets the hidden base part of
> +      * the register to 0 or the value from the GDT / LDT.  If the
> +      * next base address zero, writing 0 to the segment register is
> +      * much faster than using wrmsr to explicitly zero the base.
> +      *
> +      * The thread_struct.fs and thread_struct.gs values are 0
> +      * if the fs and gs bases respectively are not overridden
> +      * from the values implied by fsindex and gsindex.  They
> +      * are nonzero, and store the nonzero base addresses, if
> +      * the bases are overridden.
> +      *
> +      * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should
> +      * be impossible.
> +      *
> +      * Therefore we need to reload the segment registers if either
> +      * the old or new selector is nonzero, and we need to override
> +      * the base address if next thread expects it to be overridden.
> +      *
> +      * This code is unnecessarily slow in the case where the old and
> +      * new indexes are zero and the new base is nonzero -- it will
> +      * unnecessarily write 0 to the selector before writing the new
> +      * base address.
> +      *
> +      * Note: This all depends on arch_prctl being the only way that
> +      * user code can override the segment base.  Once wrfsbase and
> +      * wrgsbase are enabled, most of this code will need to change.
>        */
>       if (unlikely(fsindex | next->fsindex | prev->fs)) {
>               loadsegment(fs, next->fsindex);
> +
>               /*
> -              * Check if the user used a selector != 0; if yes
> -              *  clear 64bit base, since overloaded base is always
> -              *  mapped to the Null selector
> +              * If user code wrote a nonzero value to FS, then it also
> +              * cleared the overridden base address.
> +              *
> +              * XXX: if user code wrote 0 to FS and cleared the base
> +              * address itself, we won't notice and we'll incorrectly
> +              * restore the prior base address next time we reschdule
> +              * the process.
>                */
>               if (fsindex)
>                       prev->fs = 0;
>       }
> -     /* when next process has a 64bit base use it */
>       if (next->fs)
>               wrmsrl(MSR_FS_BASE, next->fs);
>       prev->fsindex = fsindex;
>  
>       if (unlikely(gsindex | next->gsindex | prev->gs)) {
>               load_gs_index(next->gsindex);
> +
> +             /* This works (and fails) the same way as fsindex above. */
>               if (gsindex)
>                       prev->gs = 0;
>       }
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe stable" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


-- 
js
suse labs
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to