Re: [PATCH 3.2 055/152] x86_64, switch_to(): Load TLS descriptors before switching DS and ES

2015-02-26 Thread Andy Lutomirski
On Thu, Feb 26, 2015 at 8:28 AM, Brian Gerst  wrote:
> On Thu, Feb 26, 2015 at 10:32 AM, Andy Lutomirski  wrote:
>> On Tue, Feb 24, 2015 at 7:23 PM, Brian Gerst  wrote:
>>> On Tue, Feb 24, 2015 at 3:08 PM, Denys Vlasenko
>>>  wrote:
 On Tue, Feb 24, 2015 at 9:02 PM, Andy Lutomirski  
 wrote:
>> This currently fails in 32-bit kernels (at least in qemu):
>>
>> / # ./es_test
>> Allocated GDT index 7
>> [FAIL]ES changed from 0x3b to 0x7b
>> [FAIL]ES was corrupted 1000/1000 times
>> / # uname -a
>> Linux (none) 4.0.0-rc1 #1 SMP Tue Feb 24 16:41:58 CET 2015 i686 GNU/Linux
>
> Want to send a patch?  I'll get it in a few days if no one beats me.

 I have no patch, sorry (in fact, I failed to find where is the relevant
 32-bit counterpart).

 It's just security people asked me to backport this and I wondered
 maybe I should wait a bit on this one, since fix for 32-bit ought
 to appear as well.
>>>
>>> For 32-bit kernel, userspace DS and ES are saved at syscall/interrupt
>>> entry time and reloaded on exit, unlike in 64-bit where they are saved
>>> and loaded at context switch time.  Therefore 32-bit is not affected
>>> by the issue this patch addresses.
>>>
>>> It looks to me though, that the ES test program doesn't actually test
>>> what the patch fixes - the segment attributes, like the base address.
>>> It tests just the selector, which shouldn't change across a kernel
>>> entry (with a few exceptions, like signals).  If the test is failing,
>>> then it is a different issue from what this patch addresses.
>>
>> It tests it indirectly.  The 64-bit code sets the selector to zero if
>> it fails to reload it.  Testing the ES base is awkward because it
>> can't be done in 64-bit code at all.
>
> I figured out why Denys got the failure.  usleep() makes a syscall via
> sysenter.  The sysenter path saves es/ds, but does not restore them
> before sysexit like the int80/iret path would.  That leaves them as
> USER_DS that the kernel loaded for itself.  I believe this was an
> intentional optimization, assuming the vdso would only be called from
> programs conforming to the ELF ABI.

Makes sense.  The attached variant passes, so I think we're fine.

--Andy
/*
 * Copyright (c) 2014-2015 Andy Lutomirski
 * GPL v2
 */

#include 
#include 
#include 
#include 
#include 
#include 

static unsigned short GDT3(int idx)
{
	return (idx << 3) | 3;
}

static int create_tls(int idx, unsigned int base)
{
	struct user_desc desc = {
		.entry_number= idx,
		.base_addr   = base,
		.limit   = 0xf,
		.seg_32bit   = 1,
		.contents= 0, /* Data, grow-up */
		.read_exec_only  = 0,
		.limit_in_pages  = 1,
		.seg_not_present = 0,
		.useable = 0,
	};

	if (syscall(SYS_set_thread_area, &desc) != 0)
		err(1, "set_thread_area");

	return desc.entry_number;
}

int main()
{
	int idx = create_tls(-1, 0);
	printf("Allocated GDT index %d\n", idx);

	unsigned short orig_es;
	asm volatile ("mov %%es,%0" : "=rm" (orig_es));

	int errors = 0;
	int total = 1000;
	for (int i = 0; i < total; i++) {
		struct timespec req = {
			.tv_sec = 0,
			.tv_nsec = 10,
		};
		int ret;

		asm volatile ("mov %0,%%es" : : "rm" (GDT3(idx)));

		/*
		 * Force rescheduling.  On 32-bit kernels, fast syscalls
		 * destroy DS and ES, so force int 80.
		 */
		asm volatile ("int $0x80"
			  : "=a" (ret)
			  : "a" (SYS_nanosleep), "b" (&req),
"c" (0));

		unsigned short es;
		asm volatile ("mov %%es,%0" : "=rm" (es));
		asm volatile ("mov %0,%%es" : : "rm" (orig_es));
		if (es != GDT3(idx)) {
			if (errors == 0)
printf("[FAIL]\tES changed from 0x%hx to 0x%hx\n",
   GDT3(idx), es);
			errors++;
		}
	}

	if (errors) {
		printf("[FAIL]\tES was corrupted %d/%d times\n", errors, total);
		return 1;
	} else {
		printf("[OK]\tES was preserved\n");
		return 0;
	}
}


Re: [PATCH 3.2 055/152] x86_64, switch_to(): Load TLS descriptors before switching DS and ES

2015-02-26 Thread Brian Gerst
On Thu, Feb 26, 2015 at 10:32 AM, Andy Lutomirski  wrote:
> On Tue, Feb 24, 2015 at 7:23 PM, Brian Gerst  wrote:
>> On Tue, Feb 24, 2015 at 3:08 PM, Denys Vlasenko
>>  wrote:
>>> On Tue, Feb 24, 2015 at 9:02 PM, Andy Lutomirski  
>>> wrote:
> This currently fails in 32-bit kernels (at least in qemu):
>
> / # ./es_test
> Allocated GDT index 7
> [FAIL]ES changed from 0x3b to 0x7b
> [FAIL]ES was corrupted 1000/1000 times
> / # uname -a
> Linux (none) 4.0.0-rc1 #1 SMP Tue Feb 24 16:41:58 CET 2015 i686 GNU/Linux

 Want to send a patch?  I'll get it in a few days if no one beats me.
>>>
>>> I have no patch, sorry (in fact, I failed to find where is the relevant
>>> 32-bit counterpart).
>>>
>>> It's just security people asked me to backport this and I wondered
>>> maybe I should wait a bit on this one, since fix for 32-bit ought
>>> to appear as well.
>>
>> For 32-bit kernel, userspace DS and ES are saved at syscall/interrupt
>> entry time and reloaded on exit, unlike in 64-bit where they are saved
>> and loaded at context switch time.  Therefore 32-bit is not affected
>> by the issue this patch addresses.
>>
>> It looks to me though, that the ES test program doesn't actually test
>> what the patch fixes - the segment attributes, like the base address.
>> It tests just the selector, which shouldn't change across a kernel
>> entry (with a few exceptions, like signals).  If the test is failing,
>> then it is a different issue from what this patch addresses.
>
> It tests it indirectly.  The 64-bit code sets the selector to zero if
> it fails to reload it.  Testing the ES base is awkward because it
> can't be done in 64-bit code at all.

I figured out why Denys got the failure.  usleep() makes a syscall via
sysenter.  The sysenter path saves es/ds, but does not restore them
before sysexit like the int80/iret path would.  That leaves them as
USER_DS that the kernel loaded for itself.  I believe this was an
intentional optimization, assuming the vdso would only be called from
programs conforming to the ELF ABI.

--
Brian Gerst
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3.2 055/152] x86_64, switch_to(): Load TLS descriptors before switching DS and ES

2015-02-26 Thread Andy Lutomirski
On Tue, Feb 24, 2015 at 7:23 PM, Brian Gerst  wrote:
> On Tue, Feb 24, 2015 at 3:08 PM, Denys Vlasenko
>  wrote:
>> On Tue, Feb 24, 2015 at 9:02 PM, Andy Lutomirski  wrote:
 This currently fails in 32-bit kernels (at least in qemu):

 / # ./es_test
 Allocated GDT index 7
 [FAIL]ES changed from 0x3b to 0x7b
 [FAIL]ES was corrupted 1000/1000 times
 / # uname -a
 Linux (none) 4.0.0-rc1 #1 SMP Tue Feb 24 16:41:58 CET 2015 i686 GNU/Linux
>>>
>>> Want to send a patch?  I'll get it in a few days if no one beats me.
>>
>> I have no patch, sorry (in fact, I failed to find where is the relevant
>> 32-bit counterpart).
>>
>> It's just security people asked me to backport this and I wondered
>> maybe I should wait a bit on this one, since fix for 32-bit ought
>> to appear as well.
>
> For 32-bit kernel, userspace DS and ES are saved at syscall/interrupt
> entry time and reloaded on exit, unlike in 64-bit where they are saved
> and loaded at context switch time.  Therefore 32-bit is not affected
> by the issue this patch addresses.
>
> It looks to me though, that the ES test program doesn't actually test
> what the patch fixes - the segment attributes, like the base address.
> It tests just the selector, which shouldn't change across a kernel
> entry (with a few exceptions, like signals).  If the test is failing,
> then it is a different issue from what this patch addresses.

It tests it indirectly.  The 64-bit code sets the selector to zero if
it fails to reload it.  Testing the ES base is awkward because it
can't be done in 64-bit code at all.

--Andy
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3.2 055/152] x86_64, switch_to(): Load TLS descriptors before switching DS and ES

2015-02-24 Thread Brian Gerst
On Tue, Feb 24, 2015 at 3:08 PM, Denys Vlasenko
 wrote:
> On Tue, Feb 24, 2015 at 9:02 PM, Andy Lutomirski  wrote:
>>> This currently fails in 32-bit kernels (at least in qemu):
>>>
>>> / # ./es_test
>>> Allocated GDT index 7
>>> [FAIL]ES changed from 0x3b to 0x7b
>>> [FAIL]ES was corrupted 1000/1000 times
>>> / # uname -a
>>> Linux (none) 4.0.0-rc1 #1 SMP Tue Feb 24 16:41:58 CET 2015 i686 GNU/Linux
>>
>> Want to send a patch?  I'll get it in a few days if no one beats me.
>
> I have no patch, sorry (in fact, I failed to find where is the relevant
> 32-bit counterpart).
>
> It's just security people asked me to backport this and I wondered
> maybe I should wait a bit on this one, since fix for 32-bit ought
> to appear as well.

For 32-bit kernel, userspace DS and ES are saved at syscall/interrupt
entry time and reloaded on exit, unlike in 64-bit where they are saved
and loaded at context switch time.  Therefore 32-bit is not affected
by the issue this patch addresses.

It looks to me though, that the ES test program doesn't actually test
what the patch fixes - the segment attributes, like the base address.
It tests just the selector, which shouldn't change across a kernel
entry (with a few exceptions, like signals).  If the test is failing,
then it is a different issue from what this patch addresses.

--
Brian Gerst
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3.2 055/152] x86_64, switch_to(): Load TLS descriptors before switching DS and ES

2015-02-24 Thread Denys Vlasenko
On Tue, Feb 24, 2015 at 9:02 PM, Andy Lutomirski  wrote:
>> This currently fails in 32-bit kernels (at least in qemu):
>>
>> / # ./es_test
>> Allocated GDT index 7
>> [FAIL]ES changed from 0x3b to 0x7b
>> [FAIL]ES was corrupted 1000/1000 times
>> / # uname -a
>> Linux (none) 4.0.0-rc1 #1 SMP Tue Feb 24 16:41:58 CET 2015 i686 GNU/Linux
>
> Want to send a patch?  I'll get it in a few days if no one beats me.

I have no patch, sorry (in fact, I failed to find where is the relevant
32-bit counterpart).

It's just security people asked me to backport this and I wondered
maybe I should wait a bit on this one, since fix for 32-bit ought
to appear as well.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3.2 055/152] x86_64, switch_to(): Load TLS descriptors before switching DS and ES

2015-02-24 Thread Andy Lutomirski
On Tue, Feb 24, 2015 at 7:47 AM, Denys Vlasenko
 wrote:
> On Tue, Feb 17, 2015 at 2:46 AM, Ben Hutchings  wrote:
>> 3.2.67-rc1 review patch.  If anyone has any objections, please let me know.
>>
>> --
>>
>> From: Andy Lutomirski 
>>
>> commit f647d7c155f069c1a068030255c300663516420e upstream.
>>
>> Otherwise, if buggy user code points DS or ES into the TLS
>> array, they would be corrupted after a context switch.
>>
>> This also significantly improves the comments and documents some
>> gotchas in the code.
>>
>> Before this patch, the both tests below failed.  With this
>> patch, the es test passes, although the gsbase test still fails.
>>
>>  - begin es test -
>>
>> /*
>>  * Copyright (c) 2014 Andy Lutomirski
>>  * GPL v2
>>  */
>>
>> static unsigned short GDT3(int idx)
>> {
>> return (idx << 3) | 3;
>> }
>>
>> static int create_tls(int idx, unsigned int base)
>> {
>> struct user_desc desc = {
>> .entry_number= idx,
>> .base_addr   = base,
>> .limit   = 0xf,
>> .seg_32bit   = 1,
>> .contents= 0, /* Data, grow-up */
>> .read_exec_only  = 0,
>> .limit_in_pages  = 1,
>> .seg_not_present = 0,
>> .useable = 0,
>> };
>>
>> if (syscall(SYS_set_thread_area, &desc) != 0)
>> err(1, "set_thread_area");
>>
>> return desc.entry_number;
>> }
>>
>> int main()
>> {
>> int idx = create_tls(-1, 0);
>> printf("Allocated GDT index %d\n", idx);
>>
>> unsigned short orig_es;
>> asm volatile ("mov %%es,%0" : "=rm" (orig_es));
>>
>> int errors = 0;
>> int total = 1000;
>> for (int i = 0; i < total; i++) {
>> asm volatile ("mov %0,%%es" : : "rm" (GDT3(idx)));
>> usleep(100);
>>
>> unsigned short es;
>> asm volatile ("mov %%es,%0" : "=rm" (es));
>> asm volatile ("mov %0,%%es" : : "rm" (orig_es));
>> if (es != GDT3(idx)) {
>> if (errors == 0)
>> printf("[FAIL]\tES changed from 0x%hx to 
>> 0x%hx\n",
>>GDT3(idx), es);
>> errors++;
>> }
>> }
>>
>> if (errors) {
>> printf("[FAIL]\tES was corrupted %d/%d times\n", errors, 
>> total);
>> return 1;
>> } else {
>> printf("[OK]\tES was preserved\n");
>> return 0;
>> }
>> }
>>
>>  - end es test -
>
> This currently fails in 32-bit kernels (at least in qemu):
>
> / # ./es_test
> Allocated GDT index 7
> [FAIL]ES changed from 0x3b to 0x7b
> [FAIL]ES was corrupted 1000/1000 times
> / # uname -a
> Linux (none) 4.0.0-rc1 #1 SMP Tue Feb 24 16:41:58 CET 2015 i686 GNU/Linux

Want to send a patch?  I'll get it in a few days if no one beats me.

--Andy
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3.2 055/152] x86_64, switch_to(): Load TLS descriptors before switching DS and ES

2015-02-24 Thread Denys Vlasenko
On Tue, Feb 17, 2015 at 2:46 AM, Ben Hutchings  wrote:
> 3.2.67-rc1 review patch.  If anyone has any objections, please let me know.
>
> --
>
> From: Andy Lutomirski 
>
> commit f647d7c155f069c1a068030255c300663516420e upstream.
>
> Otherwise, if buggy user code points DS or ES into the TLS
> array, they would be corrupted after a context switch.
>
> This also significantly improves the comments and documents some
> gotchas in the code.
>
> Before this patch, the both tests below failed.  With this
> patch, the es test passes, although the gsbase test still fails.
>
>  - begin es test -
>
> /*
>  * Copyright (c) 2014 Andy Lutomirski
>  * GPL v2
>  */
>
> static unsigned short GDT3(int idx)
> {
> return (idx << 3) | 3;
> }
>
> static int create_tls(int idx, unsigned int base)
> {
> struct user_desc desc = {
> .entry_number= idx,
> .base_addr   = base,
> .limit   = 0xf,
> .seg_32bit   = 1,
> .contents= 0, /* Data, grow-up */
> .read_exec_only  = 0,
> .limit_in_pages  = 1,
> .seg_not_present = 0,
> .useable = 0,
> };
>
> if (syscall(SYS_set_thread_area, &desc) != 0)
> err(1, "set_thread_area");
>
> return desc.entry_number;
> }
>
> int main()
> {
> int idx = create_tls(-1, 0);
> printf("Allocated GDT index %d\n", idx);
>
> unsigned short orig_es;
> asm volatile ("mov %%es,%0" : "=rm" (orig_es));
>
> int errors = 0;
> int total = 1000;
> for (int i = 0; i < total; i++) {
> asm volatile ("mov %0,%%es" : : "rm" (GDT3(idx)));
> usleep(100);
>
> unsigned short es;
> asm volatile ("mov %%es,%0" : "=rm" (es));
> asm volatile ("mov %0,%%es" : : "rm" (orig_es));
> if (es != GDT3(idx)) {
> if (errors == 0)
> printf("[FAIL]\tES changed from 0x%hx to 
> 0x%hx\n",
>GDT3(idx), es);
> errors++;
> }
> }
>
> if (errors) {
> printf("[FAIL]\tES was corrupted %d/%d times\n", errors, 
> total);
> return 1;
> } else {
> printf("[OK]\tES was preserved\n");
> return 0;
> }
> }
>
>  - end es test -

This currently fails in 32-bit kernels (at least in qemu):

/ # ./es_test
Allocated GDT index 7
[FAIL]ES changed from 0x3b to 0x7b
[FAIL]ES was corrupted 1000/1000 times
/ # uname -a
Linux (none) 4.0.0-rc1 #1 SMP Tue Feb 24 16:41:58 CET 2015 i686 GNU/Linux
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.2 055/152] x86_64, switch_to(): Load TLS descriptors before switching DS and ES

2015-02-16 Thread Ben Hutchings
3.2.67-rc1 review patch.  If anyone has any objections, please let me know.

--

From: Andy Lutomirski 

commit f647d7c155f069c1a068030255c300663516420e upstream.

Otherwise, if buggy user code points DS or ES into the TLS
array, they would be corrupted after a context switch.

This also significantly improves the comments and documents some
gotchas in the code.

Before this patch, the both tests below failed.  With this
patch, the es test passes, although the gsbase test still fails.

 - begin es test -

/*
 * Copyright (c) 2014 Andy Lutomirski
 * GPL v2
 */

static unsigned short GDT3(int idx)
{
return (idx << 3) | 3;
}

static int create_tls(int idx, unsigned int base)
{
struct user_desc desc = {
.entry_number= idx,
.base_addr   = base,
.limit   = 0xf,
.seg_32bit   = 1,
.contents= 0, /* Data, grow-up */
.read_exec_only  = 0,
.limit_in_pages  = 1,
.seg_not_present = 0,
.useable = 0,
};

if (syscall(SYS_set_thread_area, &desc) != 0)
err(1, "set_thread_area");

return desc.entry_number;
}

int main()
{
int idx = create_tls(-1, 0);
printf("Allocated GDT index %d\n", idx);

unsigned short orig_es;
asm volatile ("mov %%es,%0" : "=rm" (orig_es));

int errors = 0;
int total = 1000;
for (int i = 0; i < total; i++) {
asm volatile ("mov %0,%%es" : : "rm" (GDT3(idx)));
usleep(100);

unsigned short es;
asm volatile ("mov %%es,%0" : "=rm" (es));
asm volatile ("mov %0,%%es" : : "rm" (orig_es));
if (es != GDT3(idx)) {
if (errors == 0)
printf("[FAIL]\tES changed from 0x%hx to 
0x%hx\n",
   GDT3(idx), es);
errors++;
}
}

if (errors) {
printf("[FAIL]\tES was corrupted %d/%d times\n", errors, total);
return 1;
} else {
printf("[OK]\tES was preserved\n");
return 0;
}
}

 - end es test -

 - begin gsbase test -

/*
 * gsbase.c, a gsbase test
 * Copyright (c) 2014 Andy Lutomirski
 * GPL v2
 */

static unsigned char *testptr, *testptr2;

static unsigned char read_gs_testvals(void)
{
unsigned char ret;
asm volatile ("movb %%gs:%1, %0" : "=r" (ret) : "m" (*testptr));
return ret;
}

int main()
{
int errors = 0;

testptr = mmap((void *)0x2UL, 1, PROT_READ | PROT_WRITE,
   MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
if (testptr == MAP_FAILED)
err(1, "mmap");

testptr2 = mmap((void *)0x3UL, 1, PROT_READ | PROT_WRITE,
   MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
if (testptr2 == MAP_FAILED)
err(1, "mmap");

*testptr = 0;
*testptr2 = 1;

if (syscall(SYS_arch_prctl, ARCH_SET_GS,
(unsigned long)testptr2 - (unsigned long)testptr) != 0)
err(1, "ARCH_SET_GS");

usleep(100);

if (read_gs_testvals() == 1) {
printf("[OK]\tARCH_SET_GS worked\n");
} else {
printf("[FAIL]\tARCH_SET_GS failed\n");
errors++;
}

asm volatile ("mov %0,%%gs" : : "r" (0));

if (read_gs_testvals() == 0) {
printf("[OK]\tWriting 0 to gs worked\n");
} else {
printf("[FAIL]\tWriting 0 to gs failed\n");
errors++;
}

usleep(100);

if (read_gs_testvals() == 0) {
printf("[OK]\tgsbase is still zero\n");
} else {
printf("[FAIL]\tgsbase was corrupted\n");
errors++;
}

return errors == 0 ? 0 : 1;
}

 - end gsbase test -

Signed-off-by: Andy Lutomirski 
Cc: Andi Kleen 
Cc: Linus Torvalds 
Link: 
http://lkml.kernel.org/r/509d27c9fec78217691c3dad91cec87e1006b34a.1418075657.git.l...@amacapital.net
Signed-off-by: Ingo Molnar 
Signed-off-by: Ben Hutchings 
---
 arch/x86/kernel/process_64.c | 101 +++
 1 file changed, 73 insertions(+), 28 deletions(-)

--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -385,24 +385,9 @@ __switch_to(struct task_struct *prev_p,
 
fpu = switch_fpu_prepare(prev_p, next_p);
 
-   /*
-* Reload esp0, LDT and the page table pointer:
-*/
+   /* Reload esp0 and ss1. */
load_sp0(tss, next);
 
-   /*
-* Switch DS and ES.
-* This won't pick up thread selector changes, but I guess that is ok.
-*/
-   savesegment(es, prev