[PATCH 28/30] x86, kaiser: allow KAISER to be enabled/disabled at runtime

2017-11-10 Thread Dave Hansen

From: Dave Hansen 

The KAISER CR3 switches are expensive for many reasons.  Not all systems
benefit from the protection provided by KAISER.  Some of them can not
pay the high performance cost.

This patch adds a debugfs file.  To disable KAISER, you do:

echo 0 > /sys/kernel/debug/x86/kaiser-enabled

and to re-enable it, you can:

echo 1 > /sys/kernel/debug/x86/kaiser-enabled

This is a *minimal* implementation.  There are certainly plenty of
optimizations that can be done on top of this by using ALTERNATIVES
among other things.

This does, however, completely remove all the KAISER-based CR3 writes.
This permits a paravirtualized system that can not tolerate CR3
writes to theoretically survive with CONFIG_KAISER=y, albeit with
/sys/kernel/debug/x86/kaiser-enabled=0.

Signed-off-by: Dave Hansen 
Cc: Moritz Lipp 
Cc: Daniel Gruss 
Cc: Michael Schwarz 
Cc: Richard Fellner 
Cc: Andy Lutomirski 
Cc: Linus Torvalds 
Cc: Kees Cook 
Cc: Hugh Dickins 
Cc: x...@kernel.org
---

 b/arch/x86/entry/calling.h |   12 +++
 b/arch/x86/mm/kaiser.c |   70 ++---
 2 files changed, 78 insertions(+), 4 deletions(-)

diff -puN arch/x86/entry/calling.h~kaiser-dynamic-asm arch/x86/entry/calling.h
--- a/arch/x86/entry/calling.h~kaiser-dynamic-asm   2017-11-10 
11:22:20.575244921 -0800
+++ b/arch/x86/entry/calling.h  2017-11-10 11:22:20.580244921 -0800
@@ -208,19 +208,29 @@ For 32-bit we have the following convent
orq $(KAISER_SWITCH_MASK), \reg
 .endm
 
+.macro JUMP_IF_KAISER_OFF  label
+   testq   $1, kaiser_asm_do_switch
+   jz  \label
+.endm
+
 .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+   JUMP_IF_KAISER_OFF  .Lswitch_done_\@
mov %cr3, \scratch_reg
ADJUST_KERNEL_CR3 \scratch_reg
mov \scratch_reg, %cr3
+.Lswitch_done_\@:
 .endm
 
 .macro SWITCH_TO_USER_CR3 scratch_reg:req
+   JUMP_IF_KAISER_OFF  .Lswitch_done_\@
mov %cr3, \scratch_reg
ADJUST_USER_CR3 \scratch_reg
mov \scratch_reg, %cr3
+.Lswitch_done_\@:
 .endm
 
 .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+   JUMP_IF_KAISER_OFF  .Ldone_\@
movq%cr3, %r\scratch_reg
movq%r\scratch_reg, \save_reg
/*
@@ -243,11 +253,13 @@ For 32-bit we have the following convent
 .endm
 
 .macro RESTORE_CR3 save_reg:req
+   JUMP_IF_KAISER_OFF  .Ldone_\@
/*
 * We could avoid the CR3 write if not changing its value,
 * but that requires a CR3 read *and* a scratch register.
 */
movq\save_reg, %cr3
+.Ldone_\@:
 .endm
 
 #else /* CONFIG_KAISER=n: */
diff -puN arch/x86/mm/kaiser.c~kaiser-dynamic-asm arch/x86/mm/kaiser.c
--- a/arch/x86/mm/kaiser.c~kaiser-dynamic-asm   2017-11-10 11:22:20.577244921 
-0800
+++ b/arch/x86/mm/kaiser.c  2017-11-10 11:22:20.581244921 -0800
@@ -42,6 +42,9 @@
 #include 
 #include 
 
+__aligned(PAGE_SIZE)
+unsigned long kaiser_asm_do_switch[PAGE_SIZE/sizeof(unsigned long)] = { 1 };
+
 /*
  * At runtime, the only things we map are some things for CPU
  * hotplug, and stacks for new processes.  No two CPUs will ever
@@ -366,6 +369,9 @@ void __init kaiser_init(void)
 
kaiser_init_all_pgds();
 
+   kaiser_add_user_map_early(_asm_do_switch, PAGE_SIZE,
+ __PAGE_KERNEL | _PAGE_GLOBAL);
+
for_each_possible_cpu(cpu) {
void *percpu_vaddr = __per_cpu_user_mapped_start +
 per_cpu_offset(cpu);
@@ -470,6 +476,56 @@ static ssize_t kaiser_enabled_read_file(
return simple_read_from_buffer(user_buf, count, ppos, buf, len);
 }
 
+enum poison {
+   KAISER_POISON,
+   KAISER_UNPOISON
+};
+void kaiser_poison_pgds(enum poison do_poison);
+
+void kaiser_do_disable(void)
+{
+   /* Make sure the kernel PGDs are usable by userspace: */
+   kaiser_poison_pgds(KAISER_UNPOISON);
+
+   /*
+* Make sure all the CPUs have the poison clear in their TLBs.
+* This also functions as a barrier to ensure that everyone
+* sees the unpoisoned PGDs.
+*/
+   flush_tlb_all();
+
+   /* Tell the assembly code to stop switching CR3. */
+   kaiser_asm_do_switch[0] = 0;
+
+   /*
+* Make sure everybody does an interrupt.  This means that
+* they have gone through a SWITCH_TO_KERNEL_CR3 amd are no
+* longer running on the userspace CR3.  If we did not do
+* this, we might have CPUs running on the shadow page tables
+* that then enter the kernel and think they do *not* need to
+* switch.
+*/
+   flush_tlb_all();
+}
+
+void 

[PATCH 28/30] x86, kaiser: allow KAISER to be enabled/disabled at runtime

2017-11-10 Thread Dave Hansen

From: Dave Hansen 

The KAISER CR3 switches are expensive for many reasons.  Not all systems
benefit from the protection provided by KAISER.  Some of them can not
pay the high performance cost.

This patch adds a debugfs file.  To disable KAISER, you do:

echo 0 > /sys/kernel/debug/x86/kaiser-enabled

and to re-enable it, you can:

echo 1 > /sys/kernel/debug/x86/kaiser-enabled

This is a *minimal* implementation.  There are certainly plenty of
optimizations that can be done on top of this by using ALTERNATIVES
among other things.

This does, however, completely remove all the KAISER-based CR3 writes.
This permits a paravirtualized system that can not tolerate CR3
writes to theoretically survive with CONFIG_KAISER=y, albeit with
/sys/kernel/debug/x86/kaiser-enabled=0.

Signed-off-by: Dave Hansen 
Cc: Moritz Lipp 
Cc: Daniel Gruss 
Cc: Michael Schwarz 
Cc: Richard Fellner 
Cc: Andy Lutomirski 
Cc: Linus Torvalds 
Cc: Kees Cook 
Cc: Hugh Dickins 
Cc: x...@kernel.org
---

 b/arch/x86/entry/calling.h |   12 +++
 b/arch/x86/mm/kaiser.c |   70 ++---
 2 files changed, 78 insertions(+), 4 deletions(-)

diff -puN arch/x86/entry/calling.h~kaiser-dynamic-asm arch/x86/entry/calling.h
--- a/arch/x86/entry/calling.h~kaiser-dynamic-asm   2017-11-10 
11:22:20.575244921 -0800
+++ b/arch/x86/entry/calling.h  2017-11-10 11:22:20.580244921 -0800
@@ -208,19 +208,29 @@ For 32-bit we have the following convent
orq $(KAISER_SWITCH_MASK), \reg
 .endm
 
+.macro JUMP_IF_KAISER_OFF  label
+   testq   $1, kaiser_asm_do_switch
+   jz  \label
+.endm
+
 .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+   JUMP_IF_KAISER_OFF  .Lswitch_done_\@
mov %cr3, \scratch_reg
ADJUST_KERNEL_CR3 \scratch_reg
mov \scratch_reg, %cr3
+.Lswitch_done_\@:
 .endm
 
 .macro SWITCH_TO_USER_CR3 scratch_reg:req
+   JUMP_IF_KAISER_OFF  .Lswitch_done_\@
mov %cr3, \scratch_reg
ADJUST_USER_CR3 \scratch_reg
mov \scratch_reg, %cr3
+.Lswitch_done_\@:
 .endm
 
 .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+   JUMP_IF_KAISER_OFF  .Ldone_\@
movq%cr3, %r\scratch_reg
movq%r\scratch_reg, \save_reg
/*
@@ -243,11 +253,13 @@ For 32-bit we have the following convent
 .endm
 
 .macro RESTORE_CR3 save_reg:req
+   JUMP_IF_KAISER_OFF  .Ldone_\@
/*
 * We could avoid the CR3 write if not changing its value,
 * but that requires a CR3 read *and* a scratch register.
 */
movq\save_reg, %cr3
+.Ldone_\@:
 .endm
 
 #else /* CONFIG_KAISER=n: */
diff -puN arch/x86/mm/kaiser.c~kaiser-dynamic-asm arch/x86/mm/kaiser.c
--- a/arch/x86/mm/kaiser.c~kaiser-dynamic-asm   2017-11-10 11:22:20.577244921 
-0800
+++ b/arch/x86/mm/kaiser.c  2017-11-10 11:22:20.581244921 -0800
@@ -42,6 +42,9 @@
 #include 
 #include 
 
+__aligned(PAGE_SIZE)
+unsigned long kaiser_asm_do_switch[PAGE_SIZE/sizeof(unsigned long)] = { 1 };
+
 /*
  * At runtime, the only things we map are some things for CPU
  * hotplug, and stacks for new processes.  No two CPUs will ever
@@ -366,6 +369,9 @@ void __init kaiser_init(void)
 
kaiser_init_all_pgds();
 
+   kaiser_add_user_map_early(_asm_do_switch, PAGE_SIZE,
+ __PAGE_KERNEL | _PAGE_GLOBAL);
+
for_each_possible_cpu(cpu) {
void *percpu_vaddr = __per_cpu_user_mapped_start +
 per_cpu_offset(cpu);
@@ -470,6 +476,56 @@ static ssize_t kaiser_enabled_read_file(
return simple_read_from_buffer(user_buf, count, ppos, buf, len);
 }
 
+enum poison {
+   KAISER_POISON,
+   KAISER_UNPOISON
+};
+void kaiser_poison_pgds(enum poison do_poison);
+
+void kaiser_do_disable(void)
+{
+   /* Make sure the kernel PGDs are usable by userspace: */
+   kaiser_poison_pgds(KAISER_UNPOISON);
+
+   /*
+* Make sure all the CPUs have the poison clear in their TLBs.
+* This also functions as a barrier to ensure that everyone
+* sees the unpoisoned PGDs.
+*/
+   flush_tlb_all();
+
+   /* Tell the assembly code to stop switching CR3. */
+   kaiser_asm_do_switch[0] = 0;
+
+   /*
+* Make sure everybody does an interrupt.  This means that
+* they have gone through a SWITCH_TO_KERNEL_CR3 amd are no
+* longer running on the userspace CR3.  If we did not do
+* this, we might have CPUs running on the shadow page tables
+* that then enter the kernel and think they do *not* need to
+* switch.
+*/
+   flush_tlb_all();
+}
+
+void kaiser_do_enable(void)
+{
+   /* Tell the assembly code to start switching CR3: */
+   kaiser_asm_do_switch[0] = 1;
+
+   /* Make sure everyone can see the kaiser_asm_do_switch update: */
+   synchronize_rcu();
+
+   /*
+* Now that userspace is no longer using 

[PATCH 28/30] x86, kaiser: allow KAISER to be enabled/disabled at runtime

2017-11-08 Thread Dave Hansen

From: Dave Hansen 

The KAISER CR3 switches are expensive for many reasons.  Not all systems
benefit from the protection provided by KAISER.  Some of them can not
pay the high performance cost.

This patch adds a debugfs file.  To disable KAISER, you do:

echo 0 > /sys/kernel/debug/x86/kaiser-enabled

and to reenable it, you can:

echo 1 > /sys/kernel/debug/x86/kaiser-enabled

This is a *minimal* implementation.  There are certainly plenty of
optimizations that we can do on top of this by using ALTERNATIVES
among other things.

This does, however, completely remove all the KAISER-based CR3 writes.
So, a paravirtualized system that can not tolerate CR3 writes can
theretically survive with CONFIG_KAISER=y, but with
/sys/kernel/debug/x86/kaiser-enabled=0.

Signed-off-by: Dave Hansen 
Cc: Moritz Lipp 
Cc: Daniel Gruss 
Cc: Michael Schwarz 
Cc: Richard Fellner 
Cc: Andy Lutomirski 
Cc: Linus Torvalds 
Cc: Kees Cook 
Cc: Hugh Dickins 
Cc: x...@kernel.org
---

 b/arch/x86/entry/calling.h |   12 +++
 b/arch/x86/mm/kaiser.c |   70 ++---
 2 files changed, 78 insertions(+), 4 deletions(-)

diff -puN arch/x86/entry/calling.h~kaiser-dynamic-asm arch/x86/entry/calling.h
--- a/arch/x86/entry/calling.h~kaiser-dynamic-asm   2017-11-08 
10:45:41.361681365 -0800
+++ b/arch/x86/entry/calling.h  2017-11-08 10:45:41.366681365 -0800
@@ -208,19 +208,29 @@ For 32-bit we have the following convent
orq $(KAISER_SWITCH_MASK), \reg
 .endm
 
+.macro JUMP_IF_KAISER_OFF  label
+   testq   $1, kaiser_asm_do_switch
+   jz  \label
+.endm
+
 .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+   JUMP_IF_KAISER_OFF  .Lswitch_done_\@
mov %cr3, \scratch_reg
ADJUST_KERNEL_CR3 \scratch_reg
mov \scratch_reg, %cr3
+.Lswitch_done_\@:
 .endm
 
 .macro SWITCH_TO_USER_CR3 scratch_reg:req
+   JUMP_IF_KAISER_OFF  .Lswitch_done_\@
mov %cr3, \scratch_reg
ADJUST_USER_CR3 \scratch_reg
mov \scratch_reg, %cr3
+.Lswitch_done_\@:
 .endm
 
 .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+   JUMP_IF_KAISER_OFF  .Ldone_\@
movq%cr3, %r\scratch_reg
movq%r\scratch_reg, \save_reg
/*
@@ -243,11 +253,13 @@ For 32-bit we have the following convent
 .endm
 
 .macro RESTORE_CR3 save_reg:req
+   JUMP_IF_KAISER_OFF  .Ldone_\@
/*
 * We could avoid the CR3 write if not changing its value,
 * but that requires a CR3 read *and* a scratch register.
 */
movq\save_reg, %cr3
+.Ldone_\@:
 .endm
 
 #else /* CONFIG_KAISER=n: */
diff -puN arch/x86/mm/kaiser.c~kaiser-dynamic-asm arch/x86/mm/kaiser.c
--- a/arch/x86/mm/kaiser.c~kaiser-dynamic-asm   2017-11-08 10:45:41.363681365 
-0800
+++ b/arch/x86/mm/kaiser.c  2017-11-08 10:45:41.367681365 -0800
@@ -31,6 +31,9 @@
 #include 
 #include 
 
+__aligned(PAGE_SIZE)
+unsigned long kaiser_asm_do_switch[PAGE_SIZE/sizeof(unsigned long)] = { 1 };
+
 /*
  * At runtime, the only things we map are some things for CPU
  * hotplug, and stacks for new processes.  No two CPUs will ever
@@ -355,6 +358,9 @@ void __init kaiser_init(void)
 
kaiser_init_all_pgds();
 
+   kaiser_add_user_map_early(_asm_do_switch, PAGE_SIZE,
+ __PAGE_KERNEL | _PAGE_GLOBAL);
+
for_each_possible_cpu(cpu) {
void *percpu_vaddr = __per_cpu_user_mapped_start +
 per_cpu_offset(cpu);
@@ -459,6 +465,56 @@ static ssize_t kaiser_enabled_read_file(
return simple_read_from_buffer(user_buf, count, ppos, buf, len);
 }
 
+enum poison {
+   KAISER_POISON,
+   KAISER_UNPOISON
+};
+void kaiser_poison_pgds(enum poison do_poison);
+
+void kaiser_do_disable(void)
+{
+   /* Make sure the kernel PGDs are usable by userspace: */
+   kaiser_poison_pgds(KAISER_UNPOISON);
+
+   /*
+* Make sure all the CPUs have the poison clear in their TLBs.
+* This also functions as a barrier to ensure that everyone
+* sees the unpoisoned PGDs.
+*/
+   flush_tlb_all();
+
+   /* Tell the assembly code to stop switching CR3. */
+   kaiser_asm_do_switch[0] = 0;
+
+   /*
+* Make sure everybody does an interrupt.  This means that
+* they have gone through a SWITCH_TO_KERNEL_CR3 amd are no
+* longer running on the userspace CR3.  If we did not do
+* this, we might have CPUs running on the shadow page tables
+* that then enter the kernel and think they do *not* need to
+* switch.
+*/
+   flush_tlb_all();
+}
+
+void kaiser_do_enable(void)
+{
+   

[PATCH 28/30] x86, kaiser: allow KAISER to be enabled/disabled at runtime

2017-11-08 Thread Dave Hansen

From: Dave Hansen 

The KAISER CR3 switches are expensive for many reasons.  Not all systems
benefit from the protection provided by KAISER.  Some of them can not
pay the high performance cost.

This patch adds a debugfs file.  To disable KAISER, you do:

echo 0 > /sys/kernel/debug/x86/kaiser-enabled

and to reenable it, you can:

echo 1 > /sys/kernel/debug/x86/kaiser-enabled

This is a *minimal* implementation.  There are certainly plenty of
optimizations that we can do on top of this by using ALTERNATIVES
among other things.

This does, however, completely remove all the KAISER-based CR3 writes.
So, a paravirtualized system that can not tolerate CR3 writes can
theretically survive with CONFIG_KAISER=y, but with
/sys/kernel/debug/x86/kaiser-enabled=0.

Signed-off-by: Dave Hansen 
Cc: Moritz Lipp 
Cc: Daniel Gruss 
Cc: Michael Schwarz 
Cc: Richard Fellner 
Cc: Andy Lutomirski 
Cc: Linus Torvalds 
Cc: Kees Cook 
Cc: Hugh Dickins 
Cc: x...@kernel.org
---

 b/arch/x86/entry/calling.h |   12 +++
 b/arch/x86/mm/kaiser.c |   70 ++---
 2 files changed, 78 insertions(+), 4 deletions(-)

diff -puN arch/x86/entry/calling.h~kaiser-dynamic-asm arch/x86/entry/calling.h
--- a/arch/x86/entry/calling.h~kaiser-dynamic-asm   2017-11-08 
10:45:41.361681365 -0800
+++ b/arch/x86/entry/calling.h  2017-11-08 10:45:41.366681365 -0800
@@ -208,19 +208,29 @@ For 32-bit we have the following convent
orq $(KAISER_SWITCH_MASK), \reg
 .endm
 
+.macro JUMP_IF_KAISER_OFF  label
+   testq   $1, kaiser_asm_do_switch
+   jz  \label
+.endm
+
 .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+   JUMP_IF_KAISER_OFF  .Lswitch_done_\@
mov %cr3, \scratch_reg
ADJUST_KERNEL_CR3 \scratch_reg
mov \scratch_reg, %cr3
+.Lswitch_done_\@:
 .endm
 
 .macro SWITCH_TO_USER_CR3 scratch_reg:req
+   JUMP_IF_KAISER_OFF  .Lswitch_done_\@
mov %cr3, \scratch_reg
ADJUST_USER_CR3 \scratch_reg
mov \scratch_reg, %cr3
+.Lswitch_done_\@:
 .endm
 
 .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+   JUMP_IF_KAISER_OFF  .Ldone_\@
movq%cr3, %r\scratch_reg
movq%r\scratch_reg, \save_reg
/*
@@ -243,11 +253,13 @@ For 32-bit we have the following convent
 .endm
 
 .macro RESTORE_CR3 save_reg:req
+   JUMP_IF_KAISER_OFF  .Ldone_\@
/*
 * We could avoid the CR3 write if not changing its value,
 * but that requires a CR3 read *and* a scratch register.
 */
movq\save_reg, %cr3
+.Ldone_\@:
 .endm
 
 #else /* CONFIG_KAISER=n: */
diff -puN arch/x86/mm/kaiser.c~kaiser-dynamic-asm arch/x86/mm/kaiser.c
--- a/arch/x86/mm/kaiser.c~kaiser-dynamic-asm   2017-11-08 10:45:41.363681365 
-0800
+++ b/arch/x86/mm/kaiser.c  2017-11-08 10:45:41.367681365 -0800
@@ -31,6 +31,9 @@
 #include 
 #include 
 
+__aligned(PAGE_SIZE)
+unsigned long kaiser_asm_do_switch[PAGE_SIZE/sizeof(unsigned long)] = { 1 };
+
 /*
  * At runtime, the only things we map are some things for CPU
  * hotplug, and stacks for new processes.  No two CPUs will ever
@@ -355,6 +358,9 @@ void __init kaiser_init(void)
 
kaiser_init_all_pgds();
 
+   kaiser_add_user_map_early(_asm_do_switch, PAGE_SIZE,
+ __PAGE_KERNEL | _PAGE_GLOBAL);
+
for_each_possible_cpu(cpu) {
void *percpu_vaddr = __per_cpu_user_mapped_start +
 per_cpu_offset(cpu);
@@ -459,6 +465,56 @@ static ssize_t kaiser_enabled_read_file(
return simple_read_from_buffer(user_buf, count, ppos, buf, len);
 }
 
+enum poison {
+   KAISER_POISON,
+   KAISER_UNPOISON
+};
+void kaiser_poison_pgds(enum poison do_poison);
+
+void kaiser_do_disable(void)
+{
+   /* Make sure the kernel PGDs are usable by userspace: */
+   kaiser_poison_pgds(KAISER_UNPOISON);
+
+   /*
+* Make sure all the CPUs have the poison clear in their TLBs.
+* This also functions as a barrier to ensure that everyone
+* sees the unpoisoned PGDs.
+*/
+   flush_tlb_all();
+
+   /* Tell the assembly code to stop switching CR3. */
+   kaiser_asm_do_switch[0] = 0;
+
+   /*
+* Make sure everybody does an interrupt.  This means that
+* they have gone through a SWITCH_TO_KERNEL_CR3 amd are no
+* longer running on the userspace CR3.  If we did not do
+* this, we might have CPUs running on the shadow page tables
+* that then enter the kernel and think they do *not* need to
+* switch.
+*/
+   flush_tlb_all();
+}
+
+void kaiser_do_enable(void)
+{
+   /* Tell the assembly code to start switching CR3: */
+   kaiser_asm_do_switch[0] = 1;
+
+   /* Make sure everyone can see the kaiser_asm_do_switch update: */
+   synchronize_rcu();
+
+   /*
+* Now that userspace is no longer using the kernel copy