In exit_to_usermode_loop(), call task_isolation_ready() for TIF_TASK_ISOLATION tasks when we are checking the thread-info flags, and after we've handled the other work, call task_isolation_enter() for such tasks.
In syscall_trace_enter_phase1(), we add the necessary support for reporting syscalls for task-isolation processes. We add strict reporting for the kernel exception types that do not result in signals, namely non-signalling page faults and non-signalling MPX fixups. Signed-off-by: Chris Metcalf <cmetc...@mellanox.com> --- arch/x86/Kconfig | 1 + arch/x86/entry/common.c | 18 +++++++++++++++++- arch/x86/include/asm/thread_info.h | 2 ++ arch/x86/kernel/smp.c | 2 ++ arch/x86/kernel/traps.c | 3 +++ arch/x86/mm/fault.c | 5 +++++ 6 files changed, 30 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d9a94da0c29f..0762072ba284 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -89,6 +89,7 @@ config X86 select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY if X86_64 + select HAVE_ARCH_TASK_ISOLATION select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_EBPF_JIT if X86_64 diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index ec138e538c44..33fc40b29c9f 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -21,6 +21,7 @@ #include <linux/context_tracking.h> #include <linux/user-return-notifier.h> #include <linux/uprobes.h> +#include <linux/isolation.h> #include <asm/desc.h> #include <asm/traps.h> @@ -87,6 +88,13 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; + /* In isolation mode, we may prevent the syscall from running. */ + if (work & _TIF_TASK_ISOLATION) { + if (task_isolation_syscall(regs->orig_ax) == -1) + return -1; + work &= ~_TIF_TASK_ISOLATION; + } + #ifdef CONFIG_SECCOMP /* * Do seccomp first -- it should minimize exposure of other @@ -202,7 +210,7 @@ long syscall_trace_enter(struct pt_regs *regs) #define EXIT_TO_USERMODE_LOOP_FLAGS \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY) + _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY | _TIF_TASK_ISOLATION) static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) { @@ -236,11 +244,19 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) if (cached_flags & _TIF_USER_RETURN_NOTIFY) fire_user_return_notifiers(); + if (cached_flags & _TIF_TASK_ISOLATION) + task_isolation_enter(); + /* Disable IRQs and retry */ local_irq_disable(); cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags); + /* Clear task isolation from cached_flags manually. */ + if ((cached_flags & _TIF_TASK_ISOLATION) && + task_isolation_ready()) + cached_flags &= ~_TIF_TASK_ISOLATION; + if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) break; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 30c133ac05cd..10167e086f3b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -97,6 +97,7 @@ struct thread_info { #define TIF_SECCOMP 8 /* secure computing */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ +#define TIF_TASK_ISOLATION 13 /* task isolation enabled for task */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -121,6 +122,7 @@ struct thread_info { #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) +#define _TIF_TASK_ISOLATION (1 << TIF_TASK_ISOLATION) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 658777cf3851..e4ffd9581cdb 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -23,6 +23,7 @@ #include <linux/interrupt.h> #include <linux/cpu.h> #include <linux/gfp.h> +#include <linux/isolation.h> #include <asm/mtrr.h> #include <asm/tlbflush.h> @@ -125,6 +126,7 @@ static void native_smp_send_reschedule(int cpu) WARN_ON(1); return; } + task_isolation_debug(cpu, "reschedule IPI"); apic->send_IPI(cpu, RESCHEDULE_VECTOR); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 00f03d82e69a..4989af93bb33 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -36,6 +36,7 @@ #include <linux/mm.h> #include <linux/smp.h> #include <linux/io.h> +#include <linux/isolation.h> #ifdef CONFIG_EISA #include <linux/ioport.h> @@ -383,6 +384,8 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) case 2: /* Bound directory has invalid entry. */ if (mpx_handle_bd_fault()) goto exit_trap; + /* No signal was generated, but notify task-isolation tasks. */ + task_isolation_quiet_exception("bounds check"); break; /* Success, it was handled */ case 1: /* Bound violation. */ info = mpx_generate_siginfo(regs); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 7d1fa7cd2374..655b4ae0c9b8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -14,6 +14,7 @@ #include <linux/prefetch.h> /* prefetchw */ #include <linux/context_tracking.h> /* exception_enter(), ... */ #include <linux/uaccess.h> /* faulthandler_disabled() */ +#include <linux/isolation.h> /* task_isolation_quiet_exception */ #include <asm/cpufeature.h> /* boot_cpu_has, ... */ #include <asm/traps.h> /* dotraplinkage, ... */ @@ -1397,6 +1398,10 @@ good_area: perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } + /* No signal was generated, but notify task-isolation tasks. */ + if (flags & PF_USER) + task_isolation_quiet_exception("page fault at %#lx", address); + check_v8086_mode(regs, address, tsk); } NOKPROBE_SYMBOL(__do_page_fault); -- 2.7.2