This implements the simplest of the rai_* operations, loading a value. For load of an 8-byte value, I believe we do need to keep room for a movabs, since there's no guarantee the final value can be loaded with as an imm32 or using a %rip-relative leaq.
It wouldn't hurt to add some sanity checking in rai_patch_one, e.g. at least check that the immediate we are replacing is the dummy 0x12345678 we used in the .rai_templ section. That the patching works can be seen in a quick virtme session. gdb on vmlinux and /proc/kcore shows (gdb) x/16i rai_proc_show 0xffffffff8108c120 <rai_proc_show>: mov $0xffffffff81fd9ad4,%rsi 0xffffffff8108c127 <rai_proc_show+7>: jmpq 0xffffffff819652e9 0xffffffff8108c12c <rai_proc_show+12>: nop 0xffffffff8108c12d <rai_proc_show+13>: nop 0xffffffff8108c12e <rai_proc_show+14>: nop 0xffffffff8108c12f <rai_proc_show+15>: nop 0xffffffff8108c130 <rai_proc_show+16>: nop 0xffffffff8108c131 <rai_proc_show+17>: jmpq 0xffffffff819652f5 0xffffffff8108c136 <rai_proc_show+22>: jmpq 0xffffffff81965300 0xffffffff8108c13b <rai_proc_show+27>: callq 0xffffffff81238bb0 <seq_printf> 0xffffffff8108c140 <rai_proc_show+32>: mov $0xffffffffffffffff,%rax 0xffffffff8108c147 <rai_proc_show+39>: mov %rax,0x17b228a(%rip) # 0xffffffff8283e3d8 <three> 0xffffffff8108c14e <rai_proc_show+46>: mov %eax,0x17b228c(%rip) # 0xffffffff8283e3e0 <two> 0xffffffff8108c154 <rai_proc_show+52>: mov %eax,0x17b228a(%rip) # 0xffffffff8283e3e4 <one> 0xffffffff8108c15a <rai_proc_show+58>: xor %eax,%eax 0xffffffff8108c15c <rai_proc_show+60>: retq (gdb) x/16i 0xffffffff96e8c120 0xffffffff96e8c120: mov $0xffffffff97dd9ad4,%rsi 0xffffffff96e8c127: movabs $0x3,%r8 0xffffffff96e8c131: mov $0x2,%ecx 0xffffffff96e8c136: mov $0x1,%edx 0xffffffff96e8c13b: callq 0xffffffff97038bb0 0xffffffff96e8c140: mov $0xffffffffffffffff,%rax 0xffffffff96e8c147: mov %rax,0x17b228a(%rip) # 0xffffffff9863e3d8 0xffffffff96e8c14e: mov %eax,0x17b228c(%rip) # 0xffffffff9863e3e0 0xffffffff96e8c154: mov %eax,0x17b228a(%rip) # 0xffffffff9863e3e4 0xffffffff96e8c15a: xor %eax,%eax 0xffffffff96e8c15c: retq 0xffffffff96e8c15d: nopl (%rax) 0xffffffff96e8c160: push %rbx 0xffffffff96e8c161: mov $0xffffffff9804c240,%rdi 0xffffffff96e8c168: mov $0xffffffff97e9fccc,%rbx 0xffffffff96e8c16f: callq 0xffffffff9776b230 where we also see that gcc chooses the destination registers rather intelligently. As expected, repeated "cat /proc/rai" continues to print "one: 1, two: 2, three: 3". Signed-off-by: Rasmus Villemoes <li...@rasmusvillemoes.dk> --- arch/x86/include/asm/rai.S | 42 +++++++++++++++++++++++++++++++++++++- arch/x86/include/asm/rai.h | 30 ++++++++++++++++++++++++++- arch/x86/kernel/rai.c | 18 ++++++++++++++++ 3 files changed, 88 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/rai.S b/arch/x86/include/asm/rai.S index 253d27453416..f42cdd8db876 100644 --- a/arch/x86/include/asm/rai.S +++ b/arch/x86/include/asm/rai.S @@ -8,11 +8,51 @@ .long \templ_end - \templ .long \thunk - . .endm - + .macro rai_entry_pad start end .ifgt STRUCT_RAI_ENTRY_SIZE-(\end-\start) .skip STRUCT_RAI_ENTRY_SIZE-(\end-\start), 0x00 .endif .endm +.macro rai_load dst, var, type + .pushsection .rai_templ, "aw" +10: + .ifeq \type - RAI_LOAD_8 + movabs $0x1234567812345678, \dst + .else + mov $0x12345678, \dst + .endif +11: + .popsection + + /* Even if the mov \var, \dst is short enough to fit in the + * space we reserve in .text, we still need the thunk for when + * we do the immediate patching. */ + .pushsection .text.rai_thunk, "ax" +20: + mov \var(%rip), \dst + jmp 32f +21: + .popsection + + /* The part that goes into .text */ +30: + /* silence objtool by actually using the thunk for now */ + jmp 20b + /* mov \var(%rip), \dst */ +31: + .skip -(((11b - 10b)-(31b - 30b)) > 0)*((11b - 10b)-(31b - 30b)), 0x90 +32: + + .pushsection .rai_data, "a" +40: + rai_entry \type 30b 32b 10b 11b 20b + .quad \var /* .load.addr */ +41: + rai_entry_pad 40b 41b + .popsection +.endm /* rai_load */ + + #endif diff --git a/arch/x86/include/asm/rai.h b/arch/x86/include/asm/rai.h index 269d696255b0..b57494c98d0f 100644 --- a/arch/x86/include/asm/rai.h +++ b/arch/x86/include/asm/rai.h @@ -1,7 +1,10 @@ #ifndef _ASM_X86_RAI_H #define _ASM_X86_RAI_H -#define STRUCT_RAI_ENTRY_SIZE 24 +#define RAI_LOAD_4 0 +#define RAI_LOAD_8 1 + +#define STRUCT_RAI_ENTRY_SIZE 32 /* Put the asm macros in a separate file for easier editing. */ #include <asm/rai.S> @@ -16,10 +19,35 @@ struct rai_entry { s32 templ_len; /* length of template */ s32 thunk_offset; /* member-relative offset to ool thunk */ /* type-specific data follows */ + union { + struct { + void *addr; + } load; + }; }; _Static_assert(sizeof(struct rai_entry) == STRUCT_RAI_ENTRY_SIZE, "please update STRUCT_RAI_ENTRY_SIZE"); +#define _rai_load(var) ({ \ + typeof(var) ret__; \ + switch(sizeof(var)) { \ + case 4: \ + asm("rai_load %0, %c1, %c2" \ + : "=r" (ret__) \ + : "i" (&(var)), "i" (RAI_LOAD_4)); \ + break; \ + case 8: \ + asm("rai_load %0, %c1, %c2" \ + : "=r" (ret__) \ + : "i" (&(var)), "i" (RAI_LOAD_8)); \ + break; \ + default: \ + ret__ = _rai_load_fallback(var); \ + break; \ + } \ + ret__; \ + }) + #endif /* !__ASSEMBLY */ #endif /* _ASM_X86_RAI_H */ diff --git a/arch/x86/kernel/rai.c b/arch/x86/kernel/rai.c index 819d03a025e3..e55e85f11a2e 100644 --- a/arch/x86/kernel/rai.c +++ b/arch/x86/kernel/rai.c @@ -14,6 +14,24 @@ rai_patch_one(const struct rai_entry *r) u8 *thunk = (u8*)&r->thunk_offset + r->thunk_offset; switch (r->type) { + case RAI_LOAD_4: { + const u32 *imm = r->load.addr; + /* + * The immediate is the last 4 bytes of the template, + * regardless of the operand encoding. + */ + memcpy(templ + r->templ_len - sizeof(*imm), imm, sizeof(*imm)); + break; + } + case RAI_LOAD_8: { + const u64 *imm = r->load.addr; + /* + * The immediate is the last 8 bytes of the template, + * regardless of the operand encoding. + */ + memcpy(templ + r->templ_len - sizeof(*imm), imm, sizeof(*imm)); + break; + } default: WARN_ONCE(1, "unhandled RAI type %d\n", r->type); return; -- 2.19.1.6.gbde171bbf5