[POC 07/12] x86-64: rai: implement _rai_load

Rasmus Villemoes Wed, 17 Oct 2018 15:35:14 -0700

This implements the simplest of the rai_* operations, loading a
value. For load of an 8-byte value, I believe we do need to keep room
for a movabs, since there's no guarantee the final value can be loaded
with as an imm32 or using a %rip-relative leaq.


It wouldn't hurt to add some sanity checking in rai_patch_one, e.g. at
least check that the immediate we are replacing is the dummy 0x12345678
we used in the .rai_templ section.

That the patching works can be seen in a quick virtme session.  gdb on
vmlinux and /proc/kcore shows

(gdb) x/16i rai_proc_show
   0xffffffff8108c120 <rai_proc_show>:  mov    $0xffffffff81fd9ad4,%rsi
   0xffffffff8108c127 <rai_proc_show+7>:        jmpq   0xffffffff819652e9
   0xffffffff8108c12c <rai_proc_show+12>:       nop
   0xffffffff8108c12d <rai_proc_show+13>:       nop
   0xffffffff8108c12e <rai_proc_show+14>:       nop
   0xffffffff8108c12f <rai_proc_show+15>:       nop
   0xffffffff8108c130 <rai_proc_show+16>:       nop
   0xffffffff8108c131 <rai_proc_show+17>:       jmpq   0xffffffff819652f5
   0xffffffff8108c136 <rai_proc_show+22>:       jmpq   0xffffffff81965300
   0xffffffff8108c13b <rai_proc_show+27>:       callq  0xffffffff81238bb0 
<seq_printf>
   0xffffffff8108c140 <rai_proc_show+32>:       mov    $0xffffffffffffffff,%rax
   0xffffffff8108c147 <rai_proc_show+39>:       mov    %rax,0x17b228a(%rip)     
   # 0xffffffff8283e3d8 <three>
   0xffffffff8108c14e <rai_proc_show+46>:       mov    %eax,0x17b228c(%rip)     
   # 0xffffffff8283e3e0 <two>
   0xffffffff8108c154 <rai_proc_show+52>:       mov    %eax,0x17b228a(%rip)     
   # 0xffffffff8283e3e4 <one>
   0xffffffff8108c15a <rai_proc_show+58>:       xor    %eax,%eax
   0xffffffff8108c15c <rai_proc_show+60>:       retq
(gdb) x/16i 0xffffffff96e8c120
   0xffffffff96e8c120:  mov    $0xffffffff97dd9ad4,%rsi
   0xffffffff96e8c127:  movabs $0x3,%r8
   0xffffffff96e8c131:  mov    $0x2,%ecx
   0xffffffff96e8c136:  mov    $0x1,%edx
   0xffffffff96e8c13b:  callq  0xffffffff97038bb0
   0xffffffff96e8c140:  mov    $0xffffffffffffffff,%rax
   0xffffffff96e8c147:  mov    %rax,0x17b228a(%rip)        # 0xffffffff9863e3d8
   0xffffffff96e8c14e:  mov    %eax,0x17b228c(%rip)        # 0xffffffff9863e3e0
   0xffffffff96e8c154:  mov    %eax,0x17b228a(%rip)        # 0xffffffff9863e3e4
   0xffffffff96e8c15a:  xor    %eax,%eax
   0xffffffff96e8c15c:  retq
   0xffffffff96e8c15d:  nopl   (%rax)
   0xffffffff96e8c160:  push   %rbx
   0xffffffff96e8c161:  mov    $0xffffffff9804c240,%rdi
   0xffffffff96e8c168:  mov    $0xffffffff97e9fccc,%rbx
   0xffffffff96e8c16f:  callq  0xffffffff9776b230

where we also see that gcc chooses the destination registers rather
intelligently. As expected, repeated "cat /proc/rai" continues to print
"one: 1, two: 2, three: 3".

Signed-off-by: Rasmus Villemoes <li...@rasmusvillemoes.dk>
---
 arch/x86/include/asm/rai.S | 42 +++++++++++++++++++++++++++++++++++++-
 arch/x86/include/asm/rai.h | 30 ++++++++++++++++++++++++++-
 arch/x86/kernel/rai.c      | 18 ++++++++++++++++
 3 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/rai.S b/arch/x86/include/asm/rai.S
index 253d27453416..f42cdd8db876 100644
--- a/arch/x86/include/asm/rai.S
+++ b/arch/x86/include/asm/rai.S
@@ -8,11 +8,51 @@
        .long \templ_end - \templ
        .long \thunk - .
 .endm
-       
+
 .macro rai_entry_pad start end
        .ifgt STRUCT_RAI_ENTRY_SIZE-(\end-\start)
        .skip STRUCT_RAI_ENTRY_SIZE-(\end-\start), 0x00
        .endif
 .endm
 
+.macro rai_load dst, var, type
+       .pushsection .rai_templ, "aw"
+10:
+       .ifeq \type - RAI_LOAD_8
+       movabs $0x1234567812345678, \dst
+       .else
+       mov $0x12345678, \dst
+       .endif
+11:
+       .popsection
+
+       /* Even if the mov \var, \dst is short enough to fit in the
+        * space we reserve in .text, we still need the thunk for when
+        * we do the immediate patching. */
+       .pushsection .text.rai_thunk, "ax"
+20:
+       mov \var(%rip), \dst
+       jmp 32f
+21:
+       .popsection
+
+       /* The part that goes into .text */
+30:
+       /* silence objtool by actually using the thunk for now */
+       jmp 20b
+       /* mov \var(%rip), \dst */
+31:
+       .skip -(((11b - 10b)-(31b - 30b)) > 0)*((11b - 10b)-(31b - 30b)), 0x90
+32:
+
+       .pushsection .rai_data, "a"
+40:
+       rai_entry \type 30b 32b 10b 11b 20b
+       .quad \var   /* .load.addr */
+41:
+       rai_entry_pad 40b 41b
+       .popsection
+.endm /* rai_load */
+
+
 #endif
diff --git a/arch/x86/include/asm/rai.h b/arch/x86/include/asm/rai.h
index 269d696255b0..b57494c98d0f 100644
--- a/arch/x86/include/asm/rai.h
+++ b/arch/x86/include/asm/rai.h
@@ -1,7 +1,10 @@
 #ifndef _ASM_X86_RAI_H
 #define _ASM_X86_RAI_H
 
-#define STRUCT_RAI_ENTRY_SIZE 24
+#define RAI_LOAD_4 0
+#define RAI_LOAD_8 1
+
+#define STRUCT_RAI_ENTRY_SIZE 32
 
 /* Put the asm macros in a separate file for easier editing. */
 #include <asm/rai.S>
@@ -16,10 +19,35 @@ struct rai_entry {
        s32 templ_len;    /* length of template */
        s32 thunk_offset; /* member-relative offset to ool thunk */
        /* type-specific data follows */
+       union {
+               struct {
+                       void *addr;
+               } load;
+       };
 };
 _Static_assert(sizeof(struct rai_entry) == STRUCT_RAI_ENTRY_SIZE,
               "please update STRUCT_RAI_ENTRY_SIZE");
 
+#define _rai_load(var) ({                                              \
+               typeof(var) ret__;                                      \
+               switch(sizeof(var)) {                                   \
+               case 4:                                                 \
+                       asm("rai_load %0, %c1, %c2"                     \
+                           : "=r" (ret__)                              \
+                           : "i" (&(var)), "i" (RAI_LOAD_4));          \
+                       break;                                          \
+               case 8:                                                 \
+                       asm("rai_load %0, %c1, %c2"                     \
+                           : "=r" (ret__)                              \
+                           : "i" (&(var)), "i" (RAI_LOAD_8));          \
+                       break;                                          \
+               default:                                                \
+                       ret__ = _rai_load_fallback(var);                \
+                       break;                                          \
+               }                                                       \
+               ret__;                                                  \
+       })
+
 #endif /* !__ASSEMBLY */
 
 #endif /* _ASM_X86_RAI_H */
diff --git a/arch/x86/kernel/rai.c b/arch/x86/kernel/rai.c
index 819d03a025e3..e55e85f11a2e 100644
--- a/arch/x86/kernel/rai.c
+++ b/arch/x86/kernel/rai.c
@@ -14,6 +14,24 @@ rai_patch_one(const struct rai_entry *r)
        u8 *thunk = (u8*)&r->thunk_offset + r->thunk_offset;
 
        switch (r->type) {
+       case RAI_LOAD_4: {
+               const u32 *imm = r->load.addr;
+               /*
+                * The immediate is the last 4 bytes of the template,
+                * regardless of the operand encoding.
+                */
+               memcpy(templ + r->templ_len - sizeof(*imm), imm, sizeof(*imm));
+               break;
+       }
+       case RAI_LOAD_8: {
+               const u64 *imm = r->load.addr;
+               /*
+                * The immediate is the last 8 bytes of the template,
+                * regardless of the operand encoding.
+                */
+               memcpy(templ + r->templ_len - sizeof(*imm), imm, sizeof(*imm));
+               break;
+       }
        default:
                WARN_ONCE(1, "unhandled RAI type %d\n", r->type);
                return;
-- 
2.19.1.6.gbde171bbf5

[POC 07/12] x86-64: rai: implement _rai_load

Reply via email to