From: "Kirill A. Shutemov" <kirill.shute...@linux.intel.com>

Handle #VE due to MMIO operations. MMIO triggers #VE with EPT_VIOLATION
exit reason.

For now we only handle subset of instruction that kernel uses for MMIO
oerations. User-space access triggers SIGBUS.

Signed-off-by: Kirill A. Shutemov <kirill.shute...@linux.intel.com>
Reviewed-by: Andi Kleen <a...@linux.intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan 
<sathyanarayanan.kuppusw...@linux.intel.com>
---
 arch/x86/kernel/tdx.c | 120 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 120 insertions(+)

diff --git a/arch/x86/kernel/tdx.c b/arch/x86/kernel/tdx.c
index 3846d2807a7a..eff58329751e 100644
--- a/arch/x86/kernel/tdx.c
+++ b/arch/x86/kernel/tdx.c
@@ -6,6 +6,8 @@
 #include <linux/cpu.h>
 #include <asm/tdx.h>
 #include <asm/vmx.h>
+#include <asm/insn.h>
+#include <linux/sched/signal.h> /* force_sig_fault() */
 
 #ifdef CONFIG_KVM_GUEST
 #include "tdx-kvm.c"
@@ -270,6 +272,121 @@ static void tdx_handle_io(struct pt_regs *regs, u32 
exit_qual)
        }
 }
 
+static unsigned long tdx_mmio(int size, bool write, unsigned long addr,
+               unsigned long val)
+{
+       register long r10 asm("r10") = TDVMCALL_STANDARD;
+       register long r11 asm("r11") = EXIT_REASON_EPT_VIOLATION;
+       register long r12 asm("r12") = size;
+       register long r13 asm("r13") = write;
+       register long r14 asm("r14") = addr;
+       register long r15 asm("r15") = val;
+       register long rcx asm("rcx");
+       long ret;
+
+       /* Allow to pass R10, R11, R12, R13, R14 and R15 down to the VMM */
+       rcx = BIT(10) | BIT(11) | BIT(12) | BIT(13) | BIT(14) | BIT(15);
+
+       asm volatile(TDCALL
+                       : "=a"(ret), "=r"(r10), "=r"(r11), "=r"(r12), "=r"(r13),
+                         "=r"(r14), "=r"(r15)
+                       : "a"(TDVMCALL), "r"(rcx), "r"(r10), "r"(r11), "r"(r12),
+                         "r"(r13), "r"(r14), "r"(r15)
+                       : );
+
+       WARN_ON(ret || r10);
+
+       return r11;
+}
+
+static inline void *get_reg_ptr(struct pt_regs *regs, struct insn *insn)
+{
+       static const int regoff[] = {
+               offsetof(struct pt_regs, ax),
+               offsetof(struct pt_regs, cx),
+               offsetof(struct pt_regs, dx),
+               offsetof(struct pt_regs, bx),
+               offsetof(struct pt_regs, sp),
+               offsetof(struct pt_regs, bp),
+               offsetof(struct pt_regs, si),
+               offsetof(struct pt_regs, di),
+               offsetof(struct pt_regs, r8),
+               offsetof(struct pt_regs, r9),
+               offsetof(struct pt_regs, r10),
+               offsetof(struct pt_regs, r11),
+               offsetof(struct pt_regs, r12),
+               offsetof(struct pt_regs, r13),
+               offsetof(struct pt_regs, r14),
+               offsetof(struct pt_regs, r15),
+       };
+       int regno;
+
+       regno = X86_MODRM_REG(insn->modrm.value);
+       if (X86_REX_R(insn->rex_prefix.value))
+               regno += 8;
+
+       return (void *)regs + regoff[regno];
+}
+
+static int tdx_handle_mmio(struct pt_regs *regs, struct ve_info *ve)
+{
+       int size;
+       bool write;
+       unsigned long *reg;
+       struct insn insn;
+       unsigned long val = 0;
+
+       /*
+        * User mode would mean the kernel exposed a device directly
+        * to ring3, which shouldn't happen except for things like
+        * DPDK.
+        */
+       if (user_mode(regs)) {
+               pr_err("Unexpected user-mode MMIO access.\n");
+               force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *) ve->gla);
+               return 0;
+       }
+
+       kernel_insn_init(&insn, (void *) regs->ip, MAX_INSN_SIZE);
+       insn_get_length(&insn);
+       insn_get_opcode(&insn);
+
+       write = ve->exit_qual & 0x2;
+
+       size = insn.opnd_bytes;
+       switch (insn.opcode.bytes[0]) {
+       /* MOV r/m8     r8      */
+       case 0x88:
+       /* MOV r8       r/m8    */
+       case 0x8A:
+       /* MOV r/m8     imm8    */
+       case 0xC6:
+               size = 1;
+               break;
+       }
+
+       if (inat_has_immediate(insn.attr)) {
+               BUG_ON(!write);
+               val = insn.immediate.value;
+               tdx_mmio(size, write, ve->gpa, val);
+               return insn.length;
+       }
+
+       BUG_ON(!inat_has_modrm(insn.attr));
+
+       reg = get_reg_ptr(regs, &insn);
+
+       if (write) {
+               memcpy(&val, reg, size);
+               tdx_mmio(size, write, ve->gpa, val);
+       } else {
+               val = tdx_mmio(size, write, ve->gpa, val);
+               memset(reg, 0, size);
+               memcpy(reg, &val, size);
+       }
+       return insn.length;
+}
+
 void __init tdx_early_init(void)
 {
        if (!cpuid_has_tdx_guest())
@@ -331,6 +448,9 @@ int tdx_handle_virtualization_exception(struct pt_regs 
*regs,
        case EXIT_REASON_IO_INSTRUCTION:
                tdx_handle_io(regs, ve->exit_qual);
                break;
+       case EXIT_REASON_EPT_VIOLATION:
+               ve->instr_len = tdx_handle_mmio(regs, ve);
+               break;
        default:
                pr_warn("Unexpected #VE: %d\n", ve->exit_reason);
                return -EFAULT;
-- 
2.25.1

Reply via email to