Hi

I have been working on this diff on and off for a while now, mlarkin was
able to give me lots of tips, but now i am stuck, so i thought i would
ask for nits, tips or even your doing it wrong.

The code below causes the vm to triple fault without the
((*pte & EPT_WB) == EPT_WB) check in vmx_mprotect_ept, some calls pass
this check and i am able to modify the ept, other fail, and are skipped.

I am just not sure i am looking up the ept entry correctly, any help
here would be grealy appreciated.

The kernel code is called from solo5[1] as follows.

    if(mprotect(vaddr_start, size, prot) == -1)
        return -1;

    ret = 0;

    struct hvt_b *hvb = hvt->b;
    struct vm_mprotect_ept_params *vmep;

    vmep = calloc(1, sizeof (struct vm_mprotect_ept_params));
    if (vmep == NULL) {
        warn("calloc");
        return -1;
    }

    vmep->vmep_vm_id = hvb->vcp_id;
    vmep->vmep_vcpu_id = hvb->vcpu_id;

    // sgpa = vmr->vmr_gpa(0x0) + (addr - vmr->vmr_va)
    // vmep->vmep_sgpa = (vaddr_t)0x0 + (vaddr_t)(vaddr_start - hvt->mem);
    vmep->vmep_sgpa = addr_start;
    vmep->vmep_size = size;
    vmep->vmep_prot = prot;

    if (ioctl(hvb->vmd_fd, VMM_IOC_MPROTECT_EPT, vmep) < 0) {
        warn("mprotect ept vmm ioctl failed - exiting");
        ret = -1;
    }

Cheers
Adam

[1]
https://github.com/adamsteen/solo5/blob/wnox/tenders/hvt/hvt_openbsd.c#L169-L225


? div
Index: sys/arch/amd64/amd64/vmm.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.257
diff -u -p -u -p -r1.257 vmm.c
--- sys/arch/amd64/amd64/vmm.c  13 Dec 2019 03:38:15 -0000      1.257
+++ sys/arch/amd64/amd64/vmm.c  6 Jan 2020 03:30:39 -0000
@@ -41,7 +41,7 @@
 #include <dev/isa/isareg.h>
 #include <dev/pv/pvreg.h>
 
-/* #define VMM_DEBUG */
+#define VMM_DEBUG
 
 void *l1tf_flush_region;
 
@@ -124,6 +124,7 @@ int vm_get_info(struct vm_info_params *)
 int vm_resetcpu(struct vm_resetcpu_params *);
 int vm_intr_pending(struct vm_intr_params *);
 int vm_rwregs(struct vm_rwregs_params *, int);
+int vm_mprotect_ept(struct vm_mprotect_ept_params *);
 int vm_rwvmparams(struct vm_rwvmparams_params *, int);
 int vm_find(uint32_t, struct vm **);
 int vcpu_readregs_vmx(struct vcpu *, uint64_t, struct vcpu_reg_state *);
@@ -201,6 +202,8 @@ void vmx_setmsrbw(struct vcpu *, uint32_
 void vmx_setmsrbrw(struct vcpu *, uint32_t);
 void svm_set_clean(struct vcpu *, uint32_t);
 void svm_set_dirty(struct vcpu *, uint32_t);
+int vmx_mprotect_ept(vm_map_t, paddr_t, paddr_t, int);
+pt_entry_t * vmx_pmap_find_pte_ept(pmap_t, paddr_t);
 
 void vmm_init_pvclock(struct vcpu *, paddr_t);
 int vmm_update_pvclock(struct vcpu *);
@@ -225,6 +228,9 @@ void vmm_decode_efer_value(uint64_t);
 void vmm_decode_rflags(uint64_t);
 void vmm_decode_misc_enable_value(uint64_t);
 const char *vmm_decode_cpu_mode(struct vcpu *);
+void dump_requested_vmx_mprotect_ept(int prot);
+void vmx_dump_pte_prot(pt_entry_t *pte);
+void vmx_dump_pte_after(pt_entry_t *pte);
 
 extern int mtrr2mrt(int);
 
@@ -494,6 +500,9 @@ vmmioctl(dev_t dev, u_long cmd, caddr_t 
        case VMM_IOC_WRITEREGS:
                ret = vm_rwregs((struct vm_rwregs_params *)data, 1);
                break;
+       case VMM_IOC_MPROTECT_EPT:
+               ret = vm_mprotect_ept((struct vm_mprotect_ept_params *)data);
+               break;
        case VMM_IOC_READVMPARAMS:
                ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 0);
                break;
@@ -532,6 +541,7 @@ pledge_ioctl_vmm(struct proc *p, long co
        case VMM_IOC_INTR:
        case VMM_IOC_READREGS:
        case VMM_IOC_WRITEREGS:
+       case VMM_IOC_MPROTECT_EPT:
        case VMM_IOC_READVMPARAMS:
        case VMM_IOC_WRITEVMPARAMS:
                return (0);
@@ -807,6 +817,261 @@ vm_rwregs(struct vm_rwregs_params *vrwp,
 }
 
 /*
+ * vm_mprotect_ept
+ *
+ * IOCTL handler to sets the access protections of the ept
+ *
+ * Parameters:
+ *   vmep: decribes the memory for which the protect will be applied..
+ *
+ * Return values:
+ *  0: if successful
+ *  ENOENT: if the VM defined by 'vmep' cannot be found
+ *  EINVAL: if the sgpa or size is not page aligned, the prot is WX or RWX,
+ *          size is too large (512GB), there is wraparound
+ *          (like start = 512GB-1 and end = 512GB-2),
+ *          the address specified is not within the vm's mem range
+ *          or the address lies inside reserved (MMIO) memory
+ */
+int
+vm_mprotect_ept(struct vm_mprotect_ept_params *vmep)
+{
+       struct vm *vm;
+       struct vcpu *vcpu;
+       struct vm_mem_range *vmr;
+       vaddr_t sgpa;
+       size_t size;
+       vm_prot_t prot;
+       int i, ret, mem_type;
+
+       /* Find the desired VM */
+       rw_enter_read(&vmm_softc->vm_lock);
+       ret = vm_find(vmep->vmep_vm_id, &vm);
+       rw_exit_read(&vmm_softc->vm_lock);
+
+       /* Not found? exit. */
+       if (ret != 0) {
+               DPRINTF("%s: vm id %u not found\n", __func__,
+                   vmep->vmep_vm_id);
+               return (ret);
+       }
+
+       rw_enter_read(&vm->vm_vcpu_lock);
+       SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
+               if (vcpu->vc_id == vmep->vmep_vcpu_id)
+                       break;
+       }
+       rw_exit_read(&vm->vm_vcpu_lock);
+
+       if (vcpu == NULL) {
+               DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__,
+                   vmep->vmep_vcpu_id, vmep->vmep_vm_id);
+               return (ENOENT);
+       }
+
+       if (vcpu->vc_state != VCPU_STATE_STOPPED) {
+               DPRINTF("%s: mprotect_ept %u on vm %u attempted "
+                   "while vcpu was in state %u (%s)\n", __func__,
+                   vmep->vmep_vcpu_id, vmep->vmep_vm_id, vcpu->vc_state,
+                   vcpu_state_decode(vcpu->vc_state));
+
+               return (EBUSY);
+       }
+
+       sgpa = vmep->vmep_sgpa;
+       size = vmep->vmep_size;
+       prot = vmep->vmep_prot;
+
+       /* disallow W^X */
+       if ((prot & PROT_MASK) != prot &&
+           (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
+               return (EINVAL);
+
+       if ((sgpa & PAGE_MASK) || (size & PAGE_MASK) || size == 0)
+               return (EINVAL); /* Must be page aligned */
+
+       if(size >= NBPD_L4)
+               return (EINVAL); /* size must be less then 512GB */
+
+       if(sgpa + size < sgpa)
+               return (EINVAL); /* no wraparound */
+
+       /* XXX check if the address lies inside reserved (MMIO) memory */
+
+       mem_type = VMM_MEM_TYPE_UNKNOWN;
+       for (i = 0; i < vm->vm_nmemranges; i++) {
+               vmr = &vm->vm_memranges[i];
+
+               /*
+                * vm_memranges are ascending. sgpa can no longer be in one of
+                * the memranges
+                */
+               if (sgpa < vmr->vmr_gpa)
+                       continue;
+
+               if (sgpa + size < vmr->vmr_gpa + vmr->vmr_size) {
+                       mem_type = VMM_MEM_TYPE_REGULAR;
+                       break;
+               }
+       }
+
+       if (mem_type == VMM_MEM_TYPE_UNKNOWN)
+               return (EINVAL);
+
+       if (vmm_softc->mode == VMM_MODE_EPT ||
+           vmm_softc->mode == VMM_MODE_VMX) {
+               ret = vmx_mprotect_ept(vm->vm_map, sgpa, sgpa + size, prot);
+       } else {
+               pmap_write_protect(vm->vm_map->pmap, sgpa, sgpa + size, prot);
+               ret = 0;
+       }
+
+       return (ret);
+}
+
+/*
+ * vmx_mprotect_ept
+ *
+ * apply the ept protections to the requested pages, faulting the page if
+ * required.
+ */
+int
+vmx_mprotect_ept(vm_map_t vm_map, paddr_t sgpa, paddr_t egpa, int prot)
+{
+       struct vmx_invept_descriptor vid;
+       pmap_t pmap = vm_map->pmap;
+       pt_entry_t *pte;
+       paddr_t addr;
+       int ret = 0;
+
+#ifdef VMM_DEBUG
+       dump_requested_vmx_mprotect_ept(prot);
+#endif
+       for (addr = sgpa; addr < egpa; addr += PAGE_SIZE) {
+               pte = vmx_pmap_find_pte_ept(pmap, addr);
+               if(pte == NULL) {
+                       ret = uvm_fault(vm_map, addr, VM_FAULT_INVALID,
+                           PROT_READ | PROT_WRITE);
+                       if (ret)
+                               printf("%s: uvm_fault returns %d, GPA=0x%llx\n"
+                                   ,__func__, ret, (uint64_t)addr);
+
+                       pte = vmx_pmap_find_pte_ept(pmap, addr);
+                       if(pte == NULL)
+                               return EFAULT;
+               }
+#ifdef VMM_DEBUG
+               vmx_dump_pte_prot(pte);
+#endif
+               // XXX how to determine if this is a valid pte
+               if((*pte & EPT_WB) == EPT_WB) {
+
+                       if (prot & PROT_READ)
+                               *pte |= EPT_R;
+                       else
+                               *pte &= ~EPT_R;
+
+                       if (prot & PROT_WRITE)
+                               *pte |= EPT_W;
+                       else
+                               *pte &= ~EPT_W;
+
+                       if (prot & PROT_EXEC)
+                               *pte |= EPT_X;
+                       else
+                               *pte &= ~EPT_X;
+               }
+#ifdef VMM_DEBUG
+               vmx_dump_pte_after(pte);
+#endif
+
+               /*
+                * XXX bonus points - ensure permission combination is valid
+                * in EPT (eg, no "write only" pages, and ensure if "execute
+                * only" is requested, that the CPU actually supports it, etc)
+                */
+       }
+
+       /*
+        * SDM 3C: 28.3.3.4 Guidelines for Use of the INVEPT Instruction
+        * the first bullet point seems to say we should call invept.
+        *
+        * Software should use the INVEPT instruction with the “single-context”
+        * INVEPT type after making any of the following changes to an EPT
+        * paging-structure entry (the INVEPT descriptor should contain an
+        * EPTP value that references — directly or indirectly
+        * — the modified EPT paging structure):
+        * —   Changing any of the privilege bits 2:0 from 1 to 0.
+        * */
+       if (pmap->eptp != 0) {
+               memset(&vid, 0, sizeof(vid));
+               vid.vid_eptp = pmap->eptp;
+               DPRINTF("%s: flushing EPT TLB for EPTP 0x%llx\n", __func__,
+                   vid.vid_eptp);
+               invept(IA32_VMX_INVEPT_SINGLE_CTX, &vid);
+       }
+
+       return ret;
+}
+
+/*
+ * vmx_pmap_find_pte_ept
+ *
+ * find the page table entry specified by addr in the pmap supplied.
+ */
+pt_entry_t *
+vmx_pmap_find_pte_ept(pmap_t pmap, paddr_t addr)
+{
+       int l4idx, l3idx, l2idx, l1idx;
+       pd_entry_t *pd;
+       paddr_t pdppa;
+       pt_entry_t *ptes;
+
+       l4idx = (addr & L4_MASK) >> L4_SHIFT; /* PML4E idx */
+       l3idx = (addr & L3_MASK) >> L3_SHIFT; /* PDPTE idx */
+       l2idx = (addr & L2_MASK) >> L2_SHIFT; /* PDE idx */
+       l1idx = (addr & L1_MASK) >> L1_SHIFT; /* PTE idx */
+
+       pd = (pd_entry_t *)pmap->pm_pdir;
+       if(pd == NULL)
+               return NULL;
+
+       /*
+        * l4idx should always be 0 since we don't support more than 512GB
+        * guest physical memory.
+        */
+       if(l4idx > 0)
+               return NULL;
+
+       /*
+        * l3idx should always be < MAXDSIZ/1GB because we don't support more
+        * than MAXDSIZ guest phys mem.
+        */
+       if(l3idx >= MAXDSIZ / ((paddr_t)1024*1024*1024))
+               return NULL;
+
+       pdppa = pd[l4idx] & PG_FRAME;
+       if (pdppa == 0)
+               return NULL;
+
+       ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa);
+
+       pdppa = ptes[l3idx] & PG_FRAME;
+       if (pdppa == 0)
+               return NULL;
+
+       ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa);
+
+       pdppa = ptes[l2idx] & PG_FRAME;
+       if (pdppa == 0)
+               return NULL;
+
+       ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa);
+
+       return &ptes[l1idx];
+}
+
+/*
  * vm_find
  *
  * Function to find an existing VM by its identifier.
@@ -8630,5 +8895,64 @@ vmm_decode_cpu_mode(struct vcpu *vcpu)
        case VMM_CPU_MODE_LONG: return "long";
        default: return "unknown";
        }
+}
+
+void
+dump_requested_vmx_mprotect_ept(int prot)
+{
+       DPRINTF("%s\nrequested: '", __func__);
+       if (prot & PROT_READ)
+               DPRINTF("R");
+
+       if (prot & PROT_WRITE)
+               DPRINTF("W");
+
+       if (prot & PROT_EXEC)
+               DPRINTF("X");
+       DPRINTF("'\n");
+}
+
+void
+vmx_dump_pte_prot(pt_entry_t *pte)
+{
+       DPRINTF("before: '");
+       if(*pte & EPT_R)
+               DPRINTF("R");
+
+       if(*pte & EPT_W)
+               DPRINTF("W");
+
+       if(*pte & EPT_X)
+               DPRINTF("X");
+
+       if(*pte & EPT_WB)
+               DPRINTF("`WB`");
+
+       if(*pte & EPT_PS)
+               DPRINTF("`PS`");
+
+       DPRINTF("'");
+}
+
+void
+vmx_dump_pte_after(pt_entry_t *pte)
+{
+       DPRINTF(" after: '");
+       if(*pte & EPT_R)
+               DPRINTF("R");
+
+       if(*pte & EPT_W)
+               DPRINTF("W");
+
+       if(*pte & EPT_X)
+               DPRINTF("X");
+       
+       if(*pte & EPT_WB)
+               DPRINTF("`WB`");
+
+       if(*pte & EPT_PS)
+               DPRINTF("`PS`");
+
+       DPRINTF("'\n");
 }
 #endif /* VMM_DEBUG */
Index: sys/arch/amd64/include/vmmvar.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
retrieving revision 1.67
diff -u -p -u -p -r1.67 vmmvar.h
--- sys/arch/amd64/include/vmmvar.h     17 Jul 2019 05:51:07 -0000      1.67
+++ sys/arch/amd64/include/vmmvar.h     6 Jan 2020 03:30:40 -0000
@@ -558,6 +558,15 @@ struct vm_rwregs_params {
        struct vcpu_reg_state   vrwp_regs;
 };
 
+struct vm_mprotect_ept_params {
+       /* Input parameters to VMM_IOC_MPROTECT_EPT */
+       uint32_t                vmep_vm_id;
+       uint32_t                vmep_vcpu_id;
+       vaddr_t                 vmep_sgpa;
+       size_t                  vmep_size;
+       int                     vmep_prot;
+};
+
 /* IOCTL definitions */
 #define VMM_IOC_CREATE _IOWR('V', 1, struct vm_create_params) /* Create VM */
 #define VMM_IOC_RUN _IOWR('V', 2, struct vm_run_params) /* Run VCPU */
@@ -571,7 +580,8 @@ struct vm_rwregs_params {
 #define VMM_IOC_READVMPARAMS _IOWR('V', 9, struct vm_rwvmparams_params)
 /* Set VM params */
 #define VMM_IOC_WRITEVMPARAMS _IOW('V', 10, struct vm_rwvmparams_params)
-
+/* Control the protection of ept pages*/
+#define VMM_IOC_MPROTECT_EPT _IOW('V', 11, struct vm_mprotect_ept_params)
 
 /* CPUID masks */
 /*

Reply via email to