Module Name: src Committed By: bouyer Date: Tue May 26 10:11:56 UTC 2020
Modified Files: src/sys/arch/xen/include: xenio.h src/sys/arch/xen/xen: privcmd.c Log Message: Implement new ioctl, needed by Xen 4.13: IOCTL_PRIVCMD_MMAPBATCH_V2 IOCTL_PRIVCMD_MMAP_RESOURCE IOCTL_GNTDEV_MMAP_GRANT_REF IOCTL_GNTDEV_ALLOC_GRANT_REF To generate a diff of this commit: cvs rdiff -u -r1.11 -r1.12 src/sys/arch/xen/include/xenio.h cvs rdiff -u -r1.57 -r1.58 src/sys/arch/xen/xen/privcmd.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/xen/include/xenio.h diff -u src/sys/arch/xen/include/xenio.h:1.11 src/sys/arch/xen/include/xenio.h:1.12 --- src/sys/arch/xen/include/xenio.h:1.11 Thu Jul 7 06:55:40 2016 +++ src/sys/arch/xen/include/xenio.h Tue May 26 10:11:56 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: xenio.h,v 1.11 2016/07/07 06:55:40 msaitoh Exp $ */ +/* $NetBSD: xenio.h,v 1.12 2020/05/26 10:11:56 bouyer Exp $ */ /****************************************************************************** * privcmd.h @@ -114,9 +114,92 @@ typedef struct oprivcmd_hypercall */ #define IOCTL_PRIVCMD_INITDOMAIN_EVTCHN \ _IOR('P', 5, int) + #define IOCTL_PRIVCMD_MMAPBATCH_V2 \ _IOW('P', 6, privcmd_mmapbatch_v2_t) +/* + * @cmd: IOCTL_PRIVCMD_MMAP_RESOURCE + * @arg &privcmd_mmap_resource_t + * Return: + * map the specified resource at the provided virtual address + */ + +typedef struct privcmd_mmap_resource { + domid_t dom; + uint32_t type; + uint32_t id; + uint32_t idx; + uint64_t num; + uint64_t addr; +} privcmd_mmap_resource_t; + +#define IOCTL_PRIVCMD_MMAP_RESOURCE \ + _IOW('P', 7, privcmd_mmap_resource_t) + +/* + * @cmd: IOCTL_GNTDEV_MMAP_GRANT_REF + * @arg &ioctl_gntdev_mmap_grant_ref + * Return: + * map the grant references at the virtual address provided by caller + * The grant ref already exists (e.g. comes from a remote domain) + */ +struct ioctl_gntdev_grant_ref { + /* The domain ID of the grant to be mapped. */ + uint32_t domid; + /* The grant reference of the grant to be mapped. */ + uint32_t ref; +}; + +struct ioctl_gntdev_grant_notify { + ssize_t offset; + uint32_t action; + uint32_t event_channel_port; +}; +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1 +#define UNMAP_NOTIFY_SEND_EVENT 0x2 + +struct ioctl_gntdev_mmap_grant_ref { + /* The number of grants to be mapped. */ + uint32_t count; + uint32_t pad; + /* The virtual address where they should be mapped */ + void *va; + /* notify action */ + struct ioctl_gntdev_grant_notify notify; + /* Array of grant references, of size @count. */ + struct ioctl_gntdev_grant_ref *refs; +}; + +#define IOCTL_GNTDEV_MMAP_GRANT_REF \ + _IOW('P', 8, struct ioctl_gntdev_mmap_grant_ref) + +/* + * @cmd: IOCTL_GNTDEV_ALLOC_GRANT_REF + * @arg &ioctl_gntdev_alloc_grant_ref + * Return: + * Allocate local memory and grant it to a remote domain. + * local memory is mmaped at the virtual address provided by caller + */ + +struct ioctl_gntdev_alloc_grant_ref { + /* IN parameters */ + uint16_t domid; + uint16_t flags; + uint32_t count; + void *va; + /* notify action */ + struct ioctl_gntdev_grant_notify notify; + /* Variable OUT parameter */ + uint32_t *gref_ids; +}; + +#define IOCTL_GNTDEV_ALLOC_GRANT_REF \ + _IOW('P', 9, struct ioctl_gntdev_alloc_grant_ref) + +#define GNTDEV_ALLOC_FLAG_WRITABLE 0x01 + + /* Interface to /dev/xenevt */ /* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */ #define EVTCHN_RESET _IO('E', 1) Index: src/sys/arch/xen/xen/privcmd.c diff -u src/sys/arch/xen/xen/privcmd.c:1.57 src/sys/arch/xen/xen/privcmd.c:1.58 --- src/sys/arch/xen/xen/privcmd.c:1.57 Tue May 5 17:02:01 2020 +++ src/sys/arch/xen/xen/privcmd.c Tue May 26 10:11:56 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: privcmd.c,v 1.57 2020/05/05 17:02:01 bouyer Exp $ */ +/* $NetBSD: privcmd.c,v 1.58 2020/05/26 10:11:56 bouyer Exp $ */ /*- * Copyright (c) 2004 Christian Limpach. @@ -27,7 +27,7 @@ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: privcmd.c,v 1.57 2020/05/05 17:02:01 bouyer Exp $"); +__KERNEL_RCSID(0, "$NetBSD: privcmd.c,v 1.58 2020/05/26 10:11:56 bouyer Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -47,29 +47,68 @@ __KERNEL_RCSID(0, "$NetBSD: privcmd.c,v #include <xen/hypervisor.h> #include <xen/xen.h> #include <xen/xenio.h> +#include <xen/granttables.h> #define PRIVCMD_MODE (S_IRUSR) /* Magic value is used to mark invalid pages. * This must be a value within the page-offset. * Page-aligned values including 0x0 are used by the guest. - */ + */ #define INVALID_PAGE 0xfff +typedef enum _privcmd_type { + PTYPE_PRIVCMD, + PTYPE_GNTDEV_REF, + PTYPE_GNTDEV_ALLOC +} privcmd_type; + +struct privcmd_object_privcmd { + paddr_t *maddr; /* array of machine address to map */ + int domid; + bool no_translate; +}; + +struct privcmd_object_gntref { + struct ioctl_gntdev_grant_notify notify; + struct gnttab_map_grant_ref ops[1]; /* variable length */ +}; + +struct privcmd_object_gntalloc { + vaddr_t gntva; /* granted area mapped in kernel */ + uint16_t domid; + uint16_t flags; + struct ioctl_gntdev_grant_notify notify; + uint32_t gref_ids[1]; /* variable length */ +}; + struct privcmd_object { struct uvm_object uobj; - paddr_t *maddr; /* array of machine address to map */ + privcmd_type type; int npages; - int domid; + union { + struct privcmd_object_privcmd pc; + struct privcmd_object_gntref gr; + struct privcmd_object_gntalloc ga; + } u; }; +#define PGO_GNTREF_LEN(count) \ + (sizeof(struct privcmd_object) + \ + sizeof(struct gnttab_map_grant_ref) * ((count) - 1)) + +#define PGO_GNTA_LEN(count) \ + (sizeof(struct privcmd_object) + \ + sizeof(uint32_t) * ((count) - 1)) + int privcmd_nobjects = 0; static void privpgop_reference(struct uvm_object *); static void privpgop_detach(struct uvm_object *); static int privpgop_fault(struct uvm_faultinfo *, vaddr_t , struct vm_page **, - int, int, vm_prot_t, int); -static int privcmd_map_obj(struct vm_map *, vaddr_t, paddr_t *, int, int); + int, int, vm_prot_t, int); +static int privcmd_map_obj(struct vm_map *, vaddr_t, + struct privcmd_object *, vm_prot_t); static int @@ -252,6 +291,414 @@ privcmd_xen2bsd_errno(int error) } } +static vm_prot_t +privcmd_get_map_prot(struct vm_map *map, vaddr_t start, off_t size) +{ + vm_prot_t prot; + + vm_map_lock_read(map); + /* get protections. This also check for validity of mapping */ + if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_WRITE)) + prot = VM_PROT_READ | VM_PROT_WRITE; + else if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_READ)) + prot = VM_PROT_READ; + else { + printf("privcmd_get_map_prot 0x%lx -> 0x%lx " + "failed\n", + start, (unsigned long)(start + size - 1)); + prot = UVM_PROT_NONE; + } + vm_map_unlock_read(map); + return prot; +} + +static int +privcmd_mmap(struct vop_ioctl_args *ap) +{ + int i, j; + privcmd_mmap_t *mcmd = ap->a_data; + privcmd_mmap_entry_t mentry; + vaddr_t va; + paddr_t ma; + struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map; + paddr_t *maddr; + struct privcmd_object *obj; + vm_prot_t prot; + int error; + + for (i = 0; i < mcmd->num; i++) { + error = copyin(&mcmd->entry[i], &mentry, sizeof(mentry)); + if (error) + return EINVAL; + if (mentry.npages == 0) + return EINVAL; + if (mentry.va > VM_MAXUSER_ADDRESS) + return EINVAL; + va = mentry.va & ~PAGE_MASK; + prot = privcmd_get_map_prot(vmm, va, mentry.npages * PAGE_SIZE); + if (prot == UVM_PROT_NONE) + return EINVAL; + maddr = kmem_alloc(sizeof(paddr_t) * mentry.npages, + KM_SLEEP); + ma = ((paddr_t)mentry.mfn) << PGSHIFT; + for (j = 0; j < mentry.npages; j++) { + maddr[j] = ma; + ma += PAGE_SIZE; + } + obj = kmem_alloc(sizeof(*obj), KM_SLEEP); + obj->type = PTYPE_PRIVCMD; + obj->u.pc.maddr = maddr; + obj->u.pc.no_translate = false; + obj->npages = mentry.npages; + obj->u.pc.domid = mcmd->dom; + error = privcmd_map_obj(vmm, va, obj, prot); + if (error) + return error; + } + return 0; +} + +static int +privcmd_mmapbatch(struct vop_ioctl_args *ap) +{ + int i; + privcmd_mmapbatch_t* pmb = ap->a_data; + vaddr_t va0; + u_long mfn; + paddr_t ma; + struct vm_map *vmm; + vaddr_t trymap; + paddr_t *maddr; + struct privcmd_object *obj; + vm_prot_t prot; + int error; + + vmm = &curlwp->l_proc->p_vmspace->vm_map; + va0 = pmb->addr & ~PAGE_MASK; + + if (pmb->num == 0) + return EINVAL; + if (va0 > VM_MAXUSER_ADDRESS) + return EINVAL; + if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmb->num) + return EINVAL; + + prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE); + if (prot == UVM_PROT_NONE) + return EINVAL; + + maddr = kmem_alloc(sizeof(paddr_t) * pmb->num, KM_SLEEP); + /* get a page of KVA to check mappins */ + trymap = uvm_km_alloc(kernel_map, PAGE_SIZE, PAGE_SIZE, + UVM_KMF_VAONLY); + if (trymap == 0) { + kmem_free(maddr, sizeof(paddr_t) * pmb->num); + return ENOMEM; + } + + obj = kmem_alloc(sizeof(*obj), KM_SLEEP); + obj->type = PTYPE_PRIVCMD; + obj->u.pc.maddr = maddr; + obj->u.pc.no_translate = false; + obj->npages = pmb->num; + obj->u.pc.domid = pmb->dom; + + for(i = 0; i < pmb->num; ++i) { + error = copyin(&pmb->arr[i], &mfn, sizeof(mfn)); + if (error != 0) { + /* XXX: mappings */ + pmap_update(pmap_kernel()); + kmem_free(maddr, sizeof(paddr_t) * pmb->num); + uvm_km_free(kernel_map, trymap, PAGE_SIZE, + UVM_KMF_VAONLY); + return error; + } + ma = ((paddr_t)mfn) << PGSHIFT; + if ((error = pmap_enter_ma(pmap_kernel(), trymap, ma, 0, + prot, PMAP_CANFAIL | prot, pmb->dom))) { + mfn |= 0xF0000000; + copyout(&mfn, &pmb->arr[i], sizeof(mfn)); + maddr[i] = INVALID_PAGE; + } else { + pmap_remove(pmap_kernel(), trymap, + trymap + PAGE_SIZE); + maddr[i] = ma; + } + } + pmap_update(pmap_kernel()); + uvm_km_free(kernel_map, trymap, PAGE_SIZE, UVM_KMF_VAONLY); + + error = privcmd_map_obj(vmm, va0, obj, prot); + + return error; +} + +static int +privcmd_mmapbatch_v2(struct vop_ioctl_args *ap) +{ + int i; + privcmd_mmapbatch_v2_t* pmb = ap->a_data; + vaddr_t va0; + u_long mfn; + struct vm_map *vmm; + paddr_t *maddr; + struct privcmd_object *obj; + vm_prot_t prot; + int error; + + vmm = &curlwp->l_proc->p_vmspace->vm_map; + va0 = pmb->addr & ~PAGE_MASK; + + if (pmb->num == 0) + return EINVAL; + if (va0 > VM_MAXUSER_ADDRESS) + return EINVAL; + if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmb->num) + return EINVAL; + + prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE); + if (prot == UVM_PROT_NONE) + return EINVAL; + + maddr = kmem_alloc(sizeof(paddr_t) * pmb->num, KM_SLEEP); + obj = kmem_alloc(sizeof(*obj), KM_SLEEP); + obj->type = PTYPE_PRIVCMD; + obj->u.pc.maddr = maddr; + obj->u.pc.no_translate = false; + obj->npages = pmb->num; + obj->u.pc.domid = pmb->dom; + + for(i = 0; i < pmb->num; ++i) { + error = copyin(&pmb->arr[i], &mfn, sizeof(mfn)); + if (error != 0) { + kmem_free(maddr, sizeof(paddr_t) * pmb->num); + return error; + } + maddr[i] = ((paddr_t)mfn) << PGSHIFT; + } + error = privcmd_map_obj(vmm, va0, obj, prot); + if (error) + return error; + + /* + * map the range in user process now. + * If Xenr return -ENOENT, retry (paging in progress) + */ + for(i = 0; i < pmb->num; i++, va0 += PAGE_SIZE) { + int err, cerr; + for (int j = 0 ; j < 10; j++) { + err = pmap_enter_ma(vmm->pmap, va0, maddr[i], 0, + prot, PMAP_CANFAIL | prot, + pmb->dom); + if (err != -2) /* Xen ENOENT */ + break; + if (kpause("xnoent", 1, mstohz(100), NULL)) + break; + } + if (err) { + maddr[i] = INVALID_PAGE; + } + cerr = copyout(&err, &pmb->err[i], sizeof(pmb->err[i])); + if (cerr) { + privpgop_detach(&obj->uobj); + return cerr; + } + } + return 0; +} + +static int +privcmd_mmap_resource(struct vop_ioctl_args *ap) +{ + int i; + privcmd_mmap_resource_t* pmr = ap->a_data; + vaddr_t va0; + struct vm_map *vmm; + struct privcmd_object *obj; + vm_prot_t prot; + int error; + struct xen_mem_acquire_resource op; + xen_pfn_t *pfns; + paddr_t *maddr; + + KASSERT(!xen_feature(XENFEAT_auto_translated_physmap)); + + vmm = &curlwp->l_proc->p_vmspace->vm_map; + va0 = pmr->addr & ~PAGE_MASK; + + if (pmr->num == 0) + return EINVAL; + if (va0 > VM_MAXUSER_ADDRESS) + return EINVAL; + if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmr->num) + return EINVAL; + + prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE); + if (prot == UVM_PROT_NONE) + return EINVAL; + + pfns = kmem_alloc(sizeof(xen_pfn_t) * pmr->num, KM_SLEEP); + memset(&op, 0, sizeof(op)); + op.domid = pmr->dom; + op.type = pmr->type; + op.id = pmr->id; + op.frame = pmr->idx; + op.nr_frames = pmr->num; + set_xen_guest_handle(op.frame_list, pfns); + + error = HYPERVISOR_memory_op(XENMEM_acquire_resource, &op); + if (error) { + printf("%s: XENMEM_acquire_resource failed: %d\n", + __func__, error); + return privcmd_xen2bsd_errno(error); + } + maddr = kmem_alloc(sizeof(paddr_t) * pmr->num, KM_SLEEP); + for (i = 0; i < pmr->num; i++) { + maddr[i] = pfns[i] << PGSHIFT; + } + kmem_free(pfns, sizeof(xen_pfn_t) * pmr->num); + + obj = kmem_alloc(sizeof(*obj), KM_SLEEP); + obj->type = PTYPE_PRIVCMD; + obj->u.pc.maddr = maddr; + obj->u.pc.no_translate = true; + obj->npages = pmr->num; + obj->u.pc.domid = (op.flags & XENMEM_rsrc_acq_caller_owned) ? + DOMID_SELF : pmr->dom; + + error = privcmd_map_obj(vmm, va0, obj, prot); + return error; +} + +static int +privcmd_map_gref(struct vop_ioctl_args *ap) +{ + struct ioctl_gntdev_mmap_grant_ref *mgr = ap->a_data; + struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map; + struct privcmd_object *obj; + vaddr_t va0 = (vaddr_t)mgr->va & ~PAGE_MASK; + vm_prot_t prot; + int error; + + if (mgr->count == 0) + return EINVAL; + if (va0 > VM_MAXUSER_ADDRESS) + return EINVAL; + if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < mgr->count) + return EINVAL; + if (mgr->notify.offset < 0 || mgr->notify.offset > mgr->count) + return EINVAL; + + prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE); + if (prot == UVM_PROT_NONE) + return EINVAL; + + obj = kmem_alloc(PGO_GNTREF_LEN(mgr->count), KM_SLEEP); + + obj->type = PTYPE_GNTDEV_REF; + obj->npages = mgr->count; + memcpy(&obj->u.gr.notify, &mgr->notify, + sizeof(obj->u.gr.notify)); + + for (int i = 0; i < obj->npages; ++i) { + struct ioctl_gntdev_grant_ref gref; + error = copyin(&mgr->refs[i], &gref, sizeof(gref)); + if (error != 0) { + goto err1; + } + obj->u.gr.ops[i].host_addr = 0; + obj->u.gr.ops[i].dev_bus_addr = 0; + obj->u.gr.ops[i].ref = gref.ref; + obj->u.gr.ops[i].dom = gref.domid; + obj->u.gr.ops[i].handle = -1; + obj->u.gr.ops[i].flags = GNTMAP_host_map | + GNTMAP_application_map | GNTMAP_contains_pte; + if (prot == UVM_PROT_READ) + obj->u.gr.ops[i].flags |= GNTMAP_readonly; + } + error = privcmd_map_obj(vmm, va0, obj, prot); + return error; + +err1: + kmem_free(obj, PGO_GNTREF_LEN(obj->npages)); + return error; +} + +static int +privcmd_alloc_gref(struct vop_ioctl_args *ap) +{ + struct ioctl_gntdev_alloc_grant_ref *mga = ap->a_data; + struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map; + struct privcmd_object *obj; + vaddr_t va0 = (vaddr_t)mga->va & ~PAGE_MASK; + vm_prot_t prot; + int error, ret; + + if (mga->count == 0) + return EINVAL; + if (va0 > VM_MAXUSER_ADDRESS) + return EINVAL; + if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < mga->count) + return EINVAL; + if (mga->notify.offset < 0 || mga->notify.offset > mga->count) + return EINVAL; + + prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE); + if (prot == UVM_PROT_NONE) + return EINVAL; + + obj = kmem_alloc(PGO_GNTA_LEN(mga->count), KM_SLEEP); + + obj->type = PTYPE_GNTDEV_ALLOC; + obj->npages = mga->count; + obj->u.ga.domid = mga->domid; + memcpy(&obj->u.ga.notify, &mga->notify, + sizeof(obj->u.ga.notify)); + obj->u.ga.gntva = uvm_km_alloc(kernel_map, + PAGE_SIZE * obj->npages, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO); + if (obj->u.ga.gntva == 0) { + error = ENOMEM; + goto err1; + } + + for (int i = 0; i < obj->npages; ++i) { + paddr_t ma; + vaddr_t va = obj->u.ga.gntva + i * PAGE_SIZE; + grant_ref_t id; + bool ro = ((mga->flags & GNTDEV_ALLOC_FLAG_WRITABLE) == 0); + (void)pmap_extract_ma(pmap_kernel(), va, &ma); + if ((ret = xengnt_grant_access(mga->domid, ma, ro, &id)) != 0) { + printf("%s: xengnt_grant_access failed: %d\n", + __func__, ret); + for (int j = 0; j < i; j++) { + xengnt_revoke_access(obj->u.ga.gref_ids[j]); + error = ret; + goto err2; + } + } + obj->u.ga.gref_ids[i] = id; + } + + error = copyout(&obj->u.ga.gref_ids[0], mga->gref_ids, + sizeof(uint32_t) * obj->npages); + if (error) { + for (int i = 0; i < obj->npages; ++i) { + xengnt_revoke_access(obj->u.ga.gref_ids[i]); + } + goto err2; + } + + error = privcmd_map_obj(vmm, va0, obj, prot); + return error; + +err2: + uvm_km_free(kernel_map, obj->u.ga.gntva, + PAGE_SIZE * obj->npages, UVM_KMF_WIRED); +err1: + kmem_free(obj, PGO_GNTA_LEN(obj->npages)); + return error; +} + static int privcmd_ioctl(void *v) { @@ -264,7 +711,6 @@ privcmd_ioctl(void *v) kauth_cred_t a_cred; } */ *ap = v; int error = 0; - paddr_t *maddr; switch (ap->a_command) { case IOCTL_PRIVCMD_HYPERCALL: @@ -328,113 +774,23 @@ privcmd_ioctl(void *v) break; } case IOCTL_PRIVCMD_MMAP: - { - int i, j; - privcmd_mmap_t *mcmd = ap->a_data; - privcmd_mmap_entry_t mentry; - vaddr_t va; - paddr_t ma; - struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map; + return privcmd_mmap(ap); - for (i = 0; i < mcmd->num; i++) { - error = copyin(&mcmd->entry[i], &mentry, sizeof(mentry)); - if (error) - return error; - if (mentry.npages == 0) - return EINVAL; - if (mentry.va > VM_MAXUSER_ADDRESS) - return EINVAL; -#if 0 - if (mentry.va + (mentry.npages << PGSHIFT) > - mrentry->vm_end) - return EINVAL; -#endif - maddr = kmem_alloc(sizeof(paddr_t) * mentry.npages, - KM_SLEEP); - va = mentry.va & ~PAGE_MASK; - ma = ((paddr_t)mentry.mfn) << PGSHIFT; /* XXX ??? */ - for (j = 0; j < mentry.npages; j++) { - maddr[j] = ma; - ma += PAGE_SIZE; - } - error = privcmd_map_obj(vmm, va, maddr, - mentry.npages, mcmd->dom); - if (error) - return error; - } - break; - } case IOCTL_PRIVCMD_MMAPBATCH: - { - int i; - privcmd_mmapbatch_t* pmb = ap->a_data; - vaddr_t va0; - u_long mfn; - paddr_t ma; - struct vm_map *vmm; - struct vm_map_entry *entry; - vm_prot_t prot; - vaddr_t trymap; + return privcmd_mmapbatch(ap); - vmm = &curlwp->l_proc->p_vmspace->vm_map; - va0 = pmb->addr & ~PAGE_MASK; - - if (pmb->num == 0) - return EINVAL; - if (va0 > VM_MAXUSER_ADDRESS) - return EINVAL; - if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmb->num) - return EINVAL; + case IOCTL_PRIVCMD_MMAPBATCH_V2: + return privcmd_mmapbatch_v2(ap); - vm_map_lock_read(vmm); - if (!uvm_map_lookup_entry(vmm, va0, &entry)) { - vm_map_unlock_read(vmm); - return EINVAL; - } - prot = entry->protection; - vm_map_unlock_read(vmm); - - maddr = kmem_alloc(sizeof(paddr_t) * pmb->num, KM_SLEEP); - /* get a page of KVA to check mappins */ - trymap = uvm_km_alloc(kernel_map, PAGE_SIZE, PAGE_SIZE, - UVM_KMF_VAONLY); - if (trymap == 0) { - kmem_free(maddr, sizeof(paddr_t) * pmb->num); - return ENOMEM; - } - - for(i = 0; i < pmb->num; ++i) { - error = copyin(&pmb->arr[i], &mfn, sizeof(mfn)); - if (error != 0) { - /* XXX: mappings */ - pmap_update(pmap_kernel()); - kmem_free(maddr, sizeof(paddr_t) * pmb->num); - uvm_km_free(kernel_map, trymap, PAGE_SIZE, - UVM_KMF_VAONLY); - return error; - } - ma = ((paddr_t)mfn) << PGSHIFT; - if (pmap_enter_ma(pmap_kernel(), trymap, ma, 0, - prot, PMAP_CANFAIL, pmb->dom)) { - mfn |= 0xF0000000; - copyout(&mfn, &pmb->arr[i], sizeof(mfn)); - maddr[i] = INVALID_PAGE; - } else { - pmap_remove(pmap_kernel(), trymap, - trymap + PAGE_SIZE); - maddr[i] = ma; - } - } - pmap_update(pmap_kernel()); - error = privcmd_map_obj(vmm, va0, maddr, pmb->num, pmb->dom); - uvm_km_free(kernel_map, trymap, PAGE_SIZE, UVM_KMF_VAONLY); + case IOCTL_PRIVCMD_MMAP_RESOURCE: + return privcmd_mmap_resource(ap); - if (error != 0) - return error; + case IOCTL_GNTDEV_MMAP_GRANT_REF: + return privcmd_map_gref(ap); - break; - } + case IOCTL_GNTDEV_ALLOC_GRANT_REF: + return privcmd_alloc_gref(ap); default: error = EINVAL; } @@ -457,20 +813,85 @@ privpgop_reference(struct uvm_object *uo } static void +privcmd_notify(struct ioctl_gntdev_grant_notify *notify, vaddr_t va, + struct gnttab_map_grant_ref *gmops) +{ + if (notify->action & UNMAP_NOTIFY_SEND_EVENT) { + hypervisor_notify_via_evtchn(notify->event_channel_port); + } + if ((notify->action & UNMAP_NOTIFY_CLEAR_BYTE) == 0) { + notify->action = 0; + return; + } + if (va == 0) { + struct gnttab_map_grant_ref op; + struct gnttab_unmap_grant_ref uop; + int i = notify->offset / PAGE_SIZE; + int o = notify->offset % PAGE_SIZE; + int err; + + KASSERT(gmops != NULL); + va = uvm_km_alloc(kernel_map, PAGE_SIZE, PAGE_SIZE, + UVM_KMF_VAONLY | UVM_KMF_WAITVA); + op.host_addr = va; + op.dev_bus_addr = 0; + op.ref = gmops[i].ref; + op.dom = gmops[i].dom; + op.handle = -1; + op.flags = GNTMAP_host_map; + err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); + if (err == 0 && op.status == GNTST_okay) { + char *n = (void *)(va + o); + *n = 0; + uop.host_addr = va; + uop.handle = op.handle; + uop.dev_bus_addr = 0; + (void)HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, &uop, 1); + } + uvm_km_free(kernel_map, va, PAGE_SIZE, UVM_KMF_VAONLY); + } else { + KASSERT(gmops == NULL); + char *n = (void *)(va + notify->offset); + *n = 0; + } + notify->action = 0; +} + +static void privpgop_detach(struct uvm_object *uobj) { struct privcmd_object *pobj = (struct privcmd_object *)uobj; rw_enter(uobj->vmobjlock, RW_WRITER); + KASSERT(uobj->uo_refs > 0); if (uobj->uo_refs > 1) { uobj->uo_refs--; rw_exit(uobj->vmobjlock); return; } rw_exit(uobj->vmobjlock); - kmem_free(pobj->maddr, sizeof(paddr_t) * pobj->npages); - uvm_obj_destroy(uobj, true); - kmem_free(pobj, sizeof(struct privcmd_object)); + switch (pobj->type) { + case PTYPE_PRIVCMD: + kmem_free(pobj->u.pc.maddr, sizeof(paddr_t) * pobj->npages); + uvm_obj_destroy(uobj, true); + kmem_free(pobj, sizeof(struct privcmd_object)); + break; + case PTYPE_GNTDEV_REF: + { + privcmd_notify(&pobj->u.gr.notify, 0, pobj->u.gr.ops); + kmem_free(pobj, PGO_GNTREF_LEN(pobj->npages)); + break; + } + case PTYPE_GNTDEV_ALLOC: + privcmd_notify(&pobj->u.ga.notify, pobj->u.ga.gntva, NULL); + for (int i = 0; i < pobj->npages; ++i) { + xengnt_revoke_access(pobj->u.ga.gref_ids[i]); + } + uvm_km_free(kernel_map, pobj->u.ga.gntva, + PAGE_SIZE * pobj->npages, UVM_KMF_WIRED); + kmem_free(pobj, PGO_GNTA_LEN(pobj->npages)); + } privcmd_nobjects--; } @@ -493,62 +914,76 @@ privpgop_fault(struct uvm_faultinfo *ufi continue; if (pps[i] == PGO_DONTCARE) continue; - if (pobj->maddr[maddr_i] == INVALID_PAGE) { - /* This has already been flagged as error. */ - error = EFAULT; + switch(pobj->type) { + case PTYPE_PRIVCMD: + if (pobj->u.pc.maddr[maddr_i] == INVALID_PAGE) { + /* This has already been flagged as error. */ + error = EFAULT; + goto out; + } + error = pmap_enter_ma(ufi->orig_map->pmap, vaddr, + pobj->u.pc.maddr[maddr_i], 0, + ufi->entry->protection, + PMAP_CANFAIL | ufi->entry->protection | + (pobj->u.pc.no_translate ? PMAP_MD_XEN_NOTR : 0), + pobj->u.pc.domid); + if (error == ENOMEM) { + goto out; + } + if (error) { + pobj->u.pc.maddr[maddr_i] = INVALID_PAGE; + error = EFAULT; + } + break; + case PTYPE_GNTDEV_REF: + { + struct pmap *pmap = ufi->orig_map->pmap; + if (pmap_enter_gnt(pmap, vaddr, entry->start, pobj->npages, &pobj->u.gr.ops[0]) != GNTST_okay) { + error = EFAULT; + goto out; + } break; } - error = pmap_enter_ma(ufi->orig_map->pmap, vaddr, - pobj->maddr[maddr_i], 0, ufi->entry->protection, - PMAP_CANFAIL | ufi->entry->protection, - pobj->domid); - if (error == ENOMEM) { + case PTYPE_GNTDEV_ALLOC: + { + paddr_t pa; + if (!pmap_extract(pmap_kernel(), + pobj->u.ga.gntva + maddr_i * PAGE_SIZE, &pa)) { + error = EFAULT; + goto out; + } + error = pmap_enter(ufi->orig_map->pmap, vaddr, pa, + ufi->entry->protection, + PMAP_CANFAIL | ufi->entry->protection); + if (error == ENOMEM) { + goto out; + } break; } + } if (error) { /* XXX for proper ptp accountings */ - pmap_remove(ufi->orig_map->pmap, vaddr, + pmap_remove(ufi->orig_map->pmap, vaddr, vaddr + PAGE_SIZE); } } +out: pmap_update(ufi->orig_map->pmap); uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj); return error; } static int -privcmd_map_obj(struct vm_map *map, vaddr_t start, paddr_t *maddr, - int npages, int domid) +privcmd_map_obj(struct vm_map *map, vaddr_t start, struct privcmd_object *obj, + vm_prot_t prot) { - struct privcmd_object *obj; int error; uvm_flag_t uvmflag; vaddr_t newstart = start; - vm_prot_t prot; - off_t size = ((off_t)npages << PGSHIFT); + off_t size = ((off_t)obj->npages << PGSHIFT); - vm_map_lock_read(map); - /* get protections. This also check for validity of mapping */ - if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_WRITE)) - prot = VM_PROT_READ | VM_PROT_WRITE; - else if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_READ)) - prot = VM_PROT_READ; - else { - printf("uvm_map_checkprot 0x%lx -> 0x%lx " - "failed\n", - start, (unsigned long)(start + size - 1)); - vm_map_unlock_read(map); - kmem_free(maddr, sizeof(paddr_t) * npages); - return EINVAL; - } - vm_map_unlock_read(map); - - obj = kmem_alloc(sizeof(*obj), KM_SLEEP); privcmd_nobjects++; uvm_obj_init(&obj->uobj, &privpgops, true, 1); - obj->maddr = maddr; - obj->npages = npages; - obj->domid = domid; uvmflag = UVM_MAPFLAG(prot, prot, UVM_INH_NONE, UVM_ADV_NORMAL, UVM_FLAG_FIXED | UVM_FLAG_UNMAP | UVM_FLAG_NOMERGE); error = uvm_map(map, &newstart, size, &obj->uobj, 0, 0, uvmflag);