From: Dusan Stojkovic <[email protected]> The vhost-user specification reserves protocol feature bit 17 and documents an extended memory region description for backends that map guest memory through Xen rather than mapping a file descriptor each region carries two extra fields, "xen mmap flags" and "domid" (see docs/interop/vhost-user.rst, "Memory region description"). The layout is implemented by rust-vmm's vhost and vm-memory crates and used by Xen vhost-user device backends.
Implement the front-end side for foreign mappings: - negotiate VHOST_USER_PROTOCOL_F_XEN_MMAP - when negotiated, build SET_MEM_TABLE payloads from the extended region layout, with xen_mmap_flags = FOREIGN and xen_mmap_data set to the guest's domain id. - under Xen, do not call vhost_user_get_mr_data(): guest RAM has no fd and its userspace_addr does not correspond to a valid mapping in the address space. Backends map regions through privcmd using the guest physical address and domid; the fd accompanying each region only satisfies the protocol's one-fd-per-region requirement. Pass a /dev/xen/privcmd fd and close it once the message has been sent. Tracepoints for opening and closing xen fds are added as well. - suppress VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS under Xen: Postcopy is likewise refused. The userspace_addr field is carried unchanged; Xen backends derive mappings from guest_phys_addr and domid and do not interpret it. Signed-off-by: Dusan Stojkovic <[email protected]> Signed-off-by: Nikola Jelic <[email protected]> --- hw/virtio/trace-events | 2 + hw/virtio/vhost-user.c | 120 +++++++++++++++++++++++++++++++++++++++-- include/hw/virtio/vhost-user.h | 2 +- 3 files changed, 120 insertions(+), 4 deletions(-) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 2a57edc21e..0f3c58fd78 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -30,6 +30,8 @@ vhost_user_postcopy_fault_handler_found(int i, uint64_t region_offset, uint64_t vhost_user_postcopy_listen(void) "" vhost_user_set_mem_table_postcopy(uint64_t client_addr, uint64_t qhva, int reply_i, int region_i) "client:0x%"PRIx64" for hva: 0x%"PRIx64" reply %d region %d" vhost_user_set_mem_table_withfd(int index, const char *name, uint64_t memory_size, uint64_t guest_phys_addr, uint64_t userspace_addr, uint64_t offset) "%d:%s: size:0x%"PRIx64" GPA:0x%"PRIx64" QVA/userspace:0x%"PRIx64" RB offset:0x%"PRIx64 +vhost_user_open_region_fd(int index, int fd) "region:%d fd:%d" +vhost_user_put_region_fds(int index, int fd) "region:%d fd:%d" vhost_user_postcopy_waker(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64 vhost_user_postcopy_waker_found(uint64_t client_addr) "0x%"PRIx64 vhost_user_postcopy_waker_nomatch(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64 diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index d627351f45..932ead4eeb 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -30,6 +30,8 @@ #include "migration/postcopy-ram.h" #include "trace.h" #include "system/ramblock.h" +#include "system/xen.h" +#include "hw/xen/xen.h" #include <sys/ioctl.h> #include <sys/socket.h> @@ -181,12 +183,36 @@ typedef struct VhostUserMemoryRegion { uint64_t mmap_offset; } VhostUserMemoryRegion; +/* + * Memory region flags for VHOST_USER_PROTOCOL_F_XEN_MMAP, matching the + * values used by rust-vmm's vm-memory (MmapXenFlags). + */ +#define VHOST_USER_XEN_MMAP_FLAG_FOREIGN 0x1 +#define VHOST_USER_XEN_MMAP_FLAG_GRANT 0x2 + +/* + * Extended memory region description, used when + * VHOST_USER_PROTOCOL_F_XEN_MMAP has been negotiated. + */ +typedef struct VhostUserMemoryRegionXen { + VhostUserMemoryRegion region; + uint32_t xen_mmap_flags; + uint32_t xen_mmap_data; /* domain id for FOREIGN/GRANT mappings */ +} VhostUserMemoryRegionXen; + + typedef struct VhostUserMemory { uint32_t nregions; uint32_t padding; VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; } VhostUserMemory; +typedef struct VhostUserMemoryXen { + uint32_t nregions; + uint32_t padding; + VhostUserMemoryRegionXen regions[VHOST_MEMORY_BASELINE_NREGIONS]; +} VhostUserMemoryXen; + typedef struct VhostUserMemRegMsg { uint64_t padding; VhostUserMemoryRegion region; @@ -294,6 +320,7 @@ typedef union { struct vhost_vring_state state; struct vhost_vring_addr addr; VhostUserMemory memory; + VhostUserMemoryXen memory_xen; VhostUserMemRegMsg mem_reg; VhostUserLog log; struct vhost_iotlb_msg iotlb; @@ -594,6 +621,8 @@ static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, static bool vhost_user_gpa_addresses(struct vhost_dev *dev) { return vhost_user_has_protocol_feature( + dev, VHOST_USER_PROTOCOL_F_XEN_MMAP) || + vhost_user_has_protocol_feature( dev, VHOST_USER_PROTOCOL_F_GPA_ADDRESSES); } @@ -612,6 +641,23 @@ static void vhost_user_fill_msg_region(struct vhost_dev *dev, dst->mmap_offset = mmap_offset; } +/* + * With VHOST_USER_PROTOCOL_F_XEN_MMAP the region fds are opened by us + * rather than owned by the RAMBlocks, so they must be closed once the + * message carrying them has been sent (or on error). + */ +static void vhost_user_put_region_fds(struct vhost_dev *dev, int *fds, + size_t fd_num) +{ + if (!vhost_user_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_XEN_MMAP)) { + return; + } + for (size_t i = 0; i < fd_num; i++) { + trace_vhost_user_put_region_fds(i, fds[i]); + close(fds[i]); + } +} + static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, struct vhost_dev *dev, VhostUserMsg *msg, @@ -623,13 +669,41 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, MemoryRegion *mr; struct vhost_memory_region *reg; VhostUserMemoryRegion region_buffer; + bool xen_mmap = vhost_user_has_protocol_feature(dev, + VHOST_USER_PROTOCOL_F_XEN_MMAP); + + if (track_ramblocks && xen_mmap) { + error_report("vhost-user: postcopy is not supported under Xen"); + return -ENOTSUP; + } msg->hdr.request = VHOST_USER_SET_MEM_TABLE; for (i = 0; i < dev->mem->nregions; ++i) { reg = dev->mem->regions + i; - mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); + if (xen_mmap) { + /* + * Under Xen the guest RAM is not mapped into our address + * space; the backend maps it through the Xen foreign + * mapping interface using the guest physical address and + * domain id carried in the region descriptor. The file + * descriptor only satisfies the one-fd-per-region + * requirement of the protocol: pass /dev/xen/privcmd and + * close it once the message has been sent. + */ + mr = NULL; + offset = 0; + fd = open("/dev/xen/privcmd", O_RDWR | O_CLOEXEC); + if (fd < 0) { + error_report("vhost-user: failed to open /dev/xen/privcmd:" + " %s", strerror(errno)); + return -errno; + } + trace_vhost_user_open_region_fd(i, fd); + } else { + mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); + } if (fd > 0) { if (track_ramblocks) { assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); @@ -642,10 +716,21 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, u->region_rb[i] = mr->ram_block; } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { error_report("Failed preparing vhost-user memory table msg"); + if (xen_mmap) { + close(fd); + } return -ENOBUFS; } vhost_user_fill_msg_region(dev, ®ion_buffer, reg, offset); - msg->payload.memory.regions[*fd_num] = region_buffer; + if (xen_mmap) { + msg->payload.memory_xen.regions[*fd_num].region = region_buffer; + msg->payload.memory_xen.regions[*fd_num].xen_mmap_flags = + VHOST_USER_XEN_MMAP_FLAG_FOREIGN; + msg->payload.memory_xen.regions[*fd_num].xen_mmap_data = + xen_domid; + } else { + msg->payload.memory.regions[*fd_num] = region_buffer; + } fds[(*fd_num)++] = fd; } else if (track_ramblocks) { u->region_rb_offset[i] = 0; @@ -663,7 +748,11 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, msg->hdr.size = sizeof(msg->payload.memory.nregions); msg->hdr.size += sizeof(msg->payload.memory.padding); - msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); + if (xen_mmap) { + msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegionXen); + } else { + msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); + } return 0; } @@ -1149,10 +1238,12 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, false); if (ret < 0) { + vhost_user_put_region_fds(dev, fds, fd_num); return ret; } ret = vhost_user_write(dev, &msg, fds, fd_num); + vhost_user_put_region_fds(dev, fds, fd_num); if (ret < 0) { return ret; } @@ -2551,6 +2642,29 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, VHOST_USER_PROTOCOL_F_GET_VRING_BASE_INFLIGHT); } + if (!xen_enabled()) { + /* + * Xen memory mappings only make sense when QEMU itself runs + * as a Xen device model. + */ + protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_XEN_MMAP); + } else { + if (!virtio_has_feature(protocol_features, + VHOST_USER_PROTOCOL_F_XEN_MMAP)) { + error_setg(errp, "vhost-user backend does not support " + "VHOST_USER_PROTOCOL_F_XEN_MMAP, which is " + "required when running under Xen"); + return -EPROTO; + } + /* + * The ADD/REM_MEM_REG message path has not been adapted to + * the Xen region format. Xen guests expose a single RAM + * region, so fall back to SET_MEM_TABLE. + */ + protocol_features &= + ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); + } + /* final set of protocol features */ u->protocol_features = protocol_features; err = vhost_user_set_protocol_features(dev, u->protocol_features); diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h index 06c360af18..46be9cd57c 100644 --- a/include/hw/virtio/vhost-user.h +++ b/include/hw/virtio/vhost-user.h @@ -30,7 +30,7 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, VHOST_USER_PROTOCOL_F_STATUS = 16, - /* Feature 17 reserved for VHOST_USER_PROTOCOL_F_XEN_MMAP. */ + VHOST_USER_PROTOCOL_F_XEN_MMAP = 17, VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 18, VHOST_USER_PROTOCOL_F_DEVICE_STATE = 19, VHOST_USER_PROTOCOL_F_GET_VRING_BASE_INFLIGHT = 20, -- 2.43.0
