Dave Voutila <[email protected]> writes:
> While working back and forth with dlg@ on his idea of adding in async
> task queues to vmd devices, he started to introduce a cleaner way to
> handle virtqueues. In short, we can just track the host virtual address
> and read/write to that location.
>
> This has the benefit of cleaning up a good chunk of code, mostly in the
> vioscsi device. If my count is correct, it's close to 100 lines
> removed.
>
> Testing on a variety of systems, I see anywhere from a small (5%) to a
> large (50%) gain in things like vionet performance depending on the host
> hardware and the guest OS. (Linux guests push packets faster, on
> average.)
>
> If/when this lands, it'll be easier to introduce the async taskq diff
> and also clean up more of the virtio codebase. Lots of duplicate logic
> in basic virtio-pci config handling, like updating queue address or
> size.
>
> Testing is appreciated, but also looking for OKs.
>
Have had a few positive test reports off-list.
OK?
>
> diff refs/heads/master refs/heads/vmd-vq
> commit - 9f38b5b2272f7ca113158ec7943c959ae9b22683
> commit + 509bbee70e5a11cfb69b93202a24fb032c72ac43
> blob - aea2fc1d5e8903c97cb7076bfb9e5f265f416378
> blob + 3a9af7790b24914fb6e7716a049fd0428803af9b
> --- usr.sbin/vmd/vioscsi.c
> +++ usr.sbin/vmd/vioscsi.c
> @@ -81,6 +81,7 @@ vioscsi_next_ring_item(struct vioscsi_dev *dev, struct
> {
> used->ring[used->idx & VIOSCSI_QUEUE_MASK].id = idx;
> used->ring[used->idx & VIOSCSI_QUEUE_MASK].len = desc->len;
> + __sync_synchronize();
> used->idx++;
>
> dev->vq[dev->cfg.queue_notify].last_avail =
> @@ -157,7 +158,7 @@ vioscsi_reg_name(uint8_t reg)
> switch (reg) {
> case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature";
> case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature";
> - case VIRTIO_CONFIG_QUEUE_ADDRESS: return "queue address";
> + case VIRTIO_CONFIG_QUEUE_PFN: return "queue pfn";
> case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size";
> case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select";
> case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify";
> @@ -1672,8 +1673,8 @@ vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint
> DPRINTF("%s: guest feature set to %u",
> __func__, dev->cfg.guest_feature);
> break;
> - case VIRTIO_CONFIG_QUEUE_ADDRESS:
> - dev->cfg.queue_address = *data;
> + case VIRTIO_CONFIG_QUEUE_PFN:
> + dev->cfg.queue_pfn = *data;
> vioscsi_update_qa(dev);
> break;
> case VIRTIO_CONFIG_QUEUE_SELECT:
> @@ -1692,7 +1693,7 @@ vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint
> if (dev->cfg.device_status == 0) {
> log_debug("%s: device reset", __func__);
> dev->cfg.guest_feature = 0;
> - dev->cfg.queue_address = 0;
> + dev->cfg.queue_pfn = 0;
> vioscsi_update_qa(dev);
> dev->cfg.queue_size = 0;
> vioscsi_update_qs(dev);
> @@ -1988,8 +1989,8 @@ vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint
> case VIRTIO_CONFIG_GUEST_FEATURES:
> *data = dev->cfg.guest_feature;
> break;
> - case VIRTIO_CONFIG_QUEUE_ADDRESS:
> - *data = dev->cfg.queue_address;
> + case VIRTIO_CONFIG_QUEUE_PFN:
> + *data = dev->cfg.queue_pfn;
> break;
> case VIRTIO_CONFIG_QUEUE_SIZE:
> if (sz == 4)
> @@ -2033,25 +2034,38 @@ vioscsi_update_qs(struct vioscsi_dev *dev)
> void
> vioscsi_update_qs(struct vioscsi_dev *dev)
> {
> + struct virtio_vq_info *vq_info;
> +
> /* Invalid queue? */
> if (dev->cfg.queue_select >= VIRTIO_MAX_QUEUES) {
> dev->cfg.queue_size = 0;
> return;
> }
>
> - /* Update queue address/size based on queue select */
> - dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa;
> - dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs;
> + vq_info = &dev->vq[dev->cfg.queue_select];
> +
> + /* Update queue pfn/size based on queue select */
> + dev->cfg.queue_pfn = vq_info->q_gpa >> 12;
> + dev->cfg.queue_size = vq_info->qs;
> }
>
> void
> vioscsi_update_qa(struct vioscsi_dev *dev)
> {
> + struct virtio_vq_info *vq_info;
> + void *hva = NULL;
> +
> /* Invalid queue? */
> if (dev->cfg.queue_select >= VIRTIO_MAX_QUEUES)
> return;
>
> - dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address;
> + vq_info = &dev->vq[dev->cfg.queue_select];
> + vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE;
> +
> + hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIOSCSI_QUEUE_SIZE));
> + if (hva == NULL)
> + fatal("vioscsi_update_qa");
> + vq_info->q_hva = hva;
> }
>
> /*
> @@ -2067,13 +2081,12 @@ vioscsi_notifyq(struct vioscsi_dev *dev)
> int
> vioscsi_notifyq(struct vioscsi_dev *dev)
> {
> - uint64_t q_gpa;
> - uint32_t vr_sz;
> - int cnt, ret;
> + int cnt, ret = 0;
> char *vr;
> struct virtio_scsi_req_hdr req;
> struct virtio_scsi_res_hdr resp;
> struct virtio_vq_acct acct;
> + struct virtio_vq_info *vq_info;
>
> ret = 0;
>
> @@ -2081,34 +2094,21 @@ vioscsi_notifyq(struct vioscsi_dev *dev)
> if (dev->cfg.queue_notify >= VIRTIO_MAX_QUEUES)
> return (ret);
>
> - vr_sz = vring_size(VIOSCSI_QUEUE_SIZE);
> - q_gpa = dev->vq[dev->cfg.queue_notify].qa;
> - q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
> + vq_info = &dev->vq[dev->cfg.queue_notify];
> + vr = vq_info->q_hva;
> + if (vr == NULL)
> + fatalx("%s: null vring", __func__);
>
> - vr = calloc(1, vr_sz);
> - if (vr == NULL) {
> - log_warn("%s: calloc error getting vioscsi ring", __func__);
> - return (ret);
> - }
> -
> - if (read_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: error reading gpa 0x%llx", __func__, q_gpa);
> - goto out;
> - }
> -
> /* Compute offsets in ring of descriptors, avail ring, and used ring */
> acct.desc = (struct vring_desc *)(vr);
> - acct.avail = (struct vring_avail *)(vr +
> - dev->vq[dev->cfg.queue_notify].vq_availoffset);
> - acct.used = (struct vring_used *)(vr +
> - dev->vq[dev->cfg.queue_notify].vq_usedoffset);
> + acct.avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
> + acct.used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
>
> - acct.idx =
> - dev->vq[dev->cfg.queue_notify].last_avail & VIOSCSI_QUEUE_MASK;
> + acct.idx = vq_info->last_avail & VIOSCSI_QUEUE_MASK;
>
> if ((acct.avail->idx & VIOSCSI_QUEUE_MASK) == acct.idx) {
> - log_warnx("%s:nothing to do?", __func__);
> - goto out;
> + log_debug("%s - nothing to do?", __func__);
> + return (0);
> }
>
> cnt = 0;
> @@ -2180,14 +2180,10 @@ vioscsi_notifyq(struct vioscsi_dev *dev)
>
> ret = 1;
> dev->cfg.isr_status = 1;
> - /* Move ring indexes */
> +
> + /* Move ring indexes (updates the used ring index) */
> vioscsi_next_ring_item(dev, acct.avail, acct.used,
> acct.req_desc, acct.req_idx);
> -
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: error writing vioring",
> - __func__);
> - }
> goto next_msg;
> }
>
> @@ -2202,138 +2198,48 @@ vioscsi_notifyq(struct vioscsi_dev *dev)
> case TEST_UNIT_READY:
> case START_STOP:
> ret = vioscsi_handle_tur(dev, &req, &acct);
> - if (ret) {
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: error writing vioring",
> - __func__);
> - }
> - }
> break;
> case PREVENT_ALLOW:
> ret = vioscsi_handle_prevent_allow(dev, &req, &acct);
> - if (ret) {
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: error writing vioring",
> - __func__);
> - }
> - }
> break;
> case READ_TOC:
> ret = vioscsi_handle_read_toc(dev, &req, &acct);
> - if (ret) {
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: error writing vioring",
> - __func__);
> - }
> - }
> break;
> case READ_CAPACITY:
> ret = vioscsi_handle_read_capacity(dev, &req, &acct);
> - if (ret) {
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: error writing vioring",
> - __func__);
> - }
> - }
> break;
> case READ_CAPACITY_16:
> ret = vioscsi_handle_read_capacity_16(dev, &req, &acct);
> - if (ret) {
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: error writing vioring",
> - __func__);
> - }
> - }
> break;
> case READ_COMMAND:
> ret = vioscsi_handle_read_6(dev, &req, &acct);
> - if (ret) {
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: error writing vioring",
> - __func__);
> - }
> - }
> break;
> case READ_10:
> ret = vioscsi_handle_read_10(dev, &req, &acct);
> - if (ret) {
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: error writing vioring",
> - __func__);
> - }
> - }
> break;
> case INQUIRY:
> ret = vioscsi_handle_inquiry(dev, &req, &acct);
> - if (ret) {
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: error writing vioring",
> - __func__);
> - }
> - }
> break;
> case MODE_SENSE:
> ret = vioscsi_handle_mode_sense(dev, &req, &acct);
> - if (ret) {
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: error writing vioring",
> - __func__);
> - }
> - }
> break;
> case MODE_SENSE_BIG:
> ret = vioscsi_handle_mode_sense_big(dev, &req, &acct);
> - if (ret) {
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: error writing vioring",
> - __func__);
> - }
> - }
> break;
> case GET_EVENT_STATUS_NOTIFICATION:
> ret = vioscsi_handle_gesn(dev, &req, &acct);
> - if (ret) {
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: error writing vioring",
> - __func__);
> - }
> - }
> break;
> case READ_DISC_INFORMATION:
> ret = vioscsi_handle_read_disc_info(dev, &req, &acct);
> - if (ret) {
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: error writing vioring",
> - __func__);
> - }
> - }
> break;
> case GET_CONFIGURATION:
> ret = vioscsi_handle_get_config(dev, &req, &acct);
> - if (ret) {
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: error writing vioring",
> - __func__);
> - }
> - }
> break;
> case MECHANISM_STATUS:
> ret = vioscsi_handle_mechanism_status(dev, &req, &acct);
> - if (ret) {
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: error writing vioring",
> - __func__);
> - }
> - }
> break;
> case REPORT_LUNS:
> ret = vioscsi_handle_report_luns(dev, &req, &acct);
> - if (ret) {
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("%s: error writing vioring",
> - __func__);
> - }
> - }
> break;
> default:
> log_warnx("%s: unsupported opcode 0x%02x,%s",
> @@ -2348,6 +2254,5 @@ out:
> acct.idx = (acct.idx + 1) & VIOSCSI_QUEUE_MASK;
> }
> out:
> - free(vr);
> return (ret);
> }
> blob - c1739b9bbbf35804c4e85fa470497fea3a2fff36
> blob + 5d91da231b855b670970ed040657497086461636
> --- usr.sbin/vmd/virtio.c
> +++ usr.sbin/vmd/virtio.c
> @@ -89,7 +89,7 @@ virtio_reg_name(uint8_t reg)
> switch (reg) {
> case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature";
> case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature";
> - case VIRTIO_CONFIG_QUEUE_ADDRESS: return "queue address";
> + case VIRTIO_CONFIG_QUEUE_PFN: return "queue address";
> case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size";
> case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select";
> case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify";
> @@ -123,40 +123,52 @@ viornd_update_qs(void)
> void
> viornd_update_qs(void)
> {
> + struct virtio_vq_info *vq_info;
> +
> /* Invalid queue? */
> if (viornd.cfg.queue_select > 0) {
> viornd.cfg.queue_size = 0;
> return;
> }
>
> - /* Update queue address/size based on queue select */
> - viornd.cfg.queue_address = viornd.vq[viornd.cfg.queue_select].qa;
> - viornd.cfg.queue_size = viornd.vq[viornd.cfg.queue_select].qs;
> + vq_info = &viornd.vq[viornd.cfg.queue_select];
> +
> + /* Update queue pfn/size based on queue select */
> + viornd.cfg.queue_pfn = vq_info->q_gpa >> 12;
> + viornd.cfg.queue_size = vq_info->qs;
> }
>
> /* Update queue address */
> void
> viornd_update_qa(void)
> {
> + struct virtio_vq_info *vq_info;
> + void *hva = NULL;
> +
> /* Invalid queue? */
> if (viornd.cfg.queue_select > 0)
> return;
>
> - viornd.vq[viornd.cfg.queue_select].qa = viornd.cfg.queue_address;
> + vq_info = &viornd.vq[viornd.cfg.queue_select];
> + vq_info->q_gpa = (uint64_t)viornd.cfg.queue_pfn * VIRTIO_PAGE_SIZE;
> +
> + hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIORND_QUEUE_SIZE));
> + if (hva == NULL)
> + fatal("viornd_update_qa");
> + vq_info->q_hva = hva;
> }
>
> int
> viornd_notifyq(void)
> {
> - uint64_t q_gpa;
> - uint32_t vr_sz;
> size_t sz;
> int dxx, ret;
> uint16_t aidx, uidx;
> - char *buf, *rnd_data;
> + char *vr, *rnd_data;
> struct vring_desc *desc;
> struct vring_avail *avail;
> struct vring_used *used;
> + struct virtio_vq_info *vq_info;
>
> ret = 0;
>
> @@ -164,27 +176,15 @@ viornd_notifyq(void)
> if (viornd.cfg.queue_notify > 0)
> return (0);
>
> - vr_sz = vring_size(VIORND_QUEUE_SIZE);
> - q_gpa = viornd.vq[viornd.cfg.queue_notify].qa;
> - q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
> + vq_info = &viornd.vq[viornd.cfg.queue_notify];
> + vr = vq_info->q_hva;
> + if (vr == NULL)
> + fatalx("%s: null vring", __func__);
>
> - buf = calloc(1, vr_sz);
> - if (buf == NULL) {
> - log_warn("calloc error getting viornd ring");
> - return (0);
> - }
> + desc = (struct vring_desc *)(vr);
> + avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
> + used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
>
> - if (read_mem(q_gpa, buf, vr_sz)) {
> - free(buf);
> - return (0);
> - }
> -
> - desc = (struct vring_desc *)(buf);
> - avail = (struct vring_avail *)(buf +
> - viornd.vq[viornd.cfg.queue_notify].vq_availoffset);
> - used = (struct vring_used *)(buf +
> - viornd.vq[viornd.cfg.queue_notify].vq_usedoffset);
> -
> aidx = avail->idx & VIORND_QUEUE_MASK;
> uidx = used->idx & VIORND_QUEUE_MASK;
>
> @@ -209,18 +209,13 @@ viornd_notifyq(void)
> viornd.cfg.isr_status = 1;
> used->ring[uidx].id = dxx;
> used->ring[uidx].len = sz;
> + __sync_synchronize();
> used->idx++;
> -
> - if (write_mem(q_gpa, buf, vr_sz)) {
> - log_warnx("viornd: error writing vio ring");
> - }
> }
> free(rnd_data);
> } else
> fatal("memory allocation error for viornd data");
>
> - free(buf);
> -
> return (ret);
> }
>
> @@ -241,8 +236,8 @@ virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, u
> case VIRTIO_CONFIG_GUEST_FEATURES:
> viornd.cfg.guest_feature = *data;
> break;
> - case VIRTIO_CONFIG_QUEUE_ADDRESS:
> - viornd.cfg.queue_address = *data;
> + case VIRTIO_CONFIG_QUEUE_PFN:
> + viornd.cfg.queue_pfn = *data;
> viornd_update_qa();
> break;
> case VIRTIO_CONFIG_QUEUE_SELECT:
> @@ -266,8 +261,8 @@ virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, u
> case VIRTIO_CONFIG_GUEST_FEATURES:
> *data = viornd.cfg.guest_feature;
> break;
> - case VIRTIO_CONFIG_QUEUE_ADDRESS:
> - *data = viornd.cfg.queue_address;
> + case VIRTIO_CONFIG_QUEUE_PFN:
> + *data = viornd.cfg.queue_pfn;
> break;
> case VIRTIO_CONFIG_QUEUE_SIZE:
> *data = viornd.cfg.queue_size;
> @@ -294,25 +289,38 @@ vioblk_update_qa(struct vioblk_dev *dev)
> void
> vioblk_update_qa(struct vioblk_dev *dev)
> {
> + struct virtio_vq_info *vq_info;
> + void *hva = NULL;
> +
> /* Invalid queue? */
> if (dev->cfg.queue_select > 0)
> return;
>
> - dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address;
> + vq_info = &dev->vq[dev->cfg.queue_select];
> + vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE;
> +
> + hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIOBLK_QUEUE_SIZE));
> + if (hva == NULL)
> + fatal("vioblk_update_qa");
> + vq_info->q_hva = hva;
> }
>
> void
> vioblk_update_qs(struct vioblk_dev *dev)
> {
> + struct virtio_vq_info *vq_info;
> +
> /* Invalid queue? */
> if (dev->cfg.queue_select > 0) {
> dev->cfg.queue_size = 0;
> return;
> }
>
> - /* Update queue address/size based on queue select */
> - dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa;
> - dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs;
> + vq_info = &dev->vq[dev->cfg.queue_select];
> +
> + /* Update queue pfn/size based on queue select */
> + dev->cfg.queue_pfn = vq_info->q_gpa >> 12;
> + dev->cfg.queue_size = vq_info->qs;
> }
>
> static void
> @@ -424,55 +432,41 @@ vioblk_notifyq(struct vioblk_dev *dev)
> int
> vioblk_notifyq(struct vioblk_dev *dev)
> {
> - uint64_t q_gpa;
> - uint32_t vr_sz;
> uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx;
> uint8_t ds;
> - int cnt, ret;
> + int cnt;
> off_t secbias;
> char *vr;
> struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc;
> struct vring_avail *avail;
> struct vring_used *used;
> struct virtio_blk_req_hdr cmd;
> + struct virtio_vq_info *vq_info;
>
> - ret = 0;
> -
> /* Invalid queue? */
> if (dev->cfg.queue_notify > 0)
> return (0);
>
> - vr_sz = vring_size(VIOBLK_QUEUE_SIZE);
> - q_gpa = dev->vq[dev->cfg.queue_notify].qa;
> - q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
> + vq_info = &dev->vq[dev->cfg.queue_notify];
> + vr = vq_info->q_hva;
> + if (vr == NULL)
> + fatalx("%s: null vring", __func__);
>
> - vr = calloc(1, vr_sz);
> - if (vr == NULL) {
> - log_warn("calloc error getting vioblk ring");
> - return (0);
> - }
> -
> - if (read_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("error reading gpa 0x%llx", q_gpa);
> - goto out;
> - }
> -
> /* Compute offsets in ring of descriptors, avail ring, and used ring */
> desc = (struct vring_desc *)(vr);
> - avail = (struct vring_avail *)(vr +
> - dev->vq[dev->cfg.queue_notify].vq_availoffset);
> - used = (struct vring_used *)(vr +
> - dev->vq[dev->cfg.queue_notify].vq_usedoffset);
> + avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
> + used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
>
> - idx = dev->vq[dev->cfg.queue_notify].last_avail & VIOBLK_QUEUE_MASK;
> + idx = vq_info->last_avail & VIOBLK_QUEUE_MASK;
>
> if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) {
> - log_warnx("vioblk queue notify - nothing to do?");
> - goto out;
> + log_debug("%s - nothing to do?", __func__);
> + return (0);
> }
>
> while (idx != (avail->idx & VIOBLK_QUEUE_MASK)) {
>
> + ds = VIRTIO_BLK_S_IOERR;
> cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK;
> cmd_desc = &desc[cmd_desc_idx];
>
> @@ -691,22 +685,17 @@ vioblk_notifyq(struct vioblk_dev *dev)
> goto out;
> }
>
> - ret = 1;
> dev->cfg.isr_status = 1;
> used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx;
> used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len;
> + __sync_synchronize();
> used->idx++;
>
> - dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
> - VIOBLK_QUEUE_MASK;
> - if (write_mem(q_gpa, vr, vr_sz))
> - log_warnx("%s: error writing vio ring", __func__);
> -
> + vq_info->last_avail = avail->idx & VIOBLK_QUEUE_MASK;
> idx = (idx + 1) & VIOBLK_QUEUE_MASK;
> }
> out:
> - free(vr);
> - return (ret);
> + return (1);
> }
>
> int
> @@ -729,8 +718,8 @@ virtio_blk_io(int dir, uint16_t reg, uint32_t *data, u
> case VIRTIO_CONFIG_GUEST_FEATURES:
> dev->cfg.guest_feature = *data;
> break;
> - case VIRTIO_CONFIG_QUEUE_ADDRESS:
> - dev->cfg.queue_address = *data;
> + case VIRTIO_CONFIG_QUEUE_PFN:
> + dev->cfg.queue_pfn = *data;
> vioblk_update_qa(dev);
> break;
> case VIRTIO_CONFIG_QUEUE_SELECT:
> @@ -747,7 +736,7 @@ virtio_blk_io(int dir, uint16_t reg, uint32_t *data, u
> if (dev->cfg.device_status == 0) {
> log_debug("%s: device reset", __func__);
> dev->cfg.guest_feature = 0;
> - dev->cfg.queue_address = 0;
> + dev->cfg.queue_pfn = 0;
> vioblk_update_qa(dev);
> dev->cfg.queue_size = 0;
> vioblk_update_qs(dev);
> @@ -890,8 +879,8 @@ virtio_blk_io(int dir, uint16_t reg, uint32_t *data, u
> case VIRTIO_CONFIG_GUEST_FEATURES:
> *data = dev->cfg.guest_feature;
> break;
> - case VIRTIO_CONFIG_QUEUE_ADDRESS:
> - *data = dev->cfg.queue_address;
> + case VIRTIO_CONFIG_QUEUE_PFN:
> + *data = dev->cfg.queue_pfn;
> break;
> case VIRTIO_CONFIG_QUEUE_SIZE:
> if (sz == 4)
> @@ -951,8 +940,8 @@ virtio_net_io(int dir, uint16_t reg, uint32_t *data, u
> case VIRTIO_CONFIG_GUEST_FEATURES:
> dev->cfg.guest_feature = *data;
> break;
> - case VIRTIO_CONFIG_QUEUE_ADDRESS:
> - dev->cfg.queue_address = *data;
> + case VIRTIO_CONFIG_QUEUE_PFN:
> + dev->cfg.queue_pfn = *data;
> vionet_update_qa(dev);
> break;
> case VIRTIO_CONFIG_QUEUE_SELECT:
> @@ -969,7 +958,7 @@ virtio_net_io(int dir, uint16_t reg, uint32_t *data, u
> if (dev->cfg.device_status == 0) {
> log_debug("%s: device reset", __func__);
> dev->cfg.guest_feature = 0;
> - dev->cfg.queue_address = 0;
> + dev->cfg.queue_pfn = 0;
> vionet_update_qa(dev);
> dev->cfg.queue_size = 0;
> vionet_update_qs(dev);
> @@ -1003,8 +992,8 @@ virtio_net_io(int dir, uint16_t reg, uint32_t *data, u
> case VIRTIO_CONFIG_GUEST_FEATURES:
> *data = dev->cfg.guest_feature;
> break;
> - case VIRTIO_CONFIG_QUEUE_ADDRESS:
> - *data = dev->cfg.queue_address;
> + case VIRTIO_CONFIG_QUEUE_PFN:
> + *data = dev->cfg.queue_pfn;
> break;
> case VIRTIO_CONFIG_QUEUE_SIZE:
> *data = dev->cfg.queue_size;
> @@ -1036,11 +1025,20 @@ vionet_update_qa(struct vionet_dev *dev)
> void
> vionet_update_qa(struct vionet_dev *dev)
> {
> + struct virtio_vq_info *vq_info;
> + void *hva = NULL;
> +
> /* Invalid queue? */
> if (dev->cfg.queue_select > 1)
> return;
>
> - dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address;
> + vq_info = &dev->vq[dev->cfg.queue_select];
> + vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE;
> +
> + hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIONET_QUEUE_SIZE));
> + if (hva == NULL)
> + fatal("vionet_update_qa");
> + vq_info->q_hva = hva;
> }
>
> /*
> @@ -1049,15 +1047,19 @@ vionet_update_qs(struct vionet_dev *dev)
> void
> vionet_update_qs(struct vionet_dev *dev)
> {
> + struct virtio_vq_info *vq_info;
> +
> /* Invalid queue? */
> if (dev->cfg.queue_select > 1) {
> dev->cfg.queue_size = 0;
> return;
> }
>
> - /* Update queue address/size based on queue select */
> - dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa;
> - dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs;
> + vq_info = &dev->vq[dev->cfg.queue_select];
> +
> + /* Update queue pfn/size based on queue select */
> + dev->cfg.queue_pfn = vq_info->q_gpa >> 12;
> + dev->cfg.queue_size = vq_info->qs;
> }
>
> /*
> @@ -1073,17 +1075,14 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_
> int
> vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_t sz, int *spc)
> {
> - uint64_t q_gpa;
> - uint32_t vr_sz;
> uint16_t dxx, idx, hdr_desc_idx, chain_hdr_idx;
> - int ret = 0;
> char *vr = NULL;
> size_t bufsz = 0, off = 0, pkt_offset = 0, chunk_size = 0;
> size_t chain_len = 0;
> struct vring_desc *desc, *pkt_desc, *hdr_desc;
> struct vring_avail *avail;
> struct vring_used *used;
> - struct vring_used_elem *ue;
> + struct virtio_vq_info *vq_info;
> struct virtio_net_hdr hdr;
> size_t hdr_sz;
>
> @@ -1095,33 +1094,23 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_
> hdr_sz = sizeof(hdr);
>
> if (!(dev->cfg.device_status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK))
> - return ret;
> -
> - vr_sz = vring_size(VIONET_QUEUE_SIZE);
> - q_gpa = dev->vq[RXQ].qa;
> - q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
> -
> - vr = calloc(1, vr_sz);
> - if (vr == NULL) {
> - log_warn("rx enq: calloc error getting vionet ring");
> return (0);
> - }
>
> - if (read_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("rx enq: error reading gpa 0x%llx", q_gpa);
> - goto out;
> - }
> + vq_info = &dev->vq[RXQ];
> + vr = vq_info->q_hva;
> + if (vr == NULL)
> + fatalx("%s: null vring", __func__);
>
> /* Compute offsets in ring of descriptors, avail ring, and used ring */
> desc = (struct vring_desc *)(vr);
> - avail = (struct vring_avail *)(vr + dev->vq[RXQ].vq_availoffset);
> - used = (struct vring_used *)(vr + dev->vq[RXQ].vq_usedoffset);
> + avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
> + used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
>
> - idx = dev->vq[RXQ].last_avail & VIONET_QUEUE_MASK;
> - if ((dev->vq[RXQ].notified_avail & VIONET_QUEUE_MASK) == idx) {
> + idx = vq_info->last_avail & VIONET_QUEUE_MASK;
> + if ((vq_info->notified_avail & VIONET_QUEUE_MASK) == idx) {
> log_debug("%s: insufficient available buffer capacity, "
> "dropping packet.", __func__);
> - goto out;
> + return (0);
> }
>
> hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK;
> @@ -1138,7 +1127,7 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_
> if (!(pkt_desc->flags & VRING_DESC_F_WRITE)) {
> log_warnx("%s: invalid descriptor, not writable",
> __func__);
> - goto out;
> + return (0);
> }
>
> /* How much data do we get to write? */
> @@ -1158,7 +1147,7 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_
> pkt + pkt_offset, chunk_size)) {
> log_warnx("%s: failed to write to buffer 0x%llx",
> __func__, pkt_desc->addr);
> - goto out;
> + return (0);
> }
>
> chain_len += chunk_size + off;
> @@ -1168,19 +1157,8 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_
> dxx = pkt_desc->next & VIONET_QUEUE_MASK;
> } while (bufsz < sz && pkt_desc->flags & VRING_DESC_F_NEXT);
>
> - /* Update the list of used buffers. */
> - ue = &used->ring[(used->idx) & VIONET_QUEUE_MASK];
> - ue->id = chain_hdr_idx;
> - ue->len = chain_len;
> - off = ((char *)ue - vr);
> - if (write_mem(q_gpa + off, ue, sizeof(*ue))) {
> - log_warnx("%s: error updating rx used ring", __func__);
> - goto out;
> - }
> -
> /* Move our marker in the ring...*/
> - used->idx++;
> - dev->vq[RXQ].last_avail = (dev->vq[RXQ].last_avail + 1) &
> + vq_info->last_avail = (vq_info->last_avail + 1) &
> VIONET_QUEUE_MASK;
>
> /* Prepend the virtio net header in the first buffer. */
> @@ -1189,23 +1167,21 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_
> if (write_mem(hdr_desc->addr, &hdr, hdr_sz)) {
> log_warnx("vionet: rx enq header write_mem error @ 0x%llx",
> hdr_desc->addr);
> - goto out;
> + return (0);
> }
>
> /* Update the index field in the used ring. This must be done last. */
> dev->cfg.isr_status = 1;
> - off = (char *)&used->idx - vr;
> - *spc = (dev->vq[RXQ].notified_avail - dev->vq[RXQ].last_avail) &
> - VIONET_QUEUE_MASK;
> + *spc = (vq_info->notified_avail - vq_info->last_avail)
> + & VIONET_QUEUE_MASK;
>
> - if (write_mem(q_gpa + off, &used->idx, sizeof(used->idx)))
> - log_warnx("vionet: error writing vio ring");
> + /* Update the list of used buffers. */
> + used->ring[used->idx & VIONET_QUEUE_MASK].id = chain_hdr_idx;
> + used->ring[used->idx & VIONET_QUEUE_MASK].len = chain_len;
> + __sync_synchronize();
> + used->idx++;
>
> - ret = 1;
> -
> -out:
> - free(vr);
> - return (ret);
> + return (1);
> }
>
> /*
> @@ -1277,33 +1253,18 @@ vionet_notify_rx(struct vionet_dev *dev)
> void
> vionet_notify_rx(struct vionet_dev *dev)
> {
> - uint64_t q_gpa;
> - uint32_t vr_sz;
> char *vr;
> struct vring_avail *avail;
> + struct virtio_vq_info *vq_info;
>
> - vr_sz = vring_size(VIONET_QUEUE_SIZE);
> - q_gpa = dev->vq[RXQ].qa;
> - q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
> + vq_info = &dev->vq[RXQ];
> + vr = vq_info->q_hva;
> + if (vr == NULL)
> + fatalx("%s: null vring", __func__);
>
> - vr = malloc(vr_sz);
> - if (vr == NULL) {
> - log_warn("malloc error getting vionet ring");
> - return;
> - }
> -
> - if (read_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("error reading gpa 0x%llx", q_gpa);
> - free(vr);
> - return;
> - }
> -
> /* Compute offset into avail ring */
> - avail = (struct vring_avail *)(vr + dev->vq[RXQ].vq_availoffset);
> -
> - dev->vq[RXQ].notified_avail = avail->idx - 1;
> -
> - free(vr);
> + avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
> + vq_info->notified_avail = avail->idx - 1;
> }
>
> /*
> @@ -1312,12 +1273,11 @@ vionet_notifyq(struct vionet_dev *dev)
> int
> vionet_notifyq(struct vionet_dev *dev)
> {
> - int ret;
> + int ret = 0;
>
> switch (dev->cfg.queue_notify) {
> case RXQ:
> vionet_notify_rx(dev);
> - ret = 0;
> break;
> case TXQ:
> ret = vionet_notify_tx(dev);
> @@ -1329,7 +1289,6 @@ vionet_notifyq(struct vionet_dev *dev)
> */
> log_debug("%s: notify for unimplemented queue ID %d",
> __func__, dev->cfg.queue_notify);
> - ret = 0;
> break;
> }
>
> @@ -1342,49 +1301,34 @@ vionet_notify_tx(struct vionet_dev *dev)
> int
> vionet_notify_tx(struct vionet_dev *dev)
> {
> - uint64_t q_gpa;
> - uint32_t vr_sz;
> uint16_t idx, pkt_desc_idx, hdr_desc_idx, dxx, cnt;
> size_t pktsz, chunk_size = 0;
> - ssize_t dhcpsz;
> - int ret, num_enq, ofs, spc;
> - char *vr, *pkt, *dhcppkt;
> + ssize_t dhcpsz = 0;
> + int num_enq, ofs, spc = 0;
> + char *vr = NULL, *pkt = NULL, *dhcppkt = NULL;
> struct vring_desc *desc, *pkt_desc, *hdr_desc;
> struct vring_avail *avail;
> struct vring_used *used;
> + struct virtio_vq_info *vq_info;
> struct ether_header *eh;
>
> - dhcpsz = 0;
> - vr = pkt = dhcppkt = NULL;
> - ret = spc = 0;
> + vq_info = &dev->vq[TXQ];
> + vr = vq_info->q_hva;
> + if (vr == NULL)
> + fatalx("%s: null vring", __func__);
>
> - vr_sz = vring_size(VIONET_QUEUE_SIZE);
> - q_gpa = dev->vq[TXQ].qa;
> - q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
> -
> - vr = calloc(1, vr_sz);
> - if (vr == NULL) {
> - log_warn("calloc error getting vionet ring");
> - goto out;
> - }
> -
> - if (read_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("error reading gpa 0x%llx", q_gpa);
> - goto out;
> - }
> -
> /* Compute offsets in ring of descriptors, avail ring, and used ring */
> desc = (struct vring_desc *)(vr);
> - avail = (struct vring_avail *)(vr + dev->vq[TXQ].vq_availoffset);
> - used = (struct vring_used *)(vr + dev->vq[TXQ].vq_usedoffset);
> + avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
> + used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
>
> num_enq = 0;
>
> - idx = dev->vq[TXQ].last_avail & VIONET_QUEUE_MASK;
> + idx = vq_info->last_avail & VIONET_QUEUE_MASK;
>
> if ((avail->idx & VIONET_QUEUE_MASK) == idx) {
> - log_warnx("vionet tx queue notify - nothing to do?");
> - goto out;
> + log_debug("%s - nothing to do?", __func__);
> + return (0);
> }
>
> while ((avail->idx & VIONET_QUEUE_MASK) != idx) {
> @@ -1502,36 +1446,29 @@ vionet_notify_tx(struct vionet_dev *dev)
> }
>
> drop_packet:
> - ret = 1;
> dev->cfg.isr_status = 1;
> used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_desc_idx;
> used->ring[used->idx & VIONET_QUEUE_MASK].len = hdr_desc->len;
> + __sync_synchronize();
> used->idx++;
>
> - dev->vq[TXQ].last_avail++;
> + vq_info->last_avail = avail->idx & VIONET_QUEUE_MASK;
> + idx = (idx + 1) & VIONET_QUEUE_MASK;
> +
> num_enq++;
>
> - idx = dev->vq[TXQ].last_avail & VIONET_QUEUE_MASK;
> -
> free(pkt);
> pkt = NULL;
> }
>
> - if (write_mem(q_gpa, vr, vr_sz)) {
> - log_warnx("vionet: tx error writing vio ring");
> - }
> + if (dhcpsz > 0)
> + vionet_enq_rx(dev, dhcppkt, dhcpsz, &spc);
>
> - if (dhcpsz > 0) {
> - if (vionet_enq_rx(dev, dhcppkt, dhcpsz, &spc))
> - ret = 1;
> - }
> -
> out:
> - free(vr);
> free(pkt);
> free(dhcppkt);
>
> - return (ret);
> + return (1);
> }
>
> int
> @@ -1662,8 +1599,8 @@ vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_
> case VIRTIO_CONFIG_GUEST_FEATURES:
> vmmci.cfg.guest_feature = *data;
> break;
> - case VIRTIO_CONFIG_QUEUE_ADDRESS:
> - vmmci.cfg.queue_address = *data;
> + case VIRTIO_CONFIG_QUEUE_PFN:
> + vmmci.cfg.queue_pfn = *data;
> break;
> case VIRTIO_CONFIG_QUEUE_SELECT:
> vmmci.cfg.queue_select = *data;
> @@ -1703,8 +1640,8 @@ vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_
> case VIRTIO_CONFIG_GUEST_FEATURES:
> *data = vmmci.cfg.guest_feature;
> break;
> - case VIRTIO_CONFIG_QUEUE_ADDRESS:
> - *data = vmmci.cfg.queue_address;
> + case VIRTIO_CONFIG_QUEUE_PFN:
> + *data = vmmci.cfg.queue_pfn;
> break;
> case VIRTIO_CONFIG_QUEUE_SIZE:
> *data = vmmci.cfg.queue_size;
> blob - 128b469018aefa35f451c1f92be986fc691b26c9
> blob + c51f80b6d188a1eef7fd22976dd3073ce2de2d24
> --- usr.sbin/vmd/virtio.h
> +++ usr.sbin/vmd/virtio.h
> @@ -64,13 +64,18 @@
> #define VIRTIO_MAX_QUEUES 3
>
> /*
> + * Rename the address config register to be more descriptive.
> + */
> +#define VIRTIO_CONFIG_QUEUE_PFN VIRTIO_CONFIG_QUEUE_ADDRESS
> +
> +/*
> * This struct stores notifications from a virtio driver. There is
> * one such struct per virtio device.
> */
> struct virtio_io_cfg {
> uint32_t device_feature;
> uint32_t guest_feature;
> - uint32_t queue_address;
> + uint32_t queue_pfn;
> uint16_t queue_size;
> uint16_t queue_select;
> uint16_t queue_notify;
> @@ -93,8 +98,11 @@ struct virtio_vq_info {
> */
> struct virtio_vq_info {
> /* Guest physical address of virtq */
> - uint32_t qa;
> + uint64_t q_gpa;
>
> + /* Host virtual address of virtq */
> + void *q_hva;
> +
> /* Queue size: number of queue entries in virtq */
> uint32_t qs;
>
> blob - f1d9b97741c11f8cc4faa3f79658cd87135d2b29
> blob + 383bd66220116cee194d4c7147fdb6b72410b8d2
> --- usr.sbin/vmd/vm.c
> +++ usr.sbin/vmd/vm.c
> @@ -1991,6 +1991,47 @@ read_mem(paddr_t src, void *buf, size_t len)
> }
>
> /*
> + * hvaddr_mem
> + *
> + * Translate a guest physical address to a host virtual address, checking the
> + * provided memory range length to confirm it's contiguous within the same
> + * guest memory range (vm_mem_range).
> + *
> + * Parameters:
> + * gpa: guest physical address to translate
> + * len: number of bytes in the intended range
> + *
> + * Return values:
> + * void* to host virtual memory on success
> + * NULL on error, setting errno to:
> + * EFAULT: gpa falls outside guest memory ranges
> + * EINVAL: requested len extends beyond memory range
> + */
> +void *
> +hvaddr_mem(paddr_t gpa, size_t len)
> +{
> + struct vm_mem_range *vmr;
> + size_t off;
> +
> + vmr = find_gpa_range(¤t_vm->vm_params.vmc_params, gpa, len);
> + if (vmr == NULL) {
> + log_warnx("%s: failed - invalid gpa: 0x%lx\n", __func__, gpa);
> + errno = EFAULT;
> + return (NULL);
> + }
> +
> + off = gpa - vmr->vmr_gpa;
> + if (len > (vmr->vmr_size - off)) {
> + log_warnx("%s: failed - invalid memory range: gpa=0x%lx, "
> + "len=%zu", __func__, gpa, len);
> + errno = EINVAL;
> + return (NULL);
> + }
> +
> + return ((char *)vmr->vmr_va + off);
> +}
> +
> +/*
> * vcpu_assert_pic_irq
> *
> * Injects the specified IRQ on the supplied vcpu/vm
> blob - c27d03df7336a2c8ede22ec0d83b74d327fb3244
> blob + 17750811887dc4ae88c8919986d0708228f81169
> --- usr.sbin/vmd/vmd.h
> +++ usr.sbin/vmd/vmd.h
> @@ -460,6 +460,7 @@ int write_mem(paddr_t, const void *buf, size_t);
> void vm_pipe_send(struct vm_dev_pipe *, enum pipe_msg_type);
> enum pipe_msg_type vm_pipe_recv(struct vm_dev_pipe *);
> int write_mem(paddr_t, const void *buf, size_t);
> +void* hvaddr_mem(paddr_t, size_t);
>
> /* config.c */
> int config_init(struct vmd *);