While working back and forth with dlg@ on his idea of adding in async
task queues to vmd devices, he started to introduce a cleaner way to
handle virtqueues. In short, we can just track the host virtual address
and read/write to that location.
This has the benefit of cleaning up a good chunk of code, mostly in the
vioscsi device. If my count is correct, it's close to 100 lines
removed.
Testing on a variety of systems, I see anywhere from a small (5%) to a
large (50%) gain in things like vionet performance depending on the host
hardware and the guest OS. (Linux guests push packets faster, on
average.)
If/when this lands, it'll be easier to introduce the async taskq diff
and also clean up more of the virtio codebase. Lots of duplicate logic
in basic virtio-pci config handling, like updating queue address or
size.
Testing is appreciated, but also looking for OKs.
diff refs/heads/master refs/heads/vmd-vq
commit - 9f38b5b2272f7ca113158ec7943c959ae9b22683
commit + 509bbee70e5a11cfb69b93202a24fb032c72ac43
blob - aea2fc1d5e8903c97cb7076bfb9e5f265f416378
blob + 3a9af7790b24914fb6e7716a049fd0428803af9b
--- usr.sbin/vmd/vioscsi.c
+++ usr.sbin/vmd/vioscsi.c
@@ -81,6 +81,7 @@ vioscsi_next_ring_item(struct vioscsi_dev *dev, struct
{
used->ring[used->idx & VIOSCSI_QUEUE_MASK].id = idx;
used->ring[used->idx & VIOSCSI_QUEUE_MASK].len = desc->len;
+ __sync_synchronize();
used->idx++;
dev->vq[dev->cfg.queue_notify].last_avail =
@@ -157,7 +158,7 @@ vioscsi_reg_name(uint8_t reg)
switch (reg) {
case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature";
case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature";
- case VIRTIO_CONFIG_QUEUE_ADDRESS: return "queue address";
+ case VIRTIO_CONFIG_QUEUE_PFN: return "queue pfn";
case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size";
case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select";
case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify";
@@ -1672,8 +1673,8 @@ vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint
DPRINTF("%s: guest feature set to %u",
__func__, dev->cfg.guest_feature);
break;
- case VIRTIO_CONFIG_QUEUE_ADDRESS:
- dev->cfg.queue_address = *data;
+ case VIRTIO_CONFIG_QUEUE_PFN:
+ dev->cfg.queue_pfn = *data;
vioscsi_update_qa(dev);
break;
case VIRTIO_CONFIG_QUEUE_SELECT:
@@ -1692,7 +1693,7 @@ vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint
if (dev->cfg.device_status == 0) {
log_debug("%s: device reset", __func__);
dev->cfg.guest_feature = 0;
- dev->cfg.queue_address = 0;
+ dev->cfg.queue_pfn = 0;
vioscsi_update_qa(dev);
dev->cfg.queue_size = 0;
vioscsi_update_qs(dev);
@@ -1988,8 +1989,8 @@ vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint
case VIRTIO_CONFIG_GUEST_FEATURES:
*data = dev->cfg.guest_feature;
break;
- case VIRTIO_CONFIG_QUEUE_ADDRESS:
- *data = dev->cfg.queue_address;
+ case VIRTIO_CONFIG_QUEUE_PFN:
+ *data = dev->cfg.queue_pfn;
break;
case VIRTIO_CONFIG_QUEUE_SIZE:
if (sz == 4)
@@ -2033,25 +2034,38 @@ vioscsi_update_qs(struct vioscsi_dev *dev)
void
vioscsi_update_qs(struct vioscsi_dev *dev)
{
+ struct virtio_vq_info *vq_info;
+
/* Invalid queue? */
if (dev->cfg.queue_select >= VIRTIO_MAX_QUEUES) {
dev->cfg.queue_size = 0;
return;
}
- /* Update queue address/size based on queue select */
- dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa;
- dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs;
+ vq_info = &dev->vq[dev->cfg.queue_select];
+
+ /* Update queue pfn/size based on queue select */
+ dev->cfg.queue_pfn = vq_info->q_gpa >> 12;
+ dev->cfg.queue_size = vq_info->qs;
}
void
vioscsi_update_qa(struct vioscsi_dev *dev)
{
+ struct virtio_vq_info *vq_info;
+ void *hva = NULL;
+
/* Invalid queue? */
if (dev->cfg.queue_select >= VIRTIO_MAX_QUEUES)
return;
- dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address;
+ vq_info = &dev->vq[dev->cfg.queue_select];
+ vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE;
+
+ hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIOSCSI_QUEUE_SIZE));
+ if (hva == NULL)
+ fatal("vioscsi_update_qa");
+ vq_info->q_hva = hva;
}
/*
@@ -2067,13 +2081,12 @@ vioscsi_notifyq(struct vioscsi_dev *dev)
int
vioscsi_notifyq(struct vioscsi_dev *dev)
{
- uint64_t q_gpa;
- uint32_t vr_sz;
- int cnt, ret;
+ int cnt, ret = 0;
char *vr;
struct virtio_scsi_req_hdr req;
struct virtio_scsi_res_hdr resp;
struct virtio_vq_acct acct;
+ struct virtio_vq_info *vq_info;
ret = 0;
@@ -2081,34 +2094,21 @@ vioscsi_notifyq(struct vioscsi_dev *dev)
if (dev->cfg.queue_notify >= VIRTIO_MAX_QUEUES)
return (ret);
- vr_sz = vring_size(VIOSCSI_QUEUE_SIZE);
- q_gpa = dev->vq[dev->cfg.queue_notify].qa;
- q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
+ vq_info = &dev->vq[dev->cfg.queue_notify];
+ vr = vq_info->q_hva;
+ if (vr == NULL)
+ fatalx("%s: null vring", __func__);
- vr = calloc(1, vr_sz);
- if (vr == NULL) {
- log_warn("%s: calloc error getting vioscsi ring", __func__);
- return (ret);
- }
-
- if (read_mem(q_gpa, vr, vr_sz)) {
- log_warnx("%s: error reading gpa 0x%llx", __func__, q_gpa);
- goto out;
- }
-
/* Compute offsets in ring of descriptors, avail ring, and used ring */
acct.desc = (struct vring_desc *)(vr);
- acct.avail = (struct vring_avail *)(vr +
- dev->vq[dev->cfg.queue_notify].vq_availoffset);
- acct.used = (struct vring_used *)(vr +
- dev->vq[dev->cfg.queue_notify].vq_usedoffset);
+ acct.avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
+ acct.used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
- acct.idx =
- dev->vq[dev->cfg.queue_notify].last_avail & VIOSCSI_QUEUE_MASK;
+ acct.idx = vq_info->last_avail & VIOSCSI_QUEUE_MASK;
if ((acct.avail->idx & VIOSCSI_QUEUE_MASK) == acct.idx) {
- log_warnx("%s:nothing to do?", __func__);
- goto out;
+ log_debug("%s - nothing to do?", __func__);
+ return (0);
}
cnt = 0;
@@ -2180,14 +2180,10 @@ vioscsi_notifyq(struct vioscsi_dev *dev)
ret = 1;
dev->cfg.isr_status = 1;
- /* Move ring indexes */
+
+ /* Move ring indexes (updates the used ring index) */
vioscsi_next_ring_item(dev, acct.avail, acct.used,
acct.req_desc, acct.req_idx);
-
- if (write_mem(q_gpa, vr, vr_sz)) {
- log_warnx("%s: error writing vioring",
- __func__);
- }
goto next_msg;
}
@@ -2202,138 +2198,48 @@ vioscsi_notifyq(struct vioscsi_dev *dev)
case TEST_UNIT_READY:
case START_STOP:
ret = vioscsi_handle_tur(dev, &req, &acct);
- if (ret) {
- if (write_mem(q_gpa, vr, vr_sz)) {
- log_warnx("%s: error writing vioring",
- __func__);
- }
- }
break;
case PREVENT_ALLOW:
ret = vioscsi_handle_prevent_allow(dev, &req, &acct);
- if (ret) {
- if (write_mem(q_gpa, vr, vr_sz)) {
- log_warnx("%s: error writing vioring",
- __func__);
- }
- }
break;
case READ_TOC:
ret = vioscsi_handle_read_toc(dev, &req, &acct);
- if (ret) {
- if (write_mem(q_gpa, vr, vr_sz)) {
- log_warnx("%s: error writing vioring",
- __func__);
- }
- }
break;
case READ_CAPACITY:
ret = vioscsi_handle_read_capacity(dev, &req, &acct);
- if (ret) {
- if (write_mem(q_gpa, vr, vr_sz)) {
- log_warnx("%s: error writing vioring",
- __func__);
- }
- }
break;
case READ_CAPACITY_16:
ret = vioscsi_handle_read_capacity_16(dev, &req, &acct);
- if (ret) {
- if (write_mem(q_gpa, vr, vr_sz)) {
- log_warnx("%s: error writing vioring",
- __func__);
- }
- }
break;
case READ_COMMAND:
ret = vioscsi_handle_read_6(dev, &req, &acct);
- if (ret) {
- if (write_mem(q_gpa, vr, vr_sz)) {
- log_warnx("%s: error writing vioring",
- __func__);
- }
- }
break;
case READ_10:
ret = vioscsi_handle_read_10(dev, &req, &acct);
- if (ret) {
- if (write_mem(q_gpa, vr, vr_sz)) {
- log_warnx("%s: error writing vioring",
- __func__);
- }
- }
break;
case INQUIRY:
ret = vioscsi_handle_inquiry(dev, &req, &acct);
- if (ret) {
- if (write_mem(q_gpa, vr, vr_sz)) {
- log_warnx("%s: error writing vioring",
- __func__);
- }
- }
break;
case MODE_SENSE:
ret = vioscsi_handle_mode_sense(dev, &req, &acct);
- if (ret) {
- if (write_mem(q_gpa, vr, vr_sz)) {
- log_warnx("%s: error writing vioring",
- __func__);
- }
- }
break;
case MODE_SENSE_BIG:
ret = vioscsi_handle_mode_sense_big(dev, &req, &acct);
- if (ret) {
- if (write_mem(q_gpa, vr, vr_sz)) {
- log_warnx("%s: error writing vioring",
- __func__);
- }
- }
break;
case GET_EVENT_STATUS_NOTIFICATION:
ret = vioscsi_handle_gesn(dev, &req, &acct);
- if (ret) {
- if (write_mem(q_gpa, vr, vr_sz)) {
- log_warnx("%s: error writing vioring",
- __func__);
- }
- }
break;
case READ_DISC_INFORMATION:
ret = vioscsi_handle_read_disc_info(dev, &req, &acct);
- if (ret) {
- if (write_mem(q_gpa, vr, vr_sz)) {
- log_warnx("%s: error writing vioring",
- __func__);
- }
- }
break;
case GET_CONFIGURATION:
ret = vioscsi_handle_get_config(dev, &req, &acct);
- if (ret) {
- if (write_mem(q_gpa, vr, vr_sz)) {
- log_warnx("%s: error writing vioring",
- __func__);
- }
- }
break;
case MECHANISM_STATUS:
ret = vioscsi_handle_mechanism_status(dev, &req, &acct);
- if (ret) {
- if (write_mem(q_gpa, vr, vr_sz)) {
- log_warnx("%s: error writing vioring",
- __func__);
- }
- }
break;
case REPORT_LUNS:
ret = vioscsi_handle_report_luns(dev, &req, &acct);
- if (ret) {
- if (write_mem(q_gpa, vr, vr_sz)) {
- log_warnx("%s: error writing vioring",
- __func__);
- }
- }
break;
default:
log_warnx("%s: unsupported opcode 0x%02x,%s",
@@ -2348,6 +2254,5 @@ out:
acct.idx = (acct.idx + 1) & VIOSCSI_QUEUE_MASK;
}
out:
- free(vr);
return (ret);
}
blob - c1739b9bbbf35804c4e85fa470497fea3a2fff36
blob + 5d91da231b855b670970ed040657497086461636
--- usr.sbin/vmd/virtio.c
+++ usr.sbin/vmd/virtio.c
@@ -89,7 +89,7 @@ virtio_reg_name(uint8_t reg)
switch (reg) {
case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature";
case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature";
- case VIRTIO_CONFIG_QUEUE_ADDRESS: return "queue address";
+ case VIRTIO_CONFIG_QUEUE_PFN: return "queue address";
case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size";
case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select";
case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify";
@@ -123,40 +123,52 @@ viornd_update_qs(void)
void
viornd_update_qs(void)
{
+ struct virtio_vq_info *vq_info;
+
/* Invalid queue? */
if (viornd.cfg.queue_select > 0) {
viornd.cfg.queue_size = 0;
return;
}
- /* Update queue address/size based on queue select */
- viornd.cfg.queue_address = viornd.vq[viornd.cfg.queue_select].qa;
- viornd.cfg.queue_size = viornd.vq[viornd.cfg.queue_select].qs;
+ vq_info = &viornd.vq[viornd.cfg.queue_select];
+
+ /* Update queue pfn/size based on queue select */
+ viornd.cfg.queue_pfn = vq_info->q_gpa >> 12;
+ viornd.cfg.queue_size = vq_info->qs;
}
/* Update queue address */
void
viornd_update_qa(void)
{
+ struct virtio_vq_info *vq_info;
+ void *hva = NULL;
+
/* Invalid queue? */
if (viornd.cfg.queue_select > 0)
return;
- viornd.vq[viornd.cfg.queue_select].qa = viornd.cfg.queue_address;
+ vq_info = &viornd.vq[viornd.cfg.queue_select];
+ vq_info->q_gpa = (uint64_t)viornd.cfg.queue_pfn * VIRTIO_PAGE_SIZE;
+
+ hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIORND_QUEUE_SIZE));
+ if (hva == NULL)
+ fatal("viornd_update_qa");
+ vq_info->q_hva = hva;
}
int
viornd_notifyq(void)
{
- uint64_t q_gpa;
- uint32_t vr_sz;
size_t sz;
int dxx, ret;
uint16_t aidx, uidx;
- char *buf, *rnd_data;
+ char *vr, *rnd_data;
struct vring_desc *desc;
struct vring_avail *avail;
struct vring_used *used;
+ struct virtio_vq_info *vq_info;
ret = 0;
@@ -164,27 +176,15 @@ viornd_notifyq(void)
if (viornd.cfg.queue_notify > 0)
return (0);
- vr_sz = vring_size(VIORND_QUEUE_SIZE);
- q_gpa = viornd.vq[viornd.cfg.queue_notify].qa;
- q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
+ vq_info = &viornd.vq[viornd.cfg.queue_notify];
+ vr = vq_info->q_hva;
+ if (vr == NULL)
+ fatalx("%s: null vring", __func__);
- buf = calloc(1, vr_sz);
- if (buf == NULL) {
- log_warn("calloc error getting viornd ring");
- return (0);
- }
+ desc = (struct vring_desc *)(vr);
+ avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
+ used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
- if (read_mem(q_gpa, buf, vr_sz)) {
- free(buf);
- return (0);
- }
-
- desc = (struct vring_desc *)(buf);
- avail = (struct vring_avail *)(buf +
- viornd.vq[viornd.cfg.queue_notify].vq_availoffset);
- used = (struct vring_used *)(buf +
- viornd.vq[viornd.cfg.queue_notify].vq_usedoffset);
-
aidx = avail->idx & VIORND_QUEUE_MASK;
uidx = used->idx & VIORND_QUEUE_MASK;
@@ -209,18 +209,13 @@ viornd_notifyq(void)
viornd.cfg.isr_status = 1;
used->ring[uidx].id = dxx;
used->ring[uidx].len = sz;
+ __sync_synchronize();
used->idx++;
-
- if (write_mem(q_gpa, buf, vr_sz)) {
- log_warnx("viornd: error writing vio ring");
- }
}
free(rnd_data);
} else
fatal("memory allocation error for viornd data");
- free(buf);
-
return (ret);
}
@@ -241,8 +236,8 @@ virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, u
case VIRTIO_CONFIG_GUEST_FEATURES:
viornd.cfg.guest_feature = *data;
break;
- case VIRTIO_CONFIG_QUEUE_ADDRESS:
- viornd.cfg.queue_address = *data;
+ case VIRTIO_CONFIG_QUEUE_PFN:
+ viornd.cfg.queue_pfn = *data;
viornd_update_qa();
break;
case VIRTIO_CONFIG_QUEUE_SELECT:
@@ -266,8 +261,8 @@ virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, u
case VIRTIO_CONFIG_GUEST_FEATURES:
*data = viornd.cfg.guest_feature;
break;
- case VIRTIO_CONFIG_QUEUE_ADDRESS:
- *data = viornd.cfg.queue_address;
+ case VIRTIO_CONFIG_QUEUE_PFN:
+ *data = viornd.cfg.queue_pfn;
break;
case VIRTIO_CONFIG_QUEUE_SIZE:
*data = viornd.cfg.queue_size;
@@ -294,25 +289,38 @@ vioblk_update_qa(struct vioblk_dev *dev)
void
vioblk_update_qa(struct vioblk_dev *dev)
{
+ struct virtio_vq_info *vq_info;
+ void *hva = NULL;
+
/* Invalid queue? */
if (dev->cfg.queue_select > 0)
return;
- dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address;
+ vq_info = &dev->vq[dev->cfg.queue_select];
+ vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE;
+
+ hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIOBLK_QUEUE_SIZE));
+ if (hva == NULL)
+ fatal("vioblk_update_qa");
+ vq_info->q_hva = hva;
}
void
vioblk_update_qs(struct vioblk_dev *dev)
{
+ struct virtio_vq_info *vq_info;
+
/* Invalid queue? */
if (dev->cfg.queue_select > 0) {
dev->cfg.queue_size = 0;
return;
}
- /* Update queue address/size based on queue select */
- dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa;
- dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs;
+ vq_info = &dev->vq[dev->cfg.queue_select];
+
+ /* Update queue pfn/size based on queue select */
+ dev->cfg.queue_pfn = vq_info->q_gpa >> 12;
+ dev->cfg.queue_size = vq_info->qs;
}
static void
@@ -424,55 +432,41 @@ vioblk_notifyq(struct vioblk_dev *dev)
int
vioblk_notifyq(struct vioblk_dev *dev)
{
- uint64_t q_gpa;
- uint32_t vr_sz;
uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx;
uint8_t ds;
- int cnt, ret;
+ int cnt;
off_t secbias;
char *vr;
struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc;
struct vring_avail *avail;
struct vring_used *used;
struct virtio_blk_req_hdr cmd;
+ struct virtio_vq_info *vq_info;
- ret = 0;
-
/* Invalid queue? */
if (dev->cfg.queue_notify > 0)
return (0);
- vr_sz = vring_size(VIOBLK_QUEUE_SIZE);
- q_gpa = dev->vq[dev->cfg.queue_notify].qa;
- q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
+ vq_info = &dev->vq[dev->cfg.queue_notify];
+ vr = vq_info->q_hva;
+ if (vr == NULL)
+ fatalx("%s: null vring", __func__);
- vr = calloc(1, vr_sz);
- if (vr == NULL) {
- log_warn("calloc error getting vioblk ring");
- return (0);
- }
-
- if (read_mem(q_gpa, vr, vr_sz)) {
- log_warnx("error reading gpa 0x%llx", q_gpa);
- goto out;
- }
-
/* Compute offsets in ring of descriptors, avail ring, and used ring */
desc = (struct vring_desc *)(vr);
- avail = (struct vring_avail *)(vr +
- dev->vq[dev->cfg.queue_notify].vq_availoffset);
- used = (struct vring_used *)(vr +
- dev->vq[dev->cfg.queue_notify].vq_usedoffset);
+ avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
+ used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
- idx = dev->vq[dev->cfg.queue_notify].last_avail & VIOBLK_QUEUE_MASK;
+ idx = vq_info->last_avail & VIOBLK_QUEUE_MASK;
if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) {
- log_warnx("vioblk queue notify - nothing to do?");
- goto out;
+ log_debug("%s - nothing to do?", __func__);
+ return (0);
}
while (idx != (avail->idx & VIOBLK_QUEUE_MASK)) {
+ ds = VIRTIO_BLK_S_IOERR;
cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK;
cmd_desc = &desc[cmd_desc_idx];
@@ -691,22 +685,17 @@ vioblk_notifyq(struct vioblk_dev *dev)
goto out;
}
- ret = 1;
dev->cfg.isr_status = 1;
used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx;
used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len;
+ __sync_synchronize();
used->idx++;
- dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
- VIOBLK_QUEUE_MASK;
- if (write_mem(q_gpa, vr, vr_sz))
- log_warnx("%s: error writing vio ring", __func__);
-
+ vq_info->last_avail = avail->idx & VIOBLK_QUEUE_MASK;
idx = (idx + 1) & VIOBLK_QUEUE_MASK;
}
out:
- free(vr);
- return (ret);
+ return (1);
}
int
@@ -729,8 +718,8 @@ virtio_blk_io(int dir, uint16_t reg, uint32_t *data, u
case VIRTIO_CONFIG_GUEST_FEATURES:
dev->cfg.guest_feature = *data;
break;
- case VIRTIO_CONFIG_QUEUE_ADDRESS:
- dev->cfg.queue_address = *data;
+ case VIRTIO_CONFIG_QUEUE_PFN:
+ dev->cfg.queue_pfn = *data;
vioblk_update_qa(dev);
break;
case VIRTIO_CONFIG_QUEUE_SELECT:
@@ -747,7 +736,7 @@ virtio_blk_io(int dir, uint16_t reg, uint32_t *data, u
if (dev->cfg.device_status == 0) {
log_debug("%s: device reset", __func__);
dev->cfg.guest_feature = 0;
- dev->cfg.queue_address = 0;
+ dev->cfg.queue_pfn = 0;
vioblk_update_qa(dev);
dev->cfg.queue_size = 0;
vioblk_update_qs(dev);
@@ -890,8 +879,8 @@ virtio_blk_io(int dir, uint16_t reg, uint32_t *data, u
case VIRTIO_CONFIG_GUEST_FEATURES:
*data = dev->cfg.guest_feature;
break;
- case VIRTIO_CONFIG_QUEUE_ADDRESS:
- *data = dev->cfg.queue_address;
+ case VIRTIO_CONFIG_QUEUE_PFN:
+ *data = dev->cfg.queue_pfn;
break;
case VIRTIO_CONFIG_QUEUE_SIZE:
if (sz == 4)
@@ -951,8 +940,8 @@ virtio_net_io(int dir, uint16_t reg, uint32_t *data, u
case VIRTIO_CONFIG_GUEST_FEATURES:
dev->cfg.guest_feature = *data;
break;
- case VIRTIO_CONFIG_QUEUE_ADDRESS:
- dev->cfg.queue_address = *data;
+ case VIRTIO_CONFIG_QUEUE_PFN:
+ dev->cfg.queue_pfn = *data;
vionet_update_qa(dev);
break;
case VIRTIO_CONFIG_QUEUE_SELECT:
@@ -969,7 +958,7 @@ virtio_net_io(int dir, uint16_t reg, uint32_t *data, u
if (dev->cfg.device_status == 0) {
log_debug("%s: device reset", __func__);
dev->cfg.guest_feature = 0;
- dev->cfg.queue_address = 0;
+ dev->cfg.queue_pfn = 0;
vionet_update_qa(dev);
dev->cfg.queue_size = 0;
vionet_update_qs(dev);
@@ -1003,8 +992,8 @@ virtio_net_io(int dir, uint16_t reg, uint32_t *data, u
case VIRTIO_CONFIG_GUEST_FEATURES:
*data = dev->cfg.guest_feature;
break;
- case VIRTIO_CONFIG_QUEUE_ADDRESS:
- *data = dev->cfg.queue_address;
+ case VIRTIO_CONFIG_QUEUE_PFN:
+ *data = dev->cfg.queue_pfn;
break;
case VIRTIO_CONFIG_QUEUE_SIZE:
*data = dev->cfg.queue_size;
@@ -1036,11 +1025,20 @@ vionet_update_qa(struct vionet_dev *dev)
void
vionet_update_qa(struct vionet_dev *dev)
{
+ struct virtio_vq_info *vq_info;
+ void *hva = NULL;
+
/* Invalid queue? */
if (dev->cfg.queue_select > 1)
return;
- dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address;
+ vq_info = &dev->vq[dev->cfg.queue_select];
+ vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE;
+
+ hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIONET_QUEUE_SIZE));
+ if (hva == NULL)
+ fatal("vionet_update_qa");
+ vq_info->q_hva = hva;
}
/*
@@ -1049,15 +1047,19 @@ vionet_update_qs(struct vionet_dev *dev)
void
vionet_update_qs(struct vionet_dev *dev)
{
+ struct virtio_vq_info *vq_info;
+
/* Invalid queue? */
if (dev->cfg.queue_select > 1) {
dev->cfg.queue_size = 0;
return;
}
- /* Update queue address/size based on queue select */
- dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa;
- dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs;
+ vq_info = &dev->vq[dev->cfg.queue_select];
+
+ /* Update queue pfn/size based on queue select */
+ dev->cfg.queue_pfn = vq_info->q_gpa >> 12;
+ dev->cfg.queue_size = vq_info->qs;
}
/*
@@ -1073,17 +1075,14 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_
int
vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_t sz, int *spc)
{
- uint64_t q_gpa;
- uint32_t vr_sz;
uint16_t dxx, idx, hdr_desc_idx, chain_hdr_idx;
- int ret = 0;
char *vr = NULL;
size_t bufsz = 0, off = 0, pkt_offset = 0, chunk_size = 0;
size_t chain_len = 0;
struct vring_desc *desc, *pkt_desc, *hdr_desc;
struct vring_avail *avail;
struct vring_used *used;
- struct vring_used_elem *ue;
+ struct virtio_vq_info *vq_info;
struct virtio_net_hdr hdr;
size_t hdr_sz;
@@ -1095,33 +1094,23 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_
hdr_sz = sizeof(hdr);
if (!(dev->cfg.device_status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK))
- return ret;
-
- vr_sz = vring_size(VIONET_QUEUE_SIZE);
- q_gpa = dev->vq[RXQ].qa;
- q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
-
- vr = calloc(1, vr_sz);
- if (vr == NULL) {
- log_warn("rx enq: calloc error getting vionet ring");
return (0);
- }
- if (read_mem(q_gpa, vr, vr_sz)) {
- log_warnx("rx enq: error reading gpa 0x%llx", q_gpa);
- goto out;
- }
+ vq_info = &dev->vq[RXQ];
+ vr = vq_info->q_hva;
+ if (vr == NULL)
+ fatalx("%s: null vring", __func__);
/* Compute offsets in ring of descriptors, avail ring, and used ring */
desc = (struct vring_desc *)(vr);
- avail = (struct vring_avail *)(vr + dev->vq[RXQ].vq_availoffset);
- used = (struct vring_used *)(vr + dev->vq[RXQ].vq_usedoffset);
+ avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
+ used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
- idx = dev->vq[RXQ].last_avail & VIONET_QUEUE_MASK;
- if ((dev->vq[RXQ].notified_avail & VIONET_QUEUE_MASK) == idx) {
+ idx = vq_info->last_avail & VIONET_QUEUE_MASK;
+ if ((vq_info->notified_avail & VIONET_QUEUE_MASK) == idx) {
log_debug("%s: insufficient available buffer capacity, "
"dropping packet.", __func__);
- goto out;
+ return (0);
}
hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK;
@@ -1138,7 +1127,7 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_
if (!(pkt_desc->flags & VRING_DESC_F_WRITE)) {
log_warnx("%s: invalid descriptor, not writable",
__func__);
- goto out;
+ return (0);
}
/* How much data do we get to write? */
@@ -1158,7 +1147,7 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_
pkt + pkt_offset, chunk_size)) {
log_warnx("%s: failed to write to buffer 0x%llx",
__func__, pkt_desc->addr);
- goto out;
+ return (0);
}
chain_len += chunk_size + off;
@@ -1168,19 +1157,8 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_
dxx = pkt_desc->next & VIONET_QUEUE_MASK;
} while (bufsz < sz && pkt_desc->flags & VRING_DESC_F_NEXT);
- /* Update the list of used buffers. */
- ue = &used->ring[(used->idx) & VIONET_QUEUE_MASK];
- ue->id = chain_hdr_idx;
- ue->len = chain_len;
- off = ((char *)ue - vr);
- if (write_mem(q_gpa + off, ue, sizeof(*ue))) {
- log_warnx("%s: error updating rx used ring", __func__);
- goto out;
- }
-
/* Move our marker in the ring...*/
- used->idx++;
- dev->vq[RXQ].last_avail = (dev->vq[RXQ].last_avail + 1) &
+ vq_info->last_avail = (vq_info->last_avail + 1) &
VIONET_QUEUE_MASK;
/* Prepend the virtio net header in the first buffer. */
@@ -1189,23 +1167,21 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_
if (write_mem(hdr_desc->addr, &hdr, hdr_sz)) {
log_warnx("vionet: rx enq header write_mem error @ 0x%llx",
hdr_desc->addr);
- goto out;
+ return (0);
}
/* Update the index field in the used ring. This must be done last. */
dev->cfg.isr_status = 1;
- off = (char *)&used->idx - vr;
- *spc = (dev->vq[RXQ].notified_avail - dev->vq[RXQ].last_avail) &
- VIONET_QUEUE_MASK;
+ *spc = (vq_info->notified_avail - vq_info->last_avail)
+ & VIONET_QUEUE_MASK;
- if (write_mem(q_gpa + off, &used->idx, sizeof(used->idx)))
- log_warnx("vionet: error writing vio ring");
+ /* Update the list of used buffers. */
+ used->ring[used->idx & VIONET_QUEUE_MASK].id = chain_hdr_idx;
+ used->ring[used->idx & VIONET_QUEUE_MASK].len = chain_len;
+ __sync_synchronize();
+ used->idx++;
- ret = 1;
-
-out:
- free(vr);
- return (ret);
+ return (1);
}
/*
@@ -1277,33 +1253,18 @@ vionet_notify_rx(struct vionet_dev *dev)
void
vionet_notify_rx(struct vionet_dev *dev)
{
- uint64_t q_gpa;
- uint32_t vr_sz;
char *vr;
struct vring_avail *avail;
+ struct virtio_vq_info *vq_info;
- vr_sz = vring_size(VIONET_QUEUE_SIZE);
- q_gpa = dev->vq[RXQ].qa;
- q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
+ vq_info = &dev->vq[RXQ];
+ vr = vq_info->q_hva;
+ if (vr == NULL)
+ fatalx("%s: null vring", __func__);
- vr = malloc(vr_sz);
- if (vr == NULL) {
- log_warn("malloc error getting vionet ring");
- return;
- }
-
- if (read_mem(q_gpa, vr, vr_sz)) {
- log_warnx("error reading gpa 0x%llx", q_gpa);
- free(vr);
- return;
- }
-
/* Compute offset into avail ring */
- avail = (struct vring_avail *)(vr + dev->vq[RXQ].vq_availoffset);
-
- dev->vq[RXQ].notified_avail = avail->idx - 1;
-
- free(vr);
+ avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
+ vq_info->notified_avail = avail->idx - 1;
}
/*
@@ -1312,12 +1273,11 @@ vionet_notifyq(struct vionet_dev *dev)
int
vionet_notifyq(struct vionet_dev *dev)
{
- int ret;
+ int ret = 0;
switch (dev->cfg.queue_notify) {
case RXQ:
vionet_notify_rx(dev);
- ret = 0;
break;
case TXQ:
ret = vionet_notify_tx(dev);
@@ -1329,7 +1289,6 @@ vionet_notifyq(struct vionet_dev *dev)
*/
log_debug("%s: notify for unimplemented queue ID %d",
__func__, dev->cfg.queue_notify);
- ret = 0;
break;
}
@@ -1342,49 +1301,34 @@ vionet_notify_tx(struct vionet_dev *dev)
int
vionet_notify_tx(struct vionet_dev *dev)
{
- uint64_t q_gpa;
- uint32_t vr_sz;
uint16_t idx, pkt_desc_idx, hdr_desc_idx, dxx, cnt;
size_t pktsz, chunk_size = 0;
- ssize_t dhcpsz;
- int ret, num_enq, ofs, spc;
- char *vr, *pkt, *dhcppkt;
+ ssize_t dhcpsz = 0;
+ int num_enq, ofs, spc = 0;
+ char *vr = NULL, *pkt = NULL, *dhcppkt = NULL;
struct vring_desc *desc, *pkt_desc, *hdr_desc;
struct vring_avail *avail;
struct vring_used *used;
+ struct virtio_vq_info *vq_info;
struct ether_header *eh;
- dhcpsz = 0;
- vr = pkt = dhcppkt = NULL;
- ret = spc = 0;
+ vq_info = &dev->vq[TXQ];
+ vr = vq_info->q_hva;
+ if (vr == NULL)
+ fatalx("%s: null vring", __func__);
- vr_sz = vring_size(VIONET_QUEUE_SIZE);
- q_gpa = dev->vq[TXQ].qa;
- q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
-
- vr = calloc(1, vr_sz);
- if (vr == NULL) {
- log_warn("calloc error getting vionet ring");
- goto out;
- }
-
- if (read_mem(q_gpa, vr, vr_sz)) {
- log_warnx("error reading gpa 0x%llx", q_gpa);
- goto out;
- }
-
/* Compute offsets in ring of descriptors, avail ring, and used ring */
desc = (struct vring_desc *)(vr);
- avail = (struct vring_avail *)(vr + dev->vq[TXQ].vq_availoffset);
- used = (struct vring_used *)(vr + dev->vq[TXQ].vq_usedoffset);
+ avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
+ used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
num_enq = 0;
- idx = dev->vq[TXQ].last_avail & VIONET_QUEUE_MASK;
+ idx = vq_info->last_avail & VIONET_QUEUE_MASK;
if ((avail->idx & VIONET_QUEUE_MASK) == idx) {
- log_warnx("vionet tx queue notify - nothing to do?");
- goto out;
+ log_debug("%s - nothing to do?", __func__);
+ return (0);
}
while ((avail->idx & VIONET_QUEUE_MASK) != idx) {
@@ -1502,36 +1446,29 @@ vionet_notify_tx(struct vionet_dev *dev)
}
drop_packet:
- ret = 1;
dev->cfg.isr_status = 1;
used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_desc_idx;
used->ring[used->idx & VIONET_QUEUE_MASK].len = hdr_desc->len;
+ __sync_synchronize();
used->idx++;
- dev->vq[TXQ].last_avail++;
+ vq_info->last_avail = avail->idx & VIONET_QUEUE_MASK;
+ idx = (idx + 1) & VIONET_QUEUE_MASK;
+
num_enq++;
- idx = dev->vq[TXQ].last_avail & VIONET_QUEUE_MASK;
-
free(pkt);
pkt = NULL;
}
- if (write_mem(q_gpa, vr, vr_sz)) {
- log_warnx("vionet: tx error writing vio ring");
- }
+ if (dhcpsz > 0)
+ vionet_enq_rx(dev, dhcppkt, dhcpsz, &spc);
- if (dhcpsz > 0) {
- if (vionet_enq_rx(dev, dhcppkt, dhcpsz, &spc))
- ret = 1;
- }
-
out:
- free(vr);
free(pkt);
free(dhcppkt);
- return (ret);
+ return (1);
}
int
@@ -1662,8 +1599,8 @@ vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_
case VIRTIO_CONFIG_GUEST_FEATURES:
vmmci.cfg.guest_feature = *data;
break;
- case VIRTIO_CONFIG_QUEUE_ADDRESS:
- vmmci.cfg.queue_address = *data;
+ case VIRTIO_CONFIG_QUEUE_PFN:
+ vmmci.cfg.queue_pfn = *data;
break;
case VIRTIO_CONFIG_QUEUE_SELECT:
vmmci.cfg.queue_select = *data;
@@ -1703,8 +1640,8 @@ vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_
case VIRTIO_CONFIG_GUEST_FEATURES:
*data = vmmci.cfg.guest_feature;
break;
- case VIRTIO_CONFIG_QUEUE_ADDRESS:
- *data = vmmci.cfg.queue_address;
+ case VIRTIO_CONFIG_QUEUE_PFN:
+ *data = vmmci.cfg.queue_pfn;
break;
case VIRTIO_CONFIG_QUEUE_SIZE:
*data = vmmci.cfg.queue_size;
blob - 128b469018aefa35f451c1f92be986fc691b26c9
blob + c51f80b6d188a1eef7fd22976dd3073ce2de2d24
--- usr.sbin/vmd/virtio.h
+++ usr.sbin/vmd/virtio.h
@@ -64,13 +64,18 @@
#define VIRTIO_MAX_QUEUES 3
/*
+ * Rename the address config register to be more descriptive.
+ */
+#define VIRTIO_CONFIG_QUEUE_PFN VIRTIO_CONFIG_QUEUE_ADDRESS
+
+/*
* This struct stores notifications from a virtio driver. There is
* one such struct per virtio device.
*/
struct virtio_io_cfg {
uint32_t device_feature;
uint32_t guest_feature;
- uint32_t queue_address;
+ uint32_t queue_pfn;
uint16_t queue_size;
uint16_t queue_select;
uint16_t queue_notify;
@@ -93,8 +98,11 @@ struct virtio_vq_info {
*/
struct virtio_vq_info {
/* Guest physical address of virtq */
- uint32_t qa;
+ uint64_t q_gpa;
+ /* Host virtual address of virtq */
+ void *q_hva;
+
/* Queue size: number of queue entries in virtq */
uint32_t qs;
blob - f1d9b97741c11f8cc4faa3f79658cd87135d2b29
blob + 383bd66220116cee194d4c7147fdb6b72410b8d2
--- usr.sbin/vmd/vm.c
+++ usr.sbin/vmd/vm.c
@@ -1991,6 +1991,47 @@ read_mem(paddr_t src, void *buf, size_t len)
}
/*
+ * hvaddr_mem
+ *
+ * Translate a guest physical address to a host virtual address, checking the
+ * provided memory range length to confirm it's contiguous within the same
+ * guest memory range (vm_mem_range).
+ *
+ * Parameters:
+ * gpa: guest physical address to translate
+ * len: number of bytes in the intended range
+ *
+ * Return values:
+ * void* to host virtual memory on success
+ * NULL on error, setting errno to:
+ * EFAULT: gpa falls outside guest memory ranges
+ * EINVAL: requested len extends beyond memory range
+ */
+void *
+hvaddr_mem(paddr_t gpa, size_t len)
+{
+ struct vm_mem_range *vmr;
+ size_t off;
+
+ vmr = find_gpa_range(¤t_vm->vm_params.vmc_params, gpa, len);
+ if (vmr == NULL) {
+ log_warnx("%s: failed - invalid gpa: 0x%lx\n", __func__, gpa);
+ errno = EFAULT;
+ return (NULL);
+ }
+
+ off = gpa - vmr->vmr_gpa;
+ if (len > (vmr->vmr_size - off)) {
+ log_warnx("%s: failed - invalid memory range: gpa=0x%lx, "
+ "len=%zu", __func__, gpa, len);
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ return ((char *)vmr->vmr_va + off);
+}
+
+/*
* vcpu_assert_pic_irq
*
* Injects the specified IRQ on the supplied vcpu/vm
blob - c27d03df7336a2c8ede22ec0d83b74d327fb3244
blob + 17750811887dc4ae88c8919986d0708228f81169
--- usr.sbin/vmd/vmd.h
+++ usr.sbin/vmd/vmd.h
@@ -460,6 +460,7 @@ int write_mem(paddr_t, const void *buf, size_t);
void vm_pipe_send(struct vm_dev_pipe *, enum pipe_msg_type);
enum pipe_msg_type vm_pipe_recv(struct vm_dev_pipe *);
int write_mem(paddr_t, const void *buf, size_t);
+void* hvaddr_mem(paddr_t, size_t);
/* config.c */
int config_init(struct vmd *);