[RFC PATCH 12/12] vdpa: Delete CVQ migration blocker

2022-07-16 Thread Eugenio Pérez
We can restore the device state in the destination via CVQ now. Remove
the migration blocker.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/vhost-vdpa.h |  1 -
 hw/virtio/vhost-vdpa.c | 11 ---
 net/vhost-vdpa.c   |  2 --
 3 files changed, 14 deletions(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index b7d18b4e30..85b8a53052 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -39,7 +39,6 @@ typedef struct vhost_vdpa {
 bool shadow_vqs_enabled;
 /* IOVA mapping used by the Shadow Virtqueue */
 VhostIOVATree *iova_tree;
-Error *migration_blocker;
 GPtrArray *shadow_vqs;
 const VhostShadowVirtqueueOps *shadow_vq_ops;
 void *shadow_vq_ops_opaque;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 48f031b8c0..80bf461cf8 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -1028,13 +1028,6 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
 return true;
 }
 
-if (v->migration_blocker) {
-int r = migrate_add_blocker(v->migration_blocker, &err);
-if (unlikely(r < 0)) {
-goto err_migration_blocker;
-}
-}
-
 for (i = 0; i < v->shadow_vqs->len; ++i) {
 VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
@@ -1076,7 +1069,6 @@ err_svq_setup:
 vhost_svq_stop(svq);
 }
 
-err_migration_blocker:
 error_reportf_err(err, "Cannot setup SVQ %u: ", i);
 
 return false;
@@ -1098,9 +1090,6 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
 }
 }
 
-if (v->migration_blocker) {
-migrate_del_blocker(v->migration_blocker);
-}
 return true;
 }
 
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 2873be2ba4..80ea9a1412 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -573,8 +573,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 s->vhost_vdpa.start_op = vhost_vdpa_start_control_svq;
 s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
 s->vhost_vdpa.shadow_vq_ops_opaque = s;
-error_setg(&s->vhost_vdpa.migration_blocker,
-   "Migration disabled: vhost-vdpa uses CVQ.");
 }
 ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
 if (ret) {
-- 
2.31.1




[RFC PATCH 10/12] vdpa: Make vhost_vdpa_net_cvq_map_elem accept any out sg

2022-07-16 Thread Eugenio Pérez
So its generic enough to accept any out sg buffer and we can inject
NIC state messages.

Signed-off-by: Eugenio Pérez 
---
 net/vhost-vdpa.c | 29 +++--
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index aaae51a778..0183fce353 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -302,35 +302,36 @@ dma_map_err:
 }
 
 /**
- * Copy the guest element into a dedicated buffer suitable to be sent to NIC
+ * Maps out sg and in buffer into dedicated buffers suitable to be sent to NIC
  *
- * @iov: [0] is the out buffer, [1] is the in one
+ * @dev_iov: [0] is the out buffer, [1] is the in one
  */
-static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
-VirtQueueElement *elem,
-struct iovec *iov)
+static bool vhost_vdpa_net_cvq_map_sg(VhostVDPAState *s,
+  const struct iovec *out, size_t out_num,
+  struct iovec *dev_iov)
 {
 size_t in_copied;
 bool ok;
 
-iov[0].iov_base = s->cvq_cmd_out_buffer;
-ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num,
-vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base,
-&iov[0].iov_len, false);
+dev_iov[0].iov_base = s->cvq_cmd_out_buffer;
+ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, out, out_num,
+vhost_vdpa_net_cvq_cmd_len(),
+dev_iov[0].iov_base, &dev_iov[0].iov_len,
+false);
 if (unlikely(!ok)) {
 return false;
 }
 
-iov[1].iov_base = s->cvq_cmd_in_buffer;
+dev_iov[1].iov_base = s->cvq_cmd_in_buffer;
 ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0,
-sizeof(virtio_net_ctrl_ack), iov[1].iov_base,
-&in_copied, true);
+sizeof(virtio_net_ctrl_ack),
+dev_iov[1].iov_base, &in_copied, true);
 if (unlikely(!ok)) {
 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
 return false;
 }
 
-iov[1].iov_len = sizeof(virtio_net_ctrl_ack);
+dev_iov[1].iov_len = sizeof(virtio_net_ctrl_ack);
 return true;
 }
 
@@ -449,7 +450,7 @@ static int 
vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
 };
 bool ok;
 
-ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers);
+ok = vhost_vdpa_net_cvq_map_sg(s, elem->out_sg, elem->out_num, 
dev_buffers);
 if (unlikely(!ok)) {
 goto out;
 }
-- 
2.31.1




[RFC PATCH 06/12] vhost: Use opaque data in SVQDescState

2022-07-16 Thread Eugenio Pérez
Since we'll allow net/vhost-vdpa to add elements that don't come from
the guest, we need to store opaque data that makes sense to the caller.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  4 ++--
 hw/virtio/vhost-shadow-virtqueue.c | 20 +++-
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index d04c34a589..03eb7ff670 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -16,7 +16,7 @@
 #include "hw/virtio/vhost-iova-tree.h"
 
 typedef struct SVQDescState {
-VirtQueueElement *elem;
+void *data;
 
 /*
  * Number of descriptors exposed to the device. May or may not match
@@ -115,7 +115,7 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
  const VirtQueueElement *elem, uint32_t len);
 int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
   size_t out_num, const struct iovec *in_sg, size_t in_num,
-  VirtQueueElement *elem);
+  void *data);
 size_t vhost_svq_poll(VhostShadowVirtqueue *svq);
 
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 29633b7a29..88e290d94b 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -237,7 +237,7 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
  */
 int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
   size_t out_num, const struct iovec *in_sg, size_t in_num,
-  VirtQueueElement *elem)
+  void *data)
 {
 unsigned qemu_head;
 unsigned ndescs = in_num + out_num;
@@ -252,7 +252,7 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct 
iovec *out_sg,
 return -EINVAL;
 }
 
-svq->desc_state[qemu_head].elem = elem;
+svq->desc_state[qemu_head].data = data;
 svq->desc_state[qemu_head].ndescs = ndescs;
 vhost_svq_kick(svq);
 return 0;
@@ -389,8 +389,7 @@ static uint16_t vhost_svq_last_desc_of_chain(const 
VhostShadowVirtqueue *svq,
 return i;
 }
 
-static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
-   uint32_t *len)
+static void *vhost_svq_get_buf(VhostShadowVirtqueue *svq, uint32_t *len)
 {
 const vring_used_t *used = svq->vring.used;
 vring_used_elem_t used_elem;
@@ -413,7 +412,7 @@ static VirtQueueElement 
*vhost_svq_get_buf(VhostShadowVirtqueue *svq,
 return NULL;
 }
 
-if (unlikely(!svq->desc_state[used_elem.id].elem)) {
+if (unlikely(!svq->desc_state[used_elem.id].data)) {
 qemu_log_mask(LOG_GUEST_ERROR,
 "Device %s says index %u is used, but it was not available",
 svq->vdev->name, used_elem.id);
@@ -426,7 +425,7 @@ static VirtQueueElement 
*vhost_svq_get_buf(VhostShadowVirtqueue *svq,
 svq->free_head = used_elem.id;
 
 *len = used_elem.len;
-return g_steal_pointer(&svq->desc_state[used_elem.id].elem);
+return g_steal_pointer(&svq->desc_state[used_elem.id].data);
 }
 
 /**
@@ -498,8 +497,7 @@ size_t vhost_svq_poll(VhostShadowVirtqueue *svq)
 {
 do {
 uint32_t len;
-VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
-if (elem) {
+if (vhost_svq_get_buf(svq, &len)) {
 return len;
 }
 
@@ -658,8 +656,12 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
 vhost_svq_flush(svq, false);
 
 for (unsigned i = 0; i < svq->vring.num; ++i) {
+/*
+ * We know .data is an element because external callers of
+ * vhost_svq_add use active polling, not SVQ
+ */
 g_autofree VirtQueueElement *elem = NULL;
-elem = g_steal_pointer(&svq->desc_state[i].elem);
+elem = g_steal_pointer(&svq->desc_state[i].data);
 if (elem) {
 virtqueue_detach_element(svq->vq, elem, 0);
 }
-- 
2.31.1




[RFC PATCH 04/12] vdpa: delay set_vring_ready after DRIVER_OK

2022-07-16 Thread Eugenio Pérez
To restore the device in the destination of a live migration we send the
commands through control virtqueue. For a device to read CVQ it must
have received DRIVER_OK status bit.

However this open a window where the device could start receiving
packets in rx queue 0 before it receive the RSS configuration. To avoid
that, we will not send vring_enable until all configuration is used by
the device.

As a first step, reverse the DRIVER_OK and SET_VRING_ENABLE steps.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-vdpa.c | 22 --
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 906c365036..1d8829c619 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -730,13 +730,18 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, 
int idx)
 return idx;
 }
 
+/**
+ * Set ready all vring of the device
+ *
+ * @dev: Vhost device
+ */
 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
 {
 int i;
 trace_vhost_vdpa_set_vring_ready(dev);
-for (i = 0; i < dev->nvqs; ++i) {
+for (i = 0; i < dev->vq_index_end; ++i) {
 struct vhost_vring_state state = {
-.index = dev->vq_index + i,
+.index = i,
 .num = 1,
 };
 vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
@@ -,7 +1116,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, 
bool started)
 if (unlikely(!ok)) {
 return -1;
 }
-vhost_vdpa_set_vring_ready(dev);
 } else {
 ok = vhost_vdpa_svqs_stop(dev);
 if (unlikely(!ok)) {
@@ -1125,16 +1129,22 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, 
bool started)
 }
 
 if (started) {
+int r;
+
 memory_listener_register(&v->listener, &address_space_memory);
-return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
+r = vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
+if (unlikely(r)) {
+return r;
+}
+vhost_vdpa_set_vring_ready(dev);
 } else {
 vhost_vdpa_reset_device(dev);
 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
VIRTIO_CONFIG_S_DRIVER);
 memory_listener_unregister(&v->listener);
-
-return 0;
 }
+
+return 0;
 }
 
 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
-- 
2.31.1




[RFC PATCH 00/12] NIC vhost-vdpa state restore via Shadow CVQ

2022-07-16 Thread Eugenio Pérez
CVQ of net vhost-vdpa devices can be intercepted since the work of [1]. The
virtio-net device model is updated. The migration was blocked because although
the state can be megrated between VMM it was not possible to restore on the
destination NIC.

This series add support for SVQ to inject external messages without the guest's
knowledge, so before the guest is resumed all the guest visible state is
restored. It is done using standard CVQ messages, so the vhost-vdpa device does
not need to learn how to restore it: As long as they have the feature, they
know how to handle it.

This series needs SVQ CVQ support [1] and fixes [2] to be applied.

Thanks!

[1] https://lists.nongnu.org/archive/html/qemu-devel/2022-07/msg02808.html
[2] https://lists.nongnu.org/archive/html/qemu-devel/2022-07/msg02726.html

Eugenio Pérez (12):
  vhost: Get vring base from vq, not svq
  vhost: Move SVQ queue rewind to the destination
  vdpa: Small rename of error labels
  vdpa: delay set_vring_ready after DRIVER_OK
  vhost: stop transfer elem ownership in vhost_handle_guest_kick
  vhost: Use opaque data in SVQDescState
  vhost: Add VhostVDPAStartOp operation
  vdpa: Add vhost_vdpa_start_control_svq
  vdpa: Extract vhost_vdpa_net_svq_add from
vhost_vdpa_net_handle_ctrl_avail
  vdpa: Make vhost_vdpa_net_cvq_map_elem accept any out sg
  vdpa: Add virtio-net mac address via CVQ at start
  vdpa: Delete CVQ migration blocker

 hw/virtio/vhost-shadow-virtqueue.h |   7 +-
 include/hw/virtio/vhost-vdpa.h |   6 +-
 hw/virtio/vhost-shadow-virtqueue.c |  30 ++---
 hw/virtio/vhost-vdpa.c |  70 ++-
 net/vhost-vdpa.c   | 184 -
 5 files changed, 193 insertions(+), 104 deletions(-)

-- 
2.31.1





[RFC PATCH 09/12] vdpa: Extract vhost_vdpa_net_svq_add from vhost_vdpa_net_handle_ctrl_avail

2022-07-16 Thread Eugenio Pérez
So we can reuse to inject state messages.

Signed-off-by: Eugenio Pérez 
---
 net/vhost-vdpa.c | 89 +++-
 1 file changed, 51 insertions(+), 38 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 211bd0468b..aaae51a778 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -334,6 +334,54 @@ static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
 return true;
 }
 
+static virtio_net_ctrl_ack vhost_vdpa_net_svq_add(VhostShadowVirtqueue *svq,
+   const struct iovec *dev_buffers)
+{
+/* in buffer used for device model */
+virtio_net_ctrl_ack status;
+const struct iovec in = {
+.iov_base = &status,
+.iov_len = sizeof(status),
+};
+size_t dev_written;
+int r;
+void *unused = (void *)1;
+
+r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, unused);
+if (unlikely(r != 0)) {
+if (unlikely(r == -ENOSPC)) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
+  __func__);
+}
+return VIRTIO_NET_ERR;
+}
+
+/*
+ * We can poll here since we've had BQL from the time we sent the
+ * descriptor. Also, we need to take the answer before SVQ pulls by itself,
+ * when BQL is released
+ */
+dev_written = vhost_svq_poll(svq);
+if (unlikely(dev_written < sizeof(status))) {
+error_report("Insufficient written data (%zu)", dev_written);
+return VIRTIO_NET_ERR;
+}
+
+memcpy(&status, dev_buffers[1].iov_base, sizeof(status));
+if (status != VIRTIO_NET_OK) {
+return VIRTIO_NET_ERR;
+}
+
+status = VIRTIO_NET_ERR;
+virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1);
+if (status != VIRTIO_NET_OK) {
+error_report("Bad CVQ processing in model");
+return VIRTIO_NET_ERR;
+}
+
+return VIRTIO_NET_OK;
+}
+
 static int vhost_vdpa_start_control_svq(struct vhost_vdpa *v)
 {
 struct vhost_vring_state state = {
@@ -392,19 +440,13 @@ static int 
vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
 void *opaque)
 {
 VhostVDPAState *s = opaque;
-size_t in_len, dev_written;
+size_t in_len;
 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
 /* out and in buffers sent to the device */
 struct iovec dev_buffers[2] = {
 { .iov_base = s->cvq_cmd_out_buffer },
 { .iov_base = s->cvq_cmd_in_buffer },
 };
-/* in buffer used for device model */
-const struct iovec in = {
-.iov_base = &status,
-.iov_len = sizeof(status),
-};
-int r;
 bool ok;
 
 ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers);
@@ -417,36 +459,7 @@ static int 
vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
 goto out;
 }
 
-r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem);
-if (unlikely(r != 0)) {
-if (unlikely(r == -ENOSPC)) {
-qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
-  __func__);
-}
-goto out;
-}
-
-/*
- * We can poll here since we've had BQL from the time we sent the
- * descriptor. Also, we need to take the answer before SVQ pulls by itself,
- * when BQL is released
- */
-dev_written = vhost_svq_poll(svq);
-if (unlikely(dev_written < sizeof(status))) {
-error_report("Insufficient written data (%zu)", dev_written);
-goto out;
-}
-
-memcpy(&status, dev_buffers[1].iov_base, sizeof(status));
-if (status != VIRTIO_NET_OK) {
-goto out;
-}
-
-status = VIRTIO_NET_ERR;
-virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1);
-if (status != VIRTIO_NET_OK) {
-error_report("Bad CVQ processing in model");
-}
+status = vhost_vdpa_net_svq_add(svq, dev_buffers);
 
 out:
 in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
@@ -462,7 +475,7 @@ out:
 if (dev_buffers[1].iov_base) {
 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base);
 }
-return r;
+return status == VIRTIO_NET_OK ? 0 : 1;
 }
 
 static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
-- 
2.31.1




[RFC PATCH 03/12] vdpa: Small rename of error labels

2022-07-16 Thread Eugenio Pérez
So later patches are cleaner

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-vdpa.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 4458c8d23e..906c365036 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -1039,7 +1039,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
 int r;
 bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
 if (unlikely(!ok)) {
-goto err;
+goto err_svq_setup;
 }
 
 vhost_svq_start(svq, dev->vdev, vq);
@@ -1064,8 +1064,7 @@ err_set_addr:
 err_map:
 vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i));
 
-err:
-error_reportf_err(err, "Cannot setup SVQ %u: ", i);
+err_svq_setup:
 for (unsigned j = 0; j < i; ++j) {
 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j);
 vhost_vdpa_svq_unmap_rings(dev, svq);
-- 
2.31.1




[RFC PATCH 01/12] vhost: Get vring base from vq, not svq

2022-07-16 Thread Eugenio Pérez
The SVQ vring used idx usually match with the guest visible one, as long
as all the guest buffers (GPA) maps to exactly one buffer within qemu's
VA. However, as we can see in virtqueue_map_desc, a single guest buffer
could map to many buffers in SVQ vring.

The solution is to stop using the device's used idx and check for the
last avail idx. Since we cannot report in-flight descriptors with vdpa,
let's rewind all of them.

Fixes: 6d0b22266633 ("vdpa: Adapt vhost_vdpa_get_vring_base to SVQ")
Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-vdpa.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 795ed5a049..18820498b3 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -1194,11 +1194,10 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev 
*dev,
struct vhost_vring_state *ring)
 {
 struct vhost_vdpa *v = dev->opaque;
-int vdpa_idx = ring->index - dev->vq_index;
 int ret;
 
 if (v->shadow_vqs_enabled) {
-VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
+VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
 
 /*
  * Setting base as last used idx, so destination will see as available
@@ -1208,7 +1207,10 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev 
*dev,
  * TODO: This is ok for networking, but other kinds of devices might
  * have problems with these retransmissions.
  */
-ring->num = svq->last_used_idx;
+while (virtqueue_rewind(vq, 1)) {
+continue;
+}
+ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index);
 return 0;
 }
 
-- 
2.31.1




[RFC PATCH 02/12] vhost: Move SVQ queue rewind to the destination

2022-07-16 Thread Eugenio Pérez
Migration with SVQ already migrate the inflight descriptors, so the
destination can perform the work.

This makes easier to migrate between backends or to recover them in
vhost devices that support set in flight descriptors.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-vdpa.c | 24 +++-
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 18820498b3..4458c8d23e 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -1178,7 +1178,18 @@ static int vhost_vdpa_set_vring_base(struct vhost_dev 
*dev,
struct vhost_vring_state *ring)
 {
 struct vhost_vdpa *v = dev->opaque;
+VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
 
+/*
+ * vhost-vdpa devices does not support in-flight requests. Set all of them
+ * as available.
+ *
+ * TODO: This is ok for networking, but other kinds of devices might
+ * have problems with these retransmissions.
+ */
+while (virtqueue_rewind(vq, 1)) {
+continue;
+}
 if (v->shadow_vqs_enabled) {
 /*
  * Device vring base was set at device start. SVQ base is handled by
@@ -1197,19 +1208,6 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev 
*dev,
 int ret;
 
 if (v->shadow_vqs_enabled) {
-VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
-
-/*
- * Setting base as last used idx, so destination will see as available
- * all the entries that the device did not use, including the in-flight
- * processing ones.
- *
- * TODO: This is ok for networking, but other kinds of devices might
- * have problems with these retransmissions.
- */
-while (virtqueue_rewind(vq, 1)) {
-continue;
-}
 ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index);
 return 0;
 }
-- 
2.31.1




[PATCH v3 19/19] vdpa: Add x-svq to NetdevVhostVDPAOptions

2022-07-15 Thread Eugenio Pérez
Finally offering the possibility to enable SVQ from the command line.

Signed-off-by: Eugenio Pérez 
Acked-by: Markus Armbruster 
---
 qapi/net.json|  9 +-
 net/vhost-vdpa.c | 72 ++--
 2 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/qapi/net.json b/qapi/net.json
index 9af11e9a3b..75ba2cb989 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -445,12 +445,19 @@
 # @queues: number of queues to be created for multiqueue vhost-vdpa
 #  (default: 1)
 #
+# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1)
+# (default: false)
+#
+# Features:
+# @unstable: Member @x-svq is experimental.
+#
 # Since: 5.1
 ##
 { 'struct': 'NetdevVhostVDPAOptions',
   'data': {
 '*vhostdev': 'str',
-'*queues':   'int' } }
+'*queues':   'int',
+'*x-svq':{'type': 'bool', 'features' : [ 'unstable'] } } }
 
 ##
 # @NetdevVmnetHostOptions:
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 0afa60bb51..986e6414b4 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -75,6 +75,28 @@ const int vdpa_feature_bits[] = {
 VHOST_INVALID_FEATURE_BIT
 };
 
+/** Supported device specific feature bits with SVQ */
+static const uint64_t vdpa_svq_device_features =
+BIT_ULL(VIRTIO_NET_F_CSUM) |
+BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
+BIT_ULL(VIRTIO_NET_F_MTU) |
+BIT_ULL(VIRTIO_NET_F_MAC) |
+BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
+BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
+BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
+BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
+BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
+BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
+BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
+BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
+BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
+BIT_ULL(VIRTIO_NET_F_STATUS) |
+BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
+BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
+BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
+BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
+BIT_ULL(VIRTIO_NET_F_STANDBY);
+
 VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
 {
 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
@@ -133,9 +155,13 @@ err_init:
 static void vhost_vdpa_cleanup(NetClientState *nc)
 {
 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+struct vhost_dev *dev = &s->vhost_net->dev;
 
 qemu_vfree(s->cvq_cmd_out_buffer);
 qemu_vfree(s->cvq_cmd_in_buffer);
+if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
+g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
+}
 if (s->vhost_net) {
 vhost_net_cleanup(s->vhost_net);
 g_free(s->vhost_net);
@@ -437,7 +463,9 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
int vdpa_device_fd,
int queue_pair_index,
int nvqs,
-   bool is_datapath)
+   bool is_datapath,
+   bool svq,
+   VhostIOVATree *iova_tree)
 {
 NetClientState *nc = NULL;
 VhostVDPAState *s;
@@ -455,6 +483,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 
 s->vhost_vdpa.device_fd = vdpa_device_fd;
 s->vhost_vdpa.index = queue_pair_index;
+s->vhost_vdpa.shadow_vqs_enabled = svq;
+s->vhost_vdpa.iova_tree = iova_tree;
 if (!is_datapath) {
 s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
 vhost_vdpa_net_cvq_cmd_page_len());
@@ -465,6 +495,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 
 s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
 s->vhost_vdpa.shadow_vq_ops_opaque = s;
+error_setg(&s->vhost_vdpa.migration_blocker,
+   "Migration disabled: vhost-vdpa uses CVQ.");
 }
 ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
 if (ret) {
@@ -474,6 +506,14 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 return nc;
 }
 
+static int vhost_vdpa_get_iova_range(int fd,
+ struct vhost_vdpa_iova_range *iova_range)
+{
+int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
+
+return ret < 0 ? -errno : 0;
+}
+
 static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
 {
 int ret = ioctl(fd, VHOST_GET_FEATURES, features);
@@ -524,6 +564,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 uint64_t features;
 int vdpa_device_fd;
 g_autofree NetClientState **ncs = NULL;
+g_autoptr(

[PATCH v3 14/19] vdpa: Export vhost_vdpa_dma_map and unmap calls

2022-07-15 Thread Eugenio Pérez
Shadow CVQ will copy buffers on qemu VA, so we avoid TOCTOU attacks from
the guest that could set a different state in qemu device model and vdpa
device.

To do so, it needs to be able to map these new buffers to the device.

Signed-off-by: Eugenio Pérez 
Acked-by: Jason Wang 
---
 include/hw/virtio/vhost-vdpa.h | 4 
 hw/virtio/vhost-vdpa.c | 7 +++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index a29dbb3f53..7214eb47dc 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -39,4 +39,8 @@ typedef struct vhost_vdpa {
 VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostVDPA;
 
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
+   void *vaddr, bool readonly);
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size);
+
 #endif
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 0b13e98471..96997210be 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -71,8 +71,8 @@ static bool 
vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
 return false;
 }
 
-static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
-  void *vaddr, bool readonly)
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
+   void *vaddr, bool readonly)
 {
 struct vhost_msg_v2 msg = {};
 int fd = v->device_fd;
@@ -97,8 +97,7 @@ static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr 
iova, hwaddr size,
 return ret;
 }
 
-static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
-hwaddr size)
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size)
 {
 struct vhost_msg_v2 msg = {};
 int fd = v->device_fd;
-- 
2.31.1




[PATCH v3 18/19] vdpa: Add device migration blocker

2022-07-15 Thread Eugenio Pérez
Since the vhost-vdpa device is exposing _F_LOG, adding a migration blocker if
it uses CVQ.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/vhost-vdpa.h |  1 +
 hw/virtio/vhost-vdpa.c | 14 ++
 2 files changed, 15 insertions(+)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index d85643..d10a89303e 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -35,6 +35,7 @@ typedef struct vhost_vdpa {
 bool shadow_vqs_enabled;
 /* IOVA mapping used by the Shadow Virtqueue */
 VhostIOVATree *iova_tree;
+Error *migration_blocker;
 GPtrArray *shadow_vqs;
 const VhostShadowVirtqueueOps *shadow_vq_ops;
 void *shadow_vq_ops_opaque;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index beaaa7049a..795ed5a049 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -20,6 +20,7 @@
 #include "hw/virtio/vhost-shadow-virtqueue.h"
 #include "hw/virtio/vhost-vdpa.h"
 #include "exec/address-spaces.h"
+#include "migration/blocker.h"
 #include "qemu/cutils.h"
 #include "qemu/main-loop.h"
 #include "cpu.h"
@@ -1022,6 +1023,13 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
 return true;
 }
 
+if (v->migration_blocker) {
+int r = migrate_add_blocker(v->migration_blocker, &err);
+if (unlikely(r < 0)) {
+goto err_migration_blocker;
+}
+}
+
 for (i = 0; i < v->shadow_vqs->len; ++i) {
 VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
@@ -1064,6 +1072,9 @@ err:
 vhost_svq_stop(svq);
 }
 
+err_migration_blocker:
+error_reportf_err(err, "Cannot setup SVQ %u: ", i);
+
 return false;
 }
 
@@ -1083,6 +1094,9 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
 }
 }
 
+if (v->migration_blocker) {
+migrate_del_blocker(v->migration_blocker);
+}
 return true;
 }
 
-- 
2.31.1




[PATCH v3 12/19] vhost: add vhost_svq_poll

2022-07-15 Thread Eugenio Pérez
It allows the Shadow Control VirtQueue to wait for the device to use the
available buffers.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  1 +
 hw/virtio/vhost-shadow-virtqueue.c | 22 ++
 2 files changed, 23 insertions(+)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index dd78f4bec2..cf442f7dea 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -89,6 +89,7 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
 int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
   size_t out_num, const struct iovec *in_sg, size_t in_num,
   VirtQueueElement *elem);
+size_t vhost_svq_poll(VhostShadowVirtqueue *svq);
 
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
 void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 406a823c81..1c54a03e17 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -484,6 +484,28 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 } while (!vhost_svq_enable_notification(svq));
 }
 
+/**
+ * Poll the SVQ for one device used buffer.
+ *
+ * This function race with main event loop SVQ polling, so extra
+ * synchronization is needed.
+ *
+ * Return the length written by the device.
+ */
+size_t vhost_svq_poll(VhostShadowVirtqueue *svq)
+{
+do {
+uint32_t len;
+VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
+if (elem) {
+return len;
+}
+
+/* Make sure we read new used_idx */
+smp_rmb();
+} while (true);
+}
+
 /**
  * Forward used buffers.
  *
-- 
2.31.1




[PATCH v3 15/19] vdpa: manual forward CVQ buffers

2022-07-15 Thread Eugenio Pérez
Do a simple forwarding of CVQ buffers, the same work SVQ could do but
through callbacks. No functional change intended.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/vhost-vdpa.h |  3 ++
 hw/virtio/vhost-vdpa.c |  3 +-
 net/vhost-vdpa.c   | 58 ++
 3 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index 7214eb47dc..d85643 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -15,6 +15,7 @@
 #include 
 
 #include "hw/virtio/vhost-iova-tree.h"
+#include "hw/virtio/vhost-shadow-virtqueue.h"
 #include "hw/virtio/virtio.h"
 #include "standard-headers/linux/vhost_types.h"
 
@@ -35,6 +36,8 @@ typedef struct vhost_vdpa {
 /* IOVA mapping used by the Shadow Virtqueue */
 VhostIOVATree *iova_tree;
 GPtrArray *shadow_vqs;
+const VhostShadowVirtqueueOps *shadow_vq_ops;
+void *shadow_vq_ops_opaque;
 struct vhost_dev *dev;
 VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostVDPA;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 96997210be..beaaa7049a 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -419,7 +419,8 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
struct vhost_vdpa *v,
 for (unsigned n = 0; n < hdev->nvqs; ++n) {
 g_autoptr(VhostShadowVirtqueue) svq;
 
-svq = vhost_svq_new(v->iova_tree, NULL, NULL);
+svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops,
+v->shadow_vq_ops_opaque);
 if (unlikely(!svq)) {
 error_setg(errp, "Cannot create svq %u", n);
 return -1;
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index df1e69ee72..2e3b6b10d8 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -11,11 +11,14 @@
 
 #include "qemu/osdep.h"
 #include "clients.h"
+#include "hw/virtio/virtio-net.h"
 #include "net/vhost_net.h"
 #include "net/vhost-vdpa.h"
 #include "hw/virtio/vhost-vdpa.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/memalign.h"
 #include "qemu/option.h"
 #include "qapi/error.h"
 #include 
@@ -187,6 +190,57 @@ static NetClientInfo net_vhost_vdpa_info = {
 .check_peer_type = vhost_vdpa_check_peer_type,
 };
 
+/**
+ * Forward buffer for the moment.
+ */
+static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
+VirtQueueElement *elem,
+void *opaque)
+{
+unsigned int n = elem->out_num + elem->in_num;
+g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
+size_t in_len, dev_written;
+virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+int r;
+
+memcpy(dev_buffers, elem->out_sg, elem->out_num);
+memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
+
+r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1],
+  elem->in_num, elem);
+if (unlikely(r != 0)) {
+if (unlikely(r == -ENOSPC)) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
+  __func__);
+}
+goto out;
+}
+
+/*
+ * We can poll here since we've had BQL from the time we sent the
+ * descriptor. Also, we need to take the answer before SVQ pulls by itself,
+ * when BQL is released
+ */
+dev_written = vhost_svq_poll(svq);
+if (unlikely(dev_written < sizeof(status))) {
+error_report("Insufficient written data (%zu)", dev_written);
+}
+
+out:
+in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
+  sizeof(status));
+if (unlikely(in_len < sizeof(status))) {
+error_report("Bad device CVQ written length");
+}
+vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
+g_free(elem);
+return r;
+}
+
+static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
+.avail_handler = vhost_vdpa_net_handle_ctrl_avail,
+};
+
 static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
const char *device,
const char *name,
@@ -211,6 +265,10 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 
 s->vhost_vdpa.device_fd = vdpa_device_fd;
 s->vhost_vdpa.index = queue_pair_index;
+if (!is_datapath) {
+s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
+s->vhost_vdpa.shadow_vq_ops_opaque = s;
+}
 ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
 if (ret) {
 qemu_del_net_client(nc);
-- 
2.31.1




[PATCH v3 16/19] vdpa: Buffer CVQ support on shadow virtqueue

2022-07-15 Thread Eugenio Pérez
Introduce the control virtqueue support for vDPA shadow virtqueue. This
is needed for advanced networking features like rx filtering.

Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid
TOCTOU with the guest's or device's memory every time there is a device
model change.  Otherwise, the guest could change the memory content in
the time between qemu and the device read it.

To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
implemented.  If the virtio-net driver changes MAC the virtio-net device
model will be updated with the new one, and a rx filtering change event
will be raised.

More cvq commands could be added here straightforwardly but they have
not been tested.

Signed-off-by: Eugenio Pérez 
---
 net/vhost-vdpa.c | 211 +--
 1 file changed, 204 insertions(+), 7 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 2e3b6b10d8..3915b148c4 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -33,6 +33,9 @@ typedef struct VhostVDPAState {
 NetClientState nc;
 struct vhost_vdpa vhost_vdpa;
 VHostNetState *vhost_net;
+
+/* Control commands shadow buffers */
+void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
 bool started;
 } VhostVDPAState;
 
@@ -131,6 +134,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
 {
 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
 
+qemu_vfree(s->cvq_cmd_out_buffer);
+qemu_vfree(s->cvq_cmd_in_buffer);
 if (s->vhost_net) {
 vhost_net_cleanup(s->vhost_net);
 g_free(s->vhost_net);
@@ -190,24 +195,191 @@ static NetClientInfo net_vhost_vdpa_info = {
 .check_peer_type = vhost_vdpa_check_peer_type,
 };
 
+static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
+{
+VhostIOVATree *tree = v->iova_tree;
+DMAMap needle = {
+/*
+ * No need to specify size or to look for more translations since
+ * this contiguous chunk was allocated by us.
+ */
+.translated_addr = (hwaddr)(uintptr_t)addr,
+};
+const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
+int r;
+
+if (unlikely(!map)) {
+error_report("Cannot locate expected map");
+return;
+}
+
+r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
+if (unlikely(r != 0)) {
+error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
+}
+
+vhost_iova_tree_remove(tree, map);
+}
+
+static size_t vhost_vdpa_net_cvq_cmd_len(void)
+{
+/*
+ * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
+ * In buffer is always 1 byte, so it should fit here
+ */
+return sizeof(struct virtio_net_ctrl_hdr) +
+   2 * sizeof(struct virtio_net_ctrl_mac) +
+   MAC_TABLE_ENTRIES * ETH_ALEN;
+}
+
+static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
+{
+return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
+}
+
+/** Copy and map a guest buffer. */
+static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
+   const struct iovec *out_data,
+   size_t out_num, size_t data_len, void *buf,
+   size_t *written, bool write)
+{
+DMAMap map = {};
+int r;
+
+if (unlikely(!data_len)) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n",
+  __func__, write ? "in" : "out");
+return false;
+}
+
+*written = iov_to_buf(out_data, out_num, 0, buf, data_len);
+map.translated_addr = (hwaddr)(uintptr_t)buf;
+map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1;
+map.perm = write ? IOMMU_RW : IOMMU_RO,
+r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
+if (unlikely(r != IOVA_OK)) {
+error_report("Cannot map injected element");
+return false;
+}
+
+r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
+   !write);
+if (unlikely(r < 0)) {
+goto dma_map_err;
+}
+
+return true;
+
+dma_map_err:
+vhost_iova_tree_remove(v->iova_tree, &map);
+return false;
+}
+
 /**
- * Forward buffer for the moment.
+ * Copy the guest element into a dedicated buffer suitable to be sent to NIC
+ *
+ * @iov: [0] is the out buffer, [1] is the in one
+ */
+static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
+VirtQueueElement *elem,
+struct iovec *iov)
+{
+size_t in_copied;
+bool ok;
+
+iov[0].iov_base = s->cvq_cmd_out_buffer;
+ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num,
+vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base,
+&iov[0].iov_len, false)

[PATCH v3 10/19] vhost: add vhost_svq_push_elem

2022-07-15 Thread Eugenio Pérez
This function allows external SVQ users to return guest's available
buffers.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  3 +++
 hw/virtio/vhost-shadow-virtqueue.c | 16 
 2 files changed, 19 insertions(+)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 5c7e7cbab6..d9fc1f1799 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -84,6 +84,9 @@ typedef struct VhostShadowVirtqueue {
 
 bool vhost_svq_valid_features(uint64_t features, Error **errp);
 
+void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
+ const VirtQueueElement *elem, uint32_t len);
+
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
 void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
 void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index ae5bd6efa8..b377e125e7 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -427,6 +427,22 @@ static VirtQueueElement 
*vhost_svq_get_buf(VhostShadowVirtqueue *svq,
 return g_steal_pointer(&svq->desc_state[used_elem.id].elem);
 }
 
+/**
+ * Push an element to SVQ, returning it to the guest.
+ */
+void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
+ const VirtQueueElement *elem, uint32_t len)
+{
+virtqueue_push(svq->vq, elem, len);
+if (svq->next_guest_avail_elem) {
+/*
+ * Avail ring was full when vhost_svq_flush was called, so it's a
+ * good moment to make more descriptors available if possible.
+ */
+vhost_handle_guest_kick(svq);
+}
+}
+
 static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 bool check_for_avail_queue)
 {
-- 
2.31.1




[PATCH v3 17/19] vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs

2022-07-15 Thread Eugenio Pérez
To know the device features is needed for CVQ SVQ, so SVQ knows if it
can handle all commands or not. Extract from
vhost_vdpa_get_max_queue_pairs so we can reuse it.

Signed-off-by: Eugenio Pérez 
Acked-by: Jason Wang 
---
 net/vhost-vdpa.c | 30 --
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 3915b148c4..0afa60bb51 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -474,20 +474,24 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 return nc;
 }
 
-static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp)
+static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
+{
+int ret = ioctl(fd, VHOST_GET_FEATURES, features);
+if (unlikely(ret < 0)) {
+error_setg_errno(errp, errno,
+ "Fail to query features from vhost-vDPA device");
+}
+return ret;
+}
+
+static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
+  int *has_cvq, Error **errp)
 {
 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
 g_autofree struct vhost_vdpa_config *config = NULL;
 __virtio16 *max_queue_pairs;
-uint64_t features;
 int ret;
 
-ret = ioctl(fd, VHOST_GET_FEATURES, &features);
-if (ret) {
-error_setg(errp, "Fail to query features from vhost-vDPA device");
-return ret;
-}
-
 if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
 *has_cvq = 1;
 } else {
@@ -517,10 +521,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 NetClientState *peer, Error **errp)
 {
 const NetdevVhostVDPAOptions *opts;
+uint64_t features;
 int vdpa_device_fd;
 g_autofree NetClientState **ncs = NULL;
 NetClientState *nc;
-int queue_pairs, i, has_cvq = 0;
+int queue_pairs, r, i, has_cvq = 0;
 
 assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 opts = &netdev->u.vhost_vdpa;
@@ -534,7 +539,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 return -errno;
 }
 
-queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd,
+r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
+if (unlikely(r < 0)) {
+return r;
+}
+
+queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
  &has_cvq, errp);
 if (queue_pairs < 0) {
 qemu_close(vdpa_device_fd);
-- 
2.31.1




[PATCH v3 03/19] virtio-net: Expose ctrl virtqueue logic

2022-07-15 Thread Eugenio Pérez
This allows external vhost-net devices to modify the state of the
VirtIO device model once the vhost-vdpa device has acknowledged the
control commands.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/virtio-net.h |  4 ++
 hw/net/virtio-net.c| 84 --
 2 files changed, 53 insertions(+), 35 deletions(-)

diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index cce1c554f7..ef234ffe7e 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -221,6 +221,10 @@ struct VirtIONet {
 struct EBPFRSSContext ebpf_rss;
 };
 
+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
+  const struct iovec *in_sg, unsigned in_num,
+  const struct iovec *out_sg,
+  unsigned out_num);
 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
const char *type);
 
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index f83e96e4ce..dd0d056fde 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1433,57 +1433,71 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t 
cmd,
 return VIRTIO_NET_OK;
 }
 
-static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
+  const struct iovec *in_sg, unsigned in_num,
+  const struct iovec *out_sg,
+  unsigned out_num)
 {
 VirtIONet *n = VIRTIO_NET(vdev);
 struct virtio_net_ctrl_hdr ctrl;
 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
-VirtQueueElement *elem;
 size_t s;
 struct iovec *iov, *iov2;
-unsigned int iov_cnt;
+
+if (iov_size(in_sg, in_num) < sizeof(status) ||
+iov_size(out_sg, out_num) < sizeof(ctrl)) {
+virtio_error(vdev, "virtio-net ctrl missing headers");
+return 0;
+}
+
+iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
+s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
+iov_discard_front(&iov, &out_num, sizeof(ctrl));
+if (s != sizeof(ctrl)) {
+status = VIRTIO_NET_ERR;
+} else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
+status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
+status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
+status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
+status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
+status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
+status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
+}
+
+s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
+assert(s == sizeof(status));
+
+g_free(iov2);
+return sizeof(status);
+}
+
+static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+{
+VirtQueueElement *elem;
 
 for (;;) {
+size_t written;
 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
 if (!elem) {
 break;
 }
-if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
-iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
-virtio_error(vdev, "virtio-net ctrl missing headers");
+
+written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
+ elem->out_sg, elem->out_num);
+if (written > 0) {
+virtqueue_push(vq, elem, written);
+virtio_notify(vdev, vq);
+g_free(elem);
+} else {
 virtqueue_detach_element(vq, elem, 0);
 g_free(elem);
 break;
 }
-
-iov_cnt = elem->out_num;
-iov2 = iov = g_memdup2(elem->out_sg,
-   sizeof(struct iovec) * elem->out_num);
-s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
-iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
-if (s != sizeof(ctrl)) {
-status = VIRTIO_NET_ERR;
-} else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
-status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
-} else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
-status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
-} else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
-status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
-} else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
-  

[PATCH v3 11/19] vhost: Expose vhost_svq_add

2022-07-15 Thread Eugenio Pérez
This allows external parts of SVQ to forward custom buffers to the
device.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h | 3 +++
 hw/virtio/vhost-shadow-virtqueue.c | 6 +++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index d9fc1f1799..dd78f4bec2 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -86,6 +86,9 @@ bool vhost_svq_valid_features(uint64_t features, Error 
**errp);
 
 void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
  const VirtQueueElement *elem, uint32_t len);
+int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
+  size_t out_num, const struct iovec *in_sg, size_t in_num,
+  VirtQueueElement *elem);
 
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
 void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index b377e125e7..406a823c81 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -238,9 +238,9 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
  *
  * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
  */
-static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
-  size_t out_num, const struct iovec *in_sg,
-  size_t in_num, VirtQueueElement *elem)
+int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
+  size_t out_num, const struct iovec *in_sg, size_t in_num,
+  VirtQueueElement *elem)
 {
 unsigned qemu_head;
 unsigned ndescs = in_num + out_num;
-- 
2.31.1




[PATCH v3 08/19] vhost: Add SVQDescState

2022-07-15 Thread Eugenio Pérez
This will allow SVQ to add context to the different queue elements.

This patch only store the actual element, no functional change intended.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  8 ++--
 hw/virtio/vhost-shadow-virtqueue.c | 16 
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index c132c994e9..d646c35054 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -15,6 +15,10 @@
 #include "standard-headers/linux/vhost_types.h"
 #include "hw/virtio/vhost-iova-tree.h"
 
+typedef struct SVQDescState {
+VirtQueueElement *elem;
+} SVQDescState;
+
 /* Shadow virtqueue to relay notifications */
 typedef struct VhostShadowVirtqueue {
 /* Shadow vring */
@@ -47,8 +51,8 @@ typedef struct VhostShadowVirtqueue {
 /* IOVA mapping */
 VhostIOVATree *iova_tree;
 
-/* Map for use the guest's descriptors */
-VirtQueueElement **ring_id_maps;
+/* SVQ vring descriptors state */
+SVQDescState *desc_state;
 
 /* Next VirtQueue element that guest made available */
 VirtQueueElement *next_guest_avail_elem;
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index b005a457c6..d12f5afffb 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -256,7 +256,7 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const 
struct iovec *out_sg,
 return -EINVAL;
 }
 
-svq->ring_id_maps[qemu_head] = elem;
+svq->desc_state[qemu_head].elem = elem;
 vhost_svq_kick(svq);
 return 0;
 }
@@ -410,21 +410,21 @@ static VirtQueueElement 
*vhost_svq_get_buf(VhostShadowVirtqueue *svq,
 return NULL;
 }
 
-if (unlikely(!svq->ring_id_maps[used_elem.id])) {
+if (unlikely(!svq->desc_state[used_elem.id].elem)) {
 qemu_log_mask(LOG_GUEST_ERROR,
 "Device %s says index %u is used, but it was not available",
 svq->vdev->name, used_elem.id);
 return NULL;
 }
 
-num = svq->ring_id_maps[used_elem.id]->in_num +
-  svq->ring_id_maps[used_elem.id]->out_num;
+num = svq->desc_state[used_elem.id].elem->in_num +
+  svq->desc_state[used_elem.id].elem->out_num;
 last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
 svq->desc_next[last_used_chain] = svq->free_head;
 svq->free_head = used_elem.id;
 
 *len = used_elem.len;
-return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
+return g_steal_pointer(&svq->desc_state[used_elem.id].elem);
 }
 
 static void vhost_svq_flush(VhostShadowVirtqueue *svq,
@@ -594,7 +594,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, 
VirtIODevice *vdev,
 memset(svq->vring.desc, 0, driver_size);
 svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size);
 memset(svq->vring.used, 0, device_size);
-svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
+svq->desc_state = g_new0(SVQDescState, svq->vring.num);
 svq->desc_next = g_new0(uint16_t, svq->vring.num);
 for (unsigned i = 0; i < svq->vring.num - 1; i++) {
 svq->desc_next[i] = cpu_to_le16(i + 1);
@@ -619,7 +619,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
 
 for (unsigned i = 0; i < svq->vring.num; ++i) {
 g_autofree VirtQueueElement *elem = NULL;
-elem = g_steal_pointer(&svq->ring_id_maps[i]);
+elem = g_steal_pointer(&svq->desc_state[i].elem);
 if (elem) {
 virtqueue_detach_element(svq->vq, elem, 0);
 }
@@ -631,7 +631,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
 }
 svq->vq = NULL;
 g_free(svq->desc_next);
-g_free(svq->ring_id_maps);
+g_free(svq->desc_state);
 qemu_vfree(svq->vring.desc);
 qemu_vfree(svq->vring.used);
 }
-- 
2.31.1




[PATCH v3 09/19] vhost: Track number of descs in SVQDescState

2022-07-15 Thread Eugenio Pérez
A guest's buffer continuos on GPA may need multiple descriptors on
qemu's VA, so SVQ should track its length sepparatedly.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h | 6 ++
 hw/virtio/vhost-shadow-virtqueue.c | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index d646c35054..5c7e7cbab6 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -17,6 +17,12 @@
 
 typedef struct SVQDescState {
 VirtQueueElement *elem;
+
+/*
+ * Number of descriptors exposed to the device. May or may not match
+ * guest's
+ */
+unsigned int ndescs;
 } SVQDescState;
 
 /* Shadow virtqueue to relay notifications */
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index d12f5afffb..ae5bd6efa8 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -257,6 +257,7 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const 
struct iovec *out_sg,
 }
 
 svq->desc_state[qemu_head].elem = elem;
+svq->desc_state[qemu_head].ndescs = ndescs;
 vhost_svq_kick(svq);
 return 0;
 }
@@ -417,8 +418,7 @@ static VirtQueueElement 
*vhost_svq_get_buf(VhostShadowVirtqueue *svq,
 return NULL;
 }
 
-num = svq->desc_state[used_elem.id].elem->in_num +
-  svq->desc_state[used_elem.id].elem->out_num;
+num = svq->desc_state[used_elem.id].ndescs;
 last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
 svq->desc_next[last_used_chain] = svq->free_head;
 svq->free_head = used_elem.id;
-- 
2.31.1




[PATCH v3 01/19] vhost: move descriptor translation to vhost_svq_vring_write_descs

2022-07-15 Thread Eugenio Pérez
It's done for both in and out descriptors so it's better placed here.

Acked-by: Jason Wang 
Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 38 +-
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 56c96ebd13..e2184a4481 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -122,17 +122,35 @@ static bool vhost_svq_translate_addr(const 
VhostShadowVirtqueue *svq,
 return true;
 }
 
-static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
-const struct iovec *iovec, size_t num,
-bool more_descs, bool write)
+/**
+ * Write descriptors to SVQ vring
+ *
+ * @svq: The shadow virtqueue
+ * @sg: Cache for hwaddr
+ * @iovec: The iovec from the guest
+ * @num: iovec length
+ * @more_descs: True if more descriptors come in the chain
+ * @write: True if they are writeable descriptors
+ *
+ * Return true if success, false otherwise and print error.
+ */
+static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
+const struct iovec *iovec, size_t num,
+bool more_descs, bool write)
 {
 uint16_t i = svq->free_head, last = svq->free_head;
 unsigned n;
 uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
 vring_desc_t *descs = svq->vring.desc;
+bool ok;
 
 if (num == 0) {
-return;
+return true;
+}
+
+ok = vhost_svq_translate_addr(svq, sg, iovec, num);
+if (unlikely(!ok)) {
+return false;
 }
 
 for (n = 0; n < num; n++) {
@@ -150,6 +168,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue 
*svq, hwaddr *sg,
 }
 
 svq->free_head = le16_to_cpu(svq->desc_next[last]);
+return true;
 }
 
 static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
@@ -169,21 +188,18 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
 return false;
 }
 
-ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
+ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
+ elem->in_num > 0, false);
 if (unlikely(!ok)) {
 return false;
 }
-vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
-elem->in_num > 0, false);
-
 
-ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
+ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, 
false,
+ true);
 if (unlikely(!ok)) {
 return false;
 }
 
-vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
-
 /*
  * Put the entry in the available array (but don't update avail->idx until
  * they do sync).
-- 
2.31.1




[PATCH v3 04/19] vhost: Reorder vhost_svq_kick

2022-07-15 Thread Eugenio Pérez
Future code needs to call it from vhost_svq_add.

No functional change intended.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 28 ++--
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index e2184a4481..fd1839cec5 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -215,6 +215,20 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
 return true;
 }
 
+static void vhost_svq_kick(VhostShadowVirtqueue *svq)
+{
+/*
+ * We need to expose the available array entries before checking the used
+ * flags
+ */
+smp_mb();
+if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
+return;
+}
+
+event_notifier_set(&svq->hdev_kick);
+}
+
 /**
  * Add an element to a SVQ.
  *
@@ -235,20 +249,6 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, 
VirtQueueElement *elem)
 return true;
 }
 
-static void vhost_svq_kick(VhostShadowVirtqueue *svq)
-{
-/*
- * We need to expose the available array entries before checking the used
- * flags
- */
-smp_mb();
-if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
-return;
-}
-
-event_notifier_set(&svq->hdev_kick);
-}
-
 /**
  * Forward available buffers.
  *
-- 
2.31.1




[PATCH v3 07/19] vhost: Decouple vhost_svq_add from VirtQueueElement

2022-07-15 Thread Eugenio Pérez
VirtQueueElement comes from the guest, but we're heading SVQ to be able
to modify the element presented to the device without the guest's
knowledge.

To do so, make SVQ accept sg buffers directly, instead of using
VirtQueueElement.

Add vhost_svq_add_element to maintain element convenience.

Signed-off-by: Eugenio Pérez 
Acked-by: Jason Wang 
---
 hw/virtio/vhost-shadow-virtqueue.c | 33 --
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index aee9891a67..b005a457c6 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -172,30 +172,31 @@ static bool 
vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
 }
 
 static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
-VirtQueueElement *elem, unsigned *head)
+const struct iovec *out_sg, size_t out_num,
+const struct iovec *in_sg, size_t in_num,
+unsigned *head)
 {
 unsigned avail_idx;
 vring_avail_t *avail = svq->vring.avail;
 bool ok;
-g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num));
+g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num));
 
 *head = svq->free_head;
 
 /* We need some descriptors here */
-if (unlikely(!elem->out_num && !elem->in_num)) {
+if (unlikely(!out_num && !in_num)) {
 qemu_log_mask(LOG_GUEST_ERROR,
   "Guest provided element with no descriptors");
 return false;
 }
 
-ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
- elem->in_num > 0, false);
+ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
+ false);
 if (unlikely(!ok)) {
 return false;
 }
 
-ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, 
false,
- true);
+ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
 if (unlikely(!ok)) {
 return false;
 }
@@ -237,17 +238,19 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
  *
  * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
  */
-static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
+static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
+  size_t out_num, const struct iovec *in_sg,
+  size_t in_num, VirtQueueElement *elem)
 {
 unsigned qemu_head;
-unsigned ndescs = elem->in_num + elem->out_num;
+unsigned ndescs = in_num + out_num;
 bool ok;
 
 if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
 return -ENOSPC;
 }
 
-ok = vhost_svq_add_split(svq, elem, &qemu_head);
+ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
 if (unlikely(!ok)) {
 g_free(elem);
 return -EINVAL;
@@ -258,6 +261,14 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, 
VirtQueueElement *elem)
 return 0;
 }
 
+/* Convenience wrapper to add a guest's element to SVQ */
+static int vhost_svq_add_element(VhostShadowVirtqueue *svq,
+ VirtQueueElement *elem)
+{
+return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg,
+ elem->in_num, elem);
+}
+
 /**
  * Forward available buffers.
  *
@@ -294,7 +305,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue 
*svq)
 break;
 }
 
-r = vhost_svq_add(svq, elem);
+r = vhost_svq_add_element(svq, elem);
 if (unlikely(r != 0)) {
 if (r == -ENOSPC) {
 /*
-- 
2.31.1




[PATCH v3 02/19] virtio-net: Expose MAC_TABLE_ENTRIES

2022-07-15 Thread Eugenio Pérez
vhost-vdpa control virtqueue needs to know the maximum entries supported
by the virtio-net device, so we know if it is possible to apply the
filter.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/virtio-net.h | 3 +++
 hw/net/virtio-net.c| 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index eb87032627..cce1c554f7 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -35,6 +35,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET)
  * and latency. */
 #define TX_BURST 256
 
+/* Maximum VIRTIO_NET_CTRL_MAC_TABLE_SET unicast + multicast entries. */
+#define MAC_TABLE_ENTRIES64
+
 typedef struct virtio_net_conf
 {
 uint32_t txtimer;
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 7ad948ee7c..f83e96e4ce 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -49,7 +49,6 @@
 
 #define VIRTIO_NET_VM_VERSION11
 
-#define MAC_TABLE_ENTRIES64
 #define MAX_VLAN(1 << 12)   /* Per 802.1Q definition */
 
 /* previously fixed value */
-- 
2.31.1




[PATCH v3 06/19] vhost: Check for queue full at vhost_svq_add

2022-07-15 Thread Eugenio Pérez
The series need to expose vhost_svq_add with full functionality,
including checking for full queue.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 59 +-
 1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index e5a4a62daa..aee9891a67 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -233,21 +233,29 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
  * Add an element to a SVQ.
  *
  * The caller must check that there is enough slots for the new element. It
- * takes ownership of the element: In case of failure, it is free and the SVQ
- * is considered broken.
+ * takes ownership of the element: In case of failure not ENOSPC, it is free.
+ *
+ * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
  */
-static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
+static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
 {
 unsigned qemu_head;
-bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
+unsigned ndescs = elem->in_num + elem->out_num;
+bool ok;
+
+if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
+return -ENOSPC;
+}
+
+ok = vhost_svq_add_split(svq, elem, &qemu_head);
 if (unlikely(!ok)) {
 g_free(elem);
-return false;
+return -EINVAL;
 }
 
 svq->ring_id_maps[qemu_head] = elem;
 vhost_svq_kick(svq);
-return true;
+return 0;
 }
 
 /**
@@ -274,7 +282,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue 
*svq)
 
 while (true) {
 VirtQueueElement *elem;
-bool ok;
+int r;
 
 if (svq->next_guest_avail_elem) {
 elem = g_steal_pointer(&svq->next_guest_avail_elem);
@@ -286,25 +294,24 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue 
*svq)
 break;
 }
 
-if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) 
{
-/*
- * This condition is possible since a contiguous buffer in GPA
- * does not imply a contiguous buffer in qemu's VA
- * scatter-gather segments. If that happens, the buffer exposed
- * to the device needs to be a chain of descriptors at this
- * moment.
- *
- * SVQ cannot hold more available buffers if we are here:
- * queue the current guest descriptor and ignore further kicks
- * until some elements are used.
- */
-svq->next_guest_avail_elem = elem;
-return;
-}
-
-ok = vhost_svq_add(svq, elem);
-if (unlikely(!ok)) {
-/* VQ is broken, just return and ignore any other kicks */
+r = vhost_svq_add(svq, elem);
+if (unlikely(r != 0)) {
+if (r == -ENOSPC) {
+/*
+ * This condition is possible since a contiguous buffer in
+ * GPA does not imply a contiguous buffer in qemu's VA
+ * scatter-gather segments. If that happens, the buffer
+ * exposed to the device needs to be a chain of descriptors
+ * at this moment.
+ *
+ * SVQ cannot hold more available buffers if we are here:
+ * queue the current guest descriptor and ignore kicks
+ * until some elements are used.
+ */
+svq->next_guest_avail_elem = elem;
+}
+
+/* VQ is full or broken, just return and ignore kicks */
 return;
 }
 }
-- 
2.31.1




[PATCH v3 00/19] vdpa net devices Rx filter change notification with Shadow VQ

2022-07-15 Thread Eugenio Pérez
Control virtqueue is used by networking device for accepting various
commands from the driver. It's a must to support advanced configurations.

Rx filtering event is issues by qemu when device's MAC address changed once and
the previous one has not been queried by external agents.

Shadow VirtQueue (SVQ) already makes possible tracking the state of virtqueues,
effectively intercepting them so qemu can track what regions of memory are
dirty because device action and needs migration. However, this does not solve
networking device state seen by the driver because CVQ messages, like changes
on MAC addresses from the driver.

This series uses SVQ infrastructure to intercept networking control messages
used by the device. This way, qemu is able to update VirtIONet device model and
react to them. In particular, this series enables rx filter change
notification.

This is a prerequisite to achieve net vdpa device with CVQ live migration.
It's a stripped down version of [1], with error paths checked and no migration
enabled.

First nine patches reorder and clean code base so its easier to apply later
ones. No functional change should be noticed from these changes.

Patches from 11 to 14 enable SVQ API to make other parts of qemu to interact
with it. In particular, they will be used by vhost-vdpa net to handle CVQ
messages.

Patches 15 to 17 enable the update of the virtio-net device model for each
CVQ message acknowledged by the device.

Last patches enable x-svq parameter, forbidding device migration since it is
not restored in the destination's vdpa device yet. This will be added in later
series, using this work.

Comments are welcome.
v3:
- Replace SVQElement with SVQDescState

v2:
- (Comments from series [1]).
- Active poll for CVQ answer instead of relay on async used callback
- Do not offer a new buffer to device but reuse qemu's
- Use vhost_svq_add instead of not needed vhost_svq_inject
- Delete used and detach callbacks, not needed anymore
- Embed members of SVQElement in VirtQueueElement
- Reuse the same buffers for all CVQ commands

[1] 
https://patchwork.kernel.org/project/qemu-devel/cover/20220706184008.1649478-1-epere...@redhat.com/

Eugenio Pérez (19):
  vhost: move descriptor translation to vhost_svq_vring_write_descs
  virtio-net: Expose MAC_TABLE_ENTRIES
  virtio-net: Expose ctrl virtqueue logic
  vhost: Reorder vhost_svq_kick
  vhost: Move vhost_svq_kick call to vhost_svq_add
  vhost: Check for queue full at vhost_svq_add
  vhost: Decouple vhost_svq_add from VirtQueueElement
  vhost: Add SVQDescState
  vhost: Track number of descs in SVQDescState
  vhost: add vhost_svq_push_elem
  vhost: Expose vhost_svq_add
  vhost: add vhost_svq_poll
  vhost: Add svq avail_handler callback
  vdpa: Export vhost_vdpa_dma_map and unmap calls
  vdpa: manual forward CVQ buffers
  vdpa: Buffer CVQ support on shadow virtqueue
  vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs
  vdpa: Add device migration blocker
  vdpa: Add x-svq to NetdevVhostVDPAOptions

 qapi/net.json  |   9 +-
 hw/virtio/vhost-shadow-virtqueue.h |  52 -
 include/hw/virtio/vhost-vdpa.h |   8 +
 include/hw/virtio/virtio-net.h |   7 +
 hw/net/virtio-net.c|  85 ---
 hw/virtio/vhost-shadow-virtqueue.c | 202 +++-
 hw/virtio/vhost-vdpa.c |  25 +-
 net/vhost-vdpa.c   | 357 +++--
 8 files changed, 627 insertions(+), 118 deletions(-)

-- 
2.31.1





[PATCH v3 05/19] vhost: Move vhost_svq_kick call to vhost_svq_add

2022-07-15 Thread Eugenio Pérez
The series needs to expose vhost_svq_add with full functionality,
including kick

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index fd1839cec5..e5a4a62daa 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -246,6 +246,7 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, 
VirtQueueElement *elem)
 }
 
 svq->ring_id_maps[qemu_head] = elem;
+vhost_svq_kick(svq);
 return true;
 }
 
@@ -306,7 +307,6 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue 
*svq)
 /* VQ is broken, just return and ignore any other kicks */
 return;
 }
-vhost_svq_kick(svq);
 }
 
 virtio_queue_set_notification(svq->vq, true);
-- 
2.31.1




[PATCH v3 13/19] vhost: Add svq avail_handler callback

2022-07-15 Thread Eugenio Pérez
This allows external handlers to be aware of new buffers that the guest
places in the virtqueue.

When this callback is defined the ownership of the guest's virtqueue
element is transferred to the callback. This means that if the user
wants to forward the descriptor it needs to manually inject it. The
callback is also free to process the command by itself and use the
element with svq_push.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h | 31 +-
 hw/virtio/vhost-shadow-virtqueue.c | 14 --
 hw/virtio/vhost-vdpa.c |  3 ++-
 3 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index cf442f7dea..d04c34a589 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -25,6 +25,27 @@ typedef struct SVQDescState {
 unsigned int ndescs;
 } SVQDescState;
 
+typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
+
+/**
+ * Callback to handle an avail buffer.
+ *
+ * @svq:  Shadow virtqueue
+ * @elem:  Element placed in the queue by the guest
+ * @vq_callback_opaque:  Opaque
+ *
+ * Returns 0 if the vq is running as expected.
+ *
+ * Note that ownership of elem is transferred to the callback.
+ */
+typedef int (*VirtQueueAvailCallback)(VhostShadowVirtqueue *svq,
+  VirtQueueElement *elem,
+  void *vq_callback_opaque);
+
+typedef struct VhostShadowVirtqueueOps {
+VirtQueueAvailCallback avail_handler;
+} VhostShadowVirtqueueOps;
+
 /* Shadow virtqueue to relay notifications */
 typedef struct VhostShadowVirtqueue {
 /* Shadow vring */
@@ -69,6 +90,12 @@ typedef struct VhostShadowVirtqueue {
  */
 uint16_t *desc_next;
 
+/* Caller callbacks */
+const VhostShadowVirtqueueOps *ops;
+
+/* Caller callbacks opaque */
+void *ops_opaque;
+
 /* Next head to expose to the device */
 uint16_t shadow_avail_idx;
 
@@ -102,7 +129,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, 
VirtIODevice *vdev,
  VirtQueue *vq);
 void vhost_svq_stop(VhostShadowVirtqueue *svq);
 
-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
+const VhostShadowVirtqueueOps *ops,
+void *ops_opaque);
 
 void vhost_svq_free(gpointer vq);
 G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 1c54a03e17..a21b0b1bf6 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -306,7 +306,11 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue 
*svq)
 break;
 }
 
-r = vhost_svq_add_element(svq, elem);
+if (svq->ops) {
+r = svq->ops->avail_handler(svq, elem, svq->ops_opaque);
+} else {
+r = vhost_svq_add_element(svq, elem);
+}
 if (unlikely(r != 0)) {
 if (r == -ENOSPC) {
 /*
@@ -679,12 +683,16 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
  * shadow methods and file descriptors.
  *
  * @iova_tree: Tree to perform descriptors translations
+ * @ops: SVQ owner callbacks
+ * @ops_opaque: ops opaque pointer
  *
  * Returns the new virtqueue or NULL.
  *
  * In case of error, reason is reported through error_report.
  */
-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
+const VhostShadowVirtqueueOps *ops,
+void *ops_opaque)
 {
 g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
 int r;
@@ -706,6 +714,8 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree 
*iova_tree)
 event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
 event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
 svq->iova_tree = iova_tree;
+svq->ops = ops;
+svq->ops_opaque = ops_opaque;
 return g_steal_pointer(&svq);
 
 err_init_hdev_call:
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 66f054a12c..0b13e98471 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -418,8 +418,9 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
struct vhost_vdpa *v,
 
 shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
 for (unsigned n = 0; n < hdev->nvqs; ++n) {
-g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree);
+g_autoptr(VhostShadowVirtqueue) svq;
 
+svq = vhost_svq_new(v->iova_tree, NULL, NULL);
 if (unlikely(!svq)) {
 error_setg(errp, "Cannot create svq %u", n);
 return -1;
-- 
2.31.1




[PATCH 2/2] vhost: Move SVQ queue rewind to the destination

2022-07-15 Thread Eugenio Pérez
Migration with SVQ already migrate the inflight descriptors, so the
destination can perform the work.

This makes easier to migrate between backends or to recover them in
vhost devices that support set in flight descriptors.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-vdpa.c | 24 +++-
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 18820498b3..4458c8d23e 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -1178,7 +1178,18 @@ static int vhost_vdpa_set_vring_base(struct vhost_dev 
*dev,
struct vhost_vring_state *ring)
 {
 struct vhost_vdpa *v = dev->opaque;
+VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
 
+/*
+ * vhost-vdpa devices does not support in-flight requests. Set all of them
+ * as available.
+ *
+ * TODO: This is ok for networking, but other kinds of devices might
+ * have problems with these retransmissions.
+ */
+while (virtqueue_rewind(vq, 1)) {
+continue;
+}
 if (v->shadow_vqs_enabled) {
 /*
  * Device vring base was set at device start. SVQ base is handled by
@@ -1197,19 +1208,6 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev 
*dev,
 int ret;
 
 if (v->shadow_vqs_enabled) {
-VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
-
-/*
- * Setting base as last used idx, so destination will see as available
- * all the entries that the device did not use, including the in-flight
- * processing ones.
- *
- * TODO: This is ok for networking, but other kinds of devices might
- * have problems with these retransmissions.
- */
-while (virtqueue_rewind(vq, 1)) {
-continue;
-}
 ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index);
 return 0;
 }
-- 
2.31.1




[PATCH 0/2] vhost: Get vring base from vq, not svq

2022-07-15 Thread Eugenio Pérez
The SVQ vring used idx usually match with the guest visible one, as long
as all the guest buffers (GPA) maps to exactly one buffer within qemu's
VA. However, as we can see in virtqueue_map_desc, a single guest buffer
could map to many buffers in SVQ vring.

The solution is to stop using the device's used idx and check for the
last avail idx. Since we cannot report in-flight descriptors with vdpa,
let's rewind all of them.

Also, move this rewind to the destination, so we keep migrating the in-flight
ones in case the destnation backend support them (vhost-kernel, emulated virtio
in qemu, etc.)

Eugenio Pérez (2):
  vhost: Get vring base from vq, not svq
  vhost: Move SVQ queue rewind to the destination

 hw/virtio/vhost-vdpa.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

-- 
2.31.1





[PATCH 1/2] vhost: Get vring base from vq, not svq

2022-07-15 Thread Eugenio Pérez
The SVQ vring used idx usually match with the guest visible one, as long
as all the guest buffers (GPA) maps to exactly one buffer within qemu's
VA. However, as we can see in virtqueue_map_desc, a single guest buffer
could map to many buffers in SVQ vring.

The solution is to stop using the device's used idx and check for the
last avail idx. Since we cannot report in-flight descriptors with vdpa,
let's rewind all of them.

Fixes: 6d0b22266633 ("vdpa: Adapt vhost_vdpa_get_vring_base to SVQ")
Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-vdpa.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 795ed5a049..18820498b3 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -1194,11 +1194,10 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev 
*dev,
struct vhost_vring_state *ring)
 {
 struct vhost_vdpa *v = dev->opaque;
-int vdpa_idx = ring->index - dev->vq_index;
 int ret;
 
 if (v->shadow_vqs_enabled) {
-VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
+VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
 
 /*
  * Setting base as last used idx, so destination will see as available
@@ -1208,7 +1207,10 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev 
*dev,
  * TODO: This is ok for networking, but other kinds of devices might
  * have problems with these retransmissions.
  */
-ring->num = svq->last_used_idx;
+while (virtqueue_rewind(vq, 1)) {
+continue;
+}
+ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index);
 return 0;
 }
 
-- 
2.31.1




[RFC] vhost: Move svq avail handler to virtio_net_handle_ctrl

2022-07-14 Thread Eugenio Pérez
DO NOT MERGE THIS RFC

... so we can evaluate if it is worth to move.

Answering [1].

It basically makes SVQ work in two different modes:
* data virtqueues works with the SVQ loop, intercepting guest's kicks
and device's call.
* CVQ does not move to that. Instead
  - It "forbid" to vhost-dev to register guest notifier
  - VirtIONet need to know about SVQ, making it possible to use the
external functions _add, _push...

The idea is good and it would avoid to add callbacks to SVQ, but
VirtIONet should use SVQ in a different way than routing through
vhost-vdpa. Also, we are playing (more) with the guest to host
notifiers, giving vhost devices a new status (started but guest
notifications go to queue handler directly).

SVQ would start working very differently depending if we are using from
the data vqs or the control one, so we need to make it easier to use
correctly. We could start doing that for the next development cycle so
we keep VirtIONet not knowing about SVQ for this one, and have more time
to test.

To provide SVQ to VirtIONet should be easy, probably adding a
VhostShadowVirtqueue cvq_svq member to VirtIONet.

Comments are welcome, thanks!

[1] https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg02640.html
---
 include/hw/virtio/vhost.h |  2 ++
 hw/net/vhost_net.c|  6 +-
 hw/net/virtio-net.c   | 23 ---
 hw/virtio/vhost-vdpa.c|  8 +++-
 hw/virtio/vhost.c |  8 
 net/vhost-vdpa.c  | 22 +++---
 6 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index a346f23d13..634fd1bb25 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -85,6 +85,8 @@ struct vhost_dev {
 int vq_index_end;
 /* if non-zero, minimum required value for max_queues */
 int num_queues;
+/* Disable acquiring the host notifiers */
+bool not_enable_notifiers;
 uint64_t features;
 uint64_t acked_features;
 uint64_t backend_features;
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index ccac5b7a64..70cec99960 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -367,10 +367,14 @@ int vhost_net_start(VirtIODevice *dev, NetClientState 
*ncs,
 for (i = 0; i < nvhosts; i++) {
 if (i < data_queue_pairs) {
 peer = qemu_get_peer(ncs, i);
+net = get_vhost_net(peer);
+net->dev.not_enable_notifiers = false;
 } else {
 peer = qemu_get_peer(ncs, n->max_queue_pairs);
+net = get_vhost_net(peer);
+net->dev.not_enable_notifiers = true;
 }
-r = vhost_net_start_one(get_vhost_net(peer), dev);
+r = vhost_net_start_one(net, dev);
 
 if (r < 0) {
 goto err_start;
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index dd0d056fde..52883b5f0e 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -46,6 +46,7 @@
 #include "net_rx_pkt.h"
 #include "hw/virtio/vhost.h"
 #include "sysemu/qtest.h"
+#include "hw/virtio/vhost-vdpa.h"
 
 #define VIRTIO_NET_VM_VERSION11
 
@@ -1476,17 +1477,33 @@ size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
 return sizeof(status);
 }
 
+typedef struct VhostVDPAState {
+NetClientState nc;
+struct vhost_vdpa vhost_vdpa;
+VHostNetState *vhost_net;
+
+/* Control commands shadow buffers */
+void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
+bool started;
+} VhostVDPAState;
+extern VhostVDPAState *cvq_s;
+int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
+SVQElement *svq_elem, void 
*opaque);
 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
 {
-VirtQueueElement *elem;
+SVQElement *svq_elem;
 
 for (;;) {
 size_t written;
-elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
-if (!elem) {
+svq_elem = virtqueue_pop(vq, sizeof(SVQElement));
+if (!svq_elem) {
 break;
 }
 
+VhostShadowVirtqueue *svq = 
g_ptr_array_index(cvq_s->vhost_vdpa.shadow_vqs, 0);
+vhost_vdpa_net_handle_ctrl_avail(svq, svq_elem, cvq_s);
+
+VirtQueueElement *elem = &svq_elem->elem;
 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
  elem->out_sg, elem->out_num);
 if (written > 0) {
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 795ed5a049..33aace6e7c 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -1222,10 +1222,16 @@ static int vhost_vdpa_set_vring_kick(struct vhost_dev 
*dev,
 {
 struct vhost_vdpa *v = dev->opaque;
 int vdpa_idx = file->index - dev->vq_index;
+struct vhost_vring_file file2 = *file;
+
+if (dev->not_enable_notifiers) {
+/* We don't want to route this ever */
+file2.fd = -1;
+}
 
 if (v->shadow_vqs_enabled) {
 VhostShadowVirtqu

[PATCH v2 04/19] vhost: Reorder vhost_svq_kick

2022-07-14 Thread Eugenio Pérez
Future code needs to call it from vhost_svq_add.

No functional change intended.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 28 ++--
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index e2184a4481..fd1839cec5 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -215,6 +215,20 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
 return true;
 }
 
+static void vhost_svq_kick(VhostShadowVirtqueue *svq)
+{
+/*
+ * We need to expose the available array entries before checking the used
+ * flags
+ */
+smp_mb();
+if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
+return;
+}
+
+event_notifier_set(&svq->hdev_kick);
+}
+
 /**
  * Add an element to a SVQ.
  *
@@ -235,20 +249,6 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, 
VirtQueueElement *elem)
 return true;
 }
 
-static void vhost_svq_kick(VhostShadowVirtqueue *svq)
-{
-/*
- * We need to expose the available array entries before checking the used
- * flags
- */
-smp_mb();
-if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
-return;
-}
-
-event_notifier_set(&svq->hdev_kick);
-}
-
 /**
  * Forward available buffers.
  *
-- 
2.31.1




[PATCH v2 18/19] vdpa: Add device migration blocker

2022-07-14 Thread Eugenio Pérez
Since the vhost-vdpa device is exposing _F_LOG, adding a migration blocker if
it uses CVQ.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/vhost-vdpa.h |  1 +
 hw/virtio/vhost-vdpa.c | 14 ++
 2 files changed, 15 insertions(+)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index d85643..d10a89303e 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -35,6 +35,7 @@ typedef struct vhost_vdpa {
 bool shadow_vqs_enabled;
 /* IOVA mapping used by the Shadow Virtqueue */
 VhostIOVATree *iova_tree;
+Error *migration_blocker;
 GPtrArray *shadow_vqs;
 const VhostShadowVirtqueueOps *shadow_vq_ops;
 void *shadow_vq_ops_opaque;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index beaaa7049a..795ed5a049 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -20,6 +20,7 @@
 #include "hw/virtio/vhost-shadow-virtqueue.h"
 #include "hw/virtio/vhost-vdpa.h"
 #include "exec/address-spaces.h"
+#include "migration/blocker.h"
 #include "qemu/cutils.h"
 #include "qemu/main-loop.h"
 #include "cpu.h"
@@ -1022,6 +1023,13 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
 return true;
 }
 
+if (v->migration_blocker) {
+int r = migrate_add_blocker(v->migration_blocker, &err);
+if (unlikely(r < 0)) {
+goto err_migration_blocker;
+}
+}
+
 for (i = 0; i < v->shadow_vqs->len; ++i) {
 VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
@@ -1064,6 +1072,9 @@ err:
 vhost_svq_stop(svq);
 }
 
+err_migration_blocker:
+error_reportf_err(err, "Cannot setup SVQ %u: ", i);
+
 return false;
 }
 
@@ -1083,6 +1094,9 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
 }
 }
 
+if (v->migration_blocker) {
+migrate_del_blocker(v->migration_blocker);
+}
 return true;
 }
 
-- 
2.31.1




[PATCH v2 13/19] vhost: Add svq avail_handler callback

2022-07-14 Thread Eugenio Pérez
This allows external handlers to be aware of new buffers that the guest
places in the virtqueue.

When this callback is defined the ownership of the guest's virtqueue
element is transferred to the callback. This means that if the user
wants to forward the descriptor it needs to manually inject it. The
callback is also free to process the command by itself and use the
element with svq_push.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h | 31 +-
 hw/virtio/vhost-shadow-virtqueue.c | 14 --
 hw/virtio/vhost-vdpa.c |  3 ++-
 3 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index b5c6e3b3b4..965ca88706 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -25,6 +25,27 @@ typedef struct SVQElement {
 unsigned int ndescs;
 } SVQElement;
 
+typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
+
+/**
+ * Callback to handle an avail buffer.
+ *
+ * @svq:  Shadow virtqueue
+ * @elem:  Element placed in the queue by the guest
+ * @vq_callback_opaque:  Opaque
+ *
+ * Returns 0 if the vq is running as expected.
+ *
+ * Note that ownership of elem is transferred to the callback.
+ */
+typedef int (*VirtQueueAvailCallback)(VhostShadowVirtqueue *svq,
+  SVQElement *elem,
+  void *vq_callback_opaque);
+
+typedef struct VhostShadowVirtqueueOps {
+VirtQueueAvailCallback avail_handler;
+} VhostShadowVirtqueueOps;
+
 /* Shadow virtqueue to relay notifications */
 typedef struct VhostShadowVirtqueue {
 /* Shadow vring */
@@ -69,6 +90,12 @@ typedef struct VhostShadowVirtqueue {
  */
 uint16_t *desc_next;
 
+/* Caller callbacks */
+const VhostShadowVirtqueueOps *ops;
+
+/* Caller callbacks opaque */
+void *ops_opaque;
+
 /* Next head to expose to the device */
 uint16_t shadow_avail_idx;
 
@@ -102,7 +129,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, 
VirtIODevice *vdev,
  VirtQueue *vq);
 void vhost_svq_stop(VhostShadowVirtqueue *svq);
 
-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
+const VhostShadowVirtqueueOps *ops,
+void *ops_opaque);
 
 void vhost_svq_free(gpointer vq);
 G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 31a267f721..85b2d49326 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -306,7 +306,11 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue 
*svq)
 break;
 }
 
-r = vhost_svq_add_element(svq, elem);
+if (svq->ops) {
+r = svq->ops->avail_handler(svq, elem, svq->ops_opaque);
+} else {
+r = vhost_svq_add_element(svq, elem);
+}
 if (unlikely(r != 0)) {
 if (r == -ENOSPC) {
 /*
@@ -681,12 +685,16 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
  * shadow methods and file descriptors.
  *
  * @iova_tree: Tree to perform descriptors translations
+ * @ops: SVQ owner callbacks
+ * @ops_opaque: ops opaque pointer
  *
  * Returns the new virtqueue or NULL.
  *
  * In case of error, reason is reported through error_report.
  */
-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
+const VhostShadowVirtqueueOps *ops,
+void *ops_opaque)
 {
 g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
 int r;
@@ -708,6 +716,8 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree 
*iova_tree)
 event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
 event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
 svq->iova_tree = iova_tree;
+svq->ops = ops;
+svq->ops_opaque = ops_opaque;
 return g_steal_pointer(&svq);
 
 err_init_hdev_call:
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 66f054a12c..0b13e98471 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -418,8 +418,9 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
struct vhost_vdpa *v,
 
 shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
 for (unsigned n = 0; n < hdev->nvqs; ++n) {
-g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree);
+g_autoptr(VhostShadowVirtqueue) svq;
 
+svq = vhost_svq_new(v->iova_tree, NULL, NULL);
 if (unlikely(!svq)) {
 error_setg(errp, "Cannot create svq %u", n);
 return -1;
-- 
2.31.1




[PATCH v2 19/19] vdpa: Add x-svq to NetdevVhostVDPAOptions

2022-07-14 Thread Eugenio Pérez
Finally offering the possibility to enable SVQ from the command line.

Signed-off-by: Eugenio Pérez 
Acked-by: Markus Armbruster 
---
 qapi/net.json|  9 +-
 net/vhost-vdpa.c | 72 ++--
 2 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/qapi/net.json b/qapi/net.json
index 9af11e9a3b..75ba2cb989 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -445,12 +445,19 @@
 # @queues: number of queues to be created for multiqueue vhost-vdpa
 #  (default: 1)
 #
+# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1)
+# (default: false)
+#
+# Features:
+# @unstable: Member @x-svq is experimental.
+#
 # Since: 5.1
 ##
 { 'struct': 'NetdevVhostVDPAOptions',
   'data': {
 '*vhostdev': 'str',
-'*queues':   'int' } }
+'*queues':   'int',
+'*x-svq':{'type': 'bool', 'features' : [ 'unstable'] } } }
 
 ##
 # @NetdevVmnetHostOptions:
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 7ccf9eaf4d..85148a5114 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -75,6 +75,28 @@ const int vdpa_feature_bits[] = {
 VHOST_INVALID_FEATURE_BIT
 };
 
+/** Supported device specific feature bits with SVQ */
+static const uint64_t vdpa_svq_device_features =
+BIT_ULL(VIRTIO_NET_F_CSUM) |
+BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
+BIT_ULL(VIRTIO_NET_F_MTU) |
+BIT_ULL(VIRTIO_NET_F_MAC) |
+BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
+BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
+BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
+BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
+BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
+BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
+BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
+BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
+BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
+BIT_ULL(VIRTIO_NET_F_STATUS) |
+BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
+BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
+BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
+BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
+BIT_ULL(VIRTIO_NET_F_STANDBY);
+
 VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
 {
 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
@@ -133,9 +155,13 @@ err_init:
 static void vhost_vdpa_cleanup(NetClientState *nc)
 {
 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+struct vhost_dev *dev = &s->vhost_net->dev;
 
 qemu_vfree(s->cvq_cmd_out_buffer);
 qemu_vfree(s->cvq_cmd_in_buffer);
+if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
+g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
+}
 if (s->vhost_net) {
 vhost_net_cleanup(s->vhost_net);
 g_free(s->vhost_net);
@@ -437,7 +463,9 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
int vdpa_device_fd,
int queue_pair_index,
int nvqs,
-   bool is_datapath)
+   bool is_datapath,
+   bool svq,
+   VhostIOVATree *iova_tree)
 {
 NetClientState *nc = NULL;
 VhostVDPAState *s;
@@ -455,6 +483,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 
 s->vhost_vdpa.device_fd = vdpa_device_fd;
 s->vhost_vdpa.index = queue_pair_index;
+s->vhost_vdpa.shadow_vqs_enabled = svq;
+s->vhost_vdpa.iova_tree = iova_tree;
 if (!is_datapath) {
 s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
 vhost_vdpa_net_cvq_cmd_page_len());
@@ -465,6 +495,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 
 s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
 s->vhost_vdpa.shadow_vq_ops_opaque = s;
+error_setg(&s->vhost_vdpa.migration_blocker,
+   "Migration disabled: vhost-vdpa uses CVQ.");
 }
 ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
 if (ret) {
@@ -474,6 +506,14 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 return nc;
 }
 
+static int vhost_vdpa_get_iova_range(int fd,
+ struct vhost_vdpa_iova_range *iova_range)
+{
+int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
+
+return ret < 0 ? -errno : 0;
+}
+
 static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
 {
 int ret = ioctl(fd, VHOST_GET_FEATURES, features);
@@ -524,6 +564,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 uint64_t features;
 int vdpa_device_fd;
 g_autofree NetClientState **ncs = NULL;
+g_autoptr(

[PATCH v2 15/19] vdpa: manual forward CVQ buffers

2022-07-14 Thread Eugenio Pérez
Do a simple forwarding of CVQ buffers, the same work SVQ could do but
through callbacks. No functional change intended.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/vhost-vdpa.h |  3 ++
 hw/virtio/vhost-vdpa.c |  3 +-
 net/vhost-vdpa.c   | 58 ++
 3 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index 7214eb47dc..d85643 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -15,6 +15,7 @@
 #include 
 
 #include "hw/virtio/vhost-iova-tree.h"
+#include "hw/virtio/vhost-shadow-virtqueue.h"
 #include "hw/virtio/virtio.h"
 #include "standard-headers/linux/vhost_types.h"
 
@@ -35,6 +36,8 @@ typedef struct vhost_vdpa {
 /* IOVA mapping used by the Shadow Virtqueue */
 VhostIOVATree *iova_tree;
 GPtrArray *shadow_vqs;
+const VhostShadowVirtqueueOps *shadow_vq_ops;
+void *shadow_vq_ops_opaque;
 struct vhost_dev *dev;
 VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostVDPA;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 96997210be..beaaa7049a 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -419,7 +419,8 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
struct vhost_vdpa *v,
 for (unsigned n = 0; n < hdev->nvqs; ++n) {
 g_autoptr(VhostShadowVirtqueue) svq;
 
-svq = vhost_svq_new(v->iova_tree, NULL, NULL);
+svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops,
+v->shadow_vq_ops_opaque);
 if (unlikely(!svq)) {
 error_setg(errp, "Cannot create svq %u", n);
 return -1;
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index df1e69ee72..805c9dd6b6 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -11,11 +11,14 @@
 
 #include "qemu/osdep.h"
 #include "clients.h"
+#include "hw/virtio/virtio-net.h"
 #include "net/vhost_net.h"
 #include "net/vhost-vdpa.h"
 #include "hw/virtio/vhost-vdpa.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/memalign.h"
 #include "qemu/option.h"
 #include "qapi/error.h"
 #include 
@@ -187,6 +190,57 @@ static NetClientInfo net_vhost_vdpa_info = {
 .check_peer_type = vhost_vdpa_check_peer_type,
 };
 
+/**
+ * Forward buffer for the moment.
+ */
+static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
+SVQElement *svq_elem, void *opaque)
+{
+VirtQueueElement *elem = &svq_elem->elem;
+unsigned int n = elem->out_num + elem->in_num;
+g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
+size_t in_len, dev_written;
+virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+int r;
+
+memcpy(dev_buffers, elem->out_sg, elem->out_num);
+memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
+
+r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1],
+  elem->in_num, svq_elem);
+if (unlikely(r != 0)) {
+if (unlikely(r == -ENOSPC)) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
+  __func__);
+}
+goto out;
+}
+
+/*
+ * We can poll here since we've had BQL from the time we sent the
+ * descriptor. Also, we need to take the answer before SVQ pulls by itself,
+ * when BQL is released
+ */
+dev_written = vhost_svq_poll(svq);
+if (unlikely(dev_written < sizeof(status))) {
+error_report("Insufficient written data (%zu)", dev_written);
+}
+
+out:
+in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
+  sizeof(status));
+if (unlikely(in_len < sizeof(status))) {
+error_report("Bad device CVQ written length");
+}
+vhost_svq_push_elem(svq, svq_elem, MIN(in_len, sizeof(status)));
+g_free(svq_elem);
+return r;
+}
+
+static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
+.avail_handler = vhost_vdpa_net_handle_ctrl_avail,
+};
+
 static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
const char *device,
const char *name,
@@ -211,6 +265,10 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 
 s->vhost_vdpa.device_fd = vdpa_device_fd;
 s->vhost_vdpa.index = queue_pair_index;
+if (!is_datapath) {
+s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
+s->vhost_vdpa.shadow_vq_ops_opaque = s;
+}
 ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
 if (ret) {
 qemu_del_net_client(nc);
-- 
2.31.1




[PATCH v2 12/19] vhost: add vhost_svq_poll

2022-07-14 Thread Eugenio Pérez
It allows the Shadow Control VirtQueue to wait for the device to use the
available buffers.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  1 +
 hw/virtio/vhost-shadow-virtqueue.c | 22 ++
 2 files changed, 23 insertions(+)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 1692541cbb..b5c6e3b3b4 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -89,6 +89,7 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq, const 
SVQElement *elem,
 int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
   size_t out_num, const struct iovec *in_sg, size_t in_num,
   SVQElement *elem);
+size_t vhost_svq_poll(VhostShadowVirtqueue *svq);
 
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
 void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 5244896358..31a267f721 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -486,6 +486,28 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 } while (!vhost_svq_enable_notification(svq));
 }
 
+/**
+ * Poll the SVQ for one device used buffer.
+ *
+ * This function race with main event loop SVQ polling, so extra
+ * synchronization is needed.
+ *
+ * Return the length written by the device.
+ */
+size_t vhost_svq_poll(VhostShadowVirtqueue *svq)
+{
+do {
+uint32_t len;
+SVQElement *elem = vhost_svq_get_buf(svq, &len);
+if (elem) {
+return len;
+}
+
+/* Make sure we read new used_idx */
+smp_rmb();
+} while (true);
+}
+
 /**
  * Forward used buffers.
  *
-- 
2.31.1




[PATCH v2 16/19] vdpa: Buffer CVQ support on shadow virtqueue

2022-07-14 Thread Eugenio Pérez
Introduce the control virtqueue support for vDPA shadow virtqueue. This
is needed for advanced networking features like rx filtering.

Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid
TOCTOU with the guest's or device's memory every time there is a device
model change.  Otherwise, the guest could change the memory content in
the time between qemu and the device read it.

To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
implemented.  If the virtio-net driver changes MAC the virtio-net device
model will be updated with the new one, and a rx filtering change event
will be raised.

More cvq commands could be added here straightforwardly but they have
not been tested.

Signed-off-by: Eugenio Pérez 
---
 net/vhost-vdpa.c | 211 +--
 1 file changed, 204 insertions(+), 7 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 805c9dd6b6..bc115a1455 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -33,6 +33,9 @@ typedef struct VhostVDPAState {
 NetClientState nc;
 struct vhost_vdpa vhost_vdpa;
 VHostNetState *vhost_net;
+
+/* Control commands shadow buffers */
+void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
 bool started;
 } VhostVDPAState;
 
@@ -131,6 +134,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
 {
 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
 
+qemu_vfree(s->cvq_cmd_out_buffer);
+qemu_vfree(s->cvq_cmd_in_buffer);
 if (s->vhost_net) {
 vhost_net_cleanup(s->vhost_net);
 g_free(s->vhost_net);
@@ -190,24 +195,191 @@ static NetClientInfo net_vhost_vdpa_info = {
 .check_peer_type = vhost_vdpa_check_peer_type,
 };
 
+static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
+{
+VhostIOVATree *tree = v->iova_tree;
+DMAMap needle = {
+/*
+ * No need to specify size or to look for more translations since
+ * this contiguous chunk was allocated by us.
+ */
+.translated_addr = (hwaddr)(uintptr_t)addr,
+};
+const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
+int r;
+
+if (unlikely(!map)) {
+error_report("Cannot locate expected map");
+return;
+}
+
+r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
+if (unlikely(r != 0)) {
+error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
+}
+
+vhost_iova_tree_remove(tree, map);
+}
+
+static size_t vhost_vdpa_net_cvq_cmd_len(void)
+{
+/*
+ * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
+ * In buffer is always 1 byte, so it should fit here
+ */
+return sizeof(struct virtio_net_ctrl_hdr) +
+   2 * sizeof(struct virtio_net_ctrl_mac) +
+   MAC_TABLE_ENTRIES * ETH_ALEN;
+}
+
+static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
+{
+return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
+}
+
+/** Copy and map a guest buffer. */
+static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
+   const struct iovec *out_data,
+   size_t out_num, size_t data_len, void *buf,
+   size_t *written, bool write)
+{
+DMAMap map = {};
+int r;
+
+if (unlikely(!data_len)) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n",
+  __func__, write ? "in" : "out");
+return false;
+}
+
+*written = iov_to_buf(out_data, out_num, 0, buf, data_len);
+map.translated_addr = (hwaddr)(uintptr_t)buf;
+map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1;
+map.perm = write ? IOMMU_RW : IOMMU_RO,
+r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
+if (unlikely(r != IOVA_OK)) {
+error_report("Cannot map injected element");
+return false;
+}
+
+r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
+   !write);
+if (unlikely(r < 0)) {
+goto dma_map_err;
+}
+
+return true;
+
+dma_map_err:
+vhost_iova_tree_remove(v->iova_tree, &map);
+return false;
+}
+
 /**
- * Forward buffer for the moment.
+ * Copy the guest element into a dedicated buffer suitable to be sent to NIC
+ *
+ * @iov: [0] is the out buffer, [1] is the in one
+ */
+static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
+VirtQueueElement *elem,
+struct iovec *iov)
+{
+size_t in_copied;
+bool ok;
+
+iov[0].iov_base = s->cvq_cmd_out_buffer;
+ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num,
+vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base,
+&iov[0].iov_len, false)

[PATCH v2 17/19] vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs

2022-07-14 Thread Eugenio Pérez
To know the device features is needed for CVQ SVQ, so SVQ knows if it
can handle all commands or not. Extract from
vhost_vdpa_get_max_queue_pairs so we can reuse it.

Signed-off-by: Eugenio Pérez 
Acked-by: Jason Wang 
---
 net/vhost-vdpa.c | 30 --
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index bc115a1455..7ccf9eaf4d 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -474,20 +474,24 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 return nc;
 }
 
-static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp)
+static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
+{
+int ret = ioctl(fd, VHOST_GET_FEATURES, features);
+if (unlikely(ret < 0)) {
+error_setg_errno(errp, errno,
+ "Fail to query features from vhost-vDPA device");
+}
+return ret;
+}
+
+static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
+  int *has_cvq, Error **errp)
 {
 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
 g_autofree struct vhost_vdpa_config *config = NULL;
 __virtio16 *max_queue_pairs;
-uint64_t features;
 int ret;
 
-ret = ioctl(fd, VHOST_GET_FEATURES, &features);
-if (ret) {
-error_setg(errp, "Fail to query features from vhost-vDPA device");
-return ret;
-}
-
 if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
 *has_cvq = 1;
 } else {
@@ -517,10 +521,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 NetClientState *peer, Error **errp)
 {
 const NetdevVhostVDPAOptions *opts;
+uint64_t features;
 int vdpa_device_fd;
 g_autofree NetClientState **ncs = NULL;
 NetClientState *nc;
-int queue_pairs, i, has_cvq = 0;
+int queue_pairs, r, i, has_cvq = 0;
 
 assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 opts = &netdev->u.vhost_vdpa;
@@ -534,7 +539,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 return -errno;
 }
 
-queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd,
+r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
+if (unlikely(r < 0)) {
+return r;
+}
+
+queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
  &has_cvq, errp);
 if (queue_pairs < 0) {
 qemu_close(vdpa_device_fd);
-- 
2.31.1




[PATCH v2 09/19] vhost: Track number of descs in SVQElement

2022-07-14 Thread Eugenio Pérez
Since CVQ will be able to modify elements, the number of descriptors in
the guest may not match with the number of descriptors exposed. Track
separately.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  6 ++
 hw/virtio/vhost-shadow-virtqueue.c | 10 +-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index f35d4b8f90..143c86a568 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -17,6 +17,12 @@
 
 typedef struct SVQElement {
 VirtQueueElement elem;
+
+/*
+ * Number of descriptors exposed to the device. May or may not match
+ * guest's
+ */
+unsigned int ndescs;
 } SVQElement;
 
 /* Shadow virtqueue to relay notifications */
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 442ca3cbd3..3b112c4ec8 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -243,10 +243,10 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const 
struct iovec *out_sg,
   size_t in_num, SVQElement *svq_elem)
 {
 unsigned qemu_head;
-unsigned ndescs = in_num + out_num;
+svq_elem->ndescs = in_num + out_num;
 bool ok;
 
-if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
+if (unlikely(svq_elem->ndescs > vhost_svq_available_slots(svq))) {
 return -ENOSPC;
 }
 
@@ -393,7 +393,7 @@ static SVQElement *vhost_svq_get_buf(VhostShadowVirtqueue 
*svq,
 SVQElement *elem;
 const vring_used_t *used = svq->vring.used;
 vring_used_elem_t used_elem;
-uint16_t last_used, last_used_chain, num;
+uint16_t last_used, last_used_chain;
 
 if (!vhost_svq_more_used(svq)) {
 return NULL;
@@ -420,8 +420,8 @@ static SVQElement *vhost_svq_get_buf(VhostShadowVirtqueue 
*svq,
 }
 
 elem = svq->ring_id_maps[used_elem.id];
-num = elem->elem.in_num + elem->elem.out_num;
-last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
+last_used_chain = vhost_svq_last_desc_of_chain(svq, elem->ndescs,
+   used_elem.id);
 svq->desc_next[last_used_chain] = svq->free_head;
 svq->free_head = used_elem.id;
 
-- 
2.31.1




[PATCH v2 14/19] vdpa: Export vhost_vdpa_dma_map and unmap calls

2022-07-14 Thread Eugenio Pérez
Shadow CVQ will copy buffers on qemu VA, so we avoid TOCTOU attacks from
the guest that could set a different state in qemu device model and vdpa
device.

To do so, it needs to be able to map these new buffers to the device.

Signed-off-by: Eugenio Pérez 
Acked-by: Jason Wang 
---
 include/hw/virtio/vhost-vdpa.h | 4 
 hw/virtio/vhost-vdpa.c | 7 +++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index a29dbb3f53..7214eb47dc 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -39,4 +39,8 @@ typedef struct vhost_vdpa {
 VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostVDPA;
 
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
+   void *vaddr, bool readonly);
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size);
+
 #endif
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 0b13e98471..96997210be 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -71,8 +71,8 @@ static bool 
vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
 return false;
 }
 
-static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
-  void *vaddr, bool readonly)
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
+   void *vaddr, bool readonly)
 {
 struct vhost_msg_v2 msg = {};
 int fd = v->device_fd;
@@ -97,8 +97,7 @@ static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr 
iova, hwaddr size,
 return ret;
 }
 
-static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
-hwaddr size)
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size)
 {
 struct vhost_msg_v2 msg = {};
 int fd = v->device_fd;
-- 
2.31.1




[PATCH v2 10/19] vhost: add vhost_svq_push_elem

2022-07-14 Thread Eugenio Pérez
This function allows external SVQ users to return guest's available
buffers.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  3 +++
 hw/virtio/vhost-shadow-virtqueue.c | 16 
 2 files changed, 19 insertions(+)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 143c86a568..69b352c707 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -84,6 +84,9 @@ typedef struct VhostShadowVirtqueue {
 
 bool vhost_svq_valid_features(uint64_t features, Error **errp);
 
+void vhost_svq_push_elem(VhostShadowVirtqueue *svq, const SVQElement *elem,
+ uint32_t len);
+
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
 void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
 void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 3b112c4ec8..95a8ab8477 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -429,6 +429,22 @@ static SVQElement *vhost_svq_get_buf(VhostShadowVirtqueue 
*svq,
 return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
 }
 
+/**
+ * Push an element to SVQ, returning it to the guest.
+ */
+void vhost_svq_push_elem(VhostShadowVirtqueue *svq, const SVQElement *svq_elem,
+ uint32_t len)
+{
+virtqueue_push(svq->vq, &svq_elem->elem, len);
+if (svq->next_guest_avail_elem) {
+/*
+ * Avail ring was full when vhost_svq_flush was called, so it's a
+ * good moment to make more descriptors available if possible.
+ */
+vhost_handle_guest_kick(svq);
+}
+}
+
 static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 bool check_for_avail_queue)
 {
-- 
2.31.1




[PATCH v2 05/19] vhost: Move vhost_svq_kick call to vhost_svq_add

2022-07-14 Thread Eugenio Pérez
The series needs to expose vhost_svq_add with full functionality,
including kick

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index fd1839cec5..e5a4a62daa 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -246,6 +246,7 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, 
VirtQueueElement *elem)
 }
 
 svq->ring_id_maps[qemu_head] = elem;
+vhost_svq_kick(svq);
 return true;
 }
 
@@ -306,7 +307,6 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue 
*svq)
 /* VQ is broken, just return and ignore any other kicks */
 return;
 }
-vhost_svq_kick(svq);
 }
 
 virtio_queue_set_notification(svq->vq, true);
-- 
2.31.1




[PATCH v2 11/19] vhost: Expose vhost_svq_add

2022-07-14 Thread Eugenio Pérez
This allows external parts of SVQ to forward custom buffers to the
device.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h | 3 +++
 hw/virtio/vhost-shadow-virtqueue.c | 6 +++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 69b352c707..1692541cbb 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -86,6 +86,9 @@ bool vhost_svq_valid_features(uint64_t features, Error 
**errp);
 
 void vhost_svq_push_elem(VhostShadowVirtqueue *svq, const SVQElement *elem,
  uint32_t len);
+int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
+  size_t out_num, const struct iovec *in_sg, size_t in_num,
+  SVQElement *elem);
 
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
 void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 95a8ab8477..5244896358 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -238,9 +238,9 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
  *
  * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
  */
-static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
-  size_t out_num, const struct iovec *in_sg,
-  size_t in_num, SVQElement *svq_elem)
+int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
+  size_t out_num, const struct iovec *in_sg, size_t in_num,
+  SVQElement *svq_elem)
 {
 unsigned qemu_head;
 svq_elem->ndescs = in_num + out_num;
-- 
2.31.1




[PATCH v2 08/19] vhost: Add SVQElement

2022-07-14 Thread Eugenio Pérez
This will allow SVQ to add context to the different queue elements.

This patch only store the actual element, no functional change intended.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  8 ++--
 hw/virtio/vhost-shadow-virtqueue.c | 32 --
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index c132c994e9..f35d4b8f90 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -15,6 +15,10 @@
 #include "standard-headers/linux/vhost_types.h"
 #include "hw/virtio/vhost-iova-tree.h"
 
+typedef struct SVQElement {
+VirtQueueElement elem;
+} SVQElement;
+
 /* Shadow virtqueue to relay notifications */
 typedef struct VhostShadowVirtqueue {
 /* Shadow vring */
@@ -48,10 +52,10 @@ typedef struct VhostShadowVirtqueue {
 VhostIOVATree *iova_tree;
 
 /* Map for use the guest's descriptors */
-VirtQueueElement **ring_id_maps;
+SVQElement **ring_id_maps;
 
 /* Next VirtQueue element that guest made available */
-VirtQueueElement *next_guest_avail_elem;
+SVQElement *next_guest_avail_elem;
 
 /*
  * Backup next field for each descriptor so we can recover securely, not
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index b005a457c6..442ca3cbd3 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -240,7 +240,7 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
  */
 static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
   size_t out_num, const struct iovec *in_sg,
-  size_t in_num, VirtQueueElement *elem)
+  size_t in_num, SVQElement *svq_elem)
 {
 unsigned qemu_head;
 unsigned ndescs = in_num + out_num;
@@ -252,21 +252,22 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const 
struct iovec *out_sg,
 
 ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
 if (unlikely(!ok)) {
-g_free(elem);
+g_free(svq_elem);
 return -EINVAL;
 }
 
-svq->ring_id_maps[qemu_head] = elem;
+svq->ring_id_maps[qemu_head] = svq_elem;
 vhost_svq_kick(svq);
 return 0;
 }
 
 /* Convenience wrapper to add a guest's element to SVQ */
 static int vhost_svq_add_element(VhostShadowVirtqueue *svq,
- VirtQueueElement *elem)
+ SVQElement *svq_elem)
 {
+VirtQueueElement *elem = &svq_elem->elem;
 return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg,
- elem->in_num, elem);
+ elem->in_num, svq_elem);
 }
 
 /**
@@ -292,7 +293,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue 
*svq)
 virtio_queue_set_notification(svq->vq, false);
 
 while (true) {
-VirtQueueElement *elem;
+SVQElement *elem;
 int r;
 
 if (svq->next_guest_avail_elem) {
@@ -386,9 +387,10 @@ static uint16_t vhost_svq_last_desc_of_chain(const 
VhostShadowVirtqueue *svq,
 return i;
 }
 
-static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
-   uint32_t *len)
+static SVQElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
+ uint32_t *len)
 {
+SVQElement *elem;
 const vring_used_t *used = svq->vring.used;
 vring_used_elem_t used_elem;
 uint16_t last_used, last_used_chain, num;
@@ -417,8 +419,8 @@ static VirtQueueElement 
*vhost_svq_get_buf(VhostShadowVirtqueue *svq,
 return NULL;
 }
 
-num = svq->ring_id_maps[used_elem.id]->in_num +
-  svq->ring_id_maps[used_elem.id]->out_num;
+elem = svq->ring_id_maps[used_elem.id];
+num = elem->elem.in_num + elem->elem.out_num;
 last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
 svq->desc_next[last_used_chain] = svq->free_head;
 svq->free_head = used_elem.id;
@@ -439,8 +441,8 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 vhost_svq_disable_notification(svq);
 while (true) {
 uint32_t len;
-g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
-if (!elem) {
+g_autofree SVQElement *svq_elem = vhost_svq_get_buf(svq, &len);
+if (!svq_elem) {
 break;
 }
 
@@ -448,11 +450,11 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 qemu_log_mask(LOG_GUEST_ERROR,
  "More than %u used buffers obtained in a %u size SVQ",
  i, svq->vring.num);
-virtqueue_fill(vq, elem, len, i);
+   

[PATCH v2 02/19] virtio-net: Expose MAC_TABLE_ENTRIES

2022-07-14 Thread Eugenio Pérez
vhost-vdpa control virtqueue needs to know the maximum entries supported
by the virtio-net device, so we know if it is possible to apply the
filter.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/virtio-net.h | 3 +++
 hw/net/virtio-net.c| 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index eb87032627..cce1c554f7 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -35,6 +35,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET)
  * and latency. */
 #define TX_BURST 256
 
+/* Maximum VIRTIO_NET_CTRL_MAC_TABLE_SET unicast + multicast entries. */
+#define MAC_TABLE_ENTRIES64
+
 typedef struct virtio_net_conf
 {
 uint32_t txtimer;
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 7ad948ee7c..f83e96e4ce 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -49,7 +49,6 @@
 
 #define VIRTIO_NET_VM_VERSION11
 
-#define MAC_TABLE_ENTRIES64
 #define MAX_VLAN(1 << 12)   /* Per 802.1Q definition */
 
 /* previously fixed value */
-- 
2.31.1




[PATCH v2 00/19] vdpa net devices Rx filter change notification with Shadow VQ

2022-07-14 Thread Eugenio Pérez
Control virtqueue is used by networking device for accepting various
commands from the driver. It's a must to support advanced configurations.

Rx filtering event is issues by qemu when device's MAC address changed once and
the previous one has not been queried by external agents.

Shadow VirtQueue (SVQ) already makes possible tracking the state of virtqueues,
effectively intercepting them so qemu can track what regions of memory are
dirty because device action and needs migration. However, this does not solve
networking device state seen by the driver because CVQ messages, like changes
on MAC addresses from the driver.

This series uses SVQ infrastructure to intercept networking control messages
used by the device. This way, qemu is able to update VirtIONet device model and
react to them. In particular, this series enables rx filter change
notification.

This is a prerequisite to achieve net vdpa device with CVQ live migration.
It's a stripped down version of [1], with error paths checked and no migration
enabled.

First nine patches reorder and clean code base so its easier to apply later
ones. No functional change should be noticed from these changes.

Patches from 11 to 14 enable SVQ API to make other parts of qemu to interact
with it. In particular, they will be used by vhost-vdpa net to handle CVQ
messages.

Patches 15 to 17 enable the update of the virtio-net device model for each
CVQ message acknowledged by the device.

Last patches enable x-svq parameter, forbidding device migration since it is
not restored in the destination's vdpa device yet. This will be added in later
series, using this work.

Comments are welcome.

v2:
- (Comments from series [1]).
- Active poll for CVQ answer instead of relay on async used callback
- Do not offer a new buffer to device but reuse qemu's
- Use vhost_svq_add instead of not needed vhost_svq_inject
- Delete used and detach callbacks, not needed anymore
- Embed members of SVQElement in VirtQueueElement
- Reuse the same buffers for all CVQ commands

[1] 
https://patchwork.kernel.org/project/qemu-devel/cover/20220706184008.1649478-1-epere...@redhat.com/

Eugenio Pérez (19):
  vhost: move descriptor translation to vhost_svq_vring_write_descs
  virtio-net: Expose MAC_TABLE_ENTRIES
  virtio-net: Expose ctrl virtqueue logic
  vhost: Reorder vhost_svq_kick
  vhost: Move vhost_svq_kick call to vhost_svq_add
  vhost: Check for queue full at vhost_svq_add
  vhost: Decouple vhost_svq_add from VirtQueueElement
  vhost: Add SVQElement
  vhost: Track number of descs in SVQElement
  vhost: add vhost_svq_push_elem
  vhost: Expose vhost_svq_add
  vhost: add vhost_svq_poll
  vhost: Add svq avail_handler callback
  vdpa: Export vhost_vdpa_dma_map and unmap calls
  vdpa: manual forward CVQ buffers
  vdpa: Buffer CVQ support on shadow virtqueue
  vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs
  vdpa: Add device migration blocker
  vdpa: Add x-svq to NetdevVhostVDPAOptions

 qapi/net.json  |   9 +-
 hw/virtio/vhost-shadow-virtqueue.h |  52 -
 include/hw/virtio/vhost-vdpa.h |   8 +
 include/hw/virtio/virtio-net.h |   7 +
 hw/net/virtio-net.c|  85 ---
 hw/virtio/vhost-shadow-virtqueue.c | 216 +++--
 hw/virtio/vhost-vdpa.c |  25 +-
 net/vhost-vdpa.c   | 357 +++--
 8 files changed, 635 insertions(+), 124 deletions(-)

-- 
2.31.1





[PATCH v2 03/19] virtio-net: Expose ctrl virtqueue logic

2022-07-14 Thread Eugenio Pérez
This allows external vhost-net devices to modify the state of the
VirtIO device model once the vhost-vdpa device has acknowledged the
control commands.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/virtio-net.h |  4 ++
 hw/net/virtio-net.c| 84 --
 2 files changed, 53 insertions(+), 35 deletions(-)

diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index cce1c554f7..ef234ffe7e 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -221,6 +221,10 @@ struct VirtIONet {
 struct EBPFRSSContext ebpf_rss;
 };
 
+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
+  const struct iovec *in_sg, unsigned in_num,
+  const struct iovec *out_sg,
+  unsigned out_num);
 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
const char *type);
 
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index f83e96e4ce..dd0d056fde 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1433,57 +1433,71 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t 
cmd,
 return VIRTIO_NET_OK;
 }
 
-static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
+  const struct iovec *in_sg, unsigned in_num,
+  const struct iovec *out_sg,
+  unsigned out_num)
 {
 VirtIONet *n = VIRTIO_NET(vdev);
 struct virtio_net_ctrl_hdr ctrl;
 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
-VirtQueueElement *elem;
 size_t s;
 struct iovec *iov, *iov2;
-unsigned int iov_cnt;
+
+if (iov_size(in_sg, in_num) < sizeof(status) ||
+iov_size(out_sg, out_num) < sizeof(ctrl)) {
+virtio_error(vdev, "virtio-net ctrl missing headers");
+return 0;
+}
+
+iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
+s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
+iov_discard_front(&iov, &out_num, sizeof(ctrl));
+if (s != sizeof(ctrl)) {
+status = VIRTIO_NET_ERR;
+} else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
+status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
+status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
+status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
+status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
+status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
+status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
+}
+
+s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
+assert(s == sizeof(status));
+
+g_free(iov2);
+return sizeof(status);
+}
+
+static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+{
+VirtQueueElement *elem;
 
 for (;;) {
+size_t written;
 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
 if (!elem) {
 break;
 }
-if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
-iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
-virtio_error(vdev, "virtio-net ctrl missing headers");
+
+written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
+ elem->out_sg, elem->out_num);
+if (written > 0) {
+virtqueue_push(vq, elem, written);
+virtio_notify(vdev, vq);
+g_free(elem);
+} else {
 virtqueue_detach_element(vq, elem, 0);
 g_free(elem);
 break;
 }
-
-iov_cnt = elem->out_num;
-iov2 = iov = g_memdup2(elem->out_sg,
-   sizeof(struct iovec) * elem->out_num);
-s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
-iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
-if (s != sizeof(ctrl)) {
-status = VIRTIO_NET_ERR;
-} else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
-status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
-} else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
-status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
-} else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
-status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
-} else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
-  

[PATCH v2 07/19] vhost: Decouple vhost_svq_add from VirtQueueElement

2022-07-14 Thread Eugenio Pérez
VirtQueueElement comes from the guest, but we're heading SVQ to be able
to modify the element presented to the device without the guest's
knowledge.

To do so, make SVQ accept sg buffers directly, instead of using
VirtQueueElement.

Add vhost_svq_add_element to maintain element convenience.

Signed-off-by: Eugenio Pérez 
Acked-by: Jason Wang 
---
 hw/virtio/vhost-shadow-virtqueue.c | 33 --
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index aee9891a67..b005a457c6 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -172,30 +172,31 @@ static bool 
vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
 }
 
 static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
-VirtQueueElement *elem, unsigned *head)
+const struct iovec *out_sg, size_t out_num,
+const struct iovec *in_sg, size_t in_num,
+unsigned *head)
 {
 unsigned avail_idx;
 vring_avail_t *avail = svq->vring.avail;
 bool ok;
-g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num));
+g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num));
 
 *head = svq->free_head;
 
 /* We need some descriptors here */
-if (unlikely(!elem->out_num && !elem->in_num)) {
+if (unlikely(!out_num && !in_num)) {
 qemu_log_mask(LOG_GUEST_ERROR,
   "Guest provided element with no descriptors");
 return false;
 }
 
-ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
- elem->in_num > 0, false);
+ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
+ false);
 if (unlikely(!ok)) {
 return false;
 }
 
-ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, 
false,
- true);
+ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
 if (unlikely(!ok)) {
 return false;
 }
@@ -237,17 +238,19 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
  *
  * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
  */
-static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
+static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
+  size_t out_num, const struct iovec *in_sg,
+  size_t in_num, VirtQueueElement *elem)
 {
 unsigned qemu_head;
-unsigned ndescs = elem->in_num + elem->out_num;
+unsigned ndescs = in_num + out_num;
 bool ok;
 
 if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
 return -ENOSPC;
 }
 
-ok = vhost_svq_add_split(svq, elem, &qemu_head);
+ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
 if (unlikely(!ok)) {
 g_free(elem);
 return -EINVAL;
@@ -258,6 +261,14 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, 
VirtQueueElement *elem)
 return 0;
 }
 
+/* Convenience wrapper to add a guest's element to SVQ */
+static int vhost_svq_add_element(VhostShadowVirtqueue *svq,
+ VirtQueueElement *elem)
+{
+return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg,
+ elem->in_num, elem);
+}
+
 /**
  * Forward available buffers.
  *
@@ -294,7 +305,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue 
*svq)
 break;
 }
 
-r = vhost_svq_add(svq, elem);
+r = vhost_svq_add_element(svq, elem);
 if (unlikely(r != 0)) {
 if (r == -ENOSPC) {
 /*
-- 
2.31.1




[PATCH v2 01/19] vhost: move descriptor translation to vhost_svq_vring_write_descs

2022-07-14 Thread Eugenio Pérez
It's done for both in and out descriptors so it's better placed here.

Acked-by: Jason Wang 
Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 38 +-
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 56c96ebd13..e2184a4481 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -122,17 +122,35 @@ static bool vhost_svq_translate_addr(const 
VhostShadowVirtqueue *svq,
 return true;
 }
 
-static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
-const struct iovec *iovec, size_t num,
-bool more_descs, bool write)
+/**
+ * Write descriptors to SVQ vring
+ *
+ * @svq: The shadow virtqueue
+ * @sg: Cache for hwaddr
+ * @iovec: The iovec from the guest
+ * @num: iovec length
+ * @more_descs: True if more descriptors come in the chain
+ * @write: True if they are writeable descriptors
+ *
+ * Return true if success, false otherwise and print error.
+ */
+static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
+const struct iovec *iovec, size_t num,
+bool more_descs, bool write)
 {
 uint16_t i = svq->free_head, last = svq->free_head;
 unsigned n;
 uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
 vring_desc_t *descs = svq->vring.desc;
+bool ok;
 
 if (num == 0) {
-return;
+return true;
+}
+
+ok = vhost_svq_translate_addr(svq, sg, iovec, num);
+if (unlikely(!ok)) {
+return false;
 }
 
 for (n = 0; n < num; n++) {
@@ -150,6 +168,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue 
*svq, hwaddr *sg,
 }
 
 svq->free_head = le16_to_cpu(svq->desc_next[last]);
+return true;
 }
 
 static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
@@ -169,21 +188,18 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
 return false;
 }
 
-ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
+ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
+ elem->in_num > 0, false);
 if (unlikely(!ok)) {
 return false;
 }
-vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
-elem->in_num > 0, false);
-
 
-ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
+ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, 
false,
+ true);
 if (unlikely(!ok)) {
 return false;
 }
 
-vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
-
 /*
  * Put the entry in the available array (but don't update avail->idx until
  * they do sync).
-- 
2.31.1




[PATCH v2 06/19] vhost: Check for queue full at vhost_svq_add

2022-07-14 Thread Eugenio Pérez
The series need to expose vhost_svq_add with full functionality,
including checking for full queue.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 59 +-
 1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index e5a4a62daa..aee9891a67 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -233,21 +233,29 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
  * Add an element to a SVQ.
  *
  * The caller must check that there is enough slots for the new element. It
- * takes ownership of the element: In case of failure, it is free and the SVQ
- * is considered broken.
+ * takes ownership of the element: In case of failure not ENOSPC, it is free.
+ *
+ * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
  */
-static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
+static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
 {
 unsigned qemu_head;
-bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
+unsigned ndescs = elem->in_num + elem->out_num;
+bool ok;
+
+if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
+return -ENOSPC;
+}
+
+ok = vhost_svq_add_split(svq, elem, &qemu_head);
 if (unlikely(!ok)) {
 g_free(elem);
-return false;
+return -EINVAL;
 }
 
 svq->ring_id_maps[qemu_head] = elem;
 vhost_svq_kick(svq);
-return true;
+return 0;
 }
 
 /**
@@ -274,7 +282,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue 
*svq)
 
 while (true) {
 VirtQueueElement *elem;
-bool ok;
+int r;
 
 if (svq->next_guest_avail_elem) {
 elem = g_steal_pointer(&svq->next_guest_avail_elem);
@@ -286,25 +294,24 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue 
*svq)
 break;
 }
 
-if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) 
{
-/*
- * This condition is possible since a contiguous buffer in GPA
- * does not imply a contiguous buffer in qemu's VA
- * scatter-gather segments. If that happens, the buffer exposed
- * to the device needs to be a chain of descriptors at this
- * moment.
- *
- * SVQ cannot hold more available buffers if we are here:
- * queue the current guest descriptor and ignore further kicks
- * until some elements are used.
- */
-svq->next_guest_avail_elem = elem;
-return;
-}
-
-ok = vhost_svq_add(svq, elem);
-if (unlikely(!ok)) {
-/* VQ is broken, just return and ignore any other kicks */
+r = vhost_svq_add(svq, elem);
+if (unlikely(r != 0)) {
+if (r == -ENOSPC) {
+/*
+ * This condition is possible since a contiguous buffer in
+ * GPA does not imply a contiguous buffer in qemu's VA
+ * scatter-gather segments. If that happens, the buffer
+ * exposed to the device needs to be a chain of descriptors
+ * at this moment.
+ *
+ * SVQ cannot hold more available buffers if we are here:
+ * queue the current guest descriptor and ignore kicks
+ * until some elements are used.
+ */
+svq->next_guest_avail_elem = elem;
+}
+
+/* VQ is full or broken, just return and ignore kicks */
 return;
 }
 }
-- 
2.31.1




[PATCH] vdpa: Clean vhost_vdpa_dev_start(dev, false)

2022-07-12 Thread Eugenio Pérez
Return value is never checked and is a clean path, so assume success

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-vdpa.c | 33 ++---
 1 file changed, 10 insertions(+), 23 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 66f054a12c..d6ba4a492a 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -872,41 +872,35 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
 /**
  * Unmap a SVQ area in the device
  */
-static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
+static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
   const DMAMap *needle)
 {
 const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle);
 hwaddr size;
-int r;
 
 if (unlikely(!result)) {
 error_report("Unable to find SVQ address to unmap");
-return false;
+return;
 }
 
 size = ROUND_UP(result->size, qemu_real_host_page_size());
-r = vhost_vdpa_dma_unmap(v, result->iova, size);
-return r == 0;
+vhost_vdpa_dma_unmap(v, result->iova, size);
 }
 
-static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
+static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
const VhostShadowVirtqueue *svq)
 {
 DMAMap needle = {};
 struct vhost_vdpa *v = dev->opaque;
 struct vhost_vring_addr svq_addr;
-bool ok;
 
 vhost_svq_get_vring_addr(svq, &svq_addr);
 
 needle.translated_addr = svq_addr.desc_user_addr;
-ok = vhost_vdpa_svq_unmap_ring(v, &needle);
-if (unlikely(!ok)) {
-return false;
-}
+vhost_vdpa_svq_unmap_ring(v, &needle);
 
 needle.translated_addr = svq_addr.used_user_addr;
-return vhost_vdpa_svq_unmap_ring(v, &needle);
+vhost_vdpa_svq_unmap_ring(v, &needle);
 }
 
 /**
@@ -1066,23 +1060,19 @@ err:
 return false;
 }
 
-static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
+static void vhost_vdpa_svqs_stop(struct vhost_dev *dev)
 {
 struct vhost_vdpa *v = dev->opaque;
 
 if (!v->shadow_vqs) {
-return true;
+return;
 }
 
 for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
-bool ok = vhost_vdpa_svq_unmap_rings(dev, svq);
-if (unlikely(!ok)) {
-return false;
-}
+vhost_vdpa_svq_unmap_rings(dev, svq);
 }
 
-return true;
 }
 
 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
@@ -1099,10 +1089,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, 
bool started)
 }
 vhost_vdpa_set_vring_ready(dev);
 } else {
-ok = vhost_vdpa_svqs_stop(dev);
-if (unlikely(!ok)) {
-return -1;
-}
+vhost_vdpa_svqs_stop(dev);
 vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
 }
 
-- 
2.31.1




[PATCH] vhost: Return earlier if used buffers overrun SVQ flush

2022-07-12 Thread Eugenio Pérez
Previous function misses the just picked avail buffer from the queue.
This way keeps blocking the used queue until queue reset, but is cleaner
to check before calling to vhost_svq_get_buf.

Fixes: 100890f7cad50 ("vhost: Shadow virtqueue buffers forwarding")
Acked-by: Jason Wang 
Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 56c96ebd13..9280285435 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -405,19 +405,21 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 vhost_svq_disable_notification(svq);
 while (true) {
 uint32_t len;
-g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
-if (!elem) {
-break;
-}
+g_autofree VirtQueueElement *elem = NULL;
 
 if (unlikely(i >= svq->vring.num)) {
 qemu_log_mask(LOG_GUEST_ERROR,
  "More than %u used buffers obtained in a %u size SVQ",
  i, svq->vring.num);
-virtqueue_fill(vq, elem, len, i);
-virtqueue_flush(vq, i);
+virtqueue_flush(vq, svq->vring.num);
 return;
 }
+
+elem = vhost_svq_get_buf(svq, &len);
+if (!elem) {
+break;
+}
+
 virtqueue_fill(vq, elem, len, i++);
 }
 
-- 
2.31.1




[PATCH 22/22] vdpa: Add x-svq to NetdevVhostVDPAOptions

2022-07-08 Thread Eugenio Pérez
Finally offering the possibility to enable SVQ from the command line.

Signed-off-by: Eugenio Pérez 
---
 qapi/net.json|  9 +-
 net/vhost-vdpa.c | 74 ++--
 2 files changed, 79 insertions(+), 4 deletions(-)

diff --git a/qapi/net.json b/qapi/net.json
index 9af11e9a3b..75ba2cb989 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -445,12 +445,19 @@
 # @queues: number of queues to be created for multiqueue vhost-vdpa
 #  (default: 1)
 #
+# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1)
+# (default: false)
+#
+# Features:
+# @unstable: Member @x-svq is experimental.
+#
 # Since: 5.1
 ##
 { 'struct': 'NetdevVhostVDPAOptions',
   'data': {
 '*vhostdev': 'str',
-'*queues':   'int' } }
+'*queues':   'int',
+'*x-svq':{'type': 'bool', 'features' : [ 'unstable'] } } }
 
 ##
 # @NetdevVmnetHostOptions:
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index b6ed30bec3..a6ebc234c0 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -92,6 +92,30 @@ const int vdpa_feature_bits[] = {
 VHOST_INVALID_FEATURE_BIT
 };
 
+/** Supported device specific feature bits with SVQ */
+static const uint64_t vdpa_svq_device_features =
+BIT_ULL(VIRTIO_NET_F_CSUM) |
+BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
+BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) |
+BIT_ULL(VIRTIO_NET_F_MTU) |
+BIT_ULL(VIRTIO_NET_F_MAC) |
+BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
+BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
+BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
+BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
+BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
+BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
+BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
+BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
+BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
+BIT_ULL(VIRTIO_NET_F_STATUS) |
+BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
+BIT_ULL(VIRTIO_NET_F_MQ) |
+BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
+BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
+BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
+BIT_ULL(VIRTIO_NET_F_STANDBY);
+
 VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
 {
 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
@@ -150,7 +174,11 @@ err_init:
 static void vhost_vdpa_cleanup(NetClientState *nc)
 {
 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+struct vhost_dev *dev = &s->vhost_net->dev;
 
+if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
+g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
+}
 if (s->vhost_net) {
 vhost_net_cleanup(s->vhost_net);
 g_free(s->vhost_net);
@@ -398,6 +426,14 @@ static uint64_t vhost_vdpa_net_iov_len(const struct iovec 
*iov,
 return len;
 }
 
+static int vhost_vdpa_get_iova_range(int fd,
+ struct vhost_vdpa_iova_range *iova_range)
+{
+int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
+
+return ret < 0 ? -errno : 0;
+}
+
 static CVQElement *vhost_vdpa_net_cvq_copy_elem(VhostVDPAState *s,
 VirtQueueElement *elem)
 {
@@ -558,7 +594,9 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
int vdpa_device_fd,
int queue_pair_index,
int nvqs,
-   bool is_datapath)
+   bool is_datapath,
+   bool svq,
+   VhostIOVATree *iova_tree)
 {
 NetClientState *nc = NULL;
 VhostVDPAState *s;
@@ -576,9 +614,13 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 
 s->vhost_vdpa.device_fd = vdpa_device_fd;
 s->vhost_vdpa.index = queue_pair_index;
+s->vhost_vdpa.shadow_vqs_enabled = svq;
+s->vhost_vdpa.iova_tree = iova_tree;
 if (!is_datapath) {
 s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
 s->vhost_vdpa.shadow_vq_ops_opaque = s;
+error_setg(&s->vhost_vdpa.migration_blocker,
+   "Migration disabled: vhost-vdpa uses CVQ.");
 }
 ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
 if (ret) {
@@ -638,6 +680,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 uint64_t features;
 int vdpa_device_fd;
 g_autofree NetClientState **ncs = NULL;
+g_autoptr(VhostIOVATree) iova_tree = NULL;
 NetClientState *nc;
 int queue_pairs, r, i, has_cvq = 0;
 
@@ -665,22 +708,45 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 return queue_pairs;
 }
 
+if (opts->x_svq) {
+  

[PATCH 21/22] vdpa: Add device migration blocker

2022-07-08 Thread Eugenio Pérez
The device may need to add migration blockers. For example, if vdpa
device uses features not compatible with migration.

Add the possibility here.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/vhost-vdpa.h |  1 +
 hw/virtio/vhost-vdpa.c | 14 ++
 2 files changed, 15 insertions(+)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index d85643..d10a89303e 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -35,6 +35,7 @@ typedef struct vhost_vdpa {
 bool shadow_vqs_enabled;
 /* IOVA mapping used by the Shadow Virtqueue */
 VhostIOVATree *iova_tree;
+Error *migration_blocker;
 GPtrArray *shadow_vqs;
 const VhostShadowVirtqueueOps *shadow_vq_ops;
 void *shadow_vq_ops_opaque;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index c1162daecc..764a81b57f 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -20,6 +20,7 @@
 #include "hw/virtio/vhost-shadow-virtqueue.h"
 #include "hw/virtio/vhost-vdpa.h"
 #include "exec/address-spaces.h"
+#include "migration/blocker.h"
 #include "qemu/cutils.h"
 #include "qemu/main-loop.h"
 #include "cpu.h"
@@ -1016,6 +1017,13 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
 return true;
 }
 
+if (v->migration_blocker) {
+int r = migrate_add_blocker(v->migration_blocker, &err);
+if (unlikely(r < 0)) {
+goto err_migration_blocker;
+}
+}
+
 for (i = 0; i < v->shadow_vqs->len; ++i) {
 VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
@@ -1057,6 +1065,9 @@ err_svq_setup:
 vhost_svq_stop(svq);
 }
 
+err_migration_blocker:
+error_reportf_err(err, "Cannot setup SVQ %u: ", i);
+
 return false;
 }
 
@@ -1073,6 +1084,9 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev)
 vhost_vdpa_svq_unmap_rings(dev, svq);
 }
 
+if (v->migration_blocker) {
+migrate_del_blocker(v->migration_blocker);
+}
 }
 
 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
-- 
2.31.1




[PATCH 14/22] vhost: Add custom used buffer callback

2022-07-08 Thread Eugenio Pérez
The callback allows SVQ users to know the VirtQueue requests and
responses. QEMU can use this to synchronize virtio device model state,
allowing to migrate it with minimum changes to the migration code.

If callbacks are specified at svq creation, the buffers need to be
injected to the device using vhost_svq_inject. An opaque data must be
given with it, and its returned to the callback at used_handler call.

In the case of networking, this will be used to inspect control
virtqueue messages status from the device.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h | 15 ++-
 hw/virtio/vhost-shadow-virtqueue.c | 22 --
 hw/virtio/vhost-vdpa.c |  3 ++-
 3 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 57ff97ce4f..96ce7aa62e 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -23,6 +23,15 @@ typedef struct SVQElement {
 uint32_t last_chain_id;
 } SVQElement;
 
+typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
+typedef void (*VirtQueueUsedCallback)(VhostShadowVirtqueue *svq,
+  void *used_elem_opaque,
+  uint32_t written);
+
+typedef struct VhostShadowVirtqueueOps {
+VirtQueueUsedCallback used_handler;
+} VhostShadowVirtqueueOps;
+
 /* Shadow virtqueue to relay notifications */
 typedef struct VhostShadowVirtqueue {
 /* Shadow vring */
@@ -67,6 +76,9 @@ typedef struct VhostShadowVirtqueue {
  */
 uint16_t *desc_next;
 
+/* Caller callbacks */
+const VhostShadowVirtqueueOps *ops;
+
 /* Next head to expose to the device */
 uint16_t shadow_avail_idx;
 
@@ -98,7 +110,8 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice 
*vdev,
  VirtQueue *vq);
 void vhost_svq_stop(VhostShadowVirtqueue *svq);
 
-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
+const VhostShadowVirtqueueOps *ops);
 
 void vhost_svq_free(gpointer vq);
 G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index f4affa52ee..40183f8afd 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -306,6 +306,7 @@ int vhost_svq_inject(VhostShadowVirtqueue *svq, const 
struct iovec *iov,
  * assertions.
  */
 assert(out_num || in_num);
+assert(svq->ops);
 
 if (unlikely(num > vhost_svq_available_slots(svq))) {
 error_report("Injecting in a full queue");
@@ -508,7 +509,6 @@ static size_t vhost_svq_flush(VhostShadowVirtqueue *svq,
 while (true) {
 uint32_t len;
 SVQElement svq_elem;
-g_autofree VirtQueueElement *elem = NULL;
 
 if (unlikely(i >= svq->vring.num)) {
 qemu_log_mask(LOG_GUEST_ERROR,
@@ -523,13 +523,20 @@ static size_t vhost_svq_flush(VhostShadowVirtqueue *svq,
 break;
 }
 
-elem = g_steal_pointer(&svq_elem.opaque);
-virtqueue_fill(vq, elem, len, i++);
+if (svq->ops) {
+svq->ops->used_handler(svq, svq_elem.opaque, len);
+} else {
+g_autofree VirtQueueElement *elem = NULL;
+elem = g_steal_pointer(&svq_elem.opaque);
+virtqueue_fill(vq, elem, len, i++);
+}
 ret++;
 }
 
-virtqueue_flush(vq, i);
-event_notifier_set(&svq->svq_call);
+if (i > 0) {
+virtqueue_flush(vq, i);
+event_notifier_set(&svq->svq_call);
+}
 
 if (check_for_avail_queue && svq->next_guest_avail_elem) {
 /*
@@ -758,12 +765,14 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
  * shadow methods and file descriptors.
  *
  * @iova_tree: Tree to perform descriptors translations
+ * @ops: SVQ owner callbacks
  *
  * Returns the new virtqueue or NULL.
  *
  * In case of error, reason is reported through error_report.
  */
-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
+const VhostShadowVirtqueueOps *ops)
 {
 g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
 int r;
@@ -785,6 +794,7 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree 
*iova_tree)
 event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
 event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
 svq->iova_tree = iova_tree;
+svq->ops = ops;
 return g_steal_pointer(&svq);
 
 err_init_hdev_call:
diff --git a/hw/virtio/vhost-vdpa.c b/hw/vi

[PATCH 20/22] vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs

2022-07-08 Thread Eugenio Pérez
To know the device features is needed for CVQ SVQ, so SVQ knows if it
can handle all commands or not. Extract from
vhost_vdpa_get_max_queue_pairs so we can reuse it.

Signed-off-by: Eugenio Pérez 
---
 net/vhost-vdpa.c | 30 --
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 3ae74f7fb5..b6ed30bec3 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -588,20 +588,24 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 return nc;
 }
 
-static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp)
+static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
+{
+int ret = ioctl(fd, VHOST_GET_FEATURES, features);
+if (unlikely(ret < 0)) {
+error_setg_errno(errp, errno,
+ "Fail to query features from vhost-vDPA device");
+}
+return ret;
+}
+
+static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
+  int *has_cvq, Error **errp)
 {
 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
 g_autofree struct vhost_vdpa_config *config = NULL;
 __virtio16 *max_queue_pairs;
-uint64_t features;
 int ret;
 
-ret = ioctl(fd, VHOST_GET_FEATURES, &features);
-if (ret) {
-error_setg(errp, "Fail to query features from vhost-vDPA device");
-return ret;
-}
-
 if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
 *has_cvq = 1;
 } else {
@@ -631,10 +635,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 NetClientState *peer, Error **errp)
 {
 const NetdevVhostVDPAOptions *opts;
+uint64_t features;
 int vdpa_device_fd;
 g_autofree NetClientState **ncs = NULL;
 NetClientState *nc;
-int queue_pairs, i, has_cvq = 0;
+int queue_pairs, r, i, has_cvq = 0;
 
 assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 opts = &netdev->u.vhost_vdpa;
@@ -648,7 +653,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 return -errno;
 }
 
-queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd,
+r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
+if (unlikely(r < 0)) {
+return r;
+}
+
+queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
  &has_cvq, errp);
 if (queue_pairs < 0) {
 qemu_close(vdpa_device_fd);
-- 
2.31.1




[PATCH 19/22] vdpa: Buffer CVQ support on shadow virtqueue

2022-07-08 Thread Eugenio Pérez
Introduce the control virtqueue support for vDPA shadow virtqueue. This
is needed for advanced networking features like rx filtering.

Virtio-net control VQ copies now the descriptors to qemu's VA, so we
avoid TOCTOU with the guest's or device's memory every time there is a
device model change. Otherwise, the guest could change the memory
content in the time between qemu and the device reads it.

Likewise, qemu does not share the memory of the command with the device:
it exposes another copy to it.

To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
implemented.  If virtio-net driver changes MAC the virtio-net device
model will be updated with the new one, and a rx filtering change event
will be raised.

Others cvq commands could be added here straightforwardly but they have
been not tested.

Signed-off-by: Eugenio Pérez 
---
 net/vhost-vdpa.c | 334 +--
 1 file changed, 322 insertions(+), 12 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 8558ad7a01..3ae74f7fb5 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -28,6 +28,26 @@
 #include "monitor/monitor.h"
 #include "hw/virtio/vhost.h"
 
+typedef struct CVQElement {
+/* Device's in and out buffer */
+void *in_buf, *out_buf;
+
+/* Optional guest element from where this cvqelement was created */
+VirtQueueElement *guest_elem;
+
+/* Control header sent by the guest. */
+struct virtio_net_ctrl_hdr ctrl;
+
+/* vhost-vdpa device, for cleanup reasons */
+struct vhost_vdpa *vdpa;
+
+/* Length of out data */
+size_t out_len;
+
+/* Copy of the out data sent by the guest excluding ctrl. */
+uint8_t out_data[];
+} CVQElement;
+
 /* Todo:need to add the multiqueue support here */
 typedef struct VhostVDPAState {
 NetClientState nc;
@@ -191,29 +211,277 @@ static NetClientInfo net_vhost_vdpa_info = {
 };
 
 /**
- * Forward buffer for the moment.
+ * Unmap a descriptor chain of a SVQ element, optionally copying its in buffers
+ *
+ * @svq: Shadow VirtQueue
+ * @iova: SVQ IO Virtual address of descriptor
+ * @iov: Optional iovec to store device writable buffer
+ * @iov_cnt: iov length
+ * @buf_len: Length written by the device
+ *
+ * TODO: Use me! and adapt to net/vhost-vdpa format
+ * Print error message in case of error
+ */
+static void vhost_vdpa_cvq_unmap_buf(CVQElement *elem, void *addr)
+{
+struct vhost_vdpa *v = elem->vdpa;
+VhostIOVATree *tree = v->iova_tree;
+DMAMap needle = {
+/*
+ * No need to specify size or to look for more translations since
+ * this contiguous chunk was allocated by us.
+ */
+.translated_addr = (hwaddr)(uintptr_t)addr,
+};
+const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
+int r;
+
+if (unlikely(!map)) {
+error_report("Cannot locate expected map");
+goto err;
+}
+
+r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
+if (unlikely(r != 0)) {
+error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
+}
+
+vhost_iova_tree_remove(tree, map);
+
+err:
+qemu_vfree(addr);
+}
+
+static void vhost_vdpa_cvq_delete_elem(CVQElement *elem)
+{
+if (elem->out_buf) {
+vhost_vdpa_cvq_unmap_buf(elem, g_steal_pointer(&elem->out_buf));
+}
+
+if (elem->in_buf) {
+vhost_vdpa_cvq_unmap_buf(elem, g_steal_pointer(&elem->in_buf));
+}
+
+/* Guest element must have been returned to the guest or free otherway */
+assert(!elem->guest_elem);
+
+g_free(elem);
+}
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(CVQElement, vhost_vdpa_cvq_delete_elem);
+
+static int vhost_vdpa_net_cvq_svq_inject(VhostShadowVirtqueue *svq,
+ CVQElement *cvq_elem,
+ size_t out_len)
+{
+const struct iovec iov[] = {
+{
+.iov_base = cvq_elem->out_buf,
+.iov_len = out_len,
+},{
+.iov_base = cvq_elem->in_buf,
+.iov_len = sizeof(virtio_net_ctrl_ack),
+}
+};
+
+return vhost_svq_inject(svq, iov, 1, 1, cvq_elem);
+}
+
+static void *vhost_vdpa_cvq_alloc_buf(struct vhost_vdpa *v,
+  const uint8_t *out_data, size_t data_len,
+  bool write)
+{
+DMAMap map = {};
+size_t buf_len = ROUND_UP(data_len, qemu_real_host_page_size());
+void *buf = qemu_memalign(qemu_real_host_page_size(), buf_len);
+int r;
+
+if (!write) {
+memcpy(buf, out_data, data_len);
+memset(buf + data_len, 0, buf_len - data_len);
+} else {
+memset(buf, 0, data_len);
+}
+
+map.translated_addr = (hwaddr)(uintptr_t)buf;
+map.size = buf_len - 1;
+map.perm = write ? IOMMU_RW : IOMMU_RO,
+r = vhost_iova_tree_map_alloc(v->iova_tree, &

[PATCH 18/22] vdpa: manual forward CVQ buffers

2022-07-08 Thread Eugenio Pérez
Do a simple forwarding of CVQ buffers, the same work SVQ could do but
through callbacks. No functional change intended.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/vhost-vdpa.h |  3 ++
 hw/virtio/vhost-vdpa.c |  3 +-
 net/vhost-vdpa.c   | 59 ++
 3 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index 7214eb47dc..d85643 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -15,6 +15,7 @@
 #include 
 
 #include "hw/virtio/vhost-iova-tree.h"
+#include "hw/virtio/vhost-shadow-virtqueue.h"
 #include "hw/virtio/virtio.h"
 #include "standard-headers/linux/vhost_types.h"
 
@@ -35,6 +36,8 @@ typedef struct vhost_vdpa {
 /* IOVA mapping used by the Shadow Virtqueue */
 VhostIOVATree *iova_tree;
 GPtrArray *shadow_vqs;
+const VhostShadowVirtqueueOps *shadow_vq_ops;
+void *shadow_vq_ops_opaque;
 struct vhost_dev *dev;
 VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostVDPA;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 7d2922ccbf..c1162daecc 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -419,7 +419,8 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
struct vhost_vdpa *v,
 for (unsigned n = 0; n < hdev->nvqs; ++n) {
 g_autoptr(VhostShadowVirtqueue) svq;
 
-svq = vhost_svq_new(v->iova_tree, NULL, NULL);
+svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops,
+v->shadow_vq_ops_opaque);
 if (unlikely(!svq)) {
 error_setg(errp, "Cannot create svq %u", n);
 return -1;
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index df1e69ee72..8558ad7a01 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -11,11 +11,14 @@
 
 #include "qemu/osdep.h"
 #include "clients.h"
+#include "hw/virtio/virtio-net.h"
 #include "net/vhost_net.h"
 #include "net/vhost-vdpa.h"
 #include "hw/virtio/vhost-vdpa.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/memalign.h"
 #include "qemu/option.h"
 #include "qapi/error.h"
 #include 
@@ -187,6 +190,58 @@ static NetClientInfo net_vhost_vdpa_info = {
 .check_peer_type = vhost_vdpa_check_peer_type,
 };
 
+/**
+ * Forward buffer for the moment.
+ */
+static bool vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
+ VirtQueueElement *guest_elem,
+ void *opaque)
+{
+g_autofree VirtQueueElement *elem = guest_elem;
+unsigned int n = elem->out_num + elem->in_num;
+g_autofree struct iovec *iov = g_new(struct iovec, n);
+size_t in_len;
+virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+int r;
+
+memcpy(iov, elem->out_sg, elem->out_num);
+memcpy(iov + elem->out_num, elem->in_sg, elem->in_num);
+
+r = vhost_svq_inject(svq, iov, elem->out_num, elem->in_num, elem);
+if (unlikely(r != 0)) {
+goto err;
+}
+
+/* Now elem belongs to SVQ */
+g_steal_pointer(&elem);
+return true;
+
+err:
+in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
+  sizeof(status));
+vhost_svq_push_elem(svq, elem, in_len);
+return true;
+}
+
+static VirtQueueElement *vhost_vdpa_net_handle_ctrl_detach(void *elem_opaque)
+{
+return elem_opaque;
+}
+
+static void vhost_vdpa_net_handle_ctrl_used(VhostShadowVirtqueue *svq,
+void *vq_elem_opaque,
+uint32_t dev_written)
+{
+g_autofree VirtQueueElement *guest_elem = vq_elem_opaque;
+vhost_svq_push_elem(svq, guest_elem, sizeof(virtio_net_ctrl_ack));
+}
+
+static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
+.avail_handler = vhost_vdpa_net_handle_ctrl_avail,
+.used_handler = vhost_vdpa_net_handle_ctrl_used,
+.detach_handler = vhost_vdpa_net_handle_ctrl_detach,
+};
+
 static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
const char *device,
const char *name,
@@ -211,6 +266,10 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 
 s->vhost_vdpa.device_fd = vdpa_device_fd;
 s->vhost_vdpa.index = queue_pair_index;
+if (!is_datapath) {
+s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
+s->vhost_vdpa.shadow_vq_ops_opaque = s;
+}
 ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
 if (ret) {
 qemu_del_net_client(nc);
-- 
2.31.1




[PATCH 15/22] vhost: Add svq avail_handler callback

2022-07-08 Thread Eugenio Pérez
This allows external handlers to be aware of new buffers that the guest
places in the virtqueue.

When this callback is defined the ownership of guest's virtqueue element
is transferred to the callback. This means that if the user wants to
forward the descriptor it needs to manually inject it. The callback is
also free to process the command by itself and use the element with
svq_push.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h | 23 ++-
 hw/virtio/vhost-shadow-virtqueue.c | 13 +++--
 hw/virtio/vhost-vdpa.c |  2 +-
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 96ce7aa62e..cfc891e2e8 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -24,11 +24,28 @@ typedef struct SVQElement {
 } SVQElement;
 
 typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
+
+/**
+ * Callback to handle an avail buffer.
+ *
+ * @svq:  Shadow virtqueue
+ * @elem:  Element placed in the queue by the guest
+ * @vq_callback_opaque:  Opaque
+ *
+ * Returns true if the vq is running as expected, false otherwise.
+ *
+ * Note that ownership of elem is transferred to the callback.
+ */
+typedef bool (*VirtQueueAvailCallback)(VhostShadowVirtqueue *svq,
+   VirtQueueElement *elem,
+   void *vq_callback_opaque);
+
 typedef void (*VirtQueueUsedCallback)(VhostShadowVirtqueue *svq,
   void *used_elem_opaque,
   uint32_t written);
 
 typedef struct VhostShadowVirtqueueOps {
+VirtQueueAvailCallback avail_handler;
 VirtQueueUsedCallback used_handler;
 } VhostShadowVirtqueueOps;
 
@@ -79,6 +96,9 @@ typedef struct VhostShadowVirtqueue {
 /* Caller callbacks */
 const VhostShadowVirtqueueOps *ops;
 
+/* Caller callbacks opaque */
+void *ops_opaque;
+
 /* Next head to expose to the device */
 uint16_t shadow_avail_idx;
 
@@ -111,7 +131,8 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, 
VirtIODevice *vdev,
 void vhost_svq_stop(VhostShadowVirtqueue *svq);
 
 VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
-const VhostShadowVirtqueueOps *ops);
+const VhostShadowVirtqueueOps *ops,
+void *ops_opaque);
 
 void vhost_svq_free(gpointer vq);
 G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 40183f8afd..78579b9e0b 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -374,7 +374,13 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue 
*svq)
 return;
 }
 
-ok = vhost_svq_add_element(svq, g_steal_pointer(&elem));
+if (svq->ops) {
+ok = svq->ops->avail_handler(svq, g_steal_pointer(&elem),
+ svq->ops_opaque);
+} else {
+ok = vhost_svq_add_element(svq, g_steal_pointer(&elem));
+}
+
 if (unlikely(!ok)) {
 /* VQ is broken, just return and ignore any other kicks */
 return;
@@ -766,13 +772,15 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
  *
  * @iova_tree: Tree to perform descriptors translations
  * @ops: SVQ owner callbacks
+ * @ops_opaque: ops opaque pointer
  *
  * Returns the new virtqueue or NULL.
  *
  * In case of error, reason is reported through error_report.
  */
 VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
-const VhostShadowVirtqueueOps *ops)
+const VhostShadowVirtqueueOps *ops,
+void *ops_opaque)
 {
 g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
 int r;
@@ -795,6 +803,7 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree 
*iova_tree,
 event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
 svq->iova_tree = iova_tree;
 svq->ops = ops;
+svq->ops_opaque = ops_opaque;
 return g_steal_pointer(&svq);
 
 err_init_hdev_call:
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 25f7146fe4..9a4f00c114 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -420,7 +420,7 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
struct vhost_vdpa *v,
 for (unsigned n = 0; n < hdev->nvqs; ++n) {
 g_autoptr(VhostShadowVirtqueue) svq;
 
-svq = vhost_svq_new(v->iova_tree, NULL);
+svq = vhost_svq_new(v->iova_tree, NULL, NULL);
 if (unlikely(!svq)) {
 error_setg(errp, "Cannot create svq %u", n);
 return -1;
-- 
2.31.1




[PATCH 16/22] vhost: add detach SVQ operation

2022-07-08 Thread Eugenio Pérez
To notify the caller it needs to discard the element.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h | 11 +++
 hw/virtio/vhost-shadow-virtqueue.c | 11 ++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index cfc891e2e8..dc0059adc6 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -44,9 +44,20 @@ typedef void (*VirtQueueUsedCallback)(VhostShadowVirtqueue 
*svq,
   void *used_elem_opaque,
   uint32_t written);
 
+/**
+ * Detach the element from the shadow virtqueue.  SVQ needs to free it and it
+ * cannot be pushed or discarded.
+ *
+ * @elem_opaque: The element opaque
+ *
+ * Return the guest element to detach and free if any.
+ */
+typedef VirtQueueElement *(*VirtQueueDetachCallback)(void *elem_opaque);
+
 typedef struct VhostShadowVirtqueueOps {
 VirtQueueAvailCallback avail_handler;
 VirtQueueUsedCallback used_handler;
+VirtQueueDetachCallback detach_handler;
 } VhostShadowVirtqueueOps;
 
 /* Shadow virtqueue to relay notifications */
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 78579b9e0b..626691ac4e 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -749,7 +749,16 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
 
 for (unsigned i = 0; i < svq->vring.num; ++i) {
 g_autofree VirtQueueElement *elem = NULL;
-elem = g_steal_pointer(&svq->ring_id_maps[i].opaque);
+void *opaque = g_steal_pointer(&svq->ring_id_maps[i].opaque);
+
+if (!opaque) {
+continue;
+} else if (svq->ops) {
+elem = svq->ops->detach_handler(opaque);
+} else {
+elem = opaque;
+}
+
 if (elem) {
 virtqueue_detach_element(svq->vq, elem, 0);
 }
-- 
2.31.1




[PATCH 17/22] vdpa: Export vhost_vdpa_dma_map and unmap calls

2022-07-08 Thread Eugenio Pérez
Shadow CVQ will copy buffers on qemu VA, so we avoid TOCTOU attacks that
can set a different state in qemu device model and vdpa device.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/vhost-vdpa.h | 4 
 hw/virtio/vhost-vdpa.c | 7 +++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index a29dbb3f53..7214eb47dc 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -39,4 +39,8 @@ typedef struct vhost_vdpa {
 VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostVDPA;
 
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
+   void *vaddr, bool readonly);
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size);
+
 #endif
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 9a4f00c114..7d2922ccbf 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -71,8 +71,8 @@ static bool 
vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
 return false;
 }
 
-static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
-  void *vaddr, bool readonly)
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
+   void *vaddr, bool readonly)
 {
 struct vhost_msg_v2 msg = {};
 int fd = v->device_fd;
@@ -97,8 +97,7 @@ static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr 
iova, hwaddr size,
 return ret;
 }
 
-static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
-hwaddr size)
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size)
 {
 struct vhost_msg_v2 msg = {};
 int fd = v->device_fd;
-- 
2.31.1




[PATCH 11/22] vhost: add vhost_svq_push_elem

2022-07-08 Thread Eugenio Pérez
This function allows external SVQ users to return guest's available
buffers.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  2 ++
 hw/virtio/vhost-shadow-virtqueue.c | 16 
 2 files changed, 18 insertions(+)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 3e1bea12ca..855fa82e3e 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -82,6 +82,8 @@ typedef struct VhostShadowVirtqueue {
 
 bool vhost_svq_valid_features(uint64_t features, Error **errp);
 
+void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
+ const VirtQueueElement *elem, uint32_t len);
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
 void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
 void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 01caa5887e..2b0a268655 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -439,6 +439,22 @@ static SVQElement vhost_svq_get_buf(VhostShadowVirtqueue 
*svq, uint32_t *len)
 return svq_elem;
 }
 
+/**
+ * Push an element to SVQ, returning it to the guest.
+ */
+void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
+ const VirtQueueElement *elem, uint32_t len)
+{
+virtqueue_push(svq->vq, elem, len);
+if (svq->next_guest_avail_elem) {
+/*
+ * Avail ring was full when vhost_svq_flush was called, so it's a
+ * good moment to make more descriptors available if possible.
+ */
+vhost_handle_guest_kick(svq);
+}
+}
+
 static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 bool check_for_avail_queue)
 {
-- 
2.31.1




[PATCH 10/22] vdpa: Small rename of error labels

2022-07-08 Thread Eugenio Pérez
So later patches are cleaner

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-vdpa.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index d6ba4a492a..fccfc832ea 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -1024,7 +1024,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
 int r;
 bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
 if (unlikely(!ok)) {
-goto err;
+goto err_svq_setup;
 }
 
 vhost_svq_start(svq, dev->vdev, vq);
@@ -1049,8 +1049,7 @@ err_set_addr:
 err_map:
 vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i));
 
-err:
-error_reportf_err(err, "Cannot setup SVQ %u: ", i);
+err_svq_setup:
 for (unsigned j = 0; j < i; ++j) {
 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j);
 vhost_vdpa_svq_unmap_rings(dev, svq);
-- 
2.31.1




[PATCH 06/22] vhost: Reorder vhost_svq_last_desc_of_chain

2022-07-08 Thread Eugenio Pérez
SVQ is going to store it in SVQElement, so we need it before add functions.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 2d70f832e9..a4d5d7bae0 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -217,6 +217,16 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
 return true;
 }
 
+static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
+ uint16_t num, uint16_t i)
+{
+for (uint16_t j = 0; j < (num - 1); ++j) {
+i = le16_to_cpu(svq->desc_next[i]);
+}
+
+return i;
+}
+
 /**
  * Add an element to a SVQ.
  *
@@ -374,16 +384,6 @@ static void 
vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
 svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
 }
 
-static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
- uint16_t num, uint16_t i)
-{
-for (uint16_t j = 0; j < (num - 1); ++j) {
-i = le16_to_cpu(svq->desc_next[i]);
-}
-
-return i;
-}
-
 static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
uint32_t *len)
 {
-- 
2.31.1




[PATCH 13/22] vhost: add vhost_svq_poll

2022-07-08 Thread Eugenio Pérez
It allows the Shadow Control VirtQueue to wait the device to use the commands
that restore the net device state after a live migration.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  1 +
 hw/virtio/vhost-shadow-virtqueue.c | 54 --
 2 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 09b87078af..57ff97ce4f 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -86,6 +86,7 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
  const VirtQueueElement *elem, uint32_t len);
 int vhost_svq_inject(VhostShadowVirtqueue *svq, const struct iovec *iov,
  size_t out_num, size_t in_num, void *opaque);
+ssize_t vhost_svq_poll(VhostShadowVirtqueue *svq);
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
 void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
 void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 4d59954f1b..f4affa52ee 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -10,6 +10,8 @@
 #include "qemu/osdep.h"
 #include "hw/virtio/vhost-shadow-virtqueue.h"
 
+#include 
+
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qemu/main-loop.h"
@@ -492,10 +494,11 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
 }
 }
 
-static void vhost_svq_flush(VhostShadowVirtqueue *svq,
-bool check_for_avail_queue)
+static size_t vhost_svq_flush(VhostShadowVirtqueue *svq,
+  bool check_for_avail_queue)
 {
 VirtQueue *vq = svq->vq;
+size_t ret = 0;
 
 /* Forward as many used buffers as possible. */
 do {
@@ -512,7 +515,7 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
  "More than %u used buffers obtained in a %u size SVQ",
  i, svq->vring.num);
 virtqueue_flush(vq, svq->vring.num);
-return;
+return ret;
 }
 
 svq_elem = vhost_svq_get_buf(svq, &len);
@@ -522,6 +525,7 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 
 elem = g_steal_pointer(&svq_elem.opaque);
 virtqueue_fill(vq, elem, len, i++);
+ret++;
 }
 
 virtqueue_flush(vq, i);
@@ -535,6 +539,50 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 vhost_handle_guest_kick(svq);
 }
 } while (!vhost_svq_enable_notification(svq));
+
+return ret;
+}
+
+/**
+ * Poll the SVQ for device used buffers.
+ *
+ * This function race with main event loop SVQ polling, so extra
+ * synchronization is needed.
+ *
+ * Return the number of descriptors read from the device.
+ */
+ssize_t vhost_svq_poll(VhostShadowVirtqueue *svq)
+{
+int fd = event_notifier_get_fd(&svq->hdev_call);
+GPollFD poll_fd = {
+.fd = fd,
+.events = G_IO_IN,
+};
+assert(fd >= 0);
+int r = g_poll(&poll_fd, 1, -1);
+
+if (unlikely(r < 0)) {
+error_report("Cannot poll device call fd "G_POLLFD_FORMAT": (%d) %s",
+ poll_fd.fd, errno, g_strerror(errno));
+return -errno;
+}
+
+if (r == 0) {
+return 0;
+}
+
+if (unlikely(poll_fd.revents & ~(G_IO_IN))) {
+error_report(
+"Error polling device call fd "G_POLLFD_FORMAT": revents=%d",
+poll_fd.fd, poll_fd.revents);
+return -1;
+}
+
+/*
+ * Max return value of vhost_svq_flush is (uint16_t)-1, so it's safe to
+ * convert to ssize_t.
+ */
+return vhost_svq_flush(svq, false);
 }
 
 /**
-- 
2.31.1




[PATCH 08/22] vhost: Move last chain id to SVQ element

2022-07-08 Thread Eugenio Pérez
We will allow SVQ user to store opaque data for each element, so its
easier if we store this kind of information just at avail.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  3 +++
 hw/virtio/vhost-shadow-virtqueue.c | 14 --
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 0b34f48037..5646d875cb 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -17,6 +17,9 @@
 
 typedef struct SVQElement {
 VirtQueueElement *elem;
+
+/* Last descriptor of the chain */
+uint32_t last_chain_id;
 } SVQElement;
 
 /* Shadow virtqueue to relay notifications */
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index d50e1383f5..635b6b359f 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -238,7 +238,9 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, const 
struct iovec *out_sg,
   size_t out_num, const struct iovec *in_sg,
   size_t in_num, VirtQueueElement *elem)
 {
+SVQElement *svq_elem;
 unsigned qemu_head;
+size_t n;
 bool ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num,
   &qemu_head);
 if (unlikely(!ok)) {
@@ -246,7 +248,10 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, const 
struct iovec *out_sg,
 return false;
 }
 
-svq->ring_id_maps[qemu_head].elem = elem;
+n = out_num + in_num;
+svq_elem = &svq->ring_id_maps[qemu_head];
+svq_elem->elem = elem;
+svq_elem->last_chain_id = vhost_svq_last_desc_of_chain(svq, n, qemu_head);
 return true;
 }
 
@@ -399,7 +404,7 @@ static SVQElement vhost_svq_get_buf(VhostShadowVirtqueue 
*svq, uint32_t *len)
 const vring_used_t *used = svq->vring.used;
 vring_used_elem_t used_elem;
 SVQElement svq_elem = vhost_svq_empty_elem();
-uint16_t last_used, last_used_chain, num;
+uint16_t last_used;
 
 if (!vhost_svq_more_used(svq)) {
 return svq_elem;
@@ -427,11 +432,8 @@ static SVQElement vhost_svq_get_buf(VhostShadowVirtqueue 
*svq, uint32_t *len)
 return svq_elem;
 }
 
-num = svq_elem.elem->in_num + svq_elem.elem->out_num;
-last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
-svq->desc_next[last_used_chain] = svq->free_head;
+svq->desc_next[svq_elem.last_chain_id] = svq->free_head;
 svq->free_head = used_elem.id;
-
 *len = used_elem.len;
 return svq_elem;
 }
-- 
2.31.1




[PATCH 12/22] vhost: Add vhost_svq_inject

2022-07-08 Thread Eugenio Pérez
This allows qemu to inject buffers to the device.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  2 ++
 hw/virtio/vhost-shadow-virtqueue.c | 37 ++
 2 files changed, 39 insertions(+)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 855fa82e3e..09b87078af 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -84,6 +84,8 @@ bool vhost_svq_valid_features(uint64_t features, Error 
**errp);
 
 void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
  const VirtQueueElement *elem, uint32_t len);
+int vhost_svq_inject(VhostShadowVirtqueue *svq, const struct iovec *iov,
+ size_t out_num, size_t in_num, void *opaque);
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
 void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
 void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 2b0a268655..4d59954f1b 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -282,6 +282,43 @@ static bool vhost_svq_add_element(VhostShadowVirtqueue 
*svq,
 return ok;
 }
 
+/**
+ * Inject a chain of buffers to the device
+ *
+ * @svq: Shadow VirtQueue
+ * @iov: I/O vector
+ * @out_num: Number of front out descriptors
+ * @in_num: Number of last input descriptors
+ * @opaque: Contextual data to store in descriptor
+ *
+ * Return 0 on success, -ENOMEM if cannot inject
+ */
+int vhost_svq_inject(VhostShadowVirtqueue *svq, const struct iovec *iov,
+ size_t out_num, size_t in_num, void *opaque)
+{
+bool ok;
+size_t num = out_num + in_num;
+
+/*
+ * All vhost_svq_inject calls are controlled by qemu so we won't hit this
+ * assertions.
+ */
+assert(out_num || in_num);
+
+if (unlikely(num > vhost_svq_available_slots(svq))) {
+error_report("Injecting in a full queue");
+return -ENOMEM;
+}
+
+ok = vhost_svq_add(svq, iov, out_num, iov + out_num, in_num, opaque);
+if (unlikely(!ok)) {
+return -EINVAL;
+}
+
+vhost_svq_kick(svq);
+return 0;
+}
+
 /**
  * Forward available buffers.
  *
-- 
2.31.1




[PATCH 03/22] vdpa: Clean vhost_vdpa_dev_start(dev, false)

2022-07-08 Thread Eugenio Pérez
Return value is never checked and is a clean path, so assume success

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-vdpa.c | 33 ++---
 1 file changed, 10 insertions(+), 23 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 66f054a12c..d6ba4a492a 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -872,41 +872,35 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
 /**
  * Unmap a SVQ area in the device
  */
-static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
+static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
   const DMAMap *needle)
 {
 const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle);
 hwaddr size;
-int r;
 
 if (unlikely(!result)) {
 error_report("Unable to find SVQ address to unmap");
-return false;
+return;
 }
 
 size = ROUND_UP(result->size, qemu_real_host_page_size());
-r = vhost_vdpa_dma_unmap(v, result->iova, size);
-return r == 0;
+vhost_vdpa_dma_unmap(v, result->iova, size);
 }
 
-static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
+static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
const VhostShadowVirtqueue *svq)
 {
 DMAMap needle = {};
 struct vhost_vdpa *v = dev->opaque;
 struct vhost_vring_addr svq_addr;
-bool ok;
 
 vhost_svq_get_vring_addr(svq, &svq_addr);
 
 needle.translated_addr = svq_addr.desc_user_addr;
-ok = vhost_vdpa_svq_unmap_ring(v, &needle);
-if (unlikely(!ok)) {
-return false;
-}
+vhost_vdpa_svq_unmap_ring(v, &needle);
 
 needle.translated_addr = svq_addr.used_user_addr;
-return vhost_vdpa_svq_unmap_ring(v, &needle);
+vhost_vdpa_svq_unmap_ring(v, &needle);
 }
 
 /**
@@ -1066,23 +1060,19 @@ err:
 return false;
 }
 
-static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
+static void vhost_vdpa_svqs_stop(struct vhost_dev *dev)
 {
 struct vhost_vdpa *v = dev->opaque;
 
 if (!v->shadow_vqs) {
-return true;
+return;
 }
 
 for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
-bool ok = vhost_vdpa_svq_unmap_rings(dev, svq);
-if (unlikely(!ok)) {
-return false;
-}
+vhost_vdpa_svq_unmap_rings(dev, svq);
 }
 
-return true;
 }
 
 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
@@ -1099,10 +1089,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, 
bool started)
 }
 vhost_vdpa_set_vring_ready(dev);
 } else {
-ok = vhost_vdpa_svqs_stop(dev);
-if (unlikely(!ok)) {
-return -1;
-}
+vhost_vdpa_svqs_stop(dev);
 vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
 }
 
-- 
2.31.1




[PATCH 09/22] vhost: Add opaque member to SVQElement

2022-07-08 Thread Eugenio Pérez
When qemu injects buffers to the vdpa device it will be used to maintain
contextual data. If SVQ has no operation, it will be used to maintain
the VirtQueueElement pointer.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  3 ++-
 hw/virtio/vhost-shadow-virtqueue.c | 13 +++--
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 5646d875cb..3e1bea12ca 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -16,7 +16,8 @@
 #include "hw/virtio/vhost-iova-tree.h"
 
 typedef struct SVQElement {
-VirtQueueElement *elem;
+/* Opaque data */
+void *opaque;
 
 /* Last descriptor of the chain */
 uint32_t last_chain_id;
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 635b6b359f..01caa5887e 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -236,7 +236,7 @@ static uint16_t vhost_svq_last_desc_of_chain(const 
VhostShadowVirtqueue *svq,
  */
 static bool vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec 
*out_sg,
   size_t out_num, const struct iovec *in_sg,
-  size_t in_num, VirtQueueElement *elem)
+  size_t in_num, void *opaque)
 {
 SVQElement *svq_elem;
 unsigned qemu_head;
@@ -244,13 +244,12 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, 
const struct iovec *out_sg,
 bool ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num,
   &qemu_head);
 if (unlikely(!ok)) {
-g_free(elem);
 return false;
 }
 
 n = out_num + in_num;
 svq_elem = &svq->ring_id_maps[qemu_head];
-svq_elem->elem = elem;
+svq_elem->opaque = opaque;
 svq_elem->last_chain_id = vhost_svq_last_desc_of_chain(svq, n, qemu_head);
 return true;
 }
@@ -276,6 +275,8 @@ static bool vhost_svq_add_element(VhostShadowVirtqueue *svq,
 elem->in_num, elem);
 if (ok) {
 vhost_svq_kick(svq);
+} else {
+g_free(elem);
 }
 
 return ok;
@@ -391,7 +392,7 @@ static void 
vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
 
 static bool vhost_svq_is_empty_elem(SVQElement elem)
 {
-return elem.elem == NULL;
+return elem.opaque == NULL;
 }
 
 static SVQElement vhost_svq_empty_elem(void)
@@ -466,7 +467,7 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 break;
 }
 
-elem = g_steal_pointer(&svq_elem.elem);
+elem = g_steal_pointer(&svq_elem.opaque);
 virtqueue_fill(vq, elem, len, i++);
 }
 
@@ -634,7 +635,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
 
 for (unsigned i = 0; i < svq->vring.num; ++i) {
 g_autofree VirtQueueElement *elem = NULL;
-elem = g_steal_pointer(&svq->ring_id_maps[i].elem);
+elem = g_steal_pointer(&svq->ring_id_maps[i].opaque);
 if (elem) {
 virtqueue_detach_element(svq->vq, elem, 0);
 }
-- 
2.31.1




[PATCH 05/22] vhost: Decouple vhost_svq_add_split from VirtQueueElement

2022-07-08 Thread Eugenio Pérez
VirtQueueElement comes from the guest, but we're heading SVQ to be able
to inject element without the guest's knowledge.

To do so, make this accept sg buffers directly, instead of using
VirtQueueElement.

Add vhost_svq_add_element to maintain element convenience

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 38 +-
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 115d769b86..2d70f832e9 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -172,30 +172,32 @@ static bool 
vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
 }
 
 static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
-VirtQueueElement *elem, unsigned *head)
+const struct iovec *out_sg, size_t out_num,
+const struct iovec *in_sg, size_t in_num,
+unsigned *head)
 {
 unsigned avail_idx;
 vring_avail_t *avail = svq->vring.avail;
 bool ok;
-g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num));
+g_autofree hwaddr *sgs = NULL;
 
 *head = svq->free_head;
 
 /* We need some descriptors here */
-if (unlikely(!elem->out_num && !elem->in_num)) {
+if (unlikely(!out_num && !in_num)) {
 qemu_log_mask(LOG_GUEST_ERROR,
   "Guest provided element with no descriptors");
 return false;
 }
 
-ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
- elem->in_num > 0, false);
+sgs = g_new(hwaddr, MAX(out_num, in_num));
+ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
+ false);
 if (unlikely(!ok)) {
 return false;
 }
 
-ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, 
false,
- true);
+ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
 if (unlikely(!ok)) {
 return false;
 }
@@ -222,10 +224,13 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
  * takes ownership of the element: In case of failure, it is free and the SVQ
  * is considered broken.
  */
-static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
+static bool vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec 
*out_sg,
+  size_t out_num, const struct iovec *in_sg,
+  size_t in_num, VirtQueueElement *elem)
 {
 unsigned qemu_head;
-bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
+bool ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num,
+  &qemu_head);
 if (unlikely(!ok)) {
 g_free(elem);
 return false;
@@ -249,6 +254,18 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
 event_notifier_set(&svq->hdev_kick);
 }
 
+static bool vhost_svq_add_element(VhostShadowVirtqueue *svq,
+  VirtQueueElement *elem)
+{
+bool ok = vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg,
+elem->in_num, elem);
+if (ok) {
+vhost_svq_kick(svq);
+}
+
+return ok;
+}
+
 /**
  * Forward available buffers.
  *
@@ -301,12 +318,11 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue 
*svq)
 return;
 }
 
-ok = vhost_svq_add(svq, elem);
+ok = vhost_svq_add_element(svq, g_steal_pointer(&elem));
 if (unlikely(!ok)) {
 /* VQ is broken, just return and ignore any other kicks */
 return;
 }
-vhost_svq_kick(svq);
 }
 
 virtio_queue_set_notification(svq->vq, true);
-- 
2.31.1




[PATCH 02/22] vhost: move descriptor translation to vhost_svq_vring_write_descs

2022-07-08 Thread Eugenio Pérez
It's done for both in and out descriptors so it's better placed here.

Acked-by: Jason Wang 
Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 38 +-
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 9280285435..115d769b86 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -122,17 +122,35 @@ static bool vhost_svq_translate_addr(const 
VhostShadowVirtqueue *svq,
 return true;
 }
 
-static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
-const struct iovec *iovec, size_t num,
-bool more_descs, bool write)
+/**
+ * Write descriptors to SVQ vring
+ *
+ * @svq: The shadow virtqueue
+ * @sg: Cache for hwaddr
+ * @iovec: The iovec from the guest
+ * @num: iovec length
+ * @more_descs: True if more descriptors come in the chain
+ * @write: True if they are writeable descriptors
+ *
+ * Return true if success, false otherwise and print error.
+ */
+static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
+const struct iovec *iovec, size_t num,
+bool more_descs, bool write)
 {
 uint16_t i = svq->free_head, last = svq->free_head;
 unsigned n;
 uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
 vring_desc_t *descs = svq->vring.desc;
+bool ok;
 
 if (num == 0) {
-return;
+return true;
+}
+
+ok = vhost_svq_translate_addr(svq, sg, iovec, num);
+if (unlikely(!ok)) {
+return false;
 }
 
 for (n = 0; n < num; n++) {
@@ -150,6 +168,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue 
*svq, hwaddr *sg,
 }
 
 svq->free_head = le16_to_cpu(svq->desc_next[last]);
+return true;
 }
 
 static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
@@ -169,21 +188,18 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
 return false;
 }
 
-ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
+ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
+ elem->in_num > 0, false);
 if (unlikely(!ok)) {
 return false;
 }
-vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
-elem->in_num > 0, false);
-
 
-ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
+ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, 
false,
+ true);
 if (unlikely(!ok)) {
 return false;
 }
 
-vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
-
 /*
  * Put the entry in the available array (but don't update avail->idx until
  * they do sync).
-- 
2.31.1




[PATCH 07/22] vhost: Add SVQElement

2022-07-08 Thread Eugenio Pérez
This will allow SVQ to add metadata to the different queue elements. To
simplify changes, only store actual element at this patch.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  8 --
 hw/virtio/vhost-shadow-virtqueue.c | 41 --
 2 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index c132c994e9..0b34f48037 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -15,6 +15,10 @@
 #include "standard-headers/linux/vhost_types.h"
 #include "hw/virtio/vhost-iova-tree.h"
 
+typedef struct SVQElement {
+VirtQueueElement *elem;
+} SVQElement;
+
 /* Shadow virtqueue to relay notifications */
 typedef struct VhostShadowVirtqueue {
 /* Shadow vring */
@@ -47,8 +51,8 @@ typedef struct VhostShadowVirtqueue {
 /* IOVA mapping */
 VhostIOVATree *iova_tree;
 
-/* Map for use the guest's descriptors */
-VirtQueueElement **ring_id_maps;
+/* Each element context */
+SVQElement *ring_id_maps;
 
 /* Next VirtQueue element that guest made available */
 VirtQueueElement *next_guest_avail_elem;
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index a4d5d7bae0..d50e1383f5 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -246,7 +246,7 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, const 
struct iovec *out_sg,
 return false;
 }
 
-svq->ring_id_maps[qemu_head] = elem;
+svq->ring_id_maps[qemu_head].elem = elem;
 return true;
 }
 
@@ -384,15 +384,25 @@ static void 
vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
 svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
 }
 
-static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
-   uint32_t *len)
+static bool vhost_svq_is_empty_elem(SVQElement elem)
+{
+return elem.elem == NULL;
+}
+
+static SVQElement vhost_svq_empty_elem(void)
+{
+return (SVQElement){};
+}
+
+static SVQElement vhost_svq_get_buf(VhostShadowVirtqueue *svq, uint32_t *len)
 {
 const vring_used_t *used = svq->vring.used;
 vring_used_elem_t used_elem;
+SVQElement svq_elem = vhost_svq_empty_elem();
 uint16_t last_used, last_used_chain, num;
 
 if (!vhost_svq_more_used(svq)) {
-return NULL;
+return svq_elem;
 }
 
 /* Only get used array entries after they have been exposed by dev */
@@ -405,24 +415,25 @@ static VirtQueueElement 
*vhost_svq_get_buf(VhostShadowVirtqueue *svq,
 if (unlikely(used_elem.id >= svq->vring.num)) {
 qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
   svq->vdev->name, used_elem.id);
-return NULL;
+return svq_elem;
 }
 
-if (unlikely(!svq->ring_id_maps[used_elem.id])) {
+svq_elem = svq->ring_id_maps[used_elem.id];
+svq->ring_id_maps[used_elem.id] = vhost_svq_empty_elem();
+if (unlikely(vhost_svq_is_empty_elem(svq_elem))) {
 qemu_log_mask(LOG_GUEST_ERROR,
 "Device %s says index %u is used, but it was not available",
 svq->vdev->name, used_elem.id);
-return NULL;
+return svq_elem;
 }
 
-num = svq->ring_id_maps[used_elem.id]->in_num +
-  svq->ring_id_maps[used_elem.id]->out_num;
+num = svq_elem.elem->in_num + svq_elem.elem->out_num;
 last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
 svq->desc_next[last_used_chain] = svq->free_head;
 svq->free_head = used_elem.id;
 
 *len = used_elem.len;
-return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
+return svq_elem;
 }
 
 static void vhost_svq_flush(VhostShadowVirtqueue *svq,
@@ -437,6 +448,7 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 vhost_svq_disable_notification(svq);
 while (true) {
 uint32_t len;
+SVQElement svq_elem;
 g_autofree VirtQueueElement *elem = NULL;
 
 if (unlikely(i >= svq->vring.num)) {
@@ -447,11 +459,12 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 return;
 }
 
-elem = vhost_svq_get_buf(svq, &len);
-if (!elem) {
+svq_elem = vhost_svq_get_buf(svq, &len);
+if (vhost_svq_is_empty_elem(svq_elem)) {
 break;
 }
 
+elem = g_steal_pointer(&svq_elem.elem);
 virtqueue_fill(vq, elem, len, i++);
 }
 
@@ -594,7 +607,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, 
VirtIODevice *vdev,
 memset(svq->vring.desc, 0, driver_size);
 svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size);
 memset(svq-&

[PATCH 04/22] virtio-net: Expose ctrl virtqueue logic

2022-07-08 Thread Eugenio Pérez
This allows external vhost-net devices to modify the state of the
VirtIO device model once vhost-vdpa device has acknowledge the control
commands.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/virtio-net.h |  4 ++
 hw/net/virtio-net.c| 84 --
 2 files changed, 53 insertions(+), 35 deletions(-)

diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index eb87032627..42caea0d1d 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -218,6 +218,10 @@ struct VirtIONet {
 struct EBPFRSSContext ebpf_rss;
 };
 
+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
+  const struct iovec *in_sg, unsigned in_num,
+  const struct iovec *out_sg,
+  unsigned out_num);
 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
const char *type);
 
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 7ad948ee7c..53bb92c9f1 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1434,57 +1434,71 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t 
cmd,
 return VIRTIO_NET_OK;
 }
 
-static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
+  const struct iovec *in_sg, unsigned in_num,
+  const struct iovec *out_sg,
+  unsigned out_num)
 {
 VirtIONet *n = VIRTIO_NET(vdev);
 struct virtio_net_ctrl_hdr ctrl;
 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
-VirtQueueElement *elem;
 size_t s;
 struct iovec *iov, *iov2;
-unsigned int iov_cnt;
+
+if (iov_size(in_sg, in_num) < sizeof(status) ||
+iov_size(out_sg, out_num) < sizeof(ctrl)) {
+virtio_error(vdev, "virtio-net ctrl missing headers");
+return 0;
+}
+
+iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
+s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
+iov_discard_front(&iov, &out_num, sizeof(ctrl));
+if (s != sizeof(ctrl)) {
+status = VIRTIO_NET_ERR;
+} else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
+status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
+status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
+status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
+status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
+status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
+status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
+}
+
+s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
+assert(s == sizeof(status));
+
+g_free(iov2);
+return sizeof(status);
+}
+
+static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+{
+VirtQueueElement *elem;
 
 for (;;) {
+size_t written;
 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
 if (!elem) {
 break;
 }
-if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
-iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
-virtio_error(vdev, "virtio-net ctrl missing headers");
+
+written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
+ elem->out_sg, elem->out_num);
+if (written > 0) {
+virtqueue_push(vq, elem, written);
+virtio_notify(vdev, vq);
+g_free(elem);
+} else {
 virtqueue_detach_element(vq, elem, 0);
 g_free(elem);
 break;
 }
-
-iov_cnt = elem->out_num;
-iov2 = iov = g_memdup2(elem->out_sg,
-   sizeof(struct iovec) * elem->out_num);
-s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
-iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
-if (s != sizeof(ctrl)) {
-status = VIRTIO_NET_ERR;
-} else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
-status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
-} else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
-status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
-} else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
-status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
-} else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
-  

[PATCH 01/22] vhost: Return earlier if used buffers overrun

2022-07-08 Thread Eugenio Pérez
Previous function misses the just picked avail buffer from the queue.
This way keeps blocking the used queue forever, but is cleaner to check
before calling to vhost_svq_get_buf.

Fixes: 100890f7cad50 ("vhost: Shadow virtqueue buffers forwarding")
Acked-by: Jason Wang 
Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 56c96ebd13..9280285435 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -405,19 +405,21 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 vhost_svq_disable_notification(svq);
 while (true) {
 uint32_t len;
-g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
-if (!elem) {
-break;
-}
+g_autofree VirtQueueElement *elem = NULL;
 
 if (unlikely(i >= svq->vring.num)) {
 qemu_log_mask(LOG_GUEST_ERROR,
  "More than %u used buffers obtained in a %u size SVQ",
  i, svq->vring.num);
-virtqueue_fill(vq, elem, len, i);
-virtqueue_flush(vq, i);
+virtqueue_flush(vq, svq->vring.num);
 return;
 }
+
+elem = vhost_svq_get_buf(svq, &len);
+if (!elem) {
+break;
+}
+
 virtqueue_fill(vq, elem, len, i++);
 }
 
-- 
2.31.1




[PATCH 00/22] vdpa net devices Rx filter change notification with Shadow VQ

2022-07-08 Thread Eugenio Pérez
Control virtqueue is used by networking device for accepting various
commands from the driver. It's a must to support advanced configurations.

Rx filtering event is issues by qemu when device's MAC address changed once and
the previous one has not been queried by external agents.

Shadow VirtQueue (SVQ) already makes possible tracking the state of virtqueues,
effectively intercepting them so qemu can track what regions of memory are
dirty because device action and needs migration. However, this does not solve
networking device state seen by the driver because CVQ messages, like changes
on MAC addresses from the driver.

This series uses SVQ infraestructure to intercept networking control messages
used by the device. This way, qemu is able to update VirtIONet device model and
react to them. In particular, this series enables rx filter change
notification.

This is a pre-requisite to achieve net vdpa device with CVQ live migration.
It's a stripped down version of [1], with error paths checked and no migration
enabled.

First patch solves a memory leak if the device is able to trick qemu to think
it has returned more buffers than svq size. This should not be possible, but
we're a bit safer this way.

Next nine patches reorder and clean code base so its easier to apply later
ones. No functional change should be noticed from these changes.

Patches from 11 to 16 enable SVQ API to make other parts of qemu to interact
with it. In particular, they will be used by vhost-vdpa net to handle CVQ
messages.

Patches 17 to 19 enable the update of the virtio-net device model for each
CVQ message acknoledged by the device.

Last patches enable x-svq parameter, forbidding device migration since it is
not restored in the destination's vdpa device yet. This will be added in later
series, using this work.

Comments are welcome.

[1] 
https://patchwork.kernel.org/project/qemu-devel/cover/20220706184008.1649478-1-epere...@redhat.com/

Eugenio Pérez (22):
  vhost: Return earlier if used buffers overrun
  vhost: move descriptor translation to vhost_svq_vring_write_descs
  vdpa: Clean vhost_vdpa_dev_start(dev, false)
  virtio-net: Expose ctrl virtqueue logic
  vhost: Decouple vhost_svq_add_split from VirtQueueElement
  vhost: Reorder vhost_svq_last_desc_of_chain
  vhost: Add SVQElement
  vhost: Move last chain id to SVQ element
  vhost: Add opaque member to SVQElement
  vdpa: Small rename of error labels
  vhost: add vhost_svq_push_elem
  vhost: Add vhost_svq_inject
  vhost: add vhost_svq_poll
  vhost: Add custom used buffer callback
  vhost: Add svq avail_handler callback
  vhost: add detach SVQ operation
  vdpa: Export vhost_vdpa_dma_map and unmap calls
  vdpa: manual forward CVQ buffers
  vdpa: Buffer CVQ support on shadow virtqueue
  vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs
  vdpa: Add device migration blocker
  vdpa: Add x-svq to NetdevVhostVDPAOptions

 qapi/net.json  |   9 +-
 hw/virtio/vhost-shadow-virtqueue.h |  64 +++-
 include/hw/virtio/vhost-vdpa.h |   8 +
 include/hw/virtio/virtio-net.h |   4 +
 hw/net/virtio-net.c|  84 ++---
 hw/virtio/vhost-shadow-virtqueue.c | 287 +
 hw/virtio/vhost-vdpa.c |  63 ++--
 net/vhost-vdpa.c   | 473 -
 8 files changed, 855 insertions(+), 137 deletions(-)

-- 
2.31.1





[RFC PATCH v9 18/23] vdpa: Export vhost_vdpa_dma_map and unmap calls

2022-07-06 Thread Eugenio Pérez
Shadow CVQ will copy buffers on qemu VA, so we avoid TOCTOU attacks that
can set a different state in qemu device model and vdpa device.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/vhost-vdpa.h | 4 
 hw/virtio/vhost-vdpa.c | 7 +++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index a29dbb3f53..7214eb47dc 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -39,4 +39,8 @@ typedef struct vhost_vdpa {
 VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostVDPA;
 
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
+   void *vaddr, bool readonly);
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size);
+
 #endif
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 69cfaf05d6..613c3483b0 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -71,8 +71,8 @@ static bool 
vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
 return false;
 }
 
-static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
-  void *vaddr, bool readonly)
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
+   void *vaddr, bool readonly)
 {
 struct vhost_msg_v2 msg = {};
 int fd = v->device_fd;
@@ -97,8 +97,7 @@ static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr 
iova, hwaddr size,
 return ret;
 }
 
-static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
-hwaddr size)
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size)
 {
 struct vhost_msg_v2 msg = {};
 int fd = v->device_fd;
-- 
2.31.1




[RFC PATCH v9 22/23] vdpa: Inject virtio-net mac address via CVQ at start

2022-07-06 Thread Eugenio Pérez
This is needed so the destination vdpa device see the same state a the
guest set in the source.

Signed-off-by: Eugenio Pérez 
---
 net/vhost-vdpa.c | 49 +++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 77d013833f..bb6ac7d96c 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -380,12 +380,59 @@ static int 
vhost_vdpa_start_control_svq(VhostShadowVirtqueue *svq,
 VhostVDPAState *s = opaque;
 struct vhost_dev *dev = s->vhost_vdpa.dev;
 struct vhost_vdpa *v = dev->opaque;
+VirtIONet *n = VIRTIO_NET(dev->vdev);
+uint64_t features = dev->vdev->host_features;
+size_t num = 0;
 int r;
 
 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
 
 r = ioctl(v->device_fd, VHOST_VDPA_SET_VRING_ENABLE, &state);
-return r < 0 ? -errno : r;
+if (unlikely(r < 0)) {
+return -errno;
+}
+
+if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
+CVQElement *cvq_elem;
+const struct virtio_net_ctrl_hdr ctrl = {
+.class = VIRTIO_NET_CTRL_MAC,
+.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET,
+};
+uint8_t mac[6];
+const struct iovec out[] = {
+{
+.iov_base = (void *)&ctrl,
+.iov_len = sizeof(ctrl),
+},{
+.iov_base = mac,
+.iov_len = sizeof(mac),
+},
+};
+
+memcpy(mac, n->mac, sizeof(mac));
+cvq_elem = vhost_vdpa_cvq_alloc_elem(s, ctrl, out, ARRAY_SIZE(out),
+ iov_size(out, ARRAY_SIZE(out)),
+ NULL);
+assert(cvq_elem);
+r = vhost_vdpa_net_cvq_svq_inject(svq, cvq_elem,
+  sizeof(ctrl) + sizeof(mac));
+if (unlikely(r)) {
+assert(!"Need to test for pending buffers etc");
+return r;
+}
+num++;
+}
+
+while (num) {
+/*
+ * We can call vhost_svq_poll here because BQL protects calls to run.
+ */
+size_t used = vhost_svq_poll(svq);
+assert(used <= num);
+num -= used;
+}
+
+return 0;
 }
 
 /**
-- 
2.31.1




[RFC PATCH v9 17/23] vhost: add detach SVQ operation

2022-07-06 Thread Eugenio Pérez
To notify the caller it needs to discard the element.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h | 11 +++
 hw/virtio/vhost-shadow-virtqueue.c | 11 ++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 4300cb66f8..583b6fda5d 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -46,10 +46,21 @@ typedef void (*VirtQueueUsedCallback)(VhostShadowVirtqueue 
*svq,
   void *used_elem_opaque,
   uint32_t written);
 
+/**
+ * Detach the element from the shadow virtqueue.  SVQ needs to free it and it
+ * cannot be pushed or discarded.
+ *
+ * @elem_opaque: The element opaque
+ *
+ * Return the guest element to detach and free if any.
+ */
+typedef VirtQueueElement *(*VirtQueueDetachCallback)(void *elem_opaque);
+
 typedef struct VhostShadowVirtqueueOps {
 ShadowVirtQueueStart start;
 VirtQueueAvailCallback avail_handler;
 VirtQueueUsedCallback used_handler;
+VirtQueueDetachCallback detach_handler;
 } VhostShadowVirtqueueOps;
 
 /* Shadow virtqueue to relay notifications */
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index dffea256f1..4f072f040b 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -746,7 +746,16 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
 
 for (unsigned i = 0; i < svq->vring.num; ++i) {
 g_autofree VirtQueueElement *elem = NULL;
-elem = g_steal_pointer(&svq->ring_id_maps[i].opaque);
+void *opaque = g_steal_pointer(&svq->ring_id_maps[i].opaque);
+
+if (!opaque) {
+continue;
+} else if (svq->ops) {
+elem = svq->ops->detach_handler(opaque);
+} else {
+elem = opaque;
+}
+
 if (elem) {
 virtqueue_detach_element(svq->vq, elem, 0);
 }
-- 
2.31.1




[RFC PATCH v9 21/23] vdpa: Add vhost_vdpa_start_control_svq

2022-07-06 Thread Eugenio Pérez
As a first step we only enable CVQ first than others. Future patches add
state restore.

Signed-off-by: Eugenio Pérez 
---
 net/vhost-vdpa.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index e415cc8de5..77d013833f 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -370,6 +370,24 @@ static CVQElement 
*vhost_vdpa_cvq_alloc_elem(VhostVDPAState *s,
 return g_steal_pointer(&cvq_elem);
 }
 
+static int vhost_vdpa_start_control_svq(VhostShadowVirtqueue *svq,
+void *opaque)
+{
+struct vhost_vring_state state = {
+.index = virtio_get_queue_index(svq->vq),
+.num = 1,
+};
+VhostVDPAState *s = opaque;
+struct vhost_dev *dev = s->vhost_vdpa.dev;
+struct vhost_vdpa *v = dev->opaque;
+int r;
+
+assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
+
+r = ioctl(v->device_fd, VHOST_VDPA_SET_VRING_ENABLE, &state);
+return r < 0 ? -errno : r;
+}
+
 /**
  * iov_size with an upper limit. It's assumed UINT64_MAX is an invalid
  * iov_size.
@@ -554,6 +572,7 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops 
= {
 .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
 .used_handler = vhost_vdpa_net_handle_ctrl_used,
 .detach_handler = vhost_vdpa_net_handle_ctrl_detach,
+.start = vhost_vdpa_start_control_svq,
 };
 
 static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
-- 
2.31.1




[RFC PATCH v9 23/23] vdpa: Add x-svq to NetdevVhostVDPAOptions

2022-07-06 Thread Eugenio Pérez
Finally offering the possibility to enable SVQ from the command line.

Signed-off-by: Eugenio Pérez 
---
 qapi/net.json|  9 +-
 net/vhost-vdpa.c | 72 ++--
 2 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/qapi/net.json b/qapi/net.json
index 9af11e9a3b..75ba2cb989 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -445,12 +445,19 @@
 # @queues: number of queues to be created for multiqueue vhost-vdpa
 #  (default: 1)
 #
+# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1)
+# (default: false)
+#
+# Features:
+# @unstable: Member @x-svq is experimental.
+#
 # Since: 5.1
 ##
 { 'struct': 'NetdevVhostVDPAOptions',
   'data': {
 '*vhostdev': 'str',
-'*queues':   'int' } }
+'*queues':   'int',
+'*x-svq':{'type': 'bool', 'features' : [ 'unstable'] } } }
 
 ##
 # @NetdevVmnetHostOptions:
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index bb6ac7d96c..3f10636e05 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -93,6 +93,30 @@ const int vdpa_feature_bits[] = {
 VHOST_INVALID_FEATURE_BIT
 };
 
+/** Supported device specific feature bits with SVQ */
+static const uint64_t vdpa_svq_device_features =
+BIT_ULL(VIRTIO_NET_F_CSUM) |
+BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
+BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) |
+BIT_ULL(VIRTIO_NET_F_MTU) |
+BIT_ULL(VIRTIO_NET_F_MAC) |
+BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
+BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
+BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
+BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
+BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
+BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
+BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
+BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
+BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
+BIT_ULL(VIRTIO_NET_F_STATUS) |
+BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
+BIT_ULL(VIRTIO_NET_F_MQ) |
+BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
+BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
+BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
+BIT_ULL(VIRTIO_NET_F_STANDBY);
+
 VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
 {
 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
@@ -151,7 +175,11 @@ err_init:
 static void vhost_vdpa_cleanup(NetClientState *nc)
 {
 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+struct vhost_dev *dev = &s->vhost_net->dev;
 
+if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
+g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
+}
 if (s->vhost_net) {
 vhost_net_cleanup(s->vhost_net);
 g_free(s->vhost_net);
@@ -454,6 +482,14 @@ static uint64_t vhost_vdpa_net_iov_len(const struct iovec 
*iov,
 return len;
 }
 
+static int vhost_vdpa_get_iova_range(int fd,
+ struct vhost_vdpa_iova_range *iova_range)
+{
+int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
+
+return ret < 0 ? -errno : 0;
+}
+
 static CVQElement *vhost_vdpa_net_cvq_copy_elem(VhostVDPAState *s,
 VirtQueueElement *elem)
 {
@@ -628,7 +664,9 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
int vdpa_device_fd,
int queue_pair_index,
int nvqs,
-   bool is_datapath)
+   bool is_datapath,
+   bool svq,
+   VhostIOVATree *iova_tree)
 {
 NetClientState *nc = NULL;
 VhostVDPAState *s;
@@ -646,6 +684,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 
 s->vhost_vdpa.device_fd = vdpa_device_fd;
 s->vhost_vdpa.index = queue_pair_index;
+s->vhost_vdpa.shadow_vqs_enabled = svq;
+s->vhost_vdpa.iova_tree = iova_tree;
 if (!is_datapath) {
 s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
 s->vhost_vdpa.shadow_vq_ops_opaque = s;
@@ -708,6 +748,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 uint64_t features;
 int vdpa_device_fd;
 g_autofree NetClientState **ncs = NULL;
+g_autoptr(VhostIOVATree) iova_tree = NULL;
 NetClientState *nc;
 int queue_pairs, r, i, has_cvq = 0;
 
@@ -735,22 +776,45 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 return queue_pairs;
 }
 
+if (opts->x_svq) {
+struct vhost_vdpa_iova_range iova_range;
+
+uint64_t invalid_dev_features =
+features & ~vdpa_svq_device_features &
+/* Transport are all accepted at this point */
+~MAKE_64BIT_MASK(VIRTIO_TRANSPORT

[RFC PATCH v9 15/23] vhost: Add custom used buffer callback

2022-07-06 Thread Eugenio Pérez
The callback allows SVQ users to know the VirtQueue requests and
responses. QEMU can use this to synchronize virtio device model state,
allowing to migrate it with minimum changes to the migration code.

If callbacks are specified at svq creation, the buffers need to be
injected to the device using vhost_svq_inject. An opaque data must be
given with it, and its returned to the callback at used_handler call.

In the case of networking, this will be used to inspect control
virtqueue messages and to recover status injection at the first time.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  5 +
 hw/virtio/vhost-shadow-virtqueue.c | 16 +++-
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index c8668fbdd6..296fef6f21 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -27,8 +27,13 @@ typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
 typedef int (*ShadowVirtQueueStart)(VhostShadowVirtqueue *svq,
 void *opaque);
 
+typedef void (*VirtQueueUsedCallback)(VhostShadowVirtqueue *svq,
+  void *used_elem_opaque,
+  uint32_t written);
+
 typedef struct VhostShadowVirtqueueOps {
 ShadowVirtQueueStart start;
+VirtQueueUsedCallback used_handler;
 } VhostShadowVirtqueueOps;
 
 /* Shadow virtqueue to relay notifications */
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index ed7f1d0bc9..b92ca4a63f 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -506,7 +506,6 @@ static size_t vhost_svq_flush(VhostShadowVirtqueue *svq,
 while (true) {
 uint32_t len;
 SVQElement svq_elem;
-g_autofree VirtQueueElement *elem = NULL;
 
 if (unlikely(i >= svq->vring.num)) {
 qemu_log_mask(LOG_GUEST_ERROR,
@@ -521,13 +520,20 @@ static size_t vhost_svq_flush(VhostShadowVirtqueue *svq,
 break;
 }
 
-elem = g_steal_pointer(&svq_elem.opaque);
-virtqueue_fill(vq, elem, len, i++);
+if (svq->ops) {
+svq->ops->used_handler(svq, svq_elem.opaque, len);
+} else {
+g_autofree VirtQueueElement *elem = NULL;
+elem = g_steal_pointer(&svq_elem.opaque);
+virtqueue_fill(vq, elem, len, i++);
+}
 ret++;
 }
 
-virtqueue_flush(vq, i);
-event_notifier_set(&svq->svq_call);
+if (i > 0) {
+virtqueue_flush(vq, i);
+event_notifier_set(&svq->svq_call);
+}
 
 if (check_for_avail_queue && svq->next_guest_avail_elem) {
 /*
-- 
2.31.1




[RFC PATCH v9 19/23] vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs

2022-07-06 Thread Eugenio Pérez
To know the device features is needed for CVQ SVQ, so SVQ knows if it
can handle all commands or not. Extract from
vhost_vdpa_get_max_queue_pairs so we can reuse it.

Signed-off-by: Eugenio Pérez 
---
 net/vhost-vdpa.c | 30 --
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index df1e69ee72..b0158f625e 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -219,20 +219,24 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 return nc;
 }
 
-static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp)
+static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
+{
+int ret = ioctl(fd, VHOST_GET_FEATURES, features);
+if (ret) {
+error_setg_errno(errp, errno,
+ "Fail to query features from vhost-vDPA device");
+}
+return ret;
+}
+
+static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
+  int *has_cvq, Error **errp)
 {
 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
 g_autofree struct vhost_vdpa_config *config = NULL;
 __virtio16 *max_queue_pairs;
-uint64_t features;
 int ret;
 
-ret = ioctl(fd, VHOST_GET_FEATURES, &features);
-if (ret) {
-error_setg(errp, "Fail to query features from vhost-vDPA device");
-return ret;
-}
-
 if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
 *has_cvq = 1;
 } else {
@@ -262,10 +266,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 NetClientState *peer, Error **errp)
 {
 const NetdevVhostVDPAOptions *opts;
+uint64_t features;
 int vdpa_device_fd;
 g_autofree NetClientState **ncs = NULL;
 NetClientState *nc;
-int queue_pairs, i, has_cvq = 0;
+int queue_pairs, r, i, has_cvq = 0;
 
 assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 opts = &netdev->u.vhost_vdpa;
@@ -279,7 +284,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 return -errno;
 }
 
-queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd,
+r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
+if (r) {
+return r;
+}
+
+queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
  &has_cvq, errp);
 if (queue_pairs < 0) {
 qemu_close(vdpa_device_fd);
-- 
2.31.1




[RFC PATCH v9 10/23] vhost: Reorder vhost_svq_last_desc_of_chain

2022-07-06 Thread Eugenio Pérez
SVQ is going to store it in SVQElement, so we need it before add functions.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 913bca8769..cf1745fd4d 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -218,6 +218,16 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
 return true;
 }
 
+static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
+ uint16_t num, uint16_t i)
+{
+for (uint16_t j = 0; j < (num - 1); ++j) {
+i = le16_to_cpu(svq->desc_next[i]);
+}
+
+return i;
+}
+
 /**
  * Add an element to a SVQ.
  *
@@ -375,16 +385,6 @@ static void 
vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
 svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
 }
 
-static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
- uint16_t num, uint16_t i)
-{
-for (uint16_t j = 0; j < (num - 1); ++j) {
-i = le16_to_cpu(svq->desc_next[i]);
-}
-
-return i;
-}
-
 static bool vhost_svq_is_empty_elem(SVQElement elem)
 {
 return elem.elem == NULL;
-- 
2.31.1




[RFC PATCH v9 16/23] vhost: Add svq avail_handler callback

2022-07-06 Thread Eugenio Pérez
This allows external handlers to be aware of new buffers that the guest
places in the virtqueue.

When this callback is defined the ownership of guest's virtqueue element
is transferred to the callback. This means that if the user wants to
forward the descriptor it needs to manually inject it. The callback is
also free to process the command by itself and use the element with
svq_push.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h | 16 
 hw/virtio/vhost-shadow-virtqueue.c |  8 +++-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 296fef6f21..4300cb66f8 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -27,12 +27,28 @@ typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
 typedef int (*ShadowVirtQueueStart)(VhostShadowVirtqueue *svq,
 void *opaque);
 
+/**
+ * Callback to handle an avail buffer.
+ *
+ * @svq:  Shadow virtqueue
+ * @elem:  Element placed in the queue by the guest
+ * @vq_callback_opaque:  Opaque
+ *
+ * Returns true if the vq is running as expected, false otherwise.
+ *
+ * Note that ownership of elem is transferred to the callback.
+ */
+typedef bool (*VirtQueueAvailCallback)(VhostShadowVirtqueue *svq,
+   VirtQueueElement *elem,
+   void *vq_callback_opaque);
+
 typedef void (*VirtQueueUsedCallback)(VhostShadowVirtqueue *svq,
   void *used_elem_opaque,
   uint32_t written);
 
 typedef struct VhostShadowVirtqueueOps {
 ShadowVirtQueueStart start;
+VirtQueueAvailCallback avail_handler;
 VirtQueueUsedCallback used_handler;
 } VhostShadowVirtqueueOps;
 
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index b92ca4a63f..dffea256f1 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -371,7 +371,13 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue 
*svq)
 return;
 }
 
-ok = vhost_svq_add_element(svq, g_steal_pointer(&elem));
+if (svq->ops) {
+ok = svq->ops->avail_handler(svq, g_steal_pointer(&elem),
+ svq->ops_opaque);
+} else {
+ok = vhost_svq_add_element(svq, g_steal_pointer(&elem));
+}
+
 if (unlikely(!ok)) {
 /* VQ is broken, just return and ignore any other kicks */
 return;
-- 
2.31.1




[RFC PATCH v9 14/23] vhost: add vhost_svq_poll

2022-07-06 Thread Eugenio Pérez
It allows the Shadow Control VirtQueue to wait the device to use the commands
that restore the net device state after a live migration.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  1 +
 hw/virtio/vhost-shadow-virtqueue.c | 54 --
 2 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index d01d2370db..c8668fbdd6 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -100,6 +100,7 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
  const VirtQueueElement *elem, uint32_t len);
 int vhost_svq_inject(VhostShadowVirtqueue *svq, const struct iovec *iov,
  size_t out_num, size_t in_num, void *opaque);
+ssize_t vhost_svq_poll(VhostShadowVirtqueue *svq);
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
 void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
 void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index bd9e34b413..ed7f1d0bc9 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -10,6 +10,8 @@
 #include "qemu/osdep.h"
 #include "hw/virtio/vhost-shadow-virtqueue.h"
 
+#include 
+
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qemu/main-loop.h"
@@ -490,10 +492,11 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
 }
 }
 
-static void vhost_svq_flush(VhostShadowVirtqueue *svq,
-bool check_for_avail_queue)
+static size_t vhost_svq_flush(VhostShadowVirtqueue *svq,
+  bool check_for_avail_queue)
 {
 VirtQueue *vq = svq->vq;
+size_t ret = 0;
 
 /* Forward as many used buffers as possible. */
 do {
@@ -510,7 +513,7 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
  "More than %u used buffers obtained in a %u size SVQ",
  i, svq->vring.num);
 virtqueue_flush(vq, svq->vring.num);
-return;
+return ret;
 }
 
 svq_elem = vhost_svq_get_buf(svq, &len);
@@ -520,6 +523,7 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 
 elem = g_steal_pointer(&svq_elem.opaque);
 virtqueue_fill(vq, elem, len, i++);
+ret++;
 }
 
 virtqueue_flush(vq, i);
@@ -533,6 +537,50 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 vhost_handle_guest_kick(svq);
 }
 } while (!vhost_svq_enable_notification(svq));
+
+return ret;
+}
+
+/**
+ * Poll the SVQ for device used buffers.
+ *
+ * This function race with main event loop SVQ polling, so extra
+ * synchronization is needed.
+ *
+ * Return the number of descriptors read from the device.
+ */
+ssize_t vhost_svq_poll(VhostShadowVirtqueue *svq)
+{
+int fd = event_notifier_get_fd(&svq->hdev_call);
+GPollFD poll_fd = {
+.fd = fd,
+.events = G_IO_IN,
+};
+assert(fd >= 0);
+int r = g_poll(&poll_fd, 1, -1);
+
+if (unlikely(r < 0)) {
+error_report("Cannot poll device call fd "G_POLLFD_FORMAT": (%d) %s",
+ poll_fd.fd, errno, g_strerror(errno));
+return -errno;
+}
+
+if (r == 0) {
+return 0;
+}
+
+if (unlikely(poll_fd.revents & ~(G_IO_IN))) {
+error_report(
+"Error polling device call fd "G_POLLFD_FORMAT": revents=%d",
+poll_fd.fd, poll_fd.revents);
+return -1;
+}
+
+/*
+ * Max return value of vhost_svq_flush is (uint16_t)-1, so it's safe to
+ * convert to ssize_t.
+ */
+return vhost_svq_flush(svq, false);
 }
 
 /**
-- 
2.31.1




[RFC PATCH v9 12/23] vhost: Add opaque member to SVQElement

2022-07-06 Thread Eugenio Pérez
When qemu injects buffers to the vdpa device it will be used to maintain
contextual data. If SVQ has no operation, it will be used to maintain
the VirtQueueElement pointer.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  3 ++-
 hw/virtio/vhost-shadow-virtqueue.c | 13 +++--
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 0e434e9fd0..a811f90e01 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -16,7 +16,8 @@
 #include "hw/virtio/vhost-iova-tree.h"
 
 typedef struct SVQElement {
-VirtQueueElement *elem;
+/* Opaque data */
+void *opaque;
 
 /* Last descriptor of the chain */
 uint32_t last_chain_id;
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index c5e49e51c5..492bb12b5f 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -237,7 +237,7 @@ static uint16_t vhost_svq_last_desc_of_chain(const 
VhostShadowVirtqueue *svq,
  */
 static bool vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec 
*out_sg,
   size_t out_num, const struct iovec *in_sg,
-  size_t in_num, VirtQueueElement *elem)
+  size_t in_num, void *opaque)
 {
 SVQElement *svq_elem;
 unsigned qemu_head;
@@ -245,13 +245,12 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, 
const struct iovec *out_sg,
 bool ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num,
   &qemu_head);
 if (unlikely(!ok)) {
-g_free(elem);
 return false;
 }
 
 n = out_num + in_num;
 svq_elem = &svq->ring_id_maps[qemu_head];
-svq_elem->elem = elem;
+svq_elem->opaque = opaque;
 svq_elem->last_chain_id = vhost_svq_last_desc_of_chain(svq, n, qemu_head);
 return true;
 }
@@ -277,6 +276,8 @@ static bool vhost_svq_add_element(VhostShadowVirtqueue *svq,
 elem->in_num, elem);
 if (ok) {
 vhost_svq_kick(svq);
+} else {
+g_free(elem);
 }
 
 return ok;
@@ -392,7 +393,7 @@ static void 
vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
 
 static bool vhost_svq_is_empty_elem(SVQElement elem)
 {
-return elem.elem == NULL;
+return elem.opaque == NULL;
 }
 
 static SVQElement vhost_svq_empty_elem(void)
@@ -483,7 +484,7 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 break;
 }
 
-elem = g_steal_pointer(&svq_elem.elem);
+elem = g_steal_pointer(&svq_elem.opaque);
 virtqueue_fill(vq, elem, len, i++);
 }
 
@@ -651,7 +652,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
 
 for (unsigned i = 0; i < svq->vring.num; ++i) {
 g_autofree VirtQueueElement *elem = NULL;
-elem = g_steal_pointer(&svq->ring_id_maps[i].elem);
+elem = g_steal_pointer(&svq->ring_id_maps[i].opaque);
 if (elem) {
 virtqueue_detach_element(svq->vq, elem, 0);
 }
-- 
2.31.1




[RFC PATCH v9 03/23] vdpa: delay set_vring_ready after DRIVER_OK

2022-07-06 Thread Eugenio Pérez
To restore the device in the destination of a live migration we send the
commands through control virtqueue. For a device to read CVQ it must
have received DRIVER_OK status bit.

However this open a window where the device could start receiving
packets in rx queue 0 before it receive the RSS configuration. To avoid
that, we will not send vring_enable until all configuration is used by
the device.

As a first step, reverse the DRIVER_OK and SET_VRING_ENABLE steps.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-vdpa.c | 22 --
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 66f054a12c..2ee8009594 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -728,13 +728,18 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, 
int idx)
 return idx;
 }
 
+/**
+ * Set ready all vring of the device
+ *
+ * @dev: Vhost device
+ */
 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
 {
 int i;
 trace_vhost_vdpa_set_vring_ready(dev);
-for (i = 0; i < dev->nvqs; ++i) {
+for (i = 0; i < dev->vq_index_end; ++i) {
 struct vhost_vring_state state = {
-.index = dev->vq_index + i,
+.index = i,
 .num = 1,
 };
 vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
@@ -1097,7 +1102,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, 
bool started)
 if (unlikely(!ok)) {
 return -1;
 }
-vhost_vdpa_set_vring_ready(dev);
 } else {
 ok = vhost_vdpa_svqs_stop(dev);
 if (unlikely(!ok)) {
@@ -,16 +1115,22 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, 
bool started)
 }
 
 if (started) {
+int r;
+
 memory_listener_register(&v->listener, &address_space_memory);
-return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
+r = vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
+if (unlikely(r)) {
+return r;
+}
+vhost_vdpa_set_vring_ready(dev);
 } else {
 vhost_vdpa_reset_device(dev);
 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
VIRTIO_CONFIG_S_DRIVER);
 memory_listener_unregister(&v->listener);
-
-return 0;
 }
+
+return 0;
 }
 
 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
-- 
2.31.1




[RFC PATCH v9 11/23] vhost: Move last chain id to SVQ element

2022-07-06 Thread Eugenio Pérez
We will allow SVQ user to store opaque data for each element, so its
easier if we store this kind of information just at avail.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  3 +++
 hw/virtio/vhost-shadow-virtqueue.c | 14 --
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index e434dc63b0..0e434e9fd0 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -17,6 +17,9 @@
 
 typedef struct SVQElement {
 VirtQueueElement *elem;
+
+/* Last descriptor of the chain */
+uint32_t last_chain_id;
 } SVQElement;
 
 typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index cf1745fd4d..c5e49e51c5 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -239,7 +239,9 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, const 
struct iovec *out_sg,
   size_t out_num, const struct iovec *in_sg,
   size_t in_num, VirtQueueElement *elem)
 {
+SVQElement *svq_elem;
 unsigned qemu_head;
+size_t n;
 bool ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num,
   &qemu_head);
 if (unlikely(!ok)) {
@@ -247,7 +249,10 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, const 
struct iovec *out_sg,
 return false;
 }
 
-svq->ring_id_maps[qemu_head].elem = elem;
+n = out_num + in_num;
+svq_elem = &svq->ring_id_maps[qemu_head];
+svq_elem->elem = elem;
+svq_elem->last_chain_id = vhost_svq_last_desc_of_chain(svq, n, qemu_head);
 return true;
 }
 
@@ -400,7 +405,7 @@ static SVQElement vhost_svq_get_buf(VhostShadowVirtqueue 
*svq, uint32_t *len)
 const vring_used_t *used = svq->vring.used;
 vring_used_elem_t used_elem;
 SVQElement svq_elem = vhost_svq_empty_elem();
-uint16_t last_used, last_used_chain, num;
+uint16_t last_used;
 
 if (!vhost_svq_more_used(svq)) {
 return svq_elem;
@@ -428,11 +433,8 @@ static SVQElement vhost_svq_get_buf(VhostShadowVirtqueue 
*svq, uint32_t *len)
 return svq_elem;
 }
 
-num = svq_elem.elem->in_num + svq_elem.elem->out_num;
-last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
-svq->desc_next[last_used_chain] = svq->free_head;
+svq->desc_next[svq_elem.last_chain_id] = svq->free_head;
 svq->free_head = used_elem.id;
-
 *len = used_elem.len;
 return svq_elem;
 }
-- 
2.31.1




[RFC PATCH v9 07/23] vhost: add vhost_svq_push_elem

2022-07-06 Thread Eugenio Pérez
This function allows external SVQ users to return guest's available
buffers.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  2 ++
 hw/virtio/vhost-shadow-virtqueue.c | 16 
 2 files changed, 18 insertions(+)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 91c31715d9..0fbdd69153 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -88,6 +88,8 @@ typedef struct VhostShadowVirtqueue {
 
 bool vhost_svq_valid_features(uint64_t features, Error **errp);
 
+void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
+ const VirtQueueElement *elem, uint32_t len);
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
 void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
 void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index be64e0b85c..2fc5789b73 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -410,6 +410,22 @@ static VirtQueueElement 
*vhost_svq_get_buf(VhostShadowVirtqueue *svq,
 return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
 }
 
+/**
+ * Push an element to SVQ, returning it to the guest.
+ */
+void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
+ const VirtQueueElement *elem, uint32_t len)
+{
+virtqueue_push(svq->vq, elem, len);
+if (svq->next_guest_avail_elem) {
+/*
+ * Avail ring was full when vhost_svq_flush was called, so it's a
+ * good moment to make more descriptors available if possible.
+ */
+vhost_handle_guest_kick(svq);
+}
+}
+
 static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 bool check_for_avail_queue)
 {
-- 
2.31.1




[RFC PATCH v9 20/23] vdpa: Buffer CVQ support on shadow virtqueue

2022-07-06 Thread Eugenio Pérez
Introduce the control virtqueue support for vDPA shadow virtqueue. This
is needed for advanced networking features like multiqueue.

Virtio-net control VQ will copy the descriptors to qemu's VA, so we
avoid TOCTOU with the guest's or device's memory every time there is a
device model change.  When address space isolation is implemented, this
will allow, CVQ to only have access to control messages too.

To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
implemented.  If virtio-net driver changes MAC the virtio-net device
model will be updated with the new one.

Others cvq commands could be added here straightforwardly but they have
been not tested.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/vhost-vdpa.h |   3 +
 hw/virtio/vhost-vdpa.c |   5 +-
 net/vhost-vdpa.c   | 373 +
 3 files changed, 379 insertions(+), 2 deletions(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index 7214eb47dc..d85643 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -15,6 +15,7 @@
 #include 
 
 #include "hw/virtio/vhost-iova-tree.h"
+#include "hw/virtio/vhost-shadow-virtqueue.h"
 #include "hw/virtio/virtio.h"
 #include "standard-headers/linux/vhost_types.h"
 
@@ -35,6 +36,8 @@ typedef struct vhost_vdpa {
 /* IOVA mapping used by the Shadow Virtqueue */
 VhostIOVATree *iova_tree;
 GPtrArray *shadow_vqs;
+const VhostShadowVirtqueueOps *shadow_vq_ops;
+void *shadow_vq_ops_opaque;
 struct vhost_dev *dev;
 VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostVDPA;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 613c3483b0..94bda07b4d 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -417,9 +417,10 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
struct vhost_vdpa *v,
 
 shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
 for (unsigned n = 0; n < hdev->nvqs; ++n) {
-g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree, NULL,
-NULL);
+g_autoptr(VhostShadowVirtqueue) svq = NULL;
 
+svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops,
+v->shadow_vq_ops_opaque);
 if (unlikely(!svq)) {
 error_setg(errp, "Cannot create svq %u", n);
 return -1;
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index b0158f625e..e415cc8de5 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -11,11 +11,15 @@
 
 #include "qemu/osdep.h"
 #include "clients.h"
+#include "hw/virtio/virtio-net.h"
 #include "net/vhost_net.h"
 #include "net/vhost-vdpa.h"
 #include "hw/virtio/vhost-vdpa.h"
+#include "qemu/buffer.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/memalign.h"
 #include "qemu/option.h"
 #include "qapi/error.h"
 #include 
@@ -25,6 +29,26 @@
 #include "monitor/monitor.h"
 #include "hw/virtio/vhost.h"
 
+typedef struct CVQElement {
+/* Device's in and out buffer */
+void *in_buf, *out_buf;
+
+/* Optional guest element from where this cvqelement was created */
+VirtQueueElement *guest_elem;
+
+/* Control header sent by the guest. */
+struct virtio_net_ctrl_hdr ctrl;
+
+/* vhost-vdpa device, for cleanup reasons */
+struct vhost_vdpa *vdpa;
+
+/* Length of out data */
+size_t out_len;
+
+/* Copy of the out data sent by the guest excluding ctrl. */
+uint8_t out_data[];
+} CVQElement;
+
 /* Todo:need to add the multiqueue support here */
 typedef struct VhostVDPAState {
 NetClientState nc;
@@ -187,6 +211,351 @@ static NetClientInfo net_vhost_vdpa_info = {
 .check_peer_type = vhost_vdpa_check_peer_type,
 };
 
+/**
+ * Unmap a descriptor chain of a SVQ element, optionally copying its in buffers
+ *
+ * @svq: Shadow VirtQueue
+ * @iova: SVQ IO Virtual address of descriptor
+ * @iov: Optional iovec to store device writable buffer
+ * @iov_cnt: iov length
+ * @buf_len: Length written by the device
+ *
+ * TODO: Use me! and adapt to net/vhost-vdpa format
+ * Print error message in case of error
+ */
+static void vhost_vdpa_cvq_unmap_buf(CVQElement *elem, void *addr)
+{
+struct vhost_vdpa *v = elem->vdpa;
+VhostIOVATree *tree = v->iova_tree;
+DMAMap needle = {
+/*
+ * No need to specify size or to look for more translations since
+ * this contiguous chunk was allocated by us.
+ */
+.translated_addr = (hwaddr)(uintptr_t)addr,
+};
+const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
+int r;
+
+if (unlikely(!map)) {
+error_report("

[RFC PATCH v9 09/23] vhost: Add SVQElement

2022-07-06 Thread Eugenio Pérez
This will allow SVQ to add metadata to the different queue elements. To
simplify changes, only store actual element at this patch.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  8 --
 hw/virtio/vhost-shadow-virtqueue.c | 41 --
 2 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 0fbdd69153..e434dc63b0 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -15,6 +15,10 @@
 #include "standard-headers/linux/vhost_types.h"
 #include "hw/virtio/vhost-iova-tree.h"
 
+typedef struct SVQElement {
+VirtQueueElement *elem;
+} SVQElement;
+
 typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
 typedef int (*ShadowVirtQueueStart)(VhostShadowVirtqueue *svq,
 void *opaque);
@@ -55,8 +59,8 @@ typedef struct VhostShadowVirtqueue {
 /* IOVA mapping */
 VhostIOVATree *iova_tree;
 
-/* Map for use the guest's descriptors */
-VirtQueueElement **ring_id_maps;
+/* Each element context */
+SVQElement *ring_id_maps;
 
 /* Next VirtQueue element that guest made available */
 VirtQueueElement *next_guest_avail_elem;
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 46d3c1d74f..913bca8769 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -237,7 +237,7 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, const 
struct iovec *out_sg,
 return false;
 }
 
-svq->ring_id_maps[qemu_head] = elem;
+svq->ring_id_maps[qemu_head].elem = elem;
 return true;
 }
 
@@ -385,15 +385,25 @@ static uint16_t vhost_svq_last_desc_of_chain(const 
VhostShadowVirtqueue *svq,
 return i;
 }
 
-static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
-   uint32_t *len)
+static bool vhost_svq_is_empty_elem(SVQElement elem)
+{
+return elem.elem == NULL;
+}
+
+static SVQElement vhost_svq_empty_elem(void)
+{
+return (SVQElement){};
+}
+
+static SVQElement vhost_svq_get_buf(VhostShadowVirtqueue *svq, uint32_t *len)
 {
 const vring_used_t *used = svq->vring.used;
 vring_used_elem_t used_elem;
+SVQElement svq_elem = vhost_svq_empty_elem();
 uint16_t last_used, last_used_chain, num;
 
 if (!vhost_svq_more_used(svq)) {
-return NULL;
+return svq_elem;
 }
 
 /* Only get used array entries after they have been exposed by dev */
@@ -406,24 +416,25 @@ static VirtQueueElement 
*vhost_svq_get_buf(VhostShadowVirtqueue *svq,
 if (unlikely(used_elem.id >= svq->vring.num)) {
 qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
   svq->vdev->name, used_elem.id);
-return NULL;
+return svq_elem;
 }
 
-if (unlikely(!svq->ring_id_maps[used_elem.id])) {
+svq_elem = svq->ring_id_maps[used_elem.id];
+svq->ring_id_maps[used_elem.id] = vhost_svq_empty_elem();
+if (unlikely(vhost_svq_is_empty_elem(svq_elem))) {
 qemu_log_mask(LOG_GUEST_ERROR,
 "Device %s says index %u is used, but it was not available",
 svq->vdev->name, used_elem.id);
-return NULL;
+return svq_elem;
 }
 
-num = svq->ring_id_maps[used_elem.id]->in_num +
-  svq->ring_id_maps[used_elem.id]->out_num;
+num = svq_elem.elem->in_num + svq_elem.elem->out_num;
 last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
 svq->desc_next[last_used_chain] = svq->free_head;
 svq->free_head = used_elem.id;
 
 *len = used_elem.len;
-return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
+return svq_elem;
 }
 
 /**
@@ -454,6 +465,7 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 vhost_svq_disable_notification(svq);
 while (true) {
 uint32_t len;
+SVQElement svq_elem;
 g_autofree VirtQueueElement *elem = NULL;
 
 if (unlikely(i >= svq->vring.num)) {
@@ -464,11 +476,12 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 return;
 }
 
-elem = vhost_svq_get_buf(svq, &len);
-if (!elem) {
+svq_elem = vhost_svq_get_buf(svq, &len);
+if (vhost_svq_is_empty_elem(svq_elem)) {
 break;
 }
 
+elem = g_steal_pointer(&svq_elem.elem);
 virtqueue_fill(vq, elem, len, i++);
 }
 
@@ -611,7 +624,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, 
VirtIODevice *vdev,
 memset(svq->vring.desc, 0, driver_size);
 svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size);
 memset(svq->vring.used, 0, device_size);
-sv

[RFC PATCH v9 06/23] virtio-net: Expose ctrl virtqueue logic

2022-07-06 Thread Eugenio Pérez
This allows external vhost-net devices to modify the state of the
VirtIO device model once vhost-vdpa device has acknowledge the control
commands.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/virtio-net.h |  4 ++
 hw/net/virtio-net.c| 84 --
 2 files changed, 53 insertions(+), 35 deletions(-)

diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index eb87032627..42caea0d1d 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -218,6 +218,10 @@ struct VirtIONet {
 struct EBPFRSSContext ebpf_rss;
 };
 
+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
+  const struct iovec *in_sg, unsigned in_num,
+  const struct iovec *out_sg,
+  unsigned out_num);
 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
const char *type);
 
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 7ad948ee7c..53bb92c9f1 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1434,57 +1434,71 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t 
cmd,
 return VIRTIO_NET_OK;
 }
 
-static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
+  const struct iovec *in_sg, unsigned in_num,
+  const struct iovec *out_sg,
+  unsigned out_num)
 {
 VirtIONet *n = VIRTIO_NET(vdev);
 struct virtio_net_ctrl_hdr ctrl;
 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
-VirtQueueElement *elem;
 size_t s;
 struct iovec *iov, *iov2;
-unsigned int iov_cnt;
+
+if (iov_size(in_sg, in_num) < sizeof(status) ||
+iov_size(out_sg, out_num) < sizeof(ctrl)) {
+virtio_error(vdev, "virtio-net ctrl missing headers");
+return 0;
+}
+
+iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
+s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
+iov_discard_front(&iov, &out_num, sizeof(ctrl));
+if (s != sizeof(ctrl)) {
+status = VIRTIO_NET_ERR;
+} else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
+status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
+status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
+status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
+status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
+status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
+status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
+}
+
+s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
+assert(s == sizeof(status));
+
+g_free(iov2);
+return sizeof(status);
+}
+
+static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+{
+VirtQueueElement *elem;
 
 for (;;) {
+size_t written;
 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
 if (!elem) {
 break;
 }
-if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
-iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
-virtio_error(vdev, "virtio-net ctrl missing headers");
+
+written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
+ elem->out_sg, elem->out_num);
+if (written > 0) {
+virtqueue_push(vq, elem, written);
+virtio_notify(vdev, vq);
+g_free(elem);
+} else {
 virtqueue_detach_element(vq, elem, 0);
 g_free(elem);
 break;
 }
-
-iov_cnt = elem->out_num;
-iov2 = iov = g_memdup2(elem->out_sg,
-   sizeof(struct iovec) * elem->out_num);
-s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
-iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
-if (s != sizeof(ctrl)) {
-status = VIRTIO_NET_ERR;
-} else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
-status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
-} else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
-status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
-} else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
-status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
-} else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
-  

[RFC PATCH v9 02/23] vhost: move descriptor translation to vhost_svq_vring_write_descs

2022-07-06 Thread Eugenio Pérez
It's done for both in and out descriptors so it's better placed here.

Acked-by: Jason Wang 
Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 39 +-
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 9280285435..2939f4a243 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -122,17 +122,35 @@ static bool vhost_svq_translate_addr(const 
VhostShadowVirtqueue *svq,
 return true;
 }
 
-static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
-const struct iovec *iovec, size_t num,
-bool more_descs, bool write)
+/**
+ * Write descriptors to SVQ vring
+ *
+ * @svq: The shadow virtqueue
+ * @sg: Cache for hwaddr
+ * @iovec: The iovec from the guest
+ * @num: iovec length
+ * @more_descs: True if more descriptors come in the chain
+ * @write: True if they are writeable descriptors
+ *
+ * Return true if success, false otherwise and print error.
+ */
+static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
+const struct iovec *iovec, size_t num,
+bool more_descs, bool write)
 {
 uint16_t i = svq->free_head, last = svq->free_head;
 unsigned n;
 uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
 vring_desc_t *descs = svq->vring.desc;
+bool ok;
 
 if (num == 0) {
-return;
+return true;
+}
+
+ok = vhost_svq_translate_addr(svq, sg, iovec, num);
+if (unlikely(!ok)) {
+return false;
 }
 
 for (n = 0; n < num; n++) {
@@ -150,6 +168,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue 
*svq, hwaddr *sg,
 }
 
 svq->free_head = le16_to_cpu(svq->desc_next[last]);
+return true;
 }
 
 static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
@@ -169,21 +188,19 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
 return false;
 }
 
-ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
+ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
+ elem->in_num > 0, false);
 if (unlikely(!ok)) {
 return false;
 }
-vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
-elem->in_num > 0, false);
-
 
-ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
+ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, 
false,
+ true);
 if (unlikely(!ok)) {
+/* TODO unwind out_sg */
 return false;
 }
 
-vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
-
 /*
  * Put the entry in the available array (but don't update avail->idx until
  * they do sync).
-- 
2.31.1




[RFC PATCH v9 00/23] Net Control VQ support in SVQ

2022-07-06 Thread Eugenio Pérez
Control virtqueue is used by networking device for accepting various
commands from the driver. It's a must to support multiqueue and other
configurations.

Shadow VirtQueue (SVQ) already makes possible migration of virtqueue
states, effectively intercepting them so qemu can track what regions of memory
are dirty because device action and needs migration. However, this does not
solve networking device state seen by the driver because CVQ messages, like
changes on MAC addresses from the driver.

This series uses SVQ infraestructure to intercept networking control messages
used by the device. This way, qemu is able to update VirtIONet device model and
to migrate it.

To intercept all queues slows device data forwarding, so this is not the final
solution. To solve that, only the CVQ must be intercepted all the time. This
will be achieved in future revisions using the ASID infraestructure, that
allows different translations for different virtqueues.

Another pending item is to move data virtqueues from passthrough mode to SVQ
one. To achieve that, a reliable way to obtain the vq state is needed. STOP
ioctl will be added for that.

To intercept all the virtqueues and update the qemu nic device model as the
guest changes the device state add the cmdline opt x-svq=on:

-netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0,x-svq=on

Lastly, device state is sent each time qemu starts the device, using SVQ to
inject commands through CVQ. This allows the guest to transparently see the
same guest-visible state at resume.

First two patches reorder code so it's easier to apply later patches on top of
the code base.

Third patch reorders the device ok and the set_vring_enable ioctl sending. This
is done so CVQ commands reach the device before the device have the chance to
use rx queues, with incorrect data.

Fourth patch replaces the way of getting vq state. Since qemu will be able to
inject buffers, device's used_idx is not valid anymore and we must use
guest-visible one.

Fifth patch creates the API in SVQ to call when device start. This will allow
vhost-vdpa net to inject control commands before the rest of queues start.

Sixth path enables SVQ to return buffers externally. While it's not possible at
this point in the series, CVQ will need to return the available buffers

Patches 8-12 enables SVQ to communicate the caller of SVQ context data of the
used buffer.

Patch 13 enables vhost-vdpa net to inject buffers to the device. This will be
used both to inject the state at the beginning and to decouple guest's CVQ
buffers from the ones sent to the device. This brings protections against
TOCTOU, avoiding the device and qemu to see different messages. In the future,
this may also be used to emulate _F_ANNOUNCE.

The previous patch and patches from 14 to 17 makes SVQ capable of being
inspected.

Patches 18 to 20 enable the update of the virtio-net device model for each
CVQ message acknoledged by the device.

Patches 21-22 enables the update of the device configuration right at start.

Finally, last commit enables x-svq parameter.

Comments are welcomed.

TODO:
* Review failure paths, some are with TODO notes, other don't.

Changes from rfc v8:
* Remove ASID part. Delete x-svq-cvq mode too.
* Move all DMA memory management to net/vhost-vdpa, instead of svq.
* Use of qemu_real_host_page_size.
* Improved doc, general fixes.

Changes from rfc v7:
* Don't map all guest space in ASID 1 but copy all the buffers. No need for
  more memory listeners.
* Move net backend start callback to SVQ.
* Wait for device CVQ commands used by the device at SVQ start, avoiding races.
* Changed ioctls, but they're provisional anyway.
* Reorder commits so refactor and code adding ones are closer to usage.
* Usual cleaning: better tracing, doc, patches messages, ...

Changes from rfc v6:
* Fix bad iotlb updates order when batching was enabled
* Add reference counting to iova_tree so cleaning is simpler.

Changes from rfc v5:
* Fixes bad calculus of cvq end group when MQ is not acked by the guest.

Changes from rfc v4:
* Add missing tracing
* Add multiqueue support
* Use already sent version for replacing g_memdup
* Care with memory management

Changes from rfc v3:
* Fix bad returning of descriptors to SVQ list.

Changes from rfc v2:
* Fix use-after-free.

Changes from rfc v1:
* Rebase to latest master.
* Configure ASID instead of assuming cvq asid != data vqs asid.
* Update device model so (MAC) state can be migrated too.

Eugenio Pérez (23):
  vhost: Return earlier if used buffers overrun
  vhost: move descriptor translation to vhost_svq_vring_write_descs
  vdpa: delay set_vring_ready after DRIVER_OK
  vhost: Get vring base from vq, not svq
  vhost: Add ShadowVirtQueueStart operation
  virtio-net: Expose ctrl virtqueue logic
  vhost: add vhost_svq_push_elem
  vhost: Decouple vhost_svq_add_split from VirtQueueElement
  vhost: Add SVQElement
  vhost: Reorder vhost_svq_last_desc_of_chain
  vhost: M

[RFC PATCH v9 13/23] vhost: Add vhost_svq_inject

2022-07-06 Thread Eugenio Pérez
This allows qemu to inject buffers to the device.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  2 ++
 hw/virtio/vhost-shadow-virtqueue.c | 34 ++
 2 files changed, 36 insertions(+)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index a811f90e01..d01d2370db 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -98,6 +98,8 @@ bool vhost_svq_valid_features(uint64_t features, Error 
**errp);
 
 void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
  const VirtQueueElement *elem, uint32_t len);
+int vhost_svq_inject(VhostShadowVirtqueue *svq, const struct iovec *iov,
+ size_t out_num, size_t in_num, void *opaque);
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
 void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
 void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 492bb12b5f..bd9e34b413 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -283,6 +283,40 @@ static bool vhost_svq_add_element(VhostShadowVirtqueue 
*svq,
 return ok;
 }
 
+/**
+ * Inject a chain of buffers to the device
+ *
+ * @svq: Shadow VirtQueue
+ * @iov: I/O vector
+ * @out_num: Number of front out descriptors
+ * @in_num: Number of last input descriptors
+ * @opaque: Contextual data to store in descriptor
+ *
+ * Return 0 on success, -ENOMEM if cannot inject
+ */
+int vhost_svq_inject(VhostShadowVirtqueue *svq, const struct iovec *iov,
+ size_t out_num, size_t in_num, void *opaque)
+{
+bool ok;
+
+/*
+ * All vhost_svq_inject calls are controlled by qemu so we won't hit this
+ * assertions.
+ */
+assert(out_num || in_num);
+assert(svq->ops);
+
+if (unlikely(svq->next_guest_avail_elem)) {
+error_report("Injecting in a full queue");
+return -ENOMEM;
+}
+
+ok = vhost_svq_add(svq, iov, out_num, iov + out_num, in_num, opaque);
+assert(ok);
+vhost_svq_kick(svq);
+return 0;
+}
+
 /**
  * Forward available buffers.
  *
-- 
2.31.1




[RFC PATCH v9 01/23] vhost: Return earlier if used buffers overrun

2022-07-06 Thread Eugenio Pérez
Previous function misses the just picked avail buffer from the queue.
This way keeps blocking the used queue forever, but is cleaner to check
before calling to vhost_svq_get_buf.

Fixes: 100890f7cad50 ("vhost: Shadow virtqueue buffers forwarding")
Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 56c96ebd13..9280285435 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -405,19 +405,21 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 vhost_svq_disable_notification(svq);
 while (true) {
 uint32_t len;
-g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
-if (!elem) {
-break;
-}
+g_autofree VirtQueueElement *elem = NULL;
 
 if (unlikely(i >= svq->vring.num)) {
 qemu_log_mask(LOG_GUEST_ERROR,
  "More than %u used buffers obtained in a %u size SVQ",
  i, svq->vring.num);
-virtqueue_fill(vq, elem, len, i);
-virtqueue_flush(vq, i);
+virtqueue_flush(vq, svq->vring.num);
 return;
 }
+
+elem = vhost_svq_get_buf(svq, &len);
+if (!elem) {
+break;
+}
+
 virtqueue_fill(vq, elem, len, i++);
 }
 
-- 
2.31.1




[RFC PATCH v9 08/23] vhost: Decouple vhost_svq_add_split from VirtQueueElement

2022-07-06 Thread Eugenio Pérez
VirtQueueElement comes from the guest, but we're heading SVQ to be able
to inject element without the guest's knowledge.

To do so, make this accept sg buffers directly, instead of using
VirtQueueElement.

Add vhost_svq_add_element to maintain element convenience

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 38 +-
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 2fc5789b73..46d3c1d74f 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -172,30 +172,32 @@ static bool 
vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
 }
 
 static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
-VirtQueueElement *elem, unsigned *head)
+const struct iovec *out_sg, size_t out_num,
+const struct iovec *in_sg, size_t in_num,
+unsigned *head)
 {
 unsigned avail_idx;
 vring_avail_t *avail = svq->vring.avail;
 bool ok;
-g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num));
+g_autofree hwaddr *sgs = NULL;
 
 *head = svq->free_head;
 
 /* We need some descriptors here */
-if (unlikely(!elem->out_num && !elem->in_num)) {
+if (unlikely(!out_num && !in_num)) {
 qemu_log_mask(LOG_GUEST_ERROR,
   "Guest provided element with no descriptors");
 return false;
 }
 
-ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
- elem->in_num > 0, false);
+sgs = g_new(hwaddr, MAX(out_num, in_num));
+ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
+ false);
 if (unlikely(!ok)) {
 return false;
 }
 
-ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, 
false,
- true);
+ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
 if (unlikely(!ok)) {
 /* TODO unwind out_sg */
 return false;
@@ -223,10 +225,13 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
  * takes ownership of the element: In case of failure, it is free and the SVQ
  * is considered broken.
  */
-static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
+static bool vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec 
*out_sg,
+  size_t out_num, const struct iovec *in_sg,
+  size_t in_num, VirtQueueElement *elem)
 {
 unsigned qemu_head;
-bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
+bool ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num,
+  &qemu_head);
 if (unlikely(!ok)) {
 g_free(elem);
 return false;
@@ -250,6 +255,18 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
 event_notifier_set(&svq->hdev_kick);
 }
 
+static bool vhost_svq_add_element(VhostShadowVirtqueue *svq,
+  VirtQueueElement *elem)
+{
+bool ok = vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg,
+elem->in_num, elem);
+if (ok) {
+vhost_svq_kick(svq);
+}
+
+return ok;
+}
+
 /**
  * Forward available buffers.
  *
@@ -302,12 +319,11 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue 
*svq)
 return;
 }
 
-ok = vhost_svq_add(svq, elem);
+ok = vhost_svq_add_element(svq, g_steal_pointer(&elem));
 if (unlikely(!ok)) {
 /* VQ is broken, just return and ignore any other kicks */
 return;
 }
-vhost_svq_kick(svq);
 }
 
 virtio_queue_set_notification(svq->vq, true);
-- 
2.31.1




<    3   4   5   6   7   8   9   10   11   12   >