This adds a new vring feature bit: when enabled, host and guest poll the
available/used ring directly instead of looking at the index field
first.

To guarantee it is possible to detect updates, the high bits (above
vring.num - 1) in the ring head ID value are modified to match the index
bits - these change on each wrap-around.  Writer also XORs this with
0x8000 such that rings can be zero-initialized.

Reader is modified to ignore these high bits when looking
up descriptors.

The point is to reduce the number of cacheline misses
for both reads and writes.

I see a speedup of about 20% on a multithreaded micro-benchmark
(virtio-test), but regression of about 2% on a single-threaded one
(vring_bench).  I think this has to do with the fact that
complete_multi_user is implemented suboptimally.

TODO:
        investigate single-threaded regression
        better name for a feature flag
        split the patch to make it easier to review
        look at more aggressive ring layout changes
        write a spec patch

This is on top of the following patches in my tree:
        virtio_ring: Shadow available ring flags & index
        vhost: replace % with & on data path
        tools/virtio: fix byteswap logic
        tools/virtio: move list macro stubs

Signed-off-by: Michael S. Tsirkin <m...@redhat.com>
---
 drivers/vhost/vhost.h            |   3 +-
 include/linux/vringh.h           |   3 +
 include/uapi/linux/virtio_ring.h |   3 +
 drivers/vhost/vhost.c            | 104 ++++++++++++++++++--------
 drivers/vhost/vringh.c           | 153 +++++++++++++++++++++++++++++++++------
 drivers/virtio/virtio_ring.c     |  40 ++++++++--
 tools/virtio/virtio_test.c       |  14 +++-
 7 files changed, 255 insertions(+), 65 deletions(-)

diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index d3f7674..aeeb15d 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -175,7 +175,8 @@ enum {
                         (1ULL << VIRTIO_RING_F_EVENT_IDX) |
                         (1ULL << VHOST_F_LOG_ALL) |
                         (1ULL << VIRTIO_F_ANY_LAYOUT) |
-                        (1ULL << VIRTIO_F_VERSION_1)
+                        (1ULL << VIRTIO_F_VERSION_1) |
+                        (1ULL << VIRTIO_RING_F_POLL)
 };
 
 static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
diff --git a/include/linux/vringh.h b/include/linux/vringh.h
index bc6c28d..13a9e3e 100644
--- a/include/linux/vringh.h
+++ b/include/linux/vringh.h
@@ -40,6 +40,9 @@ struct vringh {
        /* Can we get away with weak barriers? */
        bool weak_barriers;
 
+       /* Poll ring directly */
+       bool poll;
+
        /* Last available index we saw (ie. where we're up to). */
        u16 last_avail_idx;
 
diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h
index c072959..bf3ca1d 100644
--- a/include/uapi/linux/virtio_ring.h
+++ b/include/uapi/linux/virtio_ring.h
@@ -62,6 +62,9 @@
  * at the end of the used ring. Guest should ignore the used->flags field. */
 #define VIRTIO_RING_F_EVENT_IDX                29
 
+/* Support ring polling */
+#define VIRTIO_RING_F_POLL     33
+
 /* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
 struct vring_desc {
        /* Address (guest-physical). */
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 85f0f0a..cdbabf5 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1346,26 +1346,26 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 
        /* Check it isn't doing very strange things with descriptor numbers. */
        last_avail_idx = vq->last_avail_idx;
-       if (unlikely(__get_user(avail_idx, &vq->avail->idx))) {
-               vq_err(vq, "Failed to access avail idx at %p\n",
-                      &vq->avail->idx);
-               return -EFAULT;
-       }
-       vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
-
-       if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
-               vq_err(vq, "Guest moved used index from %u to %u",
-                      last_avail_idx, vq->avail_idx);
-               return -EFAULT;
-       }
+       if (!vhost_has_feature(vq, VIRTIO_RING_F_POLL)) {
+               if (unlikely(__get_user(avail_idx, &vq->avail->idx))) {
+                       vq_err(vq, "Failed to access avail idx at %p\n",
+                              &vq->avail->idx);
+                       return -EFAULT;
+               }
+               vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
 
-       /* If there's nothing new since last we looked, return invalid. */
-       if (vq->avail_idx == last_avail_idx)
-               return vq->num;
+               if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
+                       vq_err(vq, "Guest moved used index from %u to %u",
+                              last_avail_idx, vq->avail_idx);
+                       return -EFAULT;
+               }
 
-       /* Only get avail ring entries after they have been exposed by guest. */
-       smp_rmb();
+               /* If there's nothing new since last we looked, return invalid. 
*/
+               if (vq->avail_idx == last_avail_idx)
+                       return vq->num;
 
+               /* Only get avail ring entries after they have been exposed by 
guest. */
+               smp_rmb();
        }
 
        /* Grab the next descriptor number they're advertising, and increment
@@ -1380,11 +1380,22 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 
        head = vhost16_to_cpu(vq, ring_head);
 
-       /* If their number is silly, that's an error. */
-       if (unlikely(head >= vq->num)) {
-               vq_err(vq, "Guest says index %u > %u is available",
-                      head, vq->num);
-               return -EINVAL;
+       if (vhost_has_feature(vq, VIRTIO_RING_F_POLL)) {
+               /* If there's nothing new since last we looked, return invalid. 
*/
+               if ((head ^ last_avail_idx ^ 0x8000) & ~(vq->num - 1))
+                       return vq->num;
+
+               /* Only get avail ring entries after they have been exposed by 
guest. */
+               smp_rmb();
+
+               head &= vq->num - 1;
+       } else {
+               /* If their number is silly, that's an error. */
+               if (unlikely(head >= vq->num)) {
+                       vq_err(vq, "Guest says index %u > %u is available",
+                              head, vq->num);
+                       return -EINVAL;
+               }
        }
 
        /* When we start there are none of either input nor output. */
@@ -1483,6 +1494,27 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned 
int head, int len)
 }
 EXPORT_SYMBOL_GPL(vhost_add_used);
 
+static inline int __vhost_add_used_poll(struct vhost_virtqueue *vq,
+                                       struct vring_used_elem *heads,
+                                       struct vring_used_elem __user *used,
+                                       int idx)
+{
+       __virtio32 head = heads[0].id ^
+               cpu_to_vhost32(vq, ~(vq->num - 1) &
+                              ((vq->last_used_idx + idx) ^ 0x8000));
+
+       if (__put_user(heads[0].len, &used->len)) {
+               vq_err(vq, "Failed to write used len");
+               return -EFAULT;
+       }
+       smp_wmb();
+       if (__put_user(head, &used->id)) {
+               vq_err(vq, "Failed to write used id");
+               return -EFAULT;
+       }
+       return 0;
+}
+
 static int __vhost_add_used_n(struct vhost_virtqueue *vq,
                            struct vring_used_elem *heads,
                            unsigned count)
@@ -1493,18 +1525,28 @@ static int __vhost_add_used_n(struct vhost_virtqueue 
*vq,
 
        start = vq->last_used_idx & (vq->num - 1);
        used = vq->used->ring + start;
-       if (count == 1) {
-               if (__put_user(heads[0].id, &used->id)) {
-                       vq_err(vq, "Failed to write used id");
+       if (vhost_has_feature(vq, VIRTIO_RING_F_POLL)) {
+               int ret = 0;
+               int i;
+
+               for (i = 0; i < count; ++i)
+                       ret |= __vhost_add_used_poll(vq, heads + i, used + i, 
i);
+               if (ret)
                        return -EFAULT;
-               }
-               if (__put_user(heads[0].len, &used->len)) {
-                       vq_err(vq, "Failed to write used len");
+       } else {
+               if (count == 1) {
+                       if (__put_user(heads[0].id, &used->id)) {
+                               vq_err(vq, "Failed to write used id");
+                               return -EFAULT;
+                       }
+                       if (__put_user(heads[0].len, &used->len)) {
+                               vq_err(vq, "Failed to write used len");
+                               return -EFAULT;
+                       }
+               } else if (__copy_to_user(used, heads, count * sizeof *used)) {
+                       vq_err(vq, "Failed to write used");
                        return -EFAULT;
                }
-       } else if (__copy_to_user(used, heads, count * sizeof *used)) {
-               vq_err(vq, "Failed to write used");
-               return -EFAULT;
        }
        if (unlikely(vq->log_used)) {
                /* Make sure data is seen before log. */
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 3bb02c6..d8aac79 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -36,18 +36,21 @@ static inline int __vringh_get_head(const struct vringh 
*vrh,
        u16 avail_idx, i, head;
        int err;
 
-       err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx);
-       if (err) {
-               vringh_bad("Failed to access avail idx at %p",
-                          &vrh->vring.avail->idx);
-               return err;
-       }
+       if (!vrh->poll) {
+               err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx);
+               if (err) {
+                       vringh_bad("Failed to access avail idx at %p",
+                                  &vrh->vring.avail->idx);
+                       return err;
+               }
 
-       if (*last_avail_idx == avail_idx)
-               return vrh->vring.num;
+               if (*last_avail_idx == avail_idx)
+                       return vrh->vring.num;
 
-       /* Only get avail ring entries after they have been exposed by guest. */
-       virtio_rmb(vrh->weak_barriers);
+               /* Only get avail ring entries after they have been exposed by 
guest. */
+               virtio_rmb(vrh->weak_barriers);
+
+       }
 
        i = *last_avail_idx & (vrh->vring.num - 1);
 
@@ -58,10 +61,20 @@ static inline int __vringh_get_head(const struct vringh 
*vrh,
                return err;
        }
 
-       if (head >= vrh->vring.num) {
-               vringh_bad("Guest says index %u > %u is available",
-                          head, vrh->vring.num);
-               return -EINVAL;
+       if (vrh->poll) {
+               if ((head ^ *last_avail_idx ^ 0x8000) & ~(vrh->vring.num - 1))
+                       return vrh->vring.num;
+
+               /* Only get descriptor entries after they have been exposed by 
guest. */
+               virtio_rmb(vrh->weak_barriers);
+
+               head &= vrh->vring.num - 1;
+       } else {
+               if (head >= vrh->vring.num) {
+                       vringh_bad("Guest says index %u > %u is available",
+                                  head, vrh->vring.num);
+                       return -EINVAL;
+               }
        }
 
        (*last_avail_idx)++;
@@ -397,6 +410,57 @@ fail:
        return err;
 }
 
+static inline int __vringh_complete_poll(struct vringh *vrh,
+                                        int (*putu32)(const struct vringh *vrh,
+                                                      __virtio32 *p, u32 val),
+                                        int (*putu16)(const struct vringh *vrh,
+                                                      __virtio16 *p, u16 val),
+                                        __virtio32 head, __virtio32 len,
+                                        bool last)
+{
+       struct vring_used *used_ring;
+       int err;
+       u16 used_idx, off;
+
+       used_ring = vrh->vring.used;
+       used_idx = vrh->last_used_idx + vrh->completed;
+
+       off = used_idx & (vrh->vring.num - 1);
+
+       err = putu32(vrh, &used_ring->ring[off].len, len);
+       if (err) {
+               vringh_bad("Failed to write used length %u at %p",
+                          off, &used_ring->ring[off]);
+               return err;
+       }
+
+       /* Make sure length is written before we update index. */
+       virtio_wmb(vrh->weak_barriers);
+
+       head ^= cpu_to_vringh32(vrh, (used_idx & ~(vrh->vring.num - 1)) ^ 
0x8000);
+       err = putu32(vrh, &used_ring->ring[off].id, head);
+       if (err) {
+               vringh_bad("Failed to write used id %u at %p",
+                          off, &used_ring->ring[off]);
+               return err;
+       }
+
+       if (last) {
+               /* Make sure buffer is written before we update index. */
+               virtio_wmb(vrh->weak_barriers);
+
+               err = putu16(vrh, &vrh->vring.used->idx, used_idx + 1);
+               if (err) {
+                       vringh_bad("Failed to update used index at %p",
+                                  &vrh->vring.used->idx);
+                       return err;
+               }
+       }
+
+       vrh->completed++;
+       return 0;
+}
+
 static inline int __vringh_complete(struct vringh *vrh,
                                    const struct vring_used_elem *used,
                                    unsigned int num_used,
@@ -556,6 +620,13 @@ static inline int getu16_user(const struct vringh *vrh, 
u16 *val, const __virtio
        return rc;
 }
 
+static inline int putu32_user(const struct vringh *vrh, __virtio32 *p, u32 val)
+{
+       __virtio32 v = cpu_to_vringh32(vrh, val);
+       return put_user(v, (__force __virtio32 __user *)p);
+}
+
+
 static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val)
 {
        __virtio16 v = cpu_to_vringh16(vrh, val);
@@ -615,6 +686,7 @@ int vringh_init_user(struct vringh *vrh, u64 features,
 
        vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
        vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
+       vrh->poll = (features & (1ULL << VIRTIO_RING_F_POLL));
        vrh->weak_barriers = weak_barriers;
        vrh->completed = 0;
        vrh->last_avail_idx = 0;
@@ -752,11 +824,19 @@ EXPORT_SYMBOL(vringh_abandon_user);
  */
 int vringh_complete_user(struct vringh *vrh, u16 head, u32 len)
 {
-       struct vring_used_elem used;
+       if (vrh->poll) {
+               return __vringh_complete_poll(vrh, putu32_user,
+                                             putu16_user,
+                                             cpu_to_vringh32(vrh, head),
+                                             cpu_to_vringh32(vrh, len),
+                                             true);
+       } else {
+               struct vring_used_elem used;
 
-       used.id = cpu_to_vringh32(vrh, head);
-       used.len = cpu_to_vringh32(vrh, len);
-       return __vringh_complete(vrh, &used, 1, putu16_user, putused_user);
+               used.id = cpu_to_vringh32(vrh, head);
+               used.len = cpu_to_vringh32(vrh, len);
+               return __vringh_complete(vrh, &used, 1, putu16_user, 
putused_user);
+       }
 }
 EXPORT_SYMBOL(vringh_complete_user);
 
@@ -773,8 +853,21 @@ int vringh_complete_multi_user(struct vringh *vrh,
                               const struct vring_used_elem used[],
                               unsigned num_used)
 {
-       return __vringh_complete(vrh, used, num_used,
-                                putu16_user, putused_user);
+       if (vrh->poll) {
+               int i, r;
+
+               for (i = 0; i < num_used; ++i) {
+                       r = __vringh_complete_poll(vrh, putu32_user, 
putu16_user,
+                                                  used[i].id, used[i].len,
+                                                  i == num_used - 1);
+                       if (r)
+                               return r;
+               }
+               return 0;
+       } else {
+               return __vringh_complete(vrh, used, num_used,
+                                        putu16_user, putused_user);
+       }
 }
 EXPORT_SYMBOL(vringh_complete_multi_user);
 
@@ -830,6 +923,12 @@ static inline int putu16_kern(const struct vringh *vrh, 
__virtio16 *p, u16 val)
        return 0;
 }
 
+static inline int putu32_kern(const struct vringh *vrh, __virtio32 *p, u32 val)
+{
+       ACCESS_ONCE(*p) = cpu_to_vringh32(vrh, val);
+       return 0;
+}
+
 static inline int copydesc_kern(void *dst, const void *src, size_t len)
 {
        memcpy(dst, src, len);
@@ -876,6 +975,7 @@ int vringh_init_kern(struct vringh *vrh, u64 features,
 
        vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
        vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
+       vrh->poll = (features & (1ULL << VIRTIO_RING_F_POLL));
        vrh->weak_barriers = weak_barriers;
        vrh->completed = 0;
        vrh->last_avail_idx = 0;
@@ -987,12 +1087,17 @@ EXPORT_SYMBOL(vringh_abandon_kern);
  */
 int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len)
 {
-       struct vring_used_elem used;
+       if (vrh->poll) {
+               return __vringh_complete_poll(vrh, putu32_kern, putu16_kern,
+                                             head, len, true);
+       } else {
+               struct vring_used_elem used;
 
-       used.id = cpu_to_vringh32(vrh, head);
-       used.len = cpu_to_vringh32(vrh, len);
+               used.id = cpu_to_vringh32(vrh, head);
+               used.len = cpu_to_vringh32(vrh, len);
 
-       return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern);
+               return __vringh_complete(vrh, &used, 1, putu16_kern, 
putused_kern);
+       }
 }
 EXPORT_SYMBOL(vringh_complete_kern);
 
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 6262015..f25fd64 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -80,6 +80,9 @@ struct vring_virtqueue {
        /* Last used index we've seen. */
        u16 last_used_idx;
 
+       /* Poll ring instead of the index */
+       bool poll;
+
        /* Last written value to avail->flags */
        u16 avail_flags_shadow;
 
@@ -239,9 +242,14 @@ static inline int virtqueue_add(struct virtqueue *_vq,
        /* Set token. */
        vq->data[head] = data;
 
+       avail = vq->avail_idx_shadow & (vq->vring.num - 1);
+
+       if (vq->poll) {
+               virtio_wmb(vq->weak_barriers);
+               head ^= ((vq->avail_idx_shadow ^ 0x8000) & ~(vq->vring.num - 
1));
+       }
        /* Put entry in available array (but don't update avail->idx until they
         * do sync). */
-       avail = vq->avail_idx_shadow & (vq->vring.num - 1);
        vq->vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
 
        /* Descriptors and available array need to be set before we expose the
@@ -488,17 +496,32 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned 
int *len)
                return NULL;
        }
 
-       if (!more_used(vq)) {
-               pr_debug("No more buffers in queue\n");
-               END_USE(vq);
-               return NULL;
-       }
+       if (!vq->poll) {
+               if (!more_used(vq)) {
+                       pr_debug("No more buffers in queue\n");
+                       END_USE(vq);
+                       return NULL;
+               }
 
-       /* Only get used array entries after they have been exposed by host. */
-       virtio_rmb(vq->weak_barriers);
+               /* Only get used array entries after they have been exposed by 
host. */
+               virtio_rmb(vq->weak_barriers);
+       }
 
        last_used = (vq->last_used_idx & (vq->vring.num - 1));
        i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id);
+
+       if (vq->poll) {
+               if ((i ^ vq->last_used_idx ^ 0x8000) & ~(vq->vring.num - 1)) {
+                       pr_debug("No more buffers in queue\n");
+                       END_USE(vq);
+                       return NULL;
+               }
+               i &= vq->vring.num - 1;
+
+               /* Only get used array entries after they have been exposed by 
host. */
+               virtio_rmb(vq->weak_barriers);
+       }
+
        *len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len);
 
        if (unlikely(i >= vq->vring.num)) {
@@ -764,6 +787,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 
        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
+       vq->poll = virtio_has_feature(vdev, VIRTIO_RING_F_POLL);
 
        /* No callback?  Tell other side not to bother us. */
        if (!callback) {
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index e044589..585cd21 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -220,6 +220,14 @@ const struct option longopts[] = {
                .val = 'e',
        },
        {
+               .name = "ring-poll",
+               .val = 'R',
+       },
+       {
+               .name = "no-ring-poll",
+               .val = 'r',
+       },
+       {
                .name = "indirect",
                .val = 'I',
        },
@@ -261,7 +269,8 @@ int main(int argc, char **argv)
 {
        struct vdev_info dev;
        unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
-               (1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << 
VIRTIO_F_VERSION_1);
+               (1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << 
VIRTIO_F_VERSION_1) |
+               (1ULL << VIRTIO_RING_F_POLL);
        int o;
        bool delayed = false;
 
@@ -276,6 +285,9 @@ int main(int argc, char **argv)
                case 'e':
                        features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX);
                        break;
+               case 'r':
+                       features &= ~(1ULL << VIRTIO_RING_F_POLL);
+                       break;
                case 'h':
                        help();
                        goto done;
-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to