On Tue, Feb 19, 2019 at 09:06:42PM +0800, Jason Wang wrote: > > On 2019/2/19 下午6:40, Wei Xu wrote: > >On Tue, Feb 19, 2019 at 03:19:58PM +0800, Jason Wang wrote: > >>On 2019/2/14 下午12:26, w...@redhat.com wrote: > >>>From: Wei Xu <w...@redhat.com> > >>> > >>>Difference between 'avail_wrap_counter' and 'last_avail_wrap_counter': > >>>For Tx(guest transmitting), they are the same after each pop of a desc. > >>> > >>>For Rx(guest receiving), they are also the same when there are enough > >>>descriptors to carry the payload for a packet(e.g. usually 16 descs are > >>>needed for a 64k packet in typical iperf tcp connection with tso enabled), > >>>however, when the ring is running out of descriptors while there are > >>>still a few free ones, e.g. 6 descriptors are available which is not > >>>enough to carry an entire packet which needs 16 descriptors, in this > >>>case the 'avail_wrap_counter' should be set as the first one pending > >>>being handled by guest driver in order to get a notification, and the > >>>'last_avail_wrap_counter' should stay unchanged to the head of available > >>>descriptors, like below: > >>> > >>>Mark meaning: > >>> | | -- available > >>> |*| -- used > >>> > >>>A Snapshot of the queue: > >>> last_avail_idx = 253 > >>> last_avail_wrap_counter = 1 > >>> | > >>> +---------------------------------------------+ > >>> 0 | | | |*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*| | | | 255 > >>> +---------------------------------------------+ > >>> | > >>> shadow_avail_idx = 3 > >>> avail_wrap_counter = 0 > >> > >>Well this might not be the good place to describe the difference between > >>shadow_avail_idx and last_avail_idx. And the comments above their definition > >>looks good enough? > >Sorry, I do not get it, can you elaborate more? > > > I meant the comment is good enough to explain what it did. For packed ring, > the only difference is the wrap counter. You can add e.g "The wrap counter > of next head to pop" to above last_avail_wrap_counter. And so does > shadow_avail_wrap_counter.
OK, I will remove the example part. > > > > > >This is one of the buggy parts of packed ring, it is good to make it clear > >here. > > > >> /* Next head to pop */ > >> uint16_t last_avail_idx; > >> > >> /* Last avail_idx read from VQ. */ > >> uint16_t shadow_avail_idx; > >> > >What is the meaning of these comment? > > > It's pretty easy to be understood, isn't it? :) > > > > Do you mean I should replace put them > >to the comments also? thanks. > > > >>Instead, how or why need event suppress is not mentioned ... > >Yes, but presumably this knowledge has been acquired from reading through the > >spec, so I skipped this part. > > > You need at least add reference to the spec. Commit log is pretty important > for starters to understand what has been done in the patch before reading > the code. I'm pretty sure they will get confused for reading what you wrote > here. > OK. > > Thanks > > > > > >Wei > > > >> > >> > >>>Signed-off-by: Wei Xu <w...@redhat.com> > >>>--- > >>> hw/virtio/virtio.c | 137 > >>> +++++++++++++++++++++++++++++++++++++++++++++++++---- > >>> 1 file changed, 128 insertions(+), 9 deletions(-) > >>> > >>>diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > >>>index 7e276b4..8cfc7b6 100644 > >>>--- a/hw/virtio/virtio.c > >>>+++ b/hw/virtio/virtio.c > >>>@@ -234,6 +234,34 @@ static void vring_desc_read(VirtIODevice *vdev, > >>>VRingDesc *desc, > >>> virtio_tswap16s(vdev, &desc->next); > >>> } > >>>+static void vring_packed_event_read(VirtIODevice *vdev, > >>>+ MemoryRegionCache *cache, > >>>VRingPackedDescEvent *e) > >>>+{ > >>>+ address_space_read_cached(cache, 0, e, sizeof(*e)); > >>>+ virtio_tswap16s(vdev, &e->off_wrap); > >>>+ virtio_tswap16s(vdev, &e->flags); > >>>+} > >>>+ > >>>+static void vring_packed_off_wrap_write(VirtIODevice *vdev, > >>>+ MemoryRegionCache *cache, uint16_t off_wrap) > >>>+{ > >>>+ virtio_tswap16s(vdev, &off_wrap); > >>>+ address_space_write_cached(cache, offsetof(VRingPackedDescEvent, > >>>off_wrap), > >>>+ &off_wrap, sizeof(off_wrap)); > >>>+ address_space_cache_invalidate(cache, > >>>+ offsetof(VRingPackedDescEvent, off_wrap), > >>>sizeof(off_wrap)); > >>>+} > >>>+ > >>>+static void vring_packed_flags_write(VirtIODevice *vdev, > >>>+ MemoryRegionCache *cache, uint16_t flags) > >>>+{ > >>>+ virtio_tswap16s(vdev, &flags); > >>>+ address_space_write_cached(cache, offsetof(VRingPackedDescEvent, > >>>flags), > >>>+ &flags, sizeof(flags)); > >>>+ address_space_cache_invalidate(cache, > >>>+ offsetof(VRingPackedDescEvent, flags), > >>>sizeof(flags)); > >>>+} > >>>+ > >>> static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue > >>> *vq) > >>> { > >>> VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches); > >>>@@ -340,14 +368,8 @@ static inline void vring_set_avail_event(VirtQueue > >>>*vq, uint16_t val) > >>> address_space_cache_invalidate(&caches->used, pa, sizeof(val)); > >>> } > >>>-void virtio_queue_set_notification(VirtQueue *vq, int enable) > >>>+static void virtio_queue_set_notification_split(VirtQueue *vq, int enable) > >>> { > >>>- vq->notification = enable; > >>>- > >>>- if (!vq->vring.desc) { > >>>- return; > >>>- } > >>>- > >>> rcu_read_lock(); > >>> if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { > >>> vring_set_avail_event(vq, vring_avail_idx(vq)); > >>>@@ -363,6 +385,57 @@ void virtio_queue_set_notification(VirtQueue *vq, int > >>>enable) > >>> rcu_read_unlock(); > >>> } > >>>+static void virtio_queue_set_notification_packed(VirtQueue *vq, int > >>>enable) > >>>+{ > >>>+ VRingPackedDescEvent e; > >>>+ VRingMemoryRegionCaches *caches; > >>>+ > >>>+ rcu_read_lock(); > >>>+ caches = vring_get_region_caches(vq); > >>>+ vring_packed_event_read(vq->vdev, &caches->used, &e); > >>>+ > >>>+ if (!enable) { > >>>+ if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { > >>>+ /* no need to write device area since this is outdated. */ > >> > >>We should advertise off and wrap in this case as well, otherwise we may get > >>notifications earlier than expected. > >Is it necessary? Supposing offset & wrap_counter are always set before update > >notification flags, it is reliable to skip this for disabling, isn't it? > > > You should either: > > - advertise the EVENT_FLAG_DISABLE > > or > > - advertise the new off wrap otherwise you may get notified early. > > Both you none of above did by your code. Right. > > > > > >While the logic here is unclear, I did a concise one like below > >which doesn't use the 'vq->notification' as in your comment for v2, > >I think this should work for packed ring as well, anything I missed? > > > > if (!enable) { > > e.flags = VRING_PACKED_EVENT_FLAG_DISABLE; > > } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { > > uint16_t off_wrap; > > > > off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << > > 15; > > vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap); > > > > /* Make sure off_wrap is wrote before flags */ > > smp_wmb(); > > e.flags = VRING_PACKED_EVENT_FLAG_DESC; > > } else { > > e.flags = VRING_PACKED_EVENT_FLAG_ENABLE; > > } > > > > vring_packed_flags_write(vq->vdev, &caches->used, e.flags); > > > Looks good to me. Thanks. > > Thanks > > > > > >> > >>>+ goto out; > >>>+ } > >>>+ > >>>+ e.flags = VRING_PACKED_EVENT_FLAG_DISABLE; > >>>+ goto update; > >>>+ } > >>>+ > >>>+ e.flags = VRING_PACKED_EVENT_FLAG_ENABLE; > >> > >>Here and the above goto could be eliminated by: > >> > >>if (even idx) { > >> > >>... > >> > >>} else if (enable) { > >> > >>... > >> > >>} else { > >> > >>... > >> > >>} > >> > >thanks, I have removed it in above snippet. > > > >Wei > >>Thanks > >> > >> > >>>+ if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { > >>>+ uint16_t off_wrap = vq->shadow_avail_idx | vq->avail_wrap_counter > >>><< 15; > >>>+ > >>>+ vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap); > >>>+ /* Make sure off_wrap is wrote before flags */ > >>>+ smp_wmb(); > >>>+ > >>>+ e.flags = VRING_PACKED_EVENT_FLAG_DESC; > >>>+ } > >>>+ > >>>+update: > >>>+ vring_packed_flags_write(vq->vdev, &caches->used, e.flags); > >>>+out: > >>>+ rcu_read_unlock(); > >>>+} > >>>+ > >>>+void virtio_queue_set_notification(VirtQueue *vq, int enable) > >>>+{ > >>>+ vq->notification = enable; > >>>+ > >>>+ if (!vq->vring.desc) { > >>>+ return; > >>>+ } > >>>+ > >>>+ if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { > >>>+ virtio_queue_set_notification_packed(vq, enable); > >>>+ } else { > >>>+ virtio_queue_set_notification_split(vq, enable); > >>>+ } > >>>+} > >>>+ > >>> int virtio_queue_ready(VirtQueue *vq) > >>> { > >>> return vq->vring.avail != 0; > >>>@@ -2117,8 +2190,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int > >>>value) > >>> } > >>> } > >>>-/* Called within rcu_read_lock(). */ > >>>-static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) > >>>+static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq) > >>> { > >>> uint16_t old, new; > >>> bool v; > >>>@@ -2141,6 +2213,53 @@ static bool virtio_should_notify(VirtIODevice > >>>*vdev, VirtQueue *vq) > >>> return !v || vring_need_event(vring_get_used_event(vq), new, old); > >>> } > >>>+static bool vring_packed_need_event(VirtQueue *vq, bool wrap, > >>>+ uint16_t off_wrap, uint16_t new, uint16_t old) > >>>+{ > >>>+ int off = off_wrap & ~(1 << 15); > >>>+ > >>>+ if (wrap != off_wrap >> 15) { > >>>+ off -= vq->vring.num; > >>>+ } > >>>+ > >>>+ return vring_need_event(off, new, old); > >>>+} > >>>+ > >>>+static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq) > >>>+{ > >>>+ VRingPackedDescEvent e; > >>>+ uint16_t old, new; > >>>+ bool v; > >>>+ VRingMemoryRegionCaches *caches; > >>>+ > >>>+ caches = vring_get_region_caches(vq); > >>>+ vring_packed_event_read(vdev, &caches->avail, &e); > >>>+ > >>>+ old = vq->signalled_used; > >>>+ new = vq->signalled_used = vq->used_idx; > >>>+ v = vq->signalled_used_valid; > >>>+ vq->signalled_used_valid = true; > >>>+ > >>>+ if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) { > >>>+ return false; > >>>+ } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) { > >>>+ return true; > >>>+ } > >>>+ > >>>+ return !v || vring_packed_need_event(vq, > >>>+ vq->used_wrap_counter, e.off_wrap, new, old); > >>>+} > >>>+ > >>>+/* Called within rcu_read_lock(). */ > >>>+static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) > >>>+{ > >>>+ if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { > >>>+ return virtio_packed_should_notify(vdev, vq); > >>>+ } else { > >>>+ return virtio_split_should_notify(vdev, vq); > >>>+ } > >>>+} > >>>+ > >>> void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq) > >>> { > >>> bool should_notify;