Re: [Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory
On Fri, 1 Feb 2019 at 10:28, Jason Wang wrote: > > > On 2019/1/30 下午1:48, Yongji Xie wrote: > > On Wed, 30 Jan 2019 at 10:32, Jason Wang wrote: > >> > >> On 2019/1/22 下午4:31, elohi...@gmail.com wrote: > >>> +static int > >>> +vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx) > >>> +{ > >>> +if (!has_feature(dev->protocol_features, > >>> +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { > >>> +return 0; > >>> +} > >>> + > >>> +if (unlikely(!vq->inflight)) { > >>> +return -1; > >>> +} > >>> + > >>> +vq->inflight->desc[desc_idx].inuse = 1; > >>> + > >>> +vq->inflight->desc[desc_idx].avail_idx = vq->last_avail_idx; > >>> + > >>> +return 0; > >>> +} > >>> + > >>> +static int > >>> +vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx) > >>> +{ > >>> +if (!has_feature(dev->protocol_features, > >>> +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { > >>> +return 0; > >>> +} > >>> + > >>> +if (unlikely(!vq->inflight)) { > >>> +return -1; > >>> +} > >>> + > >>> +vq->inflight->desc[desc_idx].used_idx = vq->used_idx; > >>> + > >>> +barrier(); > >>> + > >>> +vq->inflight->desc[desc_idx].version++; > >>> + > >>> +return 0; > >>> +} > >> > >> You probably need WRITE_ONCE() semantic (e.g volatile) to make sure the > >> value reach memory. > >> > > The cache line should have been flushed during crash. So we can see > > the correct value when backend reconnecting. If so, compile barrier > > should be enough here, right? > > > Maybe I worry too much but it's not about flushing cache, but about > whether or not compiler can generate mov to memory instead of mov to > registers. > OK, I see. I will declare those variables as volatile in v6. Thank you. Thanks, Yongji
Re: [Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory
On 2019/1/30 下午1:48, Yongji Xie wrote: On Wed, 30 Jan 2019 at 10:32, Jason Wang wrote: On 2019/1/22 下午4:31, elohi...@gmail.com wrote: +static int +vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx) +{ +if (!has_feature(dev->protocol_features, +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { +return 0; +} + +if (unlikely(!vq->inflight)) { +return -1; +} + +vq->inflight->desc[desc_idx].inuse = 1; + +vq->inflight->desc[desc_idx].avail_idx = vq->last_avail_idx; + +return 0; +} + +static int +vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx) +{ +if (!has_feature(dev->protocol_features, +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { +return 0; +} + +if (unlikely(!vq->inflight)) { +return -1; +} + +vq->inflight->desc[desc_idx].used_idx = vq->used_idx; + +barrier(); + +vq->inflight->desc[desc_idx].version++; + +return 0; +} You probably need WRITE_ONCE() semantic (e.g volatile) to make sure the value reach memory. The cache line should have been flushed during crash. So we can see the correct value when backend reconnecting. If so, compile barrier should be enough here, right? Maybe I worry too much but it's not about flushing cache, but about whether or not compiler can generate mov to memory instead of mov to registers. Thanks Thanks, Yongji
Re: [Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory
On 2019/1/30 上午11:58, Yongji Xie wrote: On Wed, 30 Jan 2019 at 10:32, Jason Wang wrote: On 2019/1/22 下午4:31, elohi...@gmail.com wrote: +static int +vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx) +{ +if (!has_feature(dev->protocol_features, +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { +return 0; +} + +if (unlikely(!vq->inflight)) { +return -1; +} + +vq->inflight->desc[desc_idx].inuse = 1; + +vq->inflight->desc[desc_idx].avail_idx = vq->last_avail_idx; + +return 0; +} + +static int +vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx) +{ +if (!has_feature(dev->protocol_features, +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { +return 0; +} + +if (unlikely(!vq->inflight)) { +return -1; +} + +vq->inflight->desc[desc_idx].used_idx = vq->used_idx; + +barrier(); + +vq->inflight->desc[desc_idx].version++; + +return 0; +} You probably need WRITE_ONCE() semantic (e.g volatile) to make sure the value reach memory. Is it enough to declare those variables as volatile? Thanks, Yongji I think so. Thanks
Re: [Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory
On 2019/1/30 上午11:14, Michael S. Tsirkin wrote: On Wed, Jan 30, 2019 at 10:31:49AM +0800, Jason Wang wrote: On 2019/1/22 下午4:31, elohi...@gmail.com wrote: +static int +vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx) +{ +if (!has_feature(dev->protocol_features, +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { +return 0; +} + +if (unlikely(!vq->inflight)) { +return -1; +} + +vq->inflight->desc[desc_idx].inuse = 1; + +vq->inflight->desc[desc_idx].avail_idx = vq->last_avail_idx; + +return 0; +} + +static int +vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx) +{ +if (!has_feature(dev->protocol_features, +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { +return 0; +} + +if (unlikely(!vq->inflight)) { +return -1; +} + +vq->inflight->desc[desc_idx].used_idx = vq->used_idx; + +barrier(); + +vq->inflight->desc[desc_idx].version++; + +return 0; +} You probably need WRITE_ONCE() semantic (e.g volatile) to make sure the value reach memory. Thanks WRITE_ONCE is literally volatile + dependency memory barrier. So unless compiler is a very agressive one, it does not buy you much. Well, since version is increased twice, if compiler decide the inline both vu_queue_inflight_pre_put() and vu_queue_inflight_post_put(), can we make sure it always generate instructions that write to memory instead of registers? Thanks
Re: [Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory
On Wed, 30 Jan 2019 at 10:32, Jason Wang wrote: > > > On 2019/1/22 下午4:31, elohi...@gmail.com wrote: > > +static int > > +vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx) > > +{ > > +if (!has_feature(dev->protocol_features, > > +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { > > +return 0; > > +} > > + > > +if (unlikely(!vq->inflight)) { > > +return -1; > > +} > > + > > +vq->inflight->desc[desc_idx].inuse = 1; > > + > > +vq->inflight->desc[desc_idx].avail_idx = vq->last_avail_idx; > > + > > +return 0; > > +} > > + > > +static int > > +vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx) > > +{ > > +if (!has_feature(dev->protocol_features, > > +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { > > +return 0; > > +} > > + > > +if (unlikely(!vq->inflight)) { > > +return -1; > > +} > > + > > +vq->inflight->desc[desc_idx].used_idx = vq->used_idx; > > + > > +barrier(); > > + > > +vq->inflight->desc[desc_idx].version++; > > + > > +return 0; > > +} > > > You probably need WRITE_ONCE() semantic (e.g volatile) to make sure the > value reach memory. > The cache line should have been flushed during crash. So we can see the correct value when backend reconnecting. If so, compile barrier should be enough here, right? Thanks, Yongji
Re: [Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory
On Wed, 30 Jan 2019 at 10:32, Jason Wang wrote: > > > On 2019/1/22 下午4:31, elohi...@gmail.com wrote: > > +static int > > +vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx) > > +{ > > +if (!has_feature(dev->protocol_features, > > +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { > > +return 0; > > +} > > + > > +if (unlikely(!vq->inflight)) { > > +return -1; > > +} > > + > > +vq->inflight->desc[desc_idx].inuse = 1; > > + > > +vq->inflight->desc[desc_idx].avail_idx = vq->last_avail_idx; > > + > > +return 0; > > +} > > + > > +static int > > +vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx) > > +{ > > +if (!has_feature(dev->protocol_features, > > +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { > > +return 0; > > +} > > + > > +if (unlikely(!vq->inflight)) { > > +return -1; > > +} > > + > > +vq->inflight->desc[desc_idx].used_idx = vq->used_idx; > > + > > +barrier(); > > + > > +vq->inflight->desc[desc_idx].version++; > > + > > +return 0; > > +} > > > You probably need WRITE_ONCE() semantic (e.g volatile) to make sure the > value reach memory. > Is it enough to declare those variables as volatile? Thanks, Yongji
Re: [Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory
On Wed, Jan 30, 2019 at 10:31:49AM +0800, Jason Wang wrote: > > On 2019/1/22 下午4:31, elohi...@gmail.com wrote: > > +static int > > +vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx) > > +{ > > +if (!has_feature(dev->protocol_features, > > +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { > > +return 0; > > +} > > + > > +if (unlikely(!vq->inflight)) { > > +return -1; > > +} > > + > > +vq->inflight->desc[desc_idx].inuse = 1; > > + > > +vq->inflight->desc[desc_idx].avail_idx = vq->last_avail_idx; > > + > > +return 0; > > +} > > + > > +static int > > +vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx) > > +{ > > +if (!has_feature(dev->protocol_features, > > +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { > > +return 0; > > +} > > + > > +if (unlikely(!vq->inflight)) { > > +return -1; > > +} > > + > > +vq->inflight->desc[desc_idx].used_idx = vq->used_idx; > > + > > +barrier(); > > + > > +vq->inflight->desc[desc_idx].version++; > > + > > +return 0; > > +} > > > You probably need WRITE_ONCE() semantic (e.g volatile) to make sure the > value reach memory. > > Thanks > WRITE_ONCE is literally volatile + dependency memory barrier. So unless compiler is a very agressive one, it does not buy you much. -- MST
Re: [Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory
On 2019/1/22 下午4:31, elohi...@gmail.com wrote: +static int +vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx) +{ +if (!has_feature(dev->protocol_features, +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { +return 0; +} + +if (unlikely(!vq->inflight)) { +return -1; +} + +vq->inflight->desc[desc_idx].inuse = 1; + +vq->inflight->desc[desc_idx].avail_idx = vq->last_avail_idx; + +return 0; +} + +static int +vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx) +{ +if (!has_feature(dev->protocol_features, +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { +return 0; +} + +if (unlikely(!vq->inflight)) { +return -1; +} + +vq->inflight->desc[desc_idx].used_idx = vq->used_idx; + +barrier(); + +vq->inflight->desc[desc_idx].version++; + +return 0; +} You probably need WRITE_ONCE() semantic (e.g volatile) to make sure the value reach memory. Thanks
[Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory
From: Xie Yongji This patch adds support for VHOST_USER_GET_INFLIGHT_FD and VHOST_USER_SET_INFLIGHT_FD message to set/get shared buffer to/from qemu. Then backend can track inflight I/O in this buffer. Signed-off-by: Xie Yongji Signed-off-by: Zhang Yu --- Makefile | 2 +- contrib/libvhost-user/libvhost-user.c | 314 -- contrib/libvhost-user/libvhost-user.h | 40 3 files changed, 335 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index dccba1dca2..3f3a5ace66 100644 --- a/Makefile +++ b/Makefile @@ -474,7 +474,7 @@ Makefile: $(version-obj-y) # Build libraries libqemuutil.a: $(util-obj-y) $(trace-obj-y) $(stub-obj-y) -libvhost-user.a: $(libvhost-user-obj-y) +libvhost-user.a: $(libvhost-user-obj-y) $(util-obj-y) $(stub-obj-y) ## diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index 23bd52264c..066fea668b 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -41,6 +41,8 @@ #endif #include "qemu/atomic.h" +#include "qemu/osdep.h" +#include "qemu/memfd.h" #include "libvhost-user.h" @@ -53,6 +55,18 @@ _min1 < _min2 ? _min1 : _min2; }) #endif +/* Round number down to multiple */ +#define ALIGN_DOWN(n, m) ((n) / (m) * (m)) + +/* Round number up to multiple */ +#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m)) + +/* Align each region to cache line size in inflight buffer */ +#define INFLIGHT_ALIGNMENT 64 + +/* The version of inflight buffer */ +#define INFLIGHT_VERSION 1 + #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) /* The version of the protocol we support */ @@ -66,6 +80,20 @@ } \ } while (0) +static inline +bool has_feature(uint64_t features, unsigned int fbit) +{ +assert(fbit < 64); +return !!(features & (1ULL << fbit)); +} + +static inline +bool vu_has_feature(VuDev *dev, +unsigned int fbit) +{ +return has_feature(dev->features, fbit); +} + static const char * vu_request_to_string(unsigned int req) { @@ -100,6 +128,8 @@ vu_request_to_string(unsigned int req) REQ(VHOST_USER_POSTCOPY_ADVISE), REQ(VHOST_USER_POSTCOPY_LISTEN), REQ(VHOST_USER_POSTCOPY_END), +REQ(VHOST_USER_GET_INFLIGHT_FD), +REQ(VHOST_USER_SET_INFLIGHT_FD), REQ(VHOST_USER_MAX), }; #undef REQ @@ -890,6 +920,59 @@ vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg) return true; } +static int +vu_check_queue_inflights(VuDev *dev, VuVirtq *vq) +{ +int i = 0; + +if (!has_feature(dev->protocol_features, +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { +return 0; +} + +if (unlikely(!vq->inflight)) { +return -1; +} + +/* check whether vm is reset */ +if (unlikely(!vq->inflight->valid)) { +vq->inflight->valid = 1; +return 0; +} + +vq->used_idx = vq->vring.used->idx; +vq->inflight_num = 0; +for (i = 0; i < vq->inflight->desc_num; i++) { +if (unlikely(vq->inflight->desc[i].version % 2 == 1)) { +if (vq->inflight->desc[i].used_idx != vq->used_idx) { +vq->inflight->desc[i].inuse = 0; +} else { +vq->inflight->desc[i].inuse = 1; +} + +barrier(); + +vq->inflight->desc[i].version++; +} + +if (vq->inflight->desc[i].inuse == 0) { +continue; +} + +vq->inflight_desc[vq->inflight_num++] = i; +vq->inuse++; +} +vq->shadow_avail_idx = vq->last_avail_idx = vq->inuse + vq->used_idx; + +/* in case of I/O hang after reconnecting */ +if (eventfd_write(vq->kick_fd, 1) || +eventfd_write(vq->call_fd, 1)) { +return -1; +} + +return 0; +} + static bool vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg) { @@ -925,6 +1008,10 @@ vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg) dev->vq[index].kick_fd, index); } +if (vu_check_queue_inflights(dev, >vq[index])) { +vu_panic(dev, "Failed to check inflights for vq: %d\n", index); +} + return false; } @@ -1215,6 +1302,127 @@ vu_set_postcopy_end(VuDev *dev, VhostUserMsg *vmsg) return true; } +static inline uint64_t +vu_inflight_queue_size(uint16_t queue_size) +{ +return ALIGN_UP(sizeof(VuDescState) * queue_size + + sizeof(uint16_t), INFLIGHT_ALIGNMENT); +} + +static bool +vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg) +{ +int fd; +void *addr; +uint64_t mmap_size; +uint16_t num_queues, queue_size; + +if (vmsg->size != sizeof(vmsg->payload.inflight)) { +vu_panic(dev, "Invalid get_inflight_fd message:%d", vmsg->size); +vmsg->payload.inflight.mmap_size = 0; +return true; +} + +num_queues =