Re: [Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory

2019-02-04 Thread Yongji Xie
On Fri, 1 Feb 2019 at 10:28, Jason Wang  wrote:
>
>
> On 2019/1/30 下午1:48, Yongji Xie wrote:
> > On Wed, 30 Jan 2019 at 10:32, Jason Wang  wrote:
> >>
> >> On 2019/1/22 下午4:31, elohi...@gmail.com wrote:
> >>> +static int
> >>> +vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx)
> >>> +{
> >>> +if (!has_feature(dev->protocol_features,
> >>> +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
> >>> +return 0;
> >>> +}
> >>> +
> >>> +if (unlikely(!vq->inflight)) {
> >>> +return -1;
> >>> +}
> >>> +
> >>> +vq->inflight->desc[desc_idx].inuse = 1;
> >>> +
> >>> +vq->inflight->desc[desc_idx].avail_idx = vq->last_avail_idx;
> >>> +
> >>> +return 0;
> >>> +}
> >>> +
> >>> +static int
> >>> +vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx)
> >>> +{
> >>> +if (!has_feature(dev->protocol_features,
> >>> +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
> >>> +return 0;
> >>> +}
> >>> +
> >>> +if (unlikely(!vq->inflight)) {
> >>> +return -1;
> >>> +}
> >>> +
> >>> +vq->inflight->desc[desc_idx].used_idx = vq->used_idx;
> >>> +
> >>> +barrier();
> >>> +
> >>> +vq->inflight->desc[desc_idx].version++;
> >>> +
> >>> +return 0;
> >>> +}
> >>
> >> You probably need WRITE_ONCE() semantic (e.g volatile) to make sure the
> >> value reach memory.
> >>
> > The cache line should have been flushed during crash. So we can see
> > the correct value when backend reconnecting. If so, compile barrier
> > should be enough here, right?
>
>
> Maybe I worry too much but it's not about flushing cache, but about
> whether or not compiler can generate mov to memory instead of mov to
> registers.
>

OK, I see. I will declare those variables as volatile in v6. Thank you.

Thanks,
Yongji



Re: [Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory

2019-01-31 Thread Jason Wang



On 2019/1/30 下午1:48, Yongji Xie wrote:

On Wed, 30 Jan 2019 at 10:32, Jason Wang  wrote:


On 2019/1/22 下午4:31, elohi...@gmail.com wrote:

+static int
+vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx)
+{
+if (!has_feature(dev->protocol_features,
+VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
+return 0;
+}
+
+if (unlikely(!vq->inflight)) {
+return -1;
+}
+
+vq->inflight->desc[desc_idx].inuse = 1;
+
+vq->inflight->desc[desc_idx].avail_idx = vq->last_avail_idx;
+
+return 0;
+}
+
+static int
+vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx)
+{
+if (!has_feature(dev->protocol_features,
+VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
+return 0;
+}
+
+if (unlikely(!vq->inflight)) {
+return -1;
+}
+
+vq->inflight->desc[desc_idx].used_idx = vq->used_idx;
+
+barrier();
+
+vq->inflight->desc[desc_idx].version++;
+
+return 0;
+}


You probably need WRITE_ONCE() semantic (e.g volatile) to make sure the
value reach memory.


The cache line should have been flushed during crash. So we can see
the correct value when backend reconnecting. If so, compile barrier
should be enough here, right?



Maybe I worry too much but it's not about flushing cache, but about 
whether or not compiler can generate mov to memory instead of mov to 
registers.


Thanks




Thanks,
Yongji




Re: [Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory

2019-01-31 Thread Jason Wang



On 2019/1/30 上午11:58, Yongji Xie wrote:

On Wed, 30 Jan 2019 at 10:32, Jason Wang  wrote:


On 2019/1/22 下午4:31, elohi...@gmail.com wrote:

+static int
+vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx)
+{
+if (!has_feature(dev->protocol_features,
+VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
+return 0;
+}
+
+if (unlikely(!vq->inflight)) {
+return -1;
+}
+
+vq->inflight->desc[desc_idx].inuse = 1;
+
+vq->inflight->desc[desc_idx].avail_idx = vq->last_avail_idx;
+
+return 0;
+}
+
+static int
+vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx)
+{
+if (!has_feature(dev->protocol_features,
+VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
+return 0;
+}
+
+if (unlikely(!vq->inflight)) {
+return -1;
+}
+
+vq->inflight->desc[desc_idx].used_idx = vq->used_idx;
+
+barrier();
+
+vq->inflight->desc[desc_idx].version++;
+
+return 0;
+}


You probably need WRITE_ONCE() semantic (e.g volatile) to make sure the
value reach memory.


Is it enough to declare those variables as volatile?

Thanks,
Yongji



I think so.

Thanks




Re: [Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory

2019-01-30 Thread Jason Wang



On 2019/1/30 上午11:14, Michael S. Tsirkin wrote:

On Wed, Jan 30, 2019 at 10:31:49AM +0800, Jason Wang wrote:

On 2019/1/22 下午4:31, elohi...@gmail.com wrote:

+static int
+vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx)
+{
+if (!has_feature(dev->protocol_features,
+VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
+return 0;
+}
+
+if (unlikely(!vq->inflight)) {
+return -1;
+}
+
+vq->inflight->desc[desc_idx].inuse = 1;
+
+vq->inflight->desc[desc_idx].avail_idx = vq->last_avail_idx;
+
+return 0;
+}
+
+static int
+vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx)
+{
+if (!has_feature(dev->protocol_features,
+VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
+return 0;
+}
+
+if (unlikely(!vq->inflight)) {
+return -1;
+}
+
+vq->inflight->desc[desc_idx].used_idx = vq->used_idx;
+
+barrier();
+
+vq->inflight->desc[desc_idx].version++;
+
+return 0;
+}


You probably need WRITE_ONCE() semantic (e.g volatile) to make sure the
value reach memory.

Thanks


WRITE_ONCE is literally volatile + dependency memory barrier.
So unless compiler is a very agressive one, it does not
buy you much.



Well, since version is increased twice, if compiler decide the inline 
both vu_queue_inflight_pre_put() and vu_queue_inflight_post_put(), can 
we make sure it always generate instructions that write to memory 
instead of registers?


Thanks




Re: [Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory

2019-01-29 Thread Yongji Xie
On Wed, 30 Jan 2019 at 10:32, Jason Wang  wrote:
>
>
> On 2019/1/22 下午4:31, elohi...@gmail.com wrote:
> > +static int
> > +vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx)
> > +{
> > +if (!has_feature(dev->protocol_features,
> > +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
> > +return 0;
> > +}
> > +
> > +if (unlikely(!vq->inflight)) {
> > +return -1;
> > +}
> > +
> > +vq->inflight->desc[desc_idx].inuse = 1;
> > +
> > +vq->inflight->desc[desc_idx].avail_idx = vq->last_avail_idx;
> > +
> > +return 0;
> > +}
> > +
> > +static int
> > +vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx)
> > +{
> > +if (!has_feature(dev->protocol_features,
> > +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
> > +return 0;
> > +}
> > +
> > +if (unlikely(!vq->inflight)) {
> > +return -1;
> > +}
> > +
> > +vq->inflight->desc[desc_idx].used_idx = vq->used_idx;
> > +
> > +barrier();
> > +
> > +vq->inflight->desc[desc_idx].version++;
> > +
> > +return 0;
> > +}
>
>
> You probably need WRITE_ONCE() semantic (e.g volatile) to make sure the
> value reach memory.
>

The cache line should have been flushed during crash. So we can see
the correct value when backend reconnecting. If so, compile barrier
should be enough here, right?

Thanks,
Yongji



Re: [Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory

2019-01-29 Thread Yongji Xie
On Wed, 30 Jan 2019 at 10:32, Jason Wang  wrote:
>
>
> On 2019/1/22 下午4:31, elohi...@gmail.com wrote:
> > +static int
> > +vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx)
> > +{
> > +if (!has_feature(dev->protocol_features,
> > +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
> > +return 0;
> > +}
> > +
> > +if (unlikely(!vq->inflight)) {
> > +return -1;
> > +}
> > +
> > +vq->inflight->desc[desc_idx].inuse = 1;
> > +
> > +vq->inflight->desc[desc_idx].avail_idx = vq->last_avail_idx;
> > +
> > +return 0;
> > +}
> > +
> > +static int
> > +vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx)
> > +{
> > +if (!has_feature(dev->protocol_features,
> > +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
> > +return 0;
> > +}
> > +
> > +if (unlikely(!vq->inflight)) {
> > +return -1;
> > +}
> > +
> > +vq->inflight->desc[desc_idx].used_idx = vq->used_idx;
> > +
> > +barrier();
> > +
> > +vq->inflight->desc[desc_idx].version++;
> > +
> > +return 0;
> > +}
>
>
> You probably need WRITE_ONCE() semantic (e.g volatile) to make sure the
> value reach memory.
>

Is it enough to declare those variables as volatile?

Thanks,
Yongji



Re: [Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory

2019-01-29 Thread Michael S. Tsirkin
On Wed, Jan 30, 2019 at 10:31:49AM +0800, Jason Wang wrote:
> 
> On 2019/1/22 下午4:31, elohi...@gmail.com wrote:
> > +static int
> > +vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx)
> > +{
> > +if (!has_feature(dev->protocol_features,
> > +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
> > +return 0;
> > +}
> > +
> > +if (unlikely(!vq->inflight)) {
> > +return -1;
> > +}
> > +
> > +vq->inflight->desc[desc_idx].inuse = 1;
> > +
> > +vq->inflight->desc[desc_idx].avail_idx = vq->last_avail_idx;
> > +
> > +return 0;
> > +}
> > +
> > +static int
> > +vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx)
> > +{
> > +if (!has_feature(dev->protocol_features,
> > +VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
> > +return 0;
> > +}
> > +
> > +if (unlikely(!vq->inflight)) {
> > +return -1;
> > +}
> > +
> > +vq->inflight->desc[desc_idx].used_idx = vq->used_idx;
> > +
> > +barrier();
> > +
> > +vq->inflight->desc[desc_idx].version++;
> > +
> > +return 0;
> > +}
> 
> 
> You probably need WRITE_ONCE() semantic (e.g volatile) to make sure the
> value reach memory.
> 
> Thanks
> 

WRITE_ONCE is literally volatile + dependency memory barrier.
So unless compiler is a very agressive one, it does not
buy you much.

-- 
MST



Re: [Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory

2019-01-29 Thread Jason Wang



On 2019/1/22 下午4:31, elohi...@gmail.com wrote:

+static int
+vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx)
+{
+if (!has_feature(dev->protocol_features,
+VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
+return 0;
+}
+
+if (unlikely(!vq->inflight)) {
+return -1;
+}
+
+vq->inflight->desc[desc_idx].inuse = 1;
+
+vq->inflight->desc[desc_idx].avail_idx = vq->last_avail_idx;
+
+return 0;
+}
+
+static int
+vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx)
+{
+if (!has_feature(dev->protocol_features,
+VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
+return 0;
+}
+
+if (unlikely(!vq->inflight)) {
+return -1;
+}
+
+vq->inflight->desc[desc_idx].used_idx = vq->used_idx;
+
+barrier();
+
+vq->inflight->desc[desc_idx].version++;
+
+return 0;
+}



You probably need WRITE_ONCE() semantic (e.g volatile) to make sure the 
value reach memory.


Thanks





[Qemu-devel] [PATCH v5 3/6] libvhost-user: Support tracking inflight I/O in shared memory

2019-01-22 Thread elohimes
From: Xie Yongji 

This patch adds support for VHOST_USER_GET_INFLIGHT_FD and
VHOST_USER_SET_INFLIGHT_FD message to set/get shared buffer
to/from qemu. Then backend can track inflight I/O in this buffer.

Signed-off-by: Xie Yongji 
Signed-off-by: Zhang Yu 
---
 Makefile  |   2 +-
 contrib/libvhost-user/libvhost-user.c | 314 --
 contrib/libvhost-user/libvhost-user.h |  40 
 3 files changed, 335 insertions(+), 21 deletions(-)

diff --git a/Makefile b/Makefile
index dccba1dca2..3f3a5ace66 100644
--- a/Makefile
+++ b/Makefile
@@ -474,7 +474,7 @@ Makefile: $(version-obj-y)
 # Build libraries
 
 libqemuutil.a: $(util-obj-y) $(trace-obj-y) $(stub-obj-y)
-libvhost-user.a: $(libvhost-user-obj-y)
+libvhost-user.a: $(libvhost-user-obj-y) $(util-obj-y) $(stub-obj-y)
 
 ##
 
diff --git a/contrib/libvhost-user/libvhost-user.c 
b/contrib/libvhost-user/libvhost-user.c
index 23bd52264c..066fea668b 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -41,6 +41,8 @@
 #endif
 
 #include "qemu/atomic.h"
+#include "qemu/osdep.h"
+#include "qemu/memfd.h"
 
 #include "libvhost-user.h"
 
@@ -53,6 +55,18 @@
 _min1 < _min2 ? _min1 : _min2; })
 #endif
 
+/* Round number down to multiple */
+#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
+
+/* Round number up to multiple */
+#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
+
+/* Align each region to cache line size in inflight buffer */
+#define INFLIGHT_ALIGNMENT 64
+
+/* The version of inflight buffer */
+#define INFLIGHT_VERSION 1
+
 #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
 
 /* The version of the protocol we support */
@@ -66,6 +80,20 @@
 }   \
 } while (0)
 
+static inline
+bool has_feature(uint64_t features, unsigned int fbit)
+{
+assert(fbit < 64);
+return !!(features & (1ULL << fbit));
+}
+
+static inline
+bool vu_has_feature(VuDev *dev,
+unsigned int fbit)
+{
+return has_feature(dev->features, fbit);
+}
+
 static const char *
 vu_request_to_string(unsigned int req)
 {
@@ -100,6 +128,8 @@ vu_request_to_string(unsigned int req)
 REQ(VHOST_USER_POSTCOPY_ADVISE),
 REQ(VHOST_USER_POSTCOPY_LISTEN),
 REQ(VHOST_USER_POSTCOPY_END),
+REQ(VHOST_USER_GET_INFLIGHT_FD),
+REQ(VHOST_USER_SET_INFLIGHT_FD),
 REQ(VHOST_USER_MAX),
 };
 #undef REQ
@@ -890,6 +920,59 @@ vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
 return true;
 }
 
+static int
+vu_check_queue_inflights(VuDev *dev, VuVirtq *vq)
+{
+int i = 0;
+
+if (!has_feature(dev->protocol_features,
+VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
+return 0;
+}
+
+if (unlikely(!vq->inflight)) {
+return -1;
+}
+
+/* check whether vm is reset */
+if (unlikely(!vq->inflight->valid)) {
+vq->inflight->valid = 1;
+return 0;
+}
+
+vq->used_idx = vq->vring.used->idx;
+vq->inflight_num = 0;
+for (i = 0; i < vq->inflight->desc_num; i++) {
+if (unlikely(vq->inflight->desc[i].version % 2 == 1)) {
+if (vq->inflight->desc[i].used_idx != vq->used_idx) {
+vq->inflight->desc[i].inuse = 0;
+} else {
+vq->inflight->desc[i].inuse = 1;
+}
+
+barrier();
+
+vq->inflight->desc[i].version++;
+}
+
+if (vq->inflight->desc[i].inuse == 0) {
+continue;
+}
+
+vq->inflight_desc[vq->inflight_num++] = i;
+vq->inuse++;
+}
+vq->shadow_avail_idx = vq->last_avail_idx = vq->inuse + vq->used_idx;
+
+/* in case of I/O hang after reconnecting */
+if (eventfd_write(vq->kick_fd, 1) ||
+eventfd_write(vq->call_fd, 1)) {
+return -1;
+}
+
+return 0;
+}
+
 static bool
 vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
 {
@@ -925,6 +1008,10 @@ vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
dev->vq[index].kick_fd, index);
 }
 
+if (vu_check_queue_inflights(dev, >vq[index])) {
+vu_panic(dev, "Failed to check inflights for vq: %d\n", index);
+}
+
 return false;
 }
 
@@ -1215,6 +1302,127 @@ vu_set_postcopy_end(VuDev *dev, VhostUserMsg *vmsg)
 return true;
 }
 
+static inline uint64_t
+vu_inflight_queue_size(uint16_t queue_size)
+{
+return ALIGN_UP(sizeof(VuDescState) * queue_size +
+   sizeof(uint16_t), INFLIGHT_ALIGNMENT);
+}
+
+static bool
+vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
+{
+int fd;
+void *addr;
+uint64_t mmap_size;
+uint16_t num_queues, queue_size;
+
+if (vmsg->size != sizeof(vmsg->payload.inflight)) {
+vu_panic(dev, "Invalid get_inflight_fd message:%d", vmsg->size);
+vmsg->payload.inflight.mmap_size = 0;
+return true;
+}
+
+num_queues =