Re: [PATCH v2 1/5] vhost-user block device backend

2020-02-20 Thread Coiby Xu
> > +vmsg->fd_num = 0;
> > +for (cmsg = CMSG_FIRSTHDR(&msg);
> > + cmsg != NULL;
> > + cmsg = CMSG_NXTHDR(&msg, cmsg))
> > +{
> > +if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type ==
SCM_RIGHTS) {
> > +fd_size = cmsg->cmsg_len - CMSG_LEN(0);
> > +vmsg->fd_num = fd_size / sizeof(int);
> > +memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
> > +break;
> > +}
> > +}

> I think the fd passing part becomes easier when you use the proper
> qio_channel_readv_full() function. Its implementation is also a bit more
> careful than yours. For example, you forgot checking fd_size against
> VHOST_MEMORY_MAX_NREGIONS, allowing a buffer overflow in the memcpy(),
> and you don't adjust fd flags for the new file descriptors.

Oh, I used qio_channel_readv_full in v3&v4. But I still forgot checking
fd_size against  VHOST_MEMORY_MAX_NREGIONS. I'll fix this buffer overflow
issue in v5.

On Thu, Jan 16, 2020 at 9:56 PM Kevin Wolf  wrote:

> Hi,
>
> I'm only doing a quick first review pointing out the more obvious
> things while I familiarise myself with your code. I intend to review it
> in more detail later (either in a second pass for this series, or when
> you post v3).
>
> Am 14.01.2020 um 15:06 hat Coiby Xu geschrieben:
> > By making use of libvhost, multiple block device drives can be exported
> and each drive can serve multiple clients simultaneously. Since
> vhost-user-server needs a block drive to be created first, delay the
> creation of this object.
> >
> > Signed-off-by: Coiby Xu 
>
> Please wrap the commit message at 72 characters.
>
> >  blockdev-vu.c  | 1008 
> >  include/block/vhost-user.h |   46 ++
> >  vl.c   |4 +
> >  3 files changed, 1058 insertions(+)
> >  create mode 100644 blockdev-vu.c
> >  create mode 100644 include/block/vhost-user.h
>
> This adds a single, relatively big source file. I see at least two
> parts: The generic vhost-user infrastructure with connection handling
> etc. and the implementation of the specific vhost-user-blk device.
> Separating these into two files is probably a good idea.
>
> I would also suggest to put the files in a new subdirectory
> block/export/ and call them vhost-user.c/vhost-user-blk.c. The new
> header file can be in the same directory as it shouldn't be used by
> anyone else.
>
> > diff --git a/blockdev-vu.c b/blockdev-vu.c
> > new file mode 100644
> > index 00..45f0bb43a7
> > --- /dev/null
> > +++ b/blockdev-vu.c
> > @@ -0,0 +1,1008 @@
>
> The LICENSE file clarifies that files without a license header are
> GPLv2+, so it's not strictly a problem, but I think it is good style to
> include a license header that explicitly tells so.
>
> > +#include "qemu/osdep.h"
> > +#include "block/vhost-user.h"
> > +#include "qapi/error.h"
> > +#include "qapi/qapi-types-sockets.h"
> > +#include "qapi/qapi-commands-block.h"
> > +
> > +#include "sysemu/block-backend.h"
> > +#include "qemu/main-loop.h"
> > +
> > +#include "qemu/units.h"
> > +
> > +#include "block/block.h"
> > +
> > +#include "qom/object_interfaces.h"
> > +
> > +#include 
> > +
> > +#include "hw/qdev-properties.h"
>
> Does the order of includes and the empty lines between them signify
> anything? If not, I suggest just sorting them alphabetically (and maybe
> using empty lines between different subdirectories if you like this
> better than a single large block).
>
> According to CODING_STYLE.rst, system headers like  come
> before all QEMU headers (except qemu/osdep.h, which always must come
> first).
>
> > +enum {
> > +VHOST_USER_BLK_MAX_QUEUES = 8,
> > +};
> > +
> > +struct virtio_blk_inhdr {
> > +unsigned char status;
> > +};
> > +
> > +
> > +static QTAILQ_HEAD(, VubDev) vub_devs =
> QTAILQ_HEAD_INITIALIZER(vub_devs);
> > +
> > +
> > +typedef struct VubReq {
> > +VuVirtqElement *elem;
>
> Maybe worth a comment that this was allocated with plain malloc(), so
> you must use free() rather than g_free() (which would be the default in
> QEMU)?
>
> > +int64_t sector_num;
> > +size_t size;
> > +struct virtio_blk_inhdr *in;
> > +struct virtio_blk_outhdr out;
> > +VuClient *client;
> > +struct VuVirtq *vq;
> > +} VubReq;
>
> I'm not completely sure yet, but I think I would prefer VuBlock to Vub
> in the type names. Some may even prefer VhostUserBlock, but I can see
> that this would be quite lengthy.
>
> > +static void
> > +remove_watch(VuDev *vu_dev, int fd)
> > +{
> > +VuClient *client;
> > +
> > +g_assert(vu_dev);
> > +g_assert(fd >= 0);
> > +
> > +client = container_of(vu_dev, VuClient, parent);
> > +aio_set_fd_handler(client->blk->ctx, fd, false, NULL, NULL, NULL,
> NULL);
> > +}
> > +
> > +static void close_client(VuClient *client)
> > +{
> > +vu_deinit(&client->parent);
> > +/** g_source_destroy(vub_device->parent.src); */
>
> Leftover from conversion?
>
> > +client->sioc = NULL

Re: [PATCH v2 1/5] vhost-user block device backend

2020-02-19 Thread Coiby Xu
Hi Kevin,

Thank you for reviewing my work in a rather detailed way.

> >  blockdev-vu.c  | 1008 
> >  include/block/vhost-user.h |   46 ++
> >  vl.c   |4 +
> >  3 files changed, 1058 insertions(+)
> >  create mode 100644 blockdev-vu.c
> >  create mode 100644 include/block/vhost-user.h

> This adds a single, relatively big source file. I see at least two
parts: The generic vhost-user infrastructure with connection handling
etc. and the implementation of the specific vhost-user-blk device.
Separating these into two files is probably a good idea.

> I would also suggest to put the files in a new subdirectory
block/export/ and call them vhost-user.c/vhost-user-blk.c. The new
header file can be in the same directory as it shouldn't be used by
anyone else.

I've split blockdev-vu.c in two separate files but in a different subdirectory
 - backends/vhost-user-blk-server.c
 - util/vhost-user-server.c

> > +static QTAILQ_HEAD(, VubDev) vub_devs = QTAILQ_HEAD_INITIALIZER(vub_devs);
> > +
> > +
> > +typedef struct VubReq {
> > +VuVirtqElement *elem;

> Maybe worth a comment that this was allocated with plain malloc(), so
> you must use free() rather than g_free() (which would be the default in
> QEMU)?

Although VuVirtqElement is created using malloc, VubReq is created using g_new0.


I missed several suggestions in v3 but in v4 all suggestions have been
applied. Thank you!
On Thu, Jan 16, 2020 at 9:56 PM Kevin Wolf  wrote:
>
> Hi,
>
> I'm only doing a quick first review pointing out the more obvious
> things while I familiarise myself with your code. I intend to review it
> in more detail later (either in a second pass for this series, or when
> you post v3).
>
> Am 14.01.2020 um 15:06 hat Coiby Xu geschrieben:
> > By making use of libvhost, multiple block device drives can be exported and 
> > each drive can serve multiple clients simultaneously. Since 
> > vhost-user-server needs a block drive to be created first, delay the 
> > creation of this object.
> >
> > Signed-off-by: Coiby Xu 
>
> Please wrap the commit message at 72 characters.
>
> >  blockdev-vu.c  | 1008 
> >  include/block/vhost-user.h |   46 ++
> >  vl.c   |4 +
> >  3 files changed, 1058 insertions(+)
> >  create mode 100644 blockdev-vu.c
> >  create mode 100644 include/block/vhost-user.h
>
> This adds a single, relatively big source file. I see at least two
> parts: The generic vhost-user infrastructure with connection handling
> etc. and the implementation of the specific vhost-user-blk device.
> Separating these into two files is probably a good idea.
>
> I would also suggest to put the files in a new subdirectory
> block/export/ and call them vhost-user.c/vhost-user-blk.c. The new
> header file can be in the same directory as it shouldn't be used by
> anyone else.
>
> > diff --git a/blockdev-vu.c b/blockdev-vu.c
> > new file mode 100644
> > index 00..45f0bb43a7
> > --- /dev/null
> > +++ b/blockdev-vu.c
> > @@ -0,0 +1,1008 @@
>
> The LICENSE file clarifies that files without a license header are
> GPLv2+, so it's not strictly a problem, but I think it is good style to
> include a license header that explicitly tells so.
>
> > +#include "qemu/osdep.h"
> > +#include "block/vhost-user.h"
> > +#include "qapi/error.h"
> > +#include "qapi/qapi-types-sockets.h"
> > +#include "qapi/qapi-commands-block.h"
> > +
> > +#include "sysemu/block-backend.h"
> > +#include "qemu/main-loop.h"
> > +
> > +#include "qemu/units.h"
> > +
> > +#include "block/block.h"
> > +
> > +#include "qom/object_interfaces.h"
> > +
> > +#include 
> > +
> > +#include "hw/qdev-properties.h"
>
> Does the order of includes and the empty lines between them signify
> anything? If not, I suggest just sorting them alphabetically (and maybe
> using empty lines between different subdirectories if you like this
> better than a single large block).
>
> According to CODING_STYLE.rst, system headers like  come
> before all QEMU headers (except qemu/osdep.h, which always must come
> first).
>
> > +enum {
> > +VHOST_USER_BLK_MAX_QUEUES = 8,
> > +};
> > +
> > +struct virtio_blk_inhdr {
> > +unsigned char status;
> > +};
> > +
> > +
> > +static QTAILQ_HEAD(, VubDev) vub_devs = QTAILQ_HEAD_INITIALIZER(vub_devs);
> > +
> > +
> > +typedef struct VubReq {
> > +VuVirtqElement *elem;
>
> Maybe worth a comment that this was allocated with plain malloc(), so
> you must use free() rather than g_free() (which would be the default in
> QEMU)?
>
> > +int64_t sector_num;
> > +size_t size;
> > +struct virtio_blk_inhdr *in;
> > +struct virtio_blk_outhdr out;
> > +VuClient *client;
> > +struct VuVirtq *vq;
> > +} VubReq;
>
> I'm not completely sure yet, but I think I would prefer VuBlock to Vub
> in the type names. Some may even prefer VhostUserBlock, but I can see
> that this would be quite lengthy.
>
> > +static void
> > +remove

Re: [PATCH v2 1/5] vhost-user block device backend

2020-02-13 Thread Coiby Xu
Thank you for reviewing this patch! I'm already posted v3 based on
your feedback.

> > +#include "hw/qdev-properties.h"
> > +enum {
> > +VHOST_USER_BLK_MAX_QUEUES = 8,
> > +};

> The number of queues is hardcoded to 1 so this constant can be removed
for now.

> > +
> > +static QTAILQ_HEAD(, VubDev) vub_devs = QTAILQ_HEAD_INITIALIZER(vub_devs);

> I'm not sure if this list will be necessary.  See my comments about
> replacing the "name" property with Object's built-in "id" property.

There is no need to start vhost-user-blk-server with the same exported
drive since vhost-user-blk-server can serve multiple clients
simutanously. And we shoudn't started two vhost-user-blk-servers with
the same unix socket path. So we need this list to avoid dupliate
servers. And as pointed out by Kevin, "name" property actually means
node-name which is used in v3.


> > +#include "hw/qdev-properties.h"
> > +enum {
> > +VHOST_USER_BLK_MAX_QUEUES = 8,
> > +};

> The number of queues is hardcoded to 1 so this constant can be removed
for now.

I've set VHOST_USER_BLK_MAX_QUEUES = 1 in v3 to avoid magic number.

> > +config->seg_max = 128 - 2;

> This is okay for now but should be changed in the future.

> hw/block/virtio-blk.c was recently modified to calculate seg_max based
> on the virtqueue size (which is where the hardcoded 128 originally came
> from).  Applications that use large buffer sizes (128+ KB) benefit from
> larger virtqueue sizes and seg_max.

I've looked at the implementation of "-device
vhost-user-blk-pci,queue-size=512" regarding seg_max and found out
vhost-user-blk-server doesn' have the chance to caculate seg_max based
on the virtqueue size and report it back to QEMU as vhost-user client
in time.

QEMU as vhost-user client will create virtqueues with the size of
queue-size parameter. Later it will get the configureation including
seg_max from vhost-user-blk-server by sending VHOST_USER_SET_CONFIG
message and this seg_max vallue will be sent to guest OS.  Guest OS
will set the real size of virtqueue which will be sent to
vhost-user-blk-server through VHOST_USER_SET_VRING_NUM message. After
that QEMU as vhost-user client will never send VHOST_USER_SET_CONFIG
again.

There could be three ways to address this issue,
1. Add seg_max_adjust and queue-size object property for
vhost-user-blk device in a way similar to virtio-blk device. And QEMU
as vhost-user client will ignore seg_max parameter from
vhost-user-blk-server. It will caculate seg_max based on queue-size
and report it to guest OS.
2. Add seg_max_adjust and queue-size object property for
vhost-user-blk server and let  vhost-user-blk server calculate seg_max
based on queue-size
3. Let QEMU as vhost-user client tell vhost-user-blk-server its queue
size by sending VHOST_USER_SET_VRING_NUM message first. When
vhost-user-blk-server receive the VHOST_USER_SET_CONFIG message, it
will calculate seg_max and report it back to QEMU as vhost-user
client.

Which way do you is the best?


> > +void vub_accept(QIONetListener *listener, QIOChannelSocket *sioc,
> > +gpointer opaque)
> > +{
> > +VuClient *client;
> > +VubDev *vub_device = opaque;
> > +client = g_new0(VuClient, 1);

> Is it helpful to have a separate VuClient struct even though only a
> single vhost-user client can be connected at a time?  It may be simpler
> to keep connection state directly in VubDev.

Currently, I don't use chardev as an object property of
vhost-user-blk-server. So actually multiple clients can be connected
simutaneously.

All the other suggestions have been adopted in v3. Thank you for your advice!


On Thu, Jan 16, 2020 at 9:51 PM Stefan Hajnoczi  wrote:
>
> On Tue, Jan 14, 2020 at 10:06:16PM +0800, Coiby Xu wrote:
> > By making use of libvhost, multiple block device drives can be exported and 
> > each drive can serve multiple clients simultaneously. Since 
> > vhost-user-server needs a block drive to be created first, delay the 
> > creation of this object.
> >
> > Signed-off-by: Coiby Xu 
> > ---
> >  blockdev-vu.c  | 1008 
>
> This file contains both vhost-user-blk code and generic vhost-user
> protocol code.  Please split them into two files:
> 1. backends/vhost-user-blk-server.c
> 2. util/vhost-user-server.c
>
> (As I read and understand the code better I may have better suggestions
> about where to put these source files and how to name them.)
>
> >  include/block/vhost-user.h |   46 ++
> >  vl.c   |4 +
> >  3 files changed, 1058 insertions(+)
> >  create mode 100644 blockdev-vu.c
> >  create mode 100644 include/block/vhost-user.h
> >
> > diff --git a/blockdev-vu.c b/blockdev-vu.c
> > new file mode 100644
> > index 00..45f0bb43a7
> > --- /dev/null
> > +++ b/blockdev-vu.c
> > @@ -0,0 +1,1008 @@
> > +#include "qemu/osdep.h"
> > +#include "block/vhost-user.h"
> > +#include "qapi/error.h"
> > +#include "qapi/qapi-types-sockets.h"
> > +#include "qapi/qapi-commands-bloc

Re: [PATCH v2 1/5] vhost-user block device backend

2020-01-16 Thread Kevin Wolf
Am 16.01.2020 um 14:51 hat Stefan Hajnoczi geschrieben:
> > +static void vu_set_unix_socket(Object *obj, const char *value,
> > +Error **errp)
> > +{
> > +VubDev *vus = VHOST_USER_SERVER(obj);;
> > +
> > +if (vus->unix_socket) {
> > +error_setg(errp, "unix_socket property already set");
> > +return;
> > +}
> > +
> > +vus->unix_socket = g_strdup(value);
> > +vhost_user_server_start(vus, value, vus->name,
> > +vus->writable, errp);
> 
> Property setters should only perform input validation and store the
> data.  Actions like creating network connections, opening files, etc
> should happen later in a UserCreatableClass->complete() callback.
> 
> This is necessary because vus->writable is also a property and may be
> set after unix_socket.  The ->complete() callback is called after all
> setters so it can access the final values of all properties.
> 
> See iothread_class_init() and iothread_complete() for an example.

Ah, right, this is the correct way. I forgot about the existence of
.complete(), so please ignore what I wrote.

> > diff --git a/vl.c b/vl.c
> > index 86474a55c9..72ac506342 100644
> > --- a/vl.c
> > +++ b/vl.c
> > @@ -2553,6 +2553,10 @@ static bool object_create_initial(const char *type, 
> > QemuOpts *opts)
> >  }
> >  #endif
> > 
> > +/* Reason: vhost-user-server property "name" */
> > +if (g_str_equal(type, "vhost-user-server")) {
> > +return false;
> > +}
> 
> I don't understand why the "name" property introduces a creation order
> dependency.  It's just a string and has no dependency on other
> command-line objects.  Can you explain why this change is necessary?

I was confused at first, too, but it's just a naming problem: "name"
is what points to the block device to be exported. It should really be
"node-name".

> > +struct VuClient {
> > +VuDev parent;
> > +int refcount;
> > +VubDev *blk;
> > +QIOChannelSocket *sioc; /* The underlying data channel */
> > +QIOChannel *ioc; /* The current I/O channel */
> > +QTAILQ_ENTRY(VuClient) next;
> > +bool closed;
> > +};
> > +VubDev *vub_dev_find(const char *name);
> 
> All -object instances already have an id=ID property.  There is no need
> to declare a separate "name" property.  Please look at iothread_by_id()
> and iothread_get_id() for examples.

It's questionable to me if this function is needed at all. It is only
used in vhost_user_server_start() to make sure that you don't export a
node twice - but what's even the problem with exporting it twice?

Kevin


signature.asc
Description: PGP signature


Re: [PATCH v2 1/5] vhost-user block device backend

2020-01-16 Thread Kevin Wolf
Hi,

I'm only doing a quick first review pointing out the more obvious
things while I familiarise myself with your code. I intend to review it
in more detail later (either in a second pass for this series, or when
you post v3).

Am 14.01.2020 um 15:06 hat Coiby Xu geschrieben:
> By making use of libvhost, multiple block device drives can be exported and 
> each drive can serve multiple clients simultaneously. Since vhost-user-server 
> needs a block drive to be created first, delay the creation of this object.
> 
> Signed-off-by: Coiby Xu 

Please wrap the commit message at 72 characters.

>  blockdev-vu.c  | 1008 
>  include/block/vhost-user.h |   46 ++
>  vl.c   |4 +
>  3 files changed, 1058 insertions(+)
>  create mode 100644 blockdev-vu.c
>  create mode 100644 include/block/vhost-user.h

This adds a single, relatively big source file. I see at least two
parts: The generic vhost-user infrastructure with connection handling
etc. and the implementation of the specific vhost-user-blk device.
Separating these into two files is probably a good idea.

I would also suggest to put the files in a new subdirectory
block/export/ and call them vhost-user.c/vhost-user-blk.c. The new
header file can be in the same directory as it shouldn't be used by
anyone else.

> diff --git a/blockdev-vu.c b/blockdev-vu.c
> new file mode 100644
> index 00..45f0bb43a7
> --- /dev/null
> +++ b/blockdev-vu.c
> @@ -0,0 +1,1008 @@

The LICENSE file clarifies that files without a license header are
GPLv2+, so it's not strictly a problem, but I think it is good style to
include a license header that explicitly tells so.

> +#include "qemu/osdep.h"
> +#include "block/vhost-user.h"
> +#include "qapi/error.h"
> +#include "qapi/qapi-types-sockets.h"
> +#include "qapi/qapi-commands-block.h"
> +
> +#include "sysemu/block-backend.h"
> +#include "qemu/main-loop.h"
> +
> +#include "qemu/units.h"
> +
> +#include "block/block.h"
> +
> +#include "qom/object_interfaces.h"
> +
> +#include 
> +
> +#include "hw/qdev-properties.h"

Does the order of includes and the empty lines between them signify
anything? If not, I suggest just sorting them alphabetically (and maybe
using empty lines between different subdirectories if you like this
better than a single large block).

According to CODING_STYLE.rst, system headers like  come
before all QEMU headers (except qemu/osdep.h, which always must come
first).

> +enum {
> +VHOST_USER_BLK_MAX_QUEUES = 8,
> +};
> +
> +struct virtio_blk_inhdr {
> +unsigned char status;
> +};
> +
> +
> +static QTAILQ_HEAD(, VubDev) vub_devs = QTAILQ_HEAD_INITIALIZER(vub_devs);
> +
> +
> +typedef struct VubReq {
> +VuVirtqElement *elem;

Maybe worth a comment that this was allocated with plain malloc(), so
you must use free() rather than g_free() (which would be the default in
QEMU)?

> +int64_t sector_num;
> +size_t size;
> +struct virtio_blk_inhdr *in;
> +struct virtio_blk_outhdr out;
> +VuClient *client;
> +struct VuVirtq *vq;
> +} VubReq;

I'm not completely sure yet, but I think I would prefer VuBlock to Vub
in the type names. Some may even prefer VhostUserBlock, but I can see
that this would be quite lengthy.

> +static void
> +remove_watch(VuDev *vu_dev, int fd)
> +{
> +VuClient *client;
> +
> +g_assert(vu_dev);
> +g_assert(fd >= 0);
> +
> +client = container_of(vu_dev, VuClient, parent);
> +aio_set_fd_handler(client->blk->ctx, fd, false, NULL, NULL, NULL, NULL);
> +}
> +
> +static void close_client(VuClient *client)
> +{
> +vu_deinit(&client->parent);
> +/** g_source_destroy(vub_device->parent.src); */

Leftover from conversion?

> +client->sioc = NULL;
> +object_unref(OBJECT(client->ioc));
> +client->closed = true;
> +
> +}
> +
> +static void vub_panic_cb(VuDev *vu_dev, const char *buf)

You use a lot of sprintf() before calling this function. Would it be
worth taking a printf-like format parameter instead of buf and using a
variable argument list?

> +{
> +if (buf) {
> +g_warning("vu_panic: %s", buf);

I think QEMU proper doesn't use g_warning() anywhere. This could be
error_report() or warn_report(). (Or if you use a format string
error_vreport() and warn_vreport().)

> +}
> +
> +VuClient *client = container_of(vu_dev, VuClient, parent);
> +if (client->blk->exit_panic) {
> +client->blk->close = true;
> +}
> +if (!client->closed) {
> +close_client(client);
> +}
> +}
> +
> +
> +static void vub_req_complete(VubReq *req)
> +{
> +VuDev *vu_dev = &req->client->parent;
> +
> +/* IO size with 1 extra status byte */
> +vu_queue_push(vu_dev, req->vq, req->elem,
> +  req->size + 1);

I think this fits in a single line.

> +vu_queue_notify(vu_dev, req->vq);
> +
> +if (req->elem) {
> +free(req->elem);
> +}
> +
> +g_free(req);
> +}
> +
> +
> +
> +static int
> +vub_discard_write_zer

Re: [PATCH v2 1/5] vhost-user block device backend

2020-01-16 Thread Stefan Hajnoczi
On Tue, Jan 14, 2020 at 10:06:16PM +0800, Coiby Xu wrote:
> By making use of libvhost, multiple block device drives can be exported and 
> each drive can serve multiple clients simultaneously. Since vhost-user-server 
> needs a block drive to be created first, delay the creation of this object.
> 
> Signed-off-by: Coiby Xu 
> ---
>  blockdev-vu.c  | 1008 

This file contains both vhost-user-blk code and generic vhost-user
protocol code.  Please split them into two files:
1. backends/vhost-user-blk-server.c
2. util/vhost-user-server.c

(As I read and understand the code better I may have better suggestions
about where to put these source files and how to name them.)

>  include/block/vhost-user.h |   46 ++
>  vl.c   |4 +
>  3 files changed, 1058 insertions(+)
>  create mode 100644 blockdev-vu.c
>  create mode 100644 include/block/vhost-user.h
> 
> diff --git a/blockdev-vu.c b/blockdev-vu.c
> new file mode 100644
> index 00..45f0bb43a7
> --- /dev/null
> +++ b/blockdev-vu.c
> @@ -0,0 +1,1008 @@
> +#include "qemu/osdep.h"
> +#include "block/vhost-user.h"
> +#include "qapi/error.h"
> +#include "qapi/qapi-types-sockets.h"
> +#include "qapi/qapi-commands-block.h"
> +
> +#include "sysemu/block-backend.h"
> +#include "qemu/main-loop.h"
> +
> +#include "qemu/units.h"
> +
> +#include "block/block.h"
> +
> +#include "qom/object_interfaces.h"
> +
> +#include 
> +
> +#include "hw/qdev-properties.h"
> +enum {
> +VHOST_USER_BLK_MAX_QUEUES = 8,
> +};

The number of queues is hardcoded to 1 so this constant can be removed
for now.

> +
> +struct virtio_blk_inhdr {
> +unsigned char status;
> +};
> +
> +
> +static QTAILQ_HEAD(, VubDev) vub_devs = QTAILQ_HEAD_INITIALIZER(vub_devs);

I'm not sure if this list will be necessary.  See my comments about
replacing the "name" property with Object's built-in "id" property.

> +
> +
> +typedef struct VubReq {
> +VuVirtqElement *elem;
> +int64_t sector_num;
> +size_t size;
> +struct virtio_blk_inhdr *in;
> +struct virtio_blk_outhdr out;
> +VuClient *client;
> +struct VuVirtq *vq;
> +} VubReq;
> +
> +static void
> +remove_watch(VuDev *vu_dev, int fd)
> +{
> +VuClient *client;
> +
> +g_assert(vu_dev);
> +g_assert(fd >= 0);
> +
> +client = container_of(vu_dev, VuClient, parent);
> +aio_set_fd_handler(client->blk->ctx, fd, false, NULL, NULL, NULL, NULL);
> +}
> +
> +static void close_client(VuClient *client)
> +{
> +vu_deinit(&client->parent);
> +/** g_source_destroy(vub_device->parent.src); */
> +client->sioc = NULL;
> +object_unref(OBJECT(client->ioc));
> +client->closed = true;
> +
> +}
> +
> +static void vub_panic_cb(VuDev *vu_dev, const char *buf)
> +{
> +if (buf) {
> +g_warning("vu_panic: %s", buf);
> +}
> +
> +VuClient *client = container_of(vu_dev, VuClient, parent);
> +if (client->blk->exit_panic) {
> +client->blk->close = true;
> +}
> +if (!client->closed) {
> +close_client(client);
> +}
> +}
> +
> +
> +static void vub_req_complete(VubReq *req)
> +{
> +VuDev *vu_dev = &req->client->parent;
> +
> +/* IO size with 1 extra status byte */
> +vu_queue_push(vu_dev, req->vq, req->elem,
> +  req->size + 1);
> +vu_queue_notify(vu_dev, req->vq);
> +
> +if (req->elem) {
> +free(req->elem);
> +}
> +
> +g_free(req);
> +}
> +
> +
> +
> +static int
> +vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
> + uint32_t type)
> +{
> +struct virtio_blk_discard_write_zeroes *desc;
> +ssize_t size;
> +void *buf;
> +
> +size = iov_size(iov, iovcnt);
> +if (size != sizeof(*desc)) {
> +fprintf(stderr, "Invalid size %ld, expect %ld\n", size, 
> sizeof(*desc));
> +return -1;
> +}
> +buf = g_new0(char, size);
> +
> +iov_to_buf_full(iov, iovcnt, 0, buf, size);

Simpler:

  struct virtio_blk_discard_write_zeroes desc;

  if (unlikely(iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc)) !=
   sizeof(desc)) {
  /* TODO handle error */
  }

No heap allocation.  One variable (desc) instead of two (desc and buf).

> +
> +
> +#if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
> +VubDev *vdev_blk;
> +VuClient *client = container_of(dev, VuClient, parent);
> +vdev_blk = client->blk;
> +desc = (struct virtio_blk_discard_write_zeroes *)buf;
> +uint64_t range[2] = { le64toh(desc->sector) << 9,
> +  le32toh(desc->num_sectors) << 9 };
> +if (type == VIRTIO_BLK_T_DISCARD) {
> +if (blk_pdiscard(vdev_blk->blk, range[0], range[1]) == 0) {
> +g_free(buf);
> +return 0;
> +}
> +} else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
> +if (blk_pwrite_zeroes(vdev_blk->blk, range[0], range[1]) == 0) {
> +g_free(buf);
> +return 0;
> + 

[PATCH v2 1/5] vhost-user block device backend

2020-01-14 Thread Coiby Xu
By making use of libvhost, multiple block device drives can be exported and 
each drive can serve multiple clients simultaneously. Since vhost-user-server 
needs a block drive to be created first, delay the creation of this object.

Signed-off-by: Coiby Xu 
---
 blockdev-vu.c  | 1008 
 include/block/vhost-user.h |   46 ++
 vl.c   |4 +
 3 files changed, 1058 insertions(+)
 create mode 100644 blockdev-vu.c
 create mode 100644 include/block/vhost-user.h

diff --git a/blockdev-vu.c b/blockdev-vu.c
new file mode 100644
index 00..45f0bb43a7
--- /dev/null
+++ b/blockdev-vu.c
@@ -0,0 +1,1008 @@
+#include "qemu/osdep.h"
+#include "block/vhost-user.h"
+#include "qapi/error.h"
+#include "qapi/qapi-types-sockets.h"
+#include "qapi/qapi-commands-block.h"
+
+#include "sysemu/block-backend.h"
+#include "qemu/main-loop.h"
+
+#include "qemu/units.h"
+
+#include "block/block.h"
+
+#include "qom/object_interfaces.h"
+
+#include 
+
+#include "hw/qdev-properties.h"
+enum {
+VHOST_USER_BLK_MAX_QUEUES = 8,
+};
+
+struct virtio_blk_inhdr {
+unsigned char status;
+};
+
+
+static QTAILQ_HEAD(, VubDev) vub_devs = QTAILQ_HEAD_INITIALIZER(vub_devs);
+
+
+typedef struct VubReq {
+VuVirtqElement *elem;
+int64_t sector_num;
+size_t size;
+struct virtio_blk_inhdr *in;
+struct virtio_blk_outhdr out;
+VuClient *client;
+struct VuVirtq *vq;
+} VubReq;
+
+static void
+remove_watch(VuDev *vu_dev, int fd)
+{
+VuClient *client;
+
+g_assert(vu_dev);
+g_assert(fd >= 0);
+
+client = container_of(vu_dev, VuClient, parent);
+aio_set_fd_handler(client->blk->ctx, fd, false, NULL, NULL, NULL, NULL);
+}
+
+static void close_client(VuClient *client)
+{
+vu_deinit(&client->parent);
+/** g_source_destroy(vub_device->parent.src); */
+client->sioc = NULL;
+object_unref(OBJECT(client->ioc));
+client->closed = true;
+
+}
+
+static void vub_panic_cb(VuDev *vu_dev, const char *buf)
+{
+if (buf) {
+g_warning("vu_panic: %s", buf);
+}
+
+VuClient *client = container_of(vu_dev, VuClient, parent);
+if (client->blk->exit_panic) {
+client->blk->close = true;
+}
+if (!client->closed) {
+close_client(client);
+}
+}
+
+
+static void vub_req_complete(VubReq *req)
+{
+VuDev *vu_dev = &req->client->parent;
+
+/* IO size with 1 extra status byte */
+vu_queue_push(vu_dev, req->vq, req->elem,
+  req->size + 1);
+vu_queue_notify(vu_dev, req->vq);
+
+if (req->elem) {
+free(req->elem);
+}
+
+g_free(req);
+}
+
+
+
+static int
+vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
+ uint32_t type)
+{
+struct virtio_blk_discard_write_zeroes *desc;
+ssize_t size;
+void *buf;
+
+size = iov_size(iov, iovcnt);
+if (size != sizeof(*desc)) {
+fprintf(stderr, "Invalid size %ld, expect %ld\n", size, sizeof(*desc));
+return -1;
+}
+buf = g_new0(char, size);
+
+iov_to_buf_full(iov, iovcnt, 0, buf, size);
+
+
+#if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
+VubDev *vdev_blk;
+VuClient *client = container_of(dev, VuClient, parent);
+vdev_blk = client->blk;
+desc = (struct virtio_blk_discard_write_zeroes *)buf;
+uint64_t range[2] = { le64toh(desc->sector) << 9,
+  le32toh(desc->num_sectors) << 9 };
+if (type == VIRTIO_BLK_T_DISCARD) {
+if (blk_pdiscard(vdev_blk->blk, range[0], range[1]) == 0) {
+g_free(buf);
+return 0;
+}
+} else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
+if (blk_pwrite_zeroes(vdev_blk->blk, range[0], range[1]) == 0) {
+g_free(buf);
+return 0;
+}
+}
+#endif
+
+g_free(buf);
+return -1;
+}
+
+
+static void
+vub_flush(VubReq *req)
+{
+VuClient *client = req->client;
+blk_co_flush(client->blk->backend);
+}
+
+
+#define NOT_DONE 0x7fff /* used while emulated sync operation in progress 
*/
+typedef struct BlkRwCo {
+BlockBackend *blk;
+int64_t offset;
+void *iobuf;
+int ret;
+BdrvRequestFlags flags;
+} BlkRwCo;
+
+static void blk_read_entry(void *opaque)
+{
+BlkRwCo *rwco = opaque;
+QEMUIOVector *qiov = rwco->iobuf;
+
+rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
+  qiov, rwco->flags);
+aio_wait_kick();
+}
+
+
+static void blk_write_entry(void *opaque)
+{
+BlkRwCo *rwco = opaque;
+QEMUIOVector *qiov = rwco->iobuf;
+
+rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
+  qiov, rwco->flags);
+aio_wait_kick();
+}
+
+
+static int blk_prw(BlockBackend *blk, QEMUIOVector *qiov, int64_t offset,
+   CoroutineEntry co_entry, BdrvRequestFlags flags)
+{
+
+BlkRwCo rwco = {
+.blk= blk,
+.offset =