On Fri, Oct 28, 2016 at 11:47:11PM -0700, Ashish Mittal wrote: > Source code for the qnio library that this code loads can be downloaded from: > https://github.com/MittalAshish/libqnio.git > > Sample command line using the JSON syntax: > ./qemu-system-x86_64 -name instance-00000008 -S -vnc 0.0.0.0:0 -k en-us > -vga cirrus -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 > -msg timestamp=on > 'json:{"driver":"vxhs","vdisk_id":"c3e9095a-a5ee-4dce-afeb-2a59fb387410", > "server":{"host":"172.172.17.4","port":"9999"}}' > > Sample command line using the URI syntax: > qemu-img convert -f raw -O raw -n > /var/lib/nova/instances/_base/0c5eacd5ebea5ed914b6a3e7b18f1ce734c386ad > vxhs://192.168.0.1:9999/c6718f6b-0401-441d-a8c3-1f0064d75ee0 > > Signed-off-by: Ashish Mittal <ashish.mit...@veritas.com> > --- > v4 changelog: > (1) Incorporated v3 review comments on QAPI changes. > (2) Added refcounting for device open/close. > Free library resources on last device close. > > v3 changelog: > (1) Added QAPI schema for the VxHS driver. > > v2 changelog: > (1) Changes done in response to v1 comments. > > TODO: > (1) Add qemu-iotest > > block/Makefile.objs | 2 + > block/trace-events | 21 ++ > block/vxhs.c | 703 > +++++++++++++++++++++++++++++++++++++++++++++++++++ > configure | 41 +++ > qapi/block-core.json | 20 +- > 5 files changed, 785 insertions(+), 2 deletions(-) > create mode 100644 block/vxhs.c > > diff --git a/block/Makefile.objs b/block/Makefile.objs > index 67a036a..58313a2 100644 > --- a/block/Makefile.objs > +++ b/block/Makefile.objs > @@ -18,6 +18,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o > block-obj-$(CONFIG_CURL) += curl.o > block-obj-$(CONFIG_RBD) += rbd.o > block-obj-$(CONFIG_GLUSTERFS) += gluster.o > +block-obj-$(CONFIG_VXHS) += vxhs.o > block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o > block-obj-$(CONFIG_LIBSSH2) += ssh.o > block-obj-y += accounting.o dirty-bitmap.o > @@ -38,6 +39,7 @@ rbd.o-cflags := $(RBD_CFLAGS) > rbd.o-libs := $(RBD_LIBS) > gluster.o-cflags := $(GLUSTERFS_CFLAGS) > gluster.o-libs := $(GLUSTERFS_LIBS) > +vxhs.o-libs := $(VXHS_LIBS) > ssh.o-cflags := $(LIBSSH2_CFLAGS) > ssh.o-libs := $(LIBSSH2_LIBS) > archipelago.o-libs := $(ARCHIPELAGO_LIBS) > diff --git a/block/trace-events b/block/trace-events > index aff8a96..5cb8089 100644 > --- a/block/trace-events > +++ b/block/trace-events > @@ -113,3 +113,24 @@ qed_aio_write_data(void *s, void *acb, int ret, uint64_t > offset, size_t len) "s > qed_aio_write_prefill(void *s, void *acb, uint64_t start, size_t len, > uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64 > qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, > uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64 > qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) > "s %p acb %p ret %d offset %"PRIu64" len %zu" > + > +# block/vxhs.c > +vxhs_iio_callback(int error, int reason) "ctx is NULL: error %d, reason %d" > +vxhs_setup_qnio(void *s) "Context to HyperScale IO manager = %p" > +vxhs_iio_callback_chnfail(int err, int error) "QNIO channel failed, no i/o > %d, %d" > +vxhs_iio_callback_unknwn(int opcode, int err) "unexpected opcode %d, errno > %d" > +vxhs_open_fail(int ret) "Could not open the device. Error = %d" > +vxhs_open_epipe(int ret) "Could not create a pipe for device. Bailing out. > Error=%d" > +vxhs_aio_rw_invalid(int req) "Invalid I/O request iodir %d" > +vxhs_aio_rw_ioerr(char *guid, int iodir, uint64_t size, uint64_t off, void > *acb, int ret, int err) "IO ERROR (vDisk %s) FOR : Read/Write = %d size = %lu > offset = %lu ACB = %p. Error = %d, errno = %d" > +vxhs_get_vdisk_stat_err(char *guid, int ret, int err) "vDisk (%s) stat ioctl > failed, ret = %d, errno = %d" > +vxhs_get_vdisk_stat(char *vdisk_guid, uint64_t vdisk_size) "vDisk %s stat > ioctl returned size %lu" > +vxhs_qnio_iio_open(const char *ip) "Failed to connect to storage agent on > host-ip %s" > +vxhs_qnio_iio_devopen(const char *fname) "Failed to open vdisk device: %s" > +vxhs_complete_aio(void *acb, uint64_t ret) "aio failed acb %p ret %ld" > +vxhs_parse_uri_filename(const char *filename) "URI passed via > bdrv_parse_filename %s" > +vxhs_qemu_init_vdisk(const char *vdisk_id) "vdisk-id from json %s" > +vxhs_parse_uri_hostinfo(int num, char *host, int port) "Host %d: IP %s, Port > %d" > +vxhs_qemu_init(char *of_vsa_addr, int port) "Adding host %s:%d to > BDRVVXHSState" > +vxhs_qemu_init_filename(const char *filename) "Filename passed as %s" > +vxhs_close(char *vdisk_guid) "Closing vdisk %s" > diff --git a/block/vxhs.c b/block/vxhs.c > new file mode 100644 > index 0000000..d95be11 > --- /dev/null > +++ b/block/vxhs.c > @@ -0,0 +1,703 @@ > +/* > + * QEMU Block driver for Veritas HyperScale (VxHS) > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ > + > +#include "qemu/osdep.h" > +#include "block/block_int.h" > +#include <qnio/qnio_api.h> > +#include "qapi/qmp/qerror.h" > +#include "qapi/qmp/qdict.h" > +#include "qapi/qmp/qstring.h" > +#include "trace.h" > +#include "qemu/uri.h" > +#include "qapi/error.h" > +#include "qemu/error-report.h" > + > +#define VDISK_FD_READ 0 > +#define VDISK_FD_WRITE 1 > + > +#define VXHS_OPT_FILENAME "filename" > +#define VXHS_OPT_VDISK_ID "vdisk-id" > +#define VXHS_OPT_SERVER "server" > +#define VXHS_OPT_HOST "host" > +#define VXHS_OPT_PORT "port" > + > +typedef struct QNIOLibState { > + int refcnt; > + QemuMutex lock;
The mutex lock is not needed, all the open/close paths are on a single IO thread. > + void *context; > +} QNIOLibState; > + > +typedef enum { > + VDISK_AIO_READ, > + VDISK_AIO_WRITE, > + VDISK_STAT > +} VDISKAIOCmd; > + > +/* > + * HyperScale AIO callbacks structure > + */ > +typedef struct VXHSAIOCB { > + BlockAIOCB common; > + int err; > + int direction; /* IO direction (r/w) */ > + size_t io_offset; > + size_t size; > + QEMUIOVector *qiov; > +} VXHSAIOCB; > + > +typedef struct VXHSvDiskHostsInfo { > + int qnio_cfd; /* Channel FD */ > + int vdisk_rfd; /* vDisk remote FD */ > + char *hostip; /* Host's IP addresses */ > + int port; /* Host's port number */ > +} VXHSvDiskHostsInfo; > + > +/* > + * Structure per vDisk maintained for state > + */ > +typedef struct BDRVVXHSState { > + int fds[2]; > + int64_t vdisk_size; > + int event_reader_pos; > + VXHSAIOCB *qnio_event_acb; > + VXHSvDiskHostsInfo vdisk_hostinfo; /* Per host info */ > + char *vdisk_guid; > +} BDRVVXHSState; > + > +/* QNIO Library State */ > +QNIOLibState qniolib; This should be static. > + > +/* vdisk prefix to pass to qnio */ > +static const char vdisk_prefix[] = "/dev/of/vdisk"; > + > +/* > + * Cleanup QNIO library on last close. > + */ > +static void vxhs_qnio_close(void) > +{ > + qemu_mutex_lock(&qniolib.lock); > + qniolib.refcnt--; > + if (qniolib.refcnt == 0) { > + iio_fini(qniolib.context); > + qniolib.context = NULL; > + } > + qemu_mutex_unlock(&qniolib.lock); > +} > + > +static void vxhs_qnio_iio_close(BDRVVXHSState *s) > +{ > + /* > + * Close vDisk device > + */ > + if (s->vdisk_hostinfo.vdisk_rfd >= 0) { > + iio_devclose(qniolib.context, 0, s->vdisk_hostinfo.vdisk_rfd); > + s->vdisk_hostinfo.vdisk_rfd = -1; > + } > + > + /* > + * Close QNIO channel against cached channel-fd > + */ > + if (s->vdisk_hostinfo.qnio_cfd >= 0) { > + iio_close(qniolib.context, s->vdisk_hostinfo.qnio_cfd); > + s->vdisk_hostinfo.qnio_cfd = -1; > + } > + > + vxhs_qnio_close(); > +} > + > +static void vxhs_iio_callback(int32_t rfd, uint32_t reason, void *ctx, > + uint32_t error, uint32_t opcode) > +{ > + VXHSAIOCB *acb = NULL; > + BDRVVXHSState *s = NULL; > + ssize_t ret; > + > + switch (opcode) { > + case IRP_READ_REQUEST: > + case IRP_WRITE_REQUEST: > + > + /* > + * ctx is VXHSAIOCB* > + * ctx is NULL if error is QNIOERROR_CHANNEL_HUP or > + * reason is IIO_REASON_HUP > + */ > + if (ctx) { > + acb = ctx; > + s = acb->common.bs->opaque; > + } else { > + trace_vxhs_iio_callback(error, reason); > + goto out; > + } > + > + if (error) { > + if (!acb->err) { > + acb->err = error; > + } > + trace_vxhs_iio_callback(error, reason); > + } > + > + ret = qemu_write_full(s->fds[VDISK_FD_WRITE], &acb, sizeof(acb)); > + g_assert(ret == sizeof(acb)); > + break; > + > + default: > + if (error == QNIOERROR_CHANNEL_HUP) { > + /* > + * Channel failed, spontaneous notification, > + * not in response to I/O > + */ > + trace_vxhs_iio_callback_chnfail(error, errno); > + } else { > + trace_vxhs_iio_callback_unknwn(opcode, error); > + } > + break; > + } > +out: > + return; > +} > + > +/* > + * Initialize QNIO library on first open. > + */ > +static int vxhs_qnio_open(void) > +{ > + int ret = 0; > + > + qemu_mutex_lock(&qniolib.lock); > + if (qniolib.refcnt != 0) { > + g_assert(qniolib.context != NULL); > + qniolib.refcnt++; > + qemu_mutex_unlock(&qniolib.lock); > + return 0; > + } > + qniolib.context = iio_init(QNIO_VERSION, vxhs_iio_callback); > + if (qniolib.context == NULL) { > + ret = -ENODEV; > + } else { > + qniolib.refcnt = 1; > + } > + qemu_mutex_unlock(&qniolib.lock); > + return ret; > +} > + > +static int vxhs_qnio_iio_open(int *cfd, const char *of_vsa_addr, > + int *rfd, const char *file_name) > +{ > + int ret = 0; > + bool qnio_open = false; Variable not needed. > + > + ret = vxhs_qnio_open(); > + if (ret) { > + return ret; > + } > + > + /* > + * Open qnio channel to storage agent if not opened before. > + */ > + *cfd = iio_open(qniolib.context, of_vsa_addr, 0); > + if (*cfd < 0) { > + trace_vxhs_qnio_iio_open(of_vsa_addr); > + ret = -ENODEV; > + goto err_out; > + } > + > + /* > + * Open vdisk device > + */ > + *rfd = iio_devopen(qniolib.context, *cfd, file_name, 0); > + if (*rfd < 0) { > + trace_vxhs_qnio_iio_devopen(file_name); > + ret = -ENODEV; > + goto err_out; > + } > + return 0; > + > +err_out: > + if (*cfd >= 0) { > + iio_close(qniolib.context, *cfd); > + } > + if (qnio_open) { > + vxhs_qnio_close(); > + } This can be removed, qnio_open is always false. > + *cfd = -1; > + *rfd = -1; > + return ret; > +} > + > +static void vxhs_complete_aio(VXHSAIOCB *acb, BDRVVXHSState *s) > +{ > + BlockCompletionFunc *cb = acb->common.cb; > + void *opaque = acb->common.opaque; > + int ret = 0; > + > + if (acb->err != 0) { > + trace_vxhs_complete_aio(acb, acb->err); > + /* > + * We mask all the IO errors generically as EIO for upper layers > + * Right now our IO Manager uses non standard error codes. Instead > + * of confusing upper layers with incorrect interpretation we are > + * doing this workaround. > + */ > + ret = (-EIO); > + } > + > + qemu_aio_unref(acb); > + cb(opaque, ret); > +} > + > +/* > + * This is the HyperScale event handler registered to QEMU. > + * It is invoked when any IO gets completed and written on pipe > + * by callback called from QNIO thread context. Then it marks > + * the AIO as completed, and releases HyperScale AIO callbacks. > + */ > +static void vxhs_aio_event_reader(void *opaque) > +{ > + BDRVVXHSState *s = opaque; > + char *p; > + ssize_t ret; > + > + do { > + p = (char *)&s->qnio_event_acb; > + ret = read(s->fds[VDISK_FD_READ], p + s->event_reader_pos, > + sizeof(s->qnio_event_acb) - s->event_reader_pos); > + if (ret > 0) { > + s->event_reader_pos += ret; > + if (s->event_reader_pos == sizeof(s->qnio_event_acb)) { > + s->event_reader_pos = 0; > + vxhs_complete_aio(s->qnio_event_acb, s); > + } > + } > + } while (ret < 0 && errno == EINTR); > +} > + > +static QemuOptsList runtime_opts = { > + .name = "vxhs", > + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), > + .desc = { > + { > + .name = VXHS_OPT_FILENAME, > + .type = QEMU_OPT_STRING, > + .help = "URI to the Veritas HyperScale image", > + }, > + { > + .name = VXHS_OPT_VDISK_ID, > + .type = QEMU_OPT_STRING, > + .help = "UUID of the VxHS vdisk", > + }, > + { /* end of list */ } > + }, > +}; > + > +static QemuOptsList runtime_tcp_opts = { > + .name = "vxhs_tcp", > + .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head), > + .desc = { > + { > + .name = VXHS_OPT_HOST, > + .type = QEMU_OPT_STRING, > + .help = "host address (ipv4 addresses)", > + }, > + { > + .name = VXHS_OPT_PORT, > + .type = QEMU_OPT_NUMBER, > + .help = "port number on which VxHSD is listening (default 9999)", > + .def_value_str = "9999" > + }, > + { /* end of list */ } > + }, > +}; > + > +/* > + * Parse the incoming URI and populate *options with the host information. > + * URI syntax has the limitation of supporting only one host info. > + * To pass multiple host information, use the JSON syntax. > + */ > +static int vxhs_parse_uri(const char *filename, QDict *options) > +{ > + URI *uri = NULL; > + char *hoststr, *portstr; > + char *port; > + int ret = 0; > + > + trace_vxhs_parse_uri_filename(filename); > + uri = uri_parse(filename); > + if (!uri || !uri->server || !uri->path) { > + uri_free(uri); > + return -EINVAL; > + } > + > + hoststr = g_strdup(VXHS_OPT_SERVER".host"); > + qdict_put(options, hoststr, qstring_from_str(uri->server)); > + g_free(hoststr); > + > + portstr = g_strdup(VXHS_OPT_SERVER".port"); > + if (uri->port) { > + port = g_strdup_printf("%d", uri->port); > + qdict_put(options, portstr, qstring_from_str(port)); > + g_free(port); > + } > + g_free(portstr); > + > + if (strstr(uri->path, "vxhs") == NULL) { > + qdict_put(options, "vdisk-id", qstring_from_str(uri->path)); > + } > + > + trace_vxhs_parse_uri_hostinfo(1, uri->server, uri->port); > + uri_free(uri); > + > + return ret; > +} > + > +static void vxhs_parse_filename(const char *filename, QDict *options, > + Error **errp) > +{ > + if (qdict_haskey(options, "vdisk-id") || qdict_haskey(options, > "server")) { > + error_setg(errp, "vdisk-id/server and a file name may not be > specified " > + "at the same time"); > + return; > + } > + > + if (strstr(filename, "://")) { > + int ret = vxhs_parse_uri(filename, options); > + if (ret < 0) { > + error_setg(errp, "Invalid URI. URI should be of the form " > + " vxhs://<host_ip>:<port>/{<vdisk-id>}"); > + } > + } > +} > + > +static int vxhs_qemu_init(QDict *options, BDRVVXHSState *s, > + int *cfd, int *rfd, Error **errp) > +{ > + QDict *backing_options = NULL; > + QemuOpts *opts, *tcp_opts; > + const char *vxhs_filename; > + char *of_vsa_addr = NULL; > + Error *local_err = NULL; > + const char *vdisk_id_opt; > + const char *server_host_opt; > + char *file_name = NULL; > + char *str = NULL; > + int ret = 0; > + > + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); > + qemu_opts_absorb_qdict(opts, options, &local_err); > + if (local_err) { > + ret = -EINVAL; > + goto out; > + } > + > + vxhs_filename = qemu_opt_get(opts, VXHS_OPT_FILENAME); > + if (vxhs_filename) { > + trace_vxhs_qemu_init_filename(vxhs_filename); > + } > + > + vdisk_id_opt = qemu_opt_get(opts, VXHS_OPT_VDISK_ID); > + if (!vdisk_id_opt) { > + error_setg(&local_err, QERR_MISSING_PARAMETER, VXHS_OPT_VDISK_ID); > + ret = -EINVAL; > + goto out; > + } > + s->vdisk_guid = g_strdup(vdisk_id_opt); > + trace_vxhs_qemu_init_vdisk(vdisk_id_opt); > + > + str = g_strdup_printf(VXHS_OPT_SERVER"."); > + qdict_extract_subqdict(options, &backing_options, str); > + > + /* Create opts info from runtime_tcp_opts list */ > + tcp_opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort); > + qemu_opts_absorb_qdict(tcp_opts, backing_options, &local_err); > + if (local_err) { > + qdict_del(backing_options, str); > + qemu_opts_del(tcp_opts); > + ret = -EINVAL; > + goto out; > + } > + > + server_host_opt = qemu_opt_get(tcp_opts, VXHS_OPT_HOST); > + if (!server_host_opt) { > + error_setg(&local_err, QERR_MISSING_PARAMETER, > + VXHS_OPT_SERVER"."VXHS_OPT_HOST); > + ret = -EINVAL; > + goto out; > + } > + > + s->vdisk_hostinfo.hostip = g_strdup(server_host_opt); > + > + s->vdisk_hostinfo.port = g_ascii_strtoll(qemu_opt_get(tcp_opts, > + VXHS_OPT_PORT), > + NULL, 0); > + > + s->vdisk_hostinfo.qnio_cfd = -1; > + s->vdisk_hostinfo.vdisk_rfd = -1; > + trace_vxhs_qemu_init(s->vdisk_hostinfo.hostip, > + s->vdisk_hostinfo.port); > + > + qdict_del(backing_options, str); > + qemu_opts_del(tcp_opts); > + > + file_name = g_strdup_printf("%s%s", vdisk_prefix, s->vdisk_guid); > + of_vsa_addr = g_strdup_printf("of://%s:%d", > + s->vdisk_hostinfo.hostip, > + s->vdisk_hostinfo.port); > + > + ret = vxhs_qnio_iio_open(cfd, of_vsa_addr, rfd, file_name); > + if (ret) { > + error_setg(&local_err, "Failed qnio_iio_open"); > + ret = -EIO; > + } > + > +out: > + g_free(str); > + g_free(file_name); > + g_free(of_vsa_addr); > + qemu_opts_del(opts); > + > + if (ret < 0) { > + error_propagate(errp, local_err); > + g_free(s->vdisk_hostinfo.hostip); > + g_free(s->vdisk_guid); > + s->vdisk_guid = NULL; > + errno = -ret; > + } > + > + return ret; > +} > + > +static int vxhs_open(BlockDriverState *bs, QDict *options, > + int bdrv_flags, Error **errp) > +{ > + BDRVVXHSState *s = bs->opaque; > + AioContext *aio_context; > + int qemu_qnio_cfd = -1; > + int qemu_rfd = -1; > + int ret = 0; > + > + ret = vxhs_qemu_init(options, s, &qemu_qnio_cfd, &qemu_rfd, errp); > + if (ret < 0) { > + trace_vxhs_open_fail(ret); > + return ret; > + } > + > + s->vdisk_hostinfo.qnio_cfd = qemu_qnio_cfd; > + s->vdisk_hostinfo.vdisk_rfd = qemu_rfd; > + s->vdisk_size = -1; > + > + /* > + * Create a pipe for communicating between two threads in different > + * context. Set handler for read event, which gets triggered when > + * IO completion is done by non-QEMU context. > + */ > + ret = qemu_pipe(s->fds); > + if (ret < 0) { > + trace_vxhs_open_epipe(ret); > + ret = -errno; > + goto errout; > + } > + fcntl(s->fds[VDISK_FD_READ], F_SETFL, O_NONBLOCK); > + > + aio_context = bdrv_get_aio_context(bs); > + aio_set_fd_handler(aio_context, s->fds[VDISK_FD_READ], > + false, vxhs_aio_event_reader, NULL, s); > + return 0; > + > +errout: > + /* > + * Close remote vDisk device if it was opened earlier > + */ > + vxhs_qnio_iio_close(s); > + trace_vxhs_open_fail(ret); > + return ret; > +} > + > +static const AIOCBInfo vxhs_aiocb_info = { > + .aiocb_size = sizeof(VXHSAIOCB) > +}; > + > +/* > + * This allocates QEMU-VXHS callback for each IO > + * and is passed to QNIO. When QNIO completes the work, > + * it will be passed back through the callback. > + */ > +static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, int64_t sector_num, > + QEMUIOVector *qiov, int nb_sectors, > + BlockCompletionFunc *cb, void *opaque, int > iodir) > +{ > + VXHSAIOCB *acb = NULL; > + BDRVVXHSState *s = bs->opaque; > + size_t size; > + uint64_t offset; > + int iio_flags = 0; > + int ret = 0; > + uint32_t rfd = s->vdisk_hostinfo.vdisk_rfd; > + > + offset = sector_num * BDRV_SECTOR_SIZE; > + size = nb_sectors * BDRV_SECTOR_SIZE; > + acb = qemu_aio_get(&vxhs_aiocb_info, bs, cb, opaque); > + /* > + * Setup or initialize VXHSAIOCB. > + * Every single field should be initialized since > + * acb will be picked up from the slab without > + * initializing with zero. > + */ > + acb->io_offset = offset; > + acb->size = size; > + acb->err = 0; > + acb->qiov = qiov; > + acb->direction = iodir; > + > + iio_flags = (IIO_FLAG_DONE | IIO_FLAG_ASYNC); > + > + switch (iodir) { > + case VDISK_AIO_WRITE: > + ret = iio_writev(qniolib.context, rfd, qiov->iov, qiov->niov, > + offset, (uint64_t)size, (void *)acb, iio_flags); > + break; > + case VDISK_AIO_READ: > + ret = iio_readv(qniolib.context, rfd, qiov->iov, qiov->niov, > + offset, (uint64_t)size, (void *)acb, iio_flags); > + break; > + default: > + trace_vxhs_aio_rw_invalid(iodir); > + goto errout; > + } > + > + if (ret != 0) { > + trace_vxhs_aio_rw_ioerr(s->vdisk_guid, iodir, size, offset, > + acb, ret, errno); > + goto errout; > + } > + return &acb->common; > + > +errout: > + qemu_aio_unref(acb); > + return NULL; > +} > + > +static BlockAIOCB *vxhs_aio_readv(BlockDriverState *bs, > + int64_t sector_num, QEMUIOVector *qiov, > + int nb_sectors, > + BlockCompletionFunc *cb, void *opaque) > +{ > + return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors, cb, > + opaque, VDISK_AIO_READ); > +} > + > +static BlockAIOCB *vxhs_aio_writev(BlockDriverState *bs, > + int64_t sector_num, QEMUIOVector *qiov, > + int nb_sectors, > + BlockCompletionFunc *cb, void *opaque) > +{ > + return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors, > + cb, opaque, VDISK_AIO_WRITE); > +} > + > +static void vxhs_close(BlockDriverState *bs) > +{ > + BDRVVXHSState *s = bs->opaque; > + > + trace_vxhs_close(s->vdisk_guid); > + close(s->fds[VDISK_FD_READ]); > + close(s->fds[VDISK_FD_WRITE]); > + > + /* > + * Clearing all the event handlers for oflame registered to QEMU > + */ > + aio_set_fd_handler(bdrv_get_aio_context(bs), s->fds[VDISK_FD_READ], > + false, NULL, NULL, NULL); > + g_free(s->vdisk_guid); > + s->vdisk_guid = NULL; > + vxhs_qnio_iio_close(s); > + > + /* > + * Free the dynamically allocated hostip string > + */ > + g_free(s->vdisk_hostinfo.hostip); > + s->vdisk_hostinfo.hostip = NULL; > + s->vdisk_hostinfo.port = 0; > +} > + > +static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s) > +{ > + int64_t vdisk_size = -1; > + int ret = 0; > + uint32_t rfd = s->vdisk_hostinfo.vdisk_rfd; > + > + ret = iio_ioctl(qniolib.context, rfd, IOR_VDISK_STAT, &vdisk_size, NULL, > 0); > + if (ret < 0) { > + trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno); > + return -EIO; > + } > + > + trace_vxhs_get_vdisk_stat(s->vdisk_guid, vdisk_size); > + return vdisk_size; > +} > + > +/* > + * Returns the size of vDisk in bytes. This is required > + * by QEMU block upper block layer so that it is visible > + * to guest. > + */ > +static int64_t vxhs_getlength(BlockDriverState *bs) > +{ > + BDRVVXHSState *s = bs->opaque; > + int64_t vdisk_size; > + > + if (s->vdisk_size <= 0) { > + vdisk_size = vxhs_get_vdisk_stat(s); > + if (vdisk_size < 0) { > + return -EIO; > + } > + s->vdisk_size = vdisk_size; > + } I'm curious regarding the caching of vdisk_size. Is this a performance optimization? I'm not seeing the practical benefit of it, and if you ever support resizing images it may be an issue. > + > + return s->vdisk_size; > +} > + > +static void vxhs_detach_aio_context(BlockDriverState *bs) > +{ > + BDRVVXHSState *s = bs->opaque; > + > + aio_set_fd_handler(bdrv_get_aio_context(bs), s->fds[VDISK_FD_READ], > + false, NULL, NULL, NULL); > + > +} > + > +static void vxhs_attach_aio_context(BlockDriverState *bs, > + AioContext *new_context) > +{ > + BDRVVXHSState *s = bs->opaque; > + > + aio_set_fd_handler(new_context, s->fds[VDISK_FD_READ], > + false, vxhs_aio_event_reader, NULL, s); > +} > + > +static BlockDriver bdrv_vxhs = { > + .format_name = "vxhs", > + .protocol_name = "vxhs", > + .instance_size = sizeof(BDRVVXHSState), > + .bdrv_file_open = vxhs_open, > + .bdrv_parse_filename = vxhs_parse_filename, > + .bdrv_close = vxhs_close, > + .bdrv_getlength = vxhs_getlength, > + .bdrv_aio_readv = vxhs_aio_readv, > + .bdrv_aio_writev = vxhs_aio_writev, > + .bdrv_detach_aio_context = vxhs_detach_aio_context, > + .bdrv_attach_aio_context = vxhs_attach_aio_context, > +}; > + > +static void bdrv_vxhs_init(void) > +{ > + qniolib.refcnt = 0; > + qniolib.context = NULL; Once qniolib is static, it will be zero initialized, so this can be dropped. > + qemu_mutex_init(&qniolib.lock); (and of course this isn't needed once the lock is removed) > + bdrv_register(&bdrv_vxhs); > +} > + > +block_init(bdrv_vxhs_init); > diff --git a/configure b/configure > index f83cdf8..053358d 100755 > --- a/configure > +++ b/configure > @@ -321,6 +321,7 @@ numa="" > tcmalloc="no" > jemalloc="no" > replication="yes" > +vxhs="" > > # parse CC options first > for opt do > @@ -1162,6 +1163,11 @@ for opt do > ;; > --enable-replication) replication="yes" > ;; > + --disable-vxhs) vxhs="no" > + ;; > + --enable-vxhs) vxhs="yes" > + ;; > + > *) > echo "ERROR: unknown option $opt" > echo "Try '$0 --help' for more information" > @@ -1394,6 +1400,7 @@ disabled with --disable-FEATURE, default is enabled if > available: > tcmalloc tcmalloc support > jemalloc jemalloc support > replication replication support > + vxhs Veritas HyperScale vDisk backend support > > NOTE: The object files are built at the place where configure is launched > EOF > @@ -4688,6 +4695,33 @@ if do_cc -nostdlib -Wl,-r -Wl,--no-relax -o $TMPMO > $TMPO; then > fi > > ########################################## > +# Veritas HyperScale block driver VxHS > +# Check if libqnio is installed > + > +if test "$vxhs" != "no" ; then > + cat > $TMPC <<EOF > +#include <stdint.h> > +#include <qnio/qnio_api.h> > + > +void *vxhs_callback; > + > +int main(void) { > + iio_init(vxhs_callback); > + return 0; > +} > +EOF > + vxhs_libs="-lqnio" > + if compile_prog "" "$vxhs_libs" ; then > + vxhs=yes > + else > + if test "$vxhs" = "yes" ; then > + feature_not_found "vxhs block device" "Install libqnio. See github" > + fi > + vxhs=no > + fi > +fi > + > +########################################## > # End of CC checks > # After here, no more $cc or $ld runs > > @@ -5053,6 +5087,7 @@ echo "tcmalloc support $tcmalloc" > echo "jemalloc support $jemalloc" > echo "avx2 optimization $avx2_opt" > echo "replication support $replication" > +echo "VxHS block device $vxhs" > > if test "$sdl_too_old" = "yes"; then > echo "-> Your SDL version is too old - please upgrade to have SDL support" > @@ -5661,6 +5696,12 @@ if test "$pthread_setname_np" = "yes" ; then > echo "CONFIG_PTHREAD_SETNAME_NP=y" >> $config_host_mak > fi > > +if test "$vxhs" = "yes" ; then > + echo "CONFIG_VXHS=y" >> $config_host_mak > + echo "VXHS_CFLAGS=$vxhs_cflags" >> $config_host_mak > + echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak > +fi > + > if test "$tcg_interpreter" = "yes"; then > QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/tci $QEMU_INCLUDES" > elif test "$ARCH" = "sparc64" ; then > diff --git a/qapi/block-core.json b/qapi/block-core.json > index cd1fa7b..1c4a7af 100644 > --- a/qapi/block-core.json > +++ b/qapi/block-core.json > @@ -1704,12 +1704,13 @@ > # @host_device, @host_cdrom: Since 2.1 > # @gluster: Since 2.7 > # @nbd: Since 2.8 > +# @vxhs: Since 2.8 > # > # Since: 2.0 > ## > { 'enum': 'BlockdevDriver', > 'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop', > - 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom', > + 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom', 'vxhs', > 'host_device', 'http', 'https', 'luks', 'nbd', 'null-aio', > 'null-co', 'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', > 'replication', 'tftp', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] } > @@ -2238,6 +2239,20 @@ > '*export': 'str', > '*tls-creds': 'str' } } > > +# @BlockdevOptionsVxHS > +# > +# Driver specific block device options for VxHS > +# > +# @vdisk-id: UUID of VxHS volume > +# > +# @server: vxhs server IP, port > +# > +# Since: 2.8 > +## > +{ 'struct': 'BlockdevOptionsVxHS', > + 'data': { 'vdisk-id': 'str', > + 'server': 'InetSocketAddress' } } > + > ## > # @BlockdevOptions > # > @@ -2302,7 +2317,8 @@ > 'vhdx': 'BlockdevOptionsGenericFormat', > 'vmdk': 'BlockdevOptionsGenericCOWFormat', > 'vpc': 'BlockdevOptionsGenericFormat', > - 'vvfat': 'BlockdevOptionsVVFAT' > + 'vvfat': 'BlockdevOptionsVVFAT', > + 'vxhs': 'BlockdevOptionsVxHS' > } } > > ## > -- > 2.5.5 >