Am 17.09.2012 17:26, schrieb Bharata B Rao: > block: Support GlusterFS as a QEMU block backend. > > From: Bharata B Rao <bhar...@linux.vnet.ibm.com> > > This patch adds gluster as the new block backend in QEMU. This gives > QEMU the ability to boot VM images from gluster volumes. Its already > possible to boot from VM images on gluster volumes using FUSE mount, but > this patchset provides the ability to boot VM images from gluster volumes > by by-passing the FUSE layer in gluster. This is made possible by > using libgfapi routines to perform IO on gluster volumes directly. > > VM Image on gluster volume is specified like this: > > file=gluster[+transport]://[server[:port]]/volname/image[?socket=...] > > 'gluster' is the protocol. > > 'transport' specifies the transport type used to connect to gluster > management daemon (glusterd). Valid transport types are > tcp, unix and rdma. If the transport type isn't specified, then tcp > type is assumed. > > 'server' specifies the server where the volume file specification for > the given volume resides. This can be either hostname or ipv4 address > or ipv6 address. ipv6 address needs to be with in square brackets [ ]. > If transport type is 'unix', then server field is ignored, but the > 'socket' field needs to be populated with the path to unix domain > socket. > > 'port' is the port number on which glusterd is listening. This is optional > and if not specified, QEMU will send 0 which will make gluster to use the > default port. port is ignored for unix type of transport. > > 'volname' is the name of the gluster volume which contains the VM image. > > 'image' is the path to the actual VM image that resides on gluster volume. > > Examples: > > file=gluster://1.2.3.4/testvol/a.img > file=gluster+tcp://1.2.3.4/testvol/a.img > file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img > file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img > file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img > file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img > file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket > file=gluster+rdma://1.2.3.4:24007/testvol/a.img > > Signed-off-by: Bharata B Rao <bhar...@linux.vnet.ibm.com> > --- > > block/Makefile.objs | 1 > block/gluster.c | 694 > +++++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 695 insertions(+), 0 deletions(-) > create mode 100644 block/gluster.c > > > diff --git a/block/Makefile.objs b/block/Makefile.objs > index b5754d3..a1ae67f 100644 > --- a/block/Makefile.objs > +++ b/block/Makefile.objs > @@ -9,3 +9,4 @@ block-obj-$(CONFIG_POSIX) += raw-posix.o > block-obj-$(CONFIG_LIBISCSI) += iscsi.o > block-obj-$(CONFIG_CURL) += curl.o > block-obj-$(CONFIG_RBD) += rbd.o > +block-obj-$(CONFIG_GLUSTERFS) += gluster.o > diff --git a/block/gluster.c b/block/gluster.c > new file mode 100644 > index 0000000..0de3286 > --- /dev/null > +++ b/block/gluster.c > @@ -0,0 +1,694 @@ > +/* > + * GlusterFS backend for QEMU > + * > + * Copyright (C) 2012 Bharata B Rao <bhar...@linux.vnet.ibm.com> > + * > + * Pipe handling mechanism in AIO implementation is derived from > + * block/rbd.c. Hence, > + * > + * Copyright (C) 2010-2011 Christian Brunner <c...@muc.de>, > + * Josh Durgin <josh.dur...@dreamhost.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + * > + * Contributions after 2012-01-13 are licensed under the terms of the > + * GNU GPL, version 2 or (at your option) any later version. > + */ > +#include <glusterfs/api/glfs.h> > +#include "block_int.h" > +#include "qemu_socket.h" > + > +typedef struct GlusterAIOCB { > + BlockDriverAIOCB common; > + int64_t size; > + int ret; > + bool *finished; > + QEMUBH *bh; > +} GlusterAIOCB; > + > +typedef struct BDRVGlusterState { > + struct glfs *glfs; > + int fds[2]; > + struct glfs_fd *fd; > + int qemu_aio_count; > + int event_reader_pos; > + GlusterAIOCB *event_acb; > +} BDRVGlusterState; > + > +#define GLUSTER_FD_READ 0 > +#define GLUSTER_FD_WRITE 1 > + > +#define GLUSTER_TRANSPORT_DEFAULT "gluster://" > +#define GLUSTER_TRANSPORT_DEFAULT_SZ strlen(GLUSTER_TRANSPORT_DEFAULT) > +#define GLUSTER_TRANSPORT_TCP "gluster+tcp://" > +#define GLUSTER_TRANSPORT_TCP_SZ strlen(GLUSTER_TRANSPORT_TCP) > +#define GLUSTER_TRANSPORT_UNIX "gluster+unix://" > +#define GLUSTER_TRANSPORT_UNIX_SZ strlen(GLUSTER_TRANSPORT_UNIX) > +#define GLUSTER_TRANSPORT_RDMA "gluster+rdma://" > +#define GLUSTER_TRANSPORT_RDMA_SZ strlen(GLUSTER_TRANSPORT_RDMA) > + > +typedef struct GlusterURI { > + char *server; > + int port; > + char *volname; > + char *image; > + char *transport; > + bool is_unix; > +} GlusterURI; > + > +static void qemu_gluster_uri_free(GlusterURI *uri) > +{ > + g_free(uri->server); > + g_free(uri->volname); > + g_free(uri->image); > + g_free(uri->transport); > + g_free(uri); > +} > + > +static int parse_socket(GlusterURI *uri, char *socket) > +{ > + char *token, *saveptr; > + > + if (!socket) { > + return 0; > + } > + token = strtok_r(socket, "=", &saveptr); > + if (!token || strcmp(token, "socket")) { > + return -EINVAL; > + } > + token = strtok_r(NULL, "=", &saveptr); > + if (!token) { > + return -EINVAL; > + } > + uri->server = g_strdup(token); > + uri->is_unix = true; > + return 0; > +} > + > +static int parse_gluster_spec(GlusterURI *uri, char *spec) > +{ > + char *token, *saveptr; > + int ret; > + QemuOpts *opts; > + char *p, *q; > + > + /* transport */ > + p = spec; > + if (!strncmp(p, GLUSTER_TRANSPORT_DEFAULT, > GLUSTER_TRANSPORT_DEFAULT_SZ)) { > + uri->transport = g_strdup("tcp"); > + p += GLUSTER_TRANSPORT_DEFAULT_SZ; > + } else if (!strncmp(p, GLUSTER_TRANSPORT_TCP, GLUSTER_TRANSPORT_TCP_SZ)) > { > + uri->transport = g_strdup("tcp"); > + p += GLUSTER_TRANSPORT_TCP_SZ; > + } else if (!strncmp(p, GLUSTER_TRANSPORT_UNIX, > GLUSTER_TRANSPORT_UNIX_SZ)) { > + uri->transport = g_strdup("unix"); > + p += GLUSTER_TRANSPORT_UNIX_SZ; > + } else if (!strncmp(p, GLUSTER_TRANSPORT_RDMA, > GLUSTER_TRANSPORT_RDMA_SZ)) {
Would look a bit nicer with strstart() form cutils.c instead of strncmp(). > + uri->transport = g_strdup("rdma"); > + p += GLUSTER_TRANSPORT_RDMA_SZ; > + } else { > + return -EINVAL; > + } > + q = p; > + > + /* server */ > + if (!strcmp(uri->transport, "unix")) { > + if (!uri->is_unix) { > + return -EINVAL; > + } > + } else { > + if (uri->is_unix) { > + return -EINVAL; > + } > + p = strchr(p, '/'); > + if (!p) { > + return -EINVAL; > + } > + *p++ = '\0'; > + opts = qemu_opts_create(qemu_find_opts("inet"), NULL, 0, NULL); > + ret = inet_parse(opts, q); > + if (!ret) { > + uri->server = g_strdup(qemu_opt_get(opts, "host")); > + uri->port = strtoul(qemu_opt_get(opts, "port"), NULL, 0); > + if (uri->port < 0) { > + ret = -EINVAL; > + } > + } > + qemu_opts_del(opts); > + if (ret < 0) { > + return -EINVAL; > + } > + } > + > + /* volname */ > + token = strtok_r(p, "/", &saveptr); > + if (!token) { > + return -EINVAL; > + } > + uri->volname = g_strdup(token); > + > + /* image */ > + token = strtok_r(NULL, "?", &saveptr); > + if (!token) { > + return -EINVAL; > + } > + uri->image = g_strdup(token); > + return 0; > +} > + > +/* > + * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...] > + * > + * 'gluster' is the protocol. > + * > + * 'transport' specifies the transport type used to connect to gluster > + * management daemon (glusterd). Valid transport types are > + * tcp, unix and rdma. If the transport type isn't specified, then tcp > + * type is assumed. > + * > + * 'server' specifies the server where the volume file specification for > + * the given volume resides. This can be either hostname or ipv4 address > + * or ipv6 address. ipv6 address needs to be with in square brackets [ ]. > + * If transport type is 'unix', then server field is ignored, but the > + * 'socket' field needs to be populated with the path to unix domain > + * socket. > + * > + * 'port' is the port number on which glusterd is listening. This is optional > + * and if not specified, QEMU will send 0 which will make gluster to use the > + * default port. port is ignored for unix type of transport. > + * > + * 'volname' is the name of the gluster volume which contains the VM image. > + * > + * 'image' is the path to the actual VM image that resides on gluster volume. > + * > + * Examples: > + * > + * file=gluster://1.2.3.4/testvol/a.img > + * file=gluster+tcp://1.2.3.4/testvol/a.img > + * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img > + * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img > + * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img > + * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img > + * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket > + * file=gluster+rdma://1.2.3.4:24007/testvol/a.img > + */ > +static int qemu_gluster_parseuri(GlusterURI *uri, const char *filename) > +{ > + char *token, *saveptr; > + char *p, *q, *gluster_spec = NULL; > + int ret = -EINVAL; > + > + p = q = g_strdup(filename); Neither p nor q are changed, so one variable would be enough. > + > + /* Extract server, volname and image */ > + token = strtok_r(p, "?", &saveptr); > + if (!token) { > + goto out; > + } > + gluster_spec = g_strdup(token); > + > + /* socket */ > + token = strtok_r(NULL, "?", &saveptr); > + ret = parse_socket(uri, token); > + if (ret < 0) { > + goto out; > + } The is_unix thing feels a bit backwards. You set it whenever an option is present, and fail if a protocol other than gluster+unix:// is used. Wouldn't it make more sense to set it depending on the protocol and then check in the option parser if is_unix == true when there is a 'socket' option? Otherwise adding non-unix options later is going to become hard. The rest looks good now. Kevin