block: gluster as block backend

From: Bharata B Rao <bhar...@linux.vnet.ibm.com>

This patch adds gluster as the new block backend in QEMU. This gives QEMU
the ability to boot VM images from gluster volumes.

Signed-off-by: Bharata B Rao <bhar...@linux.vnet.ibm.com>
---

 Makefile.objs   |    2 
 block/gluster.c |  435 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 436 insertions(+), 1 deletions(-)
 create mode 100644 block/gluster.c


diff --git a/Makefile.objs b/Makefile.objs
index 25190ba..859b88a 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -59,7 +59,7 @@ block-nested-$(CONFIG_POSIX) += raw-posix.o
 block-nested-$(CONFIG_LIBISCSI) += iscsi.o
 block-nested-$(CONFIG_CURL) += curl.o
 block-nested-$(CONFIG_RBD) += rbd.o
-block-nested-$(CONFIG_GLUSTERFS) += gluster-helpers.o
+block-nested-$(CONFIG_GLUSTERFS) += gluster-helpers.o gluster.o
 
 block-obj-y +=  $(addprefix block/, $(block-nested-y))
 
diff --git a/block/gluster.c b/block/gluster.c
new file mode 100644
index 0000000..1566cb7
--- /dev/null
+++ b/block/gluster.c
@@ -0,0 +1,435 @@
+/*
+ * GlusterFS backend for QEMU
+ *
+ * (AIO implementation is derived from block/rbd.c)
+ *
+ * Copyright (C) 2012 Bharata B Rao <bhar...@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#include "block_int.h"
+#include "gluster-helpers.h"
+
+typedef void *gluster_file_t;
+
+typedef struct glusterConf {
+    char volfile[PATH_MAX];
+    char image[PATH_MAX];
+} glusterConf;
+
+typedef struct BDRVGlusterState {
+    int fds[2];
+    int open_flags;
+    gluster_file_t fd;
+    glusterfs_ctx_t *ctx;
+    int qemu_aio_count;
+    int event_reader_pos;
+    gluster_aiocb_t *event_gaiocb;
+} BDRVGlusterState;
+
+typedef struct glusterAIOCB {
+    BlockDriverAIOCB common;
+    QEMUBH *bh;
+    QEMUIOVector *qiov;
+    int ret;
+    int write;
+    char *bounce;
+    BDRVGlusterState *s;
+    int cancelled;
+    int error;
+} glusterAIOCB;
+
+#define GLUSTER_FD_READ 0
+#define GLUSTER_FD_WRITE 1
+
+/*
+ * file=protocol:volfile:image
+ */
+static int qemu_gluster_parsename(glusterConf *c, const char *filename)
+{
+    char *file = g_strdup(filename);
+    char *token, *next_token, *saveptr;
+    int ret = 0;
+
+    /* Discard the protocol */
+    token = strtok_r(file, ":", &saveptr);
+    if (!token) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* volfile */
+    next_token = strtok_r(NULL, ":", &saveptr);
+    if (!next_token) {
+        ret = -EINVAL;
+        goto out;
+    }
+    strncpy(c->volfile, next_token, PATH_MAX);
+
+    /* image */
+    next_token = strtok_r(NULL, ":", &saveptr);
+    if (!next_token) {
+        ret = -EINVAL;
+        goto out;
+    }
+    strncpy(c->image, next_token, PATH_MAX);
+out:
+    g_free(file);
+    return ret;
+}
+
+static void gluster_aio_bh_cb(void *opaque)
+{
+    glusterAIOCB *acb = opaque;
+
+    if (!acb->write) {
+        qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
+    }
+    qemu_vfree(acb->bounce);
+    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
+    qemu_bh_delete(acb->bh);
+    acb->bh = NULL;
+
+    qemu_aio_release(acb);
+}
+
+static void qemu_gluster_complete_aio(gluster_aiocb_t *gaiocb)
+{
+    glusterAIOCB *acb = (glusterAIOCB *)gaiocb->opaque;
+    int64_t r;
+
+    if (acb->cancelled) {
+        qemu_vfree(acb->bounce);
+        qemu_aio_release(acb);
+        goto done;
+    }
+
+    r = gaiocb->ret;
+
+    if (acb->write) {
+        if (r < 0) {
+            acb->ret = r;
+            acb->error = 1;
+        } else if (!acb->error) {
+            acb->ret = gaiocb->size;
+        }
+    } else {
+        if (r < 0) {
+            memset(gaiocb->buf, 0, gaiocb->size);
+            acb->ret = r;
+            acb->error = 1;
+        } else if (r < gaiocb->size) {
+            memset(gaiocb->buf + r, 0, gaiocb->size - r);
+            if (!acb->error) {
+                acb->ret = gaiocb->size;
+            }
+        } else if (!acb->error) {
+            acb->ret = r;
+        }
+    }
+    acb->bh = qemu_bh_new(gluster_aio_bh_cb, acb);
+    qemu_bh_schedule(acb->bh);
+done:
+    g_free(gaiocb);
+}
+
+static void qemu_gluster_aio_event_reader(void *opaque)
+{
+    BDRVGlusterState *s = opaque;
+    ssize_t ret;
+
+    do {
+        char *p = (char *)&s->event_gaiocb;
+
+        ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
+                   sizeof(s->event_gaiocb) - s->event_reader_pos);
+        if (ret > 0) {
+            s->event_reader_pos += ret;
+            if (s->event_reader_pos == sizeof(s->event_gaiocb)) {
+                s->event_reader_pos = 0;
+                qemu_gluster_complete_aio(s->event_gaiocb);
+                s->qemu_aio_count--;
+            }
+        }
+    } while (ret < 0 && errno == EINTR);
+}
+
+static int qemu_gluster_aio_flush_cb(void *opaque)
+{
+    BDRVGlusterState *s = opaque;
+
+    return (s->qemu_aio_count > 0);
+}
+
+static int qemu_gluster_open(BlockDriverState *bs, const char *filename,
+    int bdrv_flags)
+{
+    BDRVGlusterState *s = bs->opaque;
+    glusterConf *c = g_malloc(sizeof(glusterConf));
+    int ret = -1;
+
+    if (qemu_gluster_parsename(c, filename)) {
+        goto out;
+    }
+
+    s->ctx = gluster_init(c->volfile);
+    if (!s->ctx) {
+        goto out;
+    }
+
+    /* FIX: Server client handshake takes time */
+    sleep(1);
+
+    s->open_flags |=  O_BINARY;
+    s->open_flags &= ~O_ACCMODE;
+    if (bdrv_flags & BDRV_O_RDWR) {
+        s->open_flags |= O_RDWR;
+    } else {
+        s->open_flags |= O_RDONLY;
+    }
+
+    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+     * and O_DIRECT for no caching. */
+    if ((bdrv_flags & BDRV_O_NOCACHE))
+        s->open_flags |= O_DIRECT;
+    if (!(bdrv_flags & BDRV_O_CACHE_WB))
+        s->open_flags |= O_DSYNC;
+
+    s->fd = gluster_open(c->image, s->open_flags, 0);
+    if (!s->fd) {
+        goto out;
+    }
+
+    ret = qemu_pipe(s->fds);
+    if (ret < 0) {
+        goto out;
+    }
+    fcntl(s->fds[0], F_SETFL, O_NONBLOCK);
+    fcntl(s->fds[1], F_SETFL, O_NONBLOCK);
+    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
+        qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s);
+out:
+    if (c) {
+        g_free(c);
+    }
+    if (ret < 0) {
+        gluster_close(s->fd);
+    }
+    return ret;
+}
+
+static int qemu_gluster_create(const char *filename,
+        QEMUOptionParameter *options)
+{
+    glusterConf *c = g_malloc(sizeof(glusterConf));
+    int ret = 0;
+    gluster_file_t fd;
+    int64_t total_size = 0;
+
+    ret = qemu_gluster_parsename(c, filename);
+    if (ret) {
+        goto out;
+    }
+
+    if (!gluster_init(c->volfile)) {
+        ret = -1;
+        goto out;
+    }
+
+    /* FIX: Server client handshake takes time */
+    sleep(1);
+
+    /* Read out options */
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            total_size = options->value.n / BDRV_SECTOR_SIZE;
+        }
+        options++;
+    }
+
+    fd = gluster_creat(c->image, 0644);
+    if (!fd) {
+        ret = -errno;
+    } else {
+        if (gluster_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
+            ret = -errno;
+        }
+        if (gluster_close(fd) != 0) {
+            ret = -errno;
+        }
+    }
+out:
+    if (c) {
+        g_free(c);
+    }
+    return ret;
+}
+
+static AIOPool gluster_aio_pool = {
+    .aiocb_size = sizeof(glusterAIOCB),
+};
+
+static int qemu_gluster_send_pipe(BDRVGlusterState *s, gluster_aiocb_t *gaiocb)
+{
+    int ret = 0;
+    while (1) {
+        fd_set wfd;
+        int fd = s->fds[GLUSTER_FD_WRITE];
+
+        ret = write(fd, (void *)&gaiocb, sizeof(gaiocb));
+        if (ret >= 0) {
+            break;
+        }
+        if (errno == EINTR) {
+            continue;
+        }
+        if (errno != EAGAIN) {
+            break;
+        }
+
+        FD_ZERO(&wfd);
+        FD_SET(fd, &wfd);
+        do {
+            ret = select(fd + 1, NULL, &wfd, NULL, NULL);
+        } while (ret < 0 && errno == EINTR);
+    }
+    return ret;
+}
+
+static void gluster_finish_aiocb(void *arg)
+{
+    int ret;
+    gluster_aiocb_t *gaiocb = (gluster_aiocb_t *)arg;
+    BDRVGlusterState *s = ((glusterAIOCB *)gaiocb->opaque)->s;
+
+    ret = qemu_gluster_send_pipe(s, gaiocb);
+    if (ret < 0) {
+        g_free(gaiocb);
+    }
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int write)
+{
+    int ret;
+    glusterAIOCB *acb;
+    gluster_aiocb_t *gaiocb;
+    BDRVGlusterState *s = bs->opaque;
+    char *buf;
+    size_t size;
+    off_t offset;
+
+    acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque);
+    acb->write = write;
+    acb->qiov = qiov;
+    acb->bounce = qemu_blockalign(bs, qiov->size);
+    acb->ret = 0;
+    acb->bh = NULL;
+    acb->s = s;
+
+    if (write) {
+        qemu_iovec_to_buffer(acb->qiov, acb->bounce);
+    }
+
+    buf = acb->bounce;
+    offset = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+    s->qemu_aio_count++;
+
+    gaiocb = g_malloc(sizeof(gluster_aiocb_t));
+    gaiocb->opaque = acb;
+    gaiocb->buf = buf;
+    gaiocb->offset = offset;
+    gaiocb->size = size;
+    gaiocb->completion_fn = &gluster_finish_aiocb;
+
+    if (write) {
+        ret = gluster_aio_writev(s->fd, gaiocb);
+    } else {
+        ret = gluster_aio_readv(s->fd, gaiocb);
+    }
+
+    if (ret < 0) {
+        goto out;
+    }
+    return &acb->common;
+
+out:
+    g_free(gaiocb);
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    return NULL;
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 
0);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 
1);
+}
+
+static int64_t qemu_gluster_getlength(BlockDriverState *bs)
+{
+    BDRVGlusterState *s = bs->opaque;
+    gluster_file_t fd = s->fd;
+    struct stat st;
+    int ret;
+
+    ret = gluster_fstat(fd, &st);
+    if (ret < 0) {
+        return -1;
+    } else {
+        return st.st_size;
+    }
+}
+
+static void qemu_gluster_close(BlockDriverState *bs)
+{
+    BDRVGlusterState *s = bs->opaque;
+
+    if (s->fd) {
+        gluster_close(s->fd);
+        s->fd = NULL;
+    }
+}
+
+static QEMUOptionParameter qemu_gluster_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size"
+    },
+    { NULL }
+};
+
+static BlockDriver bdrv_gluster = {
+    .format_name = "gluster",
+    .protocol_name = "gluster",
+    .instance_size = sizeof(BDRVGlusterState),
+    .bdrv_file_open = qemu_gluster_open,
+    .bdrv_close = qemu_gluster_close,
+    .bdrv_create = qemu_gluster_create,
+    .bdrv_getlength = qemu_gluster_getlength,
+
+    .bdrv_aio_readv = qemu_gluster_aio_readv,
+    .bdrv_aio_writev = qemu_gluster_aio_writev,
+
+    .create_options = qemu_gluster_create_options,
+};
+
+static void bdrv_gluster_init(void)
+{
+    bdrv_register(&bdrv_gluster);
+}
+
+block_init(bdrv_gluster_init);


Reply via email to