On 2013-10-02 14:39, Benoît Canet wrote:
Example of command line:
-drive if=virtio,file.driver=quorum,\
file.children.0.file.filename=1.qcow2,\
file.children.1.file.filename=2.qcow2,\
file.children.2.file.filename=3.qcow2,\
file.vote_threshold=3
file.blkverify=on with file.vote_threshold=2 and two file can be passed to
*two files
emulated blkverify.
Signed-off-by: Benoit Canet <ben...@irqsave.net>
---
block/quorum.c | 338 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 338 insertions(+)
Okay, in general: Bear in mind that the singular of "children" is "child".
Second, error_setg is a shorthand to error_set(…,
ERROR_CLASS_GENERIC_ERROR, …), so I'd propose you might make use of it –
if you're so inclined. ;)
Third, wouldn't it be better to use children[i] instead of children.i
for addressing them on the command line? I guess, a QMP interface would
represent them as an array, so using the standard array index notation
seems better to me.
Fourth, I don't like the parsing functions. They look reasonable for
what they're supposed to do, but they just do so much hard-coded parsing
(e.g., expect this prefix and that suffix). This is okay for now, since
they're just used by this driver, but there's my problem: Is there no
other, general way of parsing the command line options? (I truly don't
know) I guess not, since blockdev.c fetches whole arguments such as
"cache.direct" as well.
So, these parsing functions seem necessary for now, but I don't like
them. ;)
diff --git a/block/quorum.c b/block/quorum.c
index 0a28ab1..3cfc67e 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -17,8 +17,13 @@
#include <gnutls/crypto.h>
#include "block/block_int.h"
#include "qapi/qmp/qjson.h"
+#include "qapi/qmp/types.h"
+#include "qemu-common.h"
#define HASH_LENGTH 32
+#define KEY_PREFIX "children."
+#define KEY_SUFFIX "."
+#define KEY_FILENAME_SUFFIX ".file.filename"
/* This union hold a vote hash value */
typedef union QuorumVoteValue {
@@ -673,12 +678,342 @@ static coroutine_fn int quorum_co_flush(BlockDriverState
*bs)
return result;
}
+static int quorum_match_key(const char *key,
+ char **key_prefix,
+ bool clone_key_prefix)
+{
+ const char *start;
+ char *next;
+ unsigned long long idx;
+ int ret;
+
+ *key_prefix = NULL;
+
+ /* the following code is a translation of the following pseudo code:
+ * match = key.match('(^children\.(\d+)\.)file\.filename)
+ * if not match:
+ * return -1;
+ * key_prefix = match.outer_match()
+ * idx = match.inner_match()
+ *
+ * note: we also match the .file suffix to avoid futher checkings
+ */
+
+ /* this key is not a children */
+ if (strncmp(key, KEY_PREFIX, strlen(KEY_PREFIX))) {
You could use sizeof instead of strlen here (and for every other string
constant as well), though I don't know whether this is acceptable coding
style for qemu. ;)
+ return -1;
+ }
+
+ /* take first char after prefix */
+ start = key + strlen(KEY_PREFIX);
+
+ /* if the string end here -> scan fail */
+ if (start[0] == '\0') {
+ return -1;
+ }
+
+ /* try to retrieve the index */
+ ret = parse_uint(start, &idx, &next, 10);
+
+ /* no int found -> scan fail */
+ if (ret < 0) {
+ return -1;
+ }
+
+ /* match the ".file.filename" suffix to avoid matching the same idx
+ * multiple times and be required to do more checks later
+ */
+ if (strncmp(next, KEY_FILENAME_SUFFIX, strlen(KEY_FILENAME_SUFFIX))) {
+ return -1;
+ }
+
+ if (clone_key_prefix) {
+ /* include '.' */
+ int len = next + strlen(KEY_SUFFIX) - key;
+ *key_prefix = g_strndup(key, len);
+ }
+
+ return idx;
+}
+
+static QDict *quorum_get_children_idx(const QDict *options)
+{
+ const QDictEntry *ent;
+ QDict *result;
+ char *key_prefix;
+ int idx;
+
+ result = qdict_new();
+
+ for (ent = qdict_first(options); ent; ent = qdict_next(options, ent)) {
+ const char *key = qdict_entry_key(ent);
+ idx = quorum_match_key(key, &key_prefix, true);
+
+ /* if the result is -1 or any negative index value skip this key */
+ if (idx < 0) {
+ continue;
+ }
+
+ qdict_put(result, key_prefix, qint_from_int(idx));
+ }
+
+ return result;
+}
+
+static int quorum_fill_validation_array(bool *array,
+ const QDict *dict,
+ int total,
+ Error **errp)
+{
+ const QDictEntry *ent;
+
+ /* fill the checking array with children indexes */
+ for (ent = qdict_first(dict); ent; ent = qdict_next(dict, ent)) {
+ const char *key = qdict_entry_key(ent);
+ int idx = qdict_get_int(dict, key);
+
+ if (idx < 0 || idx >= total) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "Children index must be between 0 and children count
-1");
I'd prefer "- 1" instead of "-1" (or even "Child index must be an
unsigned integer smaller than the children count").
+ return -ERANGE;
+ }
+
+ array[idx] = true;
+ }
+
+ return 0;
+}
+
+static int quorum_valid_indexes(const bool *array, int total, Error **errp)
+{
+ int i;
+
+ for (i = 0; i < total; i++) {
+ if (array[i] == true) {
+ continue;
+ }
+
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "All children indexes between 0 and children count -1 must "
+ " be used");
Again, I'd prefer "- 1"; then, there are two spaces both left and right
of must – is this intentional?
+ return -ERANGE;
+ }
+
+ return 0;
+}
+
+static int quorum_valid_children_indexes(const QDict *dict,
+ int total,
+ Error **errp)
+{
+ bool *array;
+ int ret = 0;;
+
+ /* allocate indexes checking array and put false in it */
+ array = g_new0(bool, total);
+
+ ret = quorum_fill_validation_array(array, dict, total, errp);
+ if (ret < 0) {
+ goto free_exit;
+ }
+
+ ret = quorum_valid_indexes(array, total, errp);
+free_exit:
+ g_free(array);
+ return ret;
+}
+
+static int quorum_valid_threshold(int threshold,
+ int total,
+ Error **errp)
+{
+
+ if (threshold < 1) {
+ error_set(errp, QERR_INVALID_PARAMETER_VALUE,
+ "vote_threshold", "value >= 1");
+ return -ERANGE;
+ }
+
+ if (threshold > total) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "threshold <= children count must be true");
Well, while this might technically be true, I'd rather go for "threshold
may not exceed children count" instead. ;)
+ return -ERANGE;
+ }
+
+ return 0;
+}
+
+static int quorum_bdrv_open_sub(BlockDriverState *bs,
+ QDict *bs_dict,
+ int flags,
+ Error **errp)
+{
+ const char *format_name = NULL;
+ const char *filename = NULL;
+ BlockDriver *drv = NULL;
+ int ret = 0;
+
+ format_name = qdict_get_try_str(bs_dict, "file.driver");
+
+ if (!format_name) {
+ filename = qdict_get_try_str(bs_dict, "file.filename");
+ qdict_del(bs_dict, "file.filename");
+ } else {
+ drv = bdrv_find_format(format_name);
+ if (!drv) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "invalid format name");
+ return -EINVAL;
+ }
+ }
+
+ QINCREF(bs_dict);
+ ret = bdrv_open(bs,
+ filename,
+ bs_dict,
+ flags,
+ drv,
+ errp);
Why don't you write this on a single line?
+ QDECREF(bs_dict);
+ return ret;
+}
+
+static int quorum_open(BlockDriverState *bs,
+ QDict *options,
+ int flags,
+ Error **errp)
+{
+ BDRVQuorumState *s = bs->opaque;
+ Error *local_err = NULL;
+ const QDictEntry *ent;
+ QDict *idx_dict;
+ bool *opened;
+ const char *value;
+ char *next;
+ int i;
+ int ret = 0;
+
+ /* get a dict of children indexes for validation */
+ idx_dict = quorum_get_children_idx(options);
+
+ /* count how many different children indexes are present and validate */
+ s->total = qdict_size(idx_dict);
+ if (s->total < 2) {
+ error_set(&local_err, ERROR_CLASS_GENERIC_ERROR,
+ "Number of provided children must be greater than 1");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ /* validate that the set of index is coherent */
+ ret = quorum_valid_children_indexes(idx_dict, s->total, &local_err);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ /* retrieve the threshold option from the command line */
+ value = qdict_get_try_str(options, "vote_threshold");
+ if (!value) {
+ error_set(&local_err, ERROR_CLASS_GENERIC_ERROR,
+ "vote_threshold must be provided");
+ ret = -EINVAL;
+ goto exit;
+ }
+ qdict_del(options, "vote_threshold");
+
+ ret = parse_uint(value, (unsigned long long *) &s->threshold, &next, 10);
+
+ /* no int found -> scan fail */
+ if (ret < 0) {
+ error_set(&local_err, ERROR_CLASS_GENERIC_ERROR,
+ "invalid vote_threshold specified");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ /* and validate it againts s->total */
*against
+ ret = quorum_valid_threshold(s->threshold, s->total, &local_err);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ /* is the driver in blkverify mode */
+ value = qdict_get_try_str(options, "blkverify");
+ if (value && !strcmp(value, "on") &&
+ s->total == 2 && s->threshold == 2) {
+ s->is_blkverify = true;
+ }
If the user specifies anything different from "on" or if he does and
s->total or s->threshold is not 2, quorum will silently work in
non-blkverify mode without telling the user. So I'd emit a message here
if blkverify has been specified and its value is different from "off"
but s->is_blkverify remains false (i.e., “else if (value &&
strcmp(value, "off")) { fprintf(stderr, "[Some warning]"); }”).
+ qdict_del(options, "blkverify");
+
+ /* allocate the children BlockDriverState array */
+ s->bs = g_new0(BlockDriverState, s->total);
+ opened = g_new0(bool, s->total);
+
+ /* open children bs */
+ for (ent = qdict_first(idx_dict);
+ ent; ent = qdict_next(idx_dict, ent)) {
+ const char *key = qdict_entry_key(ent);
+ int idx = qdict_get_int(idx_dict, key);
+ int ret = 0;
Remove this declaration, it shadows the outer ret. This results in that
outer ret being 0 even if quorum_bdrv_open_sub returns an error which
makes this whole function return 0 (instead of propagating the return
value of quorum_bdrv_open_sub).
+ QDict *bs_dict; /* dict used to open child */
+ qdict_extract_subqdict(options, &bs_dict, key);
+ ret = quorum_bdrv_open_sub(&s->bs[idx],
+ bs_dict,
+ flags,
+ &local_err);
+ if (ret < 0) {
+ QDECREF(bs_dict);
I think you should remove this QDECREF. I don't really know why, but I
think basically bdrv_open takes ownership of it. Anyway, if this QDECREF
stays, qemu crashes when a child could not be opened due to memory
corruption, so it's probably not correct. ;)
+ goto close_exit;
+ }
+ opened[idx] = true;
+ }
+
+ g_free(opened);
+ goto exit;
+
+close_exit:
+ /* cleanup on error */
+ for (i = 0; i < s->total; i++) {
+ if (!opened[i]) {
+ continue;
+ }
+ bdrv_close(&s->bs[i]);
+ }
+ g_free(s->bs);
+ g_free(opened);
+exit:
+ /* propagate error */
+ QDECREF(idx_dict);
I think you should swap these two lines.
Max
+ if (error_is_set(&local_err)) {
+ error_propagate(errp, local_err);
+ }
+ return ret;
+}
+
+static void quorum_close(BlockDriverState *bs)
+{
+ BDRVQuorumState *s = bs->opaque;
+ int i;
+
+ for (i = 0; i < s->total; i++) {
+ /* Ensure writes reach stable storage */
+ bdrv_flush(&s->bs[i]);
+ /* close manually because we'll free s->bs */
+ bdrv_close(&s->bs[i]);
+ }
+
+ g_free(s->bs);
+}
+
static BlockDriver bdrv_quorum = {
.format_name = "quorum",
.protocol_name = "quorum",
.instance_size = sizeof(BDRVQuorumState),
+ .bdrv_file_open = quorum_open,
+ .bdrv_close = quorum_close,
+
.bdrv_co_flush_to_disk = quorum_co_flush,
.bdrv_getlength = quorum_getlength,
@@ -687,6 +1022,9 @@ static BlockDriver bdrv_quorum = {
.bdrv_aio_writev = quorum_aio_writev,
.bdrv_invalidate_cache = quorum_invalidate_cache,
.bdrv_co_get_block_status = quorum_co_get_block_status,
+
+ /* disable external snapshots while waiting for block filters */
+ .bdrv_check_ext_snapshot = bdrv_check_ext_snapshot_forbidden,
};
static void bdrv_quorum_init(void)