From: Benoît Canet ben...@irqsave.net
This patchset enables the core of the quorum mechanism.
The num_children reads are compared to get the majority version and if this
version exists more than threshold times the guest won't see the error at all.
If a block is corrupted or if an error occurs during an IO or if the quorum
cannot be established QMP events are used to report to the management.
Use gnutls's SHA-256 to compare versions.
--enable-quorum must be used to enable the feature.
Signed-off-by: Benoit Canet ben...@irqsave.net
---
block/Makefile.objs | 2 +-
block/quorum.c| 391 +-
configure | 36 +
docs/qmp/qmp-events.txt | 36 +
include/monitor/monitor.h | 2 +
monitor.c | 2 +
6 files changed, 467 insertions(+), 2 deletions(-)
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 716556f..425d7fb 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -3,7 +3,7 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o
qcow2-snapshot.o qcow2-c
block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o
block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
-block-obj-y += quorum.o
+block-obj-$(CONFIG_QUORUM) += quorum.o
block-obj-y += parallels.o blkdebug.o blkverify.o
block-obj-y += snapshot.o qapi.o
block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
diff --git a/block/quorum.c b/block/quorum.c
index fd19662..7acaa97 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -13,7 +13,43 @@
* See the COPYING file in the top-level directory.
*/
+#include gnutls/gnutls.h
+#include gnutls/crypto.h
#include block/block_int.h
+#include qapi/qmp/qjson.h
+
+#define HASH_LENGTH 32
+
+/* This union holds a vote hash value */
+typedef union QuorumVoteValue {
+char h[HASH_LENGTH]; /* SHA-256 hash */
+int64_t l; /* simpler 64 bits hash */
+} QuorumVoteValue;
+
+/* A vote item */
+typedef struct QuorumVoteItem {
+int index;
+QLIST_ENTRY(QuorumVoteItem) next;
+} QuorumVoteItem;
+
+/* this structure is a vote version. A version is the set of votes sharing the
+ * same vote value.
+ * The set of votes will be tracked with the items field and its cardinality is
+ * vote_count.
+ */
+typedef struct QuorumVoteVersion {
+QuorumVoteValue value;
+int index;
+int vote_count;
+QLIST_HEAD(, QuorumVoteItem) items;
+QLIST_ENTRY(QuorumVoteVersion) next;
+} QuorumVoteVersion;
+
+/* this structure holds a group of vote versions together */
+typedef struct QuorumVotes {
+QLIST_HEAD(, QuorumVoteVersion) vote_list;
+bool (*compare)(QuorumVoteValue *a, QuorumVoteValue *b);
+} QuorumVotes;
/* the following structure holds the state of one quorum instance */
typedef struct BDRVQuorumState {
@@ -65,10 +101,14 @@ struct QuorumAIOCB {
int count; /* number of completed AIOCB */
int success_count; /* number of successfully completed AIOCB */
+QuorumVotes votes;
+
bool is_read;
int vote_ret;
};
+static void quorum_vote(QuorumAIOCB *acb);
+
static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
{
QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
@@ -94,6 +134,10 @@ static void quorum_aio_finalize(QuorumAIOCB *acb)
BDRVQuorumState *s = acb-common.bs-opaque;
int i, ret = 0;
+if (acb-vote_ret) {
+ret = acb-vote_ret;
+}
+
acb-common.cb(acb-common.opaque, ret);
if (acb-is_read) {
@@ -107,6 +151,16 @@ static void quorum_aio_finalize(QuorumAIOCB *acb)
qemu_aio_release(acb);
}
+static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
+{
+return !memcmp(a-h, b-h, HASH_LENGTH);
+}
+
+static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
+{
+return a-l == b-l;
+}
+
static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
BlockDriverState *bs,
QEMUIOVector *qiov,
@@ -125,6 +179,8 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
acb-qcrs = g_new0(QuorumChildRequest, s-num_children);
acb-count = 0;
acb-success_count = 0;
+acb-votes.compare = quorum_sha256_compare;
+QLIST_INIT(acb-votes.vote_list);
acb-is_read = false;
acb-vote_ret = 0;
@@ -137,6 +193,48 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
return acb;
}
+static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)
+{
+QObject *data;
+assert(node_name);
+data = qobject_from_jsonf({ 'ret': %i
+ , 'node-name': \%s\
+ , 'sector-num': % PRId64
+ , 'sectors-count': %i },
+ ret, node_name, acb-sector_num,
acb-nb_sectors);
+monitor_protocol_event(QEVENT_QUORUM_REPORT_BAD, data);
+qobject_decref(data);