Le Friday 04 Oct 2013 à 16:48:12 (+0200), Max Reitz a écrit : > On 2013-10-02 14:39, Benoît Canet wrote: > >Use gnutls's SHA-256 to compare versions. > Wouldn't CRC32 suffice? (I don't really oppose using SHA, but taking > in gnutls as a dependency just for comparing several memory areas > seems a bit much to me) > > >Signed-off-by: Benoit Canet <ben...@irqsave.net> > >--- > > block/Makefile.objs | 2 +- > > block/quorum.c | 321 > > +++++++++++++++++++++++++++++++++++++++++++++- > > configure | 36 ++++++ > > include/monitor/monitor.h | 2 + > > monitor.c | 2 + > > 5 files changed, 361 insertions(+), 2 deletions(-) > > > >diff --git a/block/Makefile.objs b/block/Makefile.objs > >index 05a65c2..adcdc21 100644 > >--- a/block/Makefile.objs > >+++ b/block/Makefile.objs > >@@ -3,7 +3,7 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o > >qcow2-snapshot.o qcow2-c > > block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o > > block-obj-y += qed-check.o > > block-obj-y += vhdx.o > >-block-obj-y += quorum.o > >+block-obj-$(CONFIG_QUORUM) += quorum.o > > block-obj-y += parallels.o blkdebug.o blkverify.o > > block-obj-y += snapshot.o qapi.o > > block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o > >diff --git a/block/quorum.c b/block/quorum.c > >index f0fc0e9..e235ac1 100644 > >--- a/block/quorum.c > >+++ b/block/quorum.c > >@@ -13,7 +13,43 @@ > > * See the COPYING file in the top-level directory. > > */ > >+#include <gnutls/gnutls.h> > >+#include <gnutls/crypto.h> > > #include "block/block_int.h" > >+#include "qapi/qmp/qjson.h" > >+ > >+#define HASH_LENGTH 32 > >+ > >+/* This union hold a vote hash value */ > *holds > > >+typedef union QuorumVoteValue { > >+ char h[HASH_LENGTH]; /* SHA-256 hash */ > >+ int64_t l; /* simpler 64 bits hash */ > >+} QuorumVoteValue; > >+ > >+/* A vote item */ > >+typedef struct QuorumVoteItem { > >+ int index; > >+ QLIST_ENTRY(QuorumVoteItem) next; > >+} QuorumVoteItem; > >+ > >+/* this structure is a vote version. A version is the set of vote sharing > >the > *set of votes > > >+ * same vote value. > >+ * The set of vote will be tracked with the items field and it's count is > *set of votes or *vote set; also s/it's count/its cardinality/ or > something like that > > >+ * vote_count. > >+ */ > >+typedef struct QuorumVoteVersion { > >+ QuorumVoteValue value; > >+ int index; > >+ int vote_count; > >+ QLIST_HEAD(, QuorumVoteItem) items; > >+ QLIST_ENTRY(QuorumVoteVersion) next; > >+} QuorumVoteVersion; > >+ > >+/* this structure hold a group of vote versions together */ > *holds > > >+typedef struct QuorumVotes { > >+ QLIST_HEAD(, QuorumVoteVersion) vote_list; > >+ int (*compare)(QuorumVoteValue *a, QuorumVoteValue *b); > >+} QuorumVotes; > > /* the following structure hold the state of one quorum instance */ > > typedef struct { > >@@ -60,10 +96,14 @@ struct QuorumAIOCB { > > int success_count; /* number of successfully completed AIOCB > > */ > > bool *finished; /* completion signal for cancel */ > >+ QuorumVotes votes; > >+ > > bool is_read; > > int vote_ret; > > }; > >+static void quorum_vote(QuorumAIOCB *acb); > >+ > > static void quorum_aio_cancel(BlockDriverAIOCB *blockacb) > > { > > QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common); > >@@ -111,6 +151,10 @@ static void quorum_aio_finalize(QuorumAIOCB *acb) > > acb->aios[i].ret = 0; > > } > >+ if (acb->vote_ret) { > >+ ret = acb->vote_ret; > >+ } > >+ > > acb->common.cb(acb->common.opaque, ret); > > if (acb->finished) { > > *acb->finished = true; > >@@ -122,6 +166,11 @@ static void quorum_aio_finalize(QuorumAIOCB *acb) > > qemu_aio_release(acb); > > } > >+static int quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b) > >+{ > >+ return memcmp(a->h, b->h, HASH_LENGTH); > >+} > >+ > > static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s, > > BlockDriverState *bs, > > QEMUIOVector *qiov, > >@@ -141,6 +190,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s, > > acb->count = 0; > > acb->success_count = 0; > > acb->finished = NULL; > >+ acb->votes.compare = quorum_sha256_compare; > > acb->is_read = false; > > acb->vote_ret = 0; > >@@ -170,9 +220,278 @@ static void quorum_aio_cb(void *opaque, int ret) > > return; > > } > >+ /* Do the vote on read */ > >+ if (acb->is_read) { > >+ quorum_vote(acb); > >+ } > >+ > > quorum_aio_finalize(acb); > > } > >+static void quorum_report_bad(QuorumAIOCB *acb, int index) > >+{ > >+ QObject *data; > >+ data = qobject_from_jsonf("{ 'children-index': %i" > I'd prefer child-index. Generally, remember that the singular of > "children" is "child". > > >+ ", 'sector-num': %" PRId64 > >+ ", 'sectors-count': %i }", > >+ index, > >+ acb->sector_num, > >+ acb->nb_sectors); > >+ monitor_protocol_event(QEVENT_QUORUM_REPORT_BAD, data); > How about decrementing the refcount of data? > > >+} > >+ > >+static void quorum_report_failure(QuorumAIOCB *acb) > >+{ > >+ QObject *data; > >+ data = qobject_from_jsonf("{ 'sector-num': %" PRId64 > >+ ", 'sectors-count': %i }", > >+ acb->sector_num, > >+ acb->nb_sectors); > >+ monitor_protocol_event(QEVENT_QUORUM_FAILURE, data); > Same here. > > >+} > >+ > >+static void quorum_report_bad_versions(QuorumAIOCB *acb, > >+ QuorumVoteValue *value) > >+{ > >+ QuorumVoteVersion *version; > >+ QuorumVoteItem *item; > >+ > >+ QLIST_FOREACH(version, &acb->votes.vote_list, next) { > >+ if (!acb->votes.compare(&version->value, value)) { > >+ continue; > >+ } > >+ QLIST_FOREACH(item, &version->items, next) { > >+ quorum_report_bad(acb, item->index); > >+ } > >+ } > >+} > >+ > >+static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source) > >+{ > >+ int i; > >+ assert(dest->niov == source->niov); > >+ assert(dest->size == source->size); > >+ for (i = 0; i < source->niov; i++) { > >+ assert(dest->iov[i].iov_len == source->iov[i].iov_len); > >+ memcpy(dest->iov[i].iov_base, > >+ source->iov[i].iov_base, > >+ source->iov[i].iov_len); > >+ } > >+} > >+ > >+static void quorum_count_vote(QuorumVotes *votes, > >+ QuorumVoteValue *value, > >+ int index) > >+{ > >+ QuorumVoteVersion *v = NULL, *version = NULL; > >+ QuorumVoteItem *item; > >+ > >+ /* look if we have something with this hash */ > >+ QLIST_FOREACH(v, &votes->vote_list, next) { > >+ if (!votes->compare(&v->value, value)) { > >+ version = v; > >+ break; > >+ } > >+ } > >+ > >+ /* It's a version not yet in the list add it */ > >+ if (!version) { > >+ version = g_new0(QuorumVoteVersion, 1); > >+ QLIST_INIT(&version->items); > >+ memcpy(&version->value, value, sizeof(version->value)); > >+ version->index = index; > >+ version->vote_count = 0; > >+ QLIST_INSERT_HEAD(&votes->vote_list, version, next); > >+ } > >+ > >+ version->vote_count++; > >+ > >+ item = g_new0(QuorumVoteItem, 1); > >+ item->index = index; > >+ QLIST_INSERT_HEAD(&version->items, item, next); > >+} > >+ > >+static void quorum_free_vote_list(QuorumVotes *votes) > >+{ > >+ QuorumVoteVersion *version, *next_version; > >+ QuorumVoteItem *item, *next_item; > >+ > >+ QLIST_FOREACH_SAFE(version, &votes->vote_list, next, next_version) { > >+ QLIST_REMOVE(version, next); > >+ QLIST_FOREACH_SAFE(item, &version->items, next, next_item) { > >+ QLIST_REMOVE(item, next); > >+ g_free(item); > >+ } > >+ g_free(version); > >+ } > >+} > >+ > >+static int quorum_compute_hash(QuorumAIOCB *acb, int i, QuorumVoteValue > >*hash) > >+{ > >+ int j, ret; > >+ gnutls_hash_hd_t dig; > >+ QEMUIOVector *qiov = &acb->aios[i].qiov; > >+ > >+ ret = gnutls_hash_init(&dig, GNUTLS_DIG_SHA256); > >+ > >+ if (ret < 0) { > >+ return ret; > >+ } > >+ > >+ for (j = 0; j < qiov->niov; j++) { > >+ ret = gnutls_hash(dig, qiov->iov[j].iov_base, qiov->iov[j].iov_len); > >+ if (ret < 0) { > >+ return ret; > gnutls_hash_deinit? > > >+ } > >+ } > >+ > >+ gnutls_hash_deinit(dig, (void *) hash); > >+ > >+ return 0; > >+} > >+ > >+static QuorumVoteVersion *quorum_get_vote_winner(QuorumVotes *votes) > >+{ > >+ int i = 0; > >+ QuorumVoteVersion *candidate, *winner = NULL; > >+ > >+ QLIST_FOREACH(candidate, &votes->vote_list, next) { > >+ if (candidate->vote_count > i) { > >+ i = candidate->vote_count; > >+ winner = candidate; > >+ } > >+ } > >+ > >+ return winner; > >+} > >+ > >+static bool quorum_iovec_compare(QEMUIOVector *a, QEMUIOVector *b) > >+{ > >+ int i; > >+ int result; > >+ > >+ assert(a->niov == b->niov); > >+ for (i = 0; i < a->niov; i++) { > >+ assert(a->iov[i].iov_len == b->iov[i].iov_len); > >+ result = memcmp(a->iov[i].iov_base, > >+ b->iov[i].iov_base, > >+ a->iov[i].iov_len); > >+ if (result) { > >+ return false; > >+ } > >+ } > >+ > >+ return true; > >+} > >+ > >+static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb, > >+ const char *fmt, ...) > >+{ > >+ va_list ap; > >+ > >+ va_start(ap, fmt); > >+ fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ", > >+ acb->sector_num, acb->nb_sectors); > >+ vfprintf(stderr, fmt, ap); > >+ fprintf(stderr, "\n"); > >+ va_end(ap); > >+ exit(1); > >+} > >+ > >+static bool quorum_compare(QuorumAIOCB *acb, > >+ QEMUIOVector *a, > >+ QEMUIOVector *b) > >+{ > >+ BDRVQuorumState *s = acb->bqs; > >+ ssize_t offset; > >+ > >+ /* This driver will replace blkverify in this particular case */ > >+ if (s->is_blkverify) { > >+ offset = qemu_iovec_compare(a, b); > >+ if (offset != -1) { > >+ quorum_err(acb, "contents mismatch in sector %" PRId64, > >+ acb->sector_num + > >+ (uint64_t)(offset / BDRV_SECTOR_SIZE)); > >+ } > >+ return true; > >+ } > >+ > >+ return quorum_iovec_compare(a, b); > >+} > >+ > >+static void quorum_vote(QuorumAIOCB *acb) > >+{ > >+ bool quorum = true; > >+ int i, j, ret; > >+ QuorumVoteValue hash; > >+ BDRVQuorumState *s = acb->bqs; > >+ QuorumVoteVersion *winner; > >+ > >+ /* get the index of the first successful read */ > >+ for (i = 0; i < s->total; i++) { > >+ if (!acb->aios[i].ret) { > >+ break; > >+ } > >+ } > >+ > >+ /* compare this read with all other successful read looking for quorum > >*/ > >+ for (j = i + 1; j < s->total; j++) { > >+ if (acb->aios[j].ret) { > >+ continue; > >+ } > >+ quorum = quorum_compare(acb, &acb->aios[i].qiov, > >&acb->aios[j].qiov); > >+ if (!quorum) { > >+ break; > >+ } > >+ } > >+ > >+ /* Every successful read agrees -> Quorum */ > >+ if (quorum) { > >+ quorum_copy_qiov(acb->qiov, &acb->aios[i].qiov); > If no reads were successful at all, quorum will be true and i will > be s->total. Therefore, this will result in an array access with an > index out of bounds here. Thanks a lot for spotting this.
> > Generally, why is this function executed at all if any read failed? > If a read fails, the quorum read function will return an error, thus > the caller will ignore the result anyway. I should if there is a quorum between the successfull reads; if all the success ful reads agrees and their count is greater than threshold. > > >+ return; > >+ } > >+ > >+ /* compute hashs for each successful read, also store indexes */ > >+ for (i = 0; i < s->total; i++) { > >+ if (acb->aios[i].ret) { > >+ continue; > >+ } > >+ ret = quorum_compute_hash(acb, i, &hash); > >+ /* if ever the hash computation failed */ > >+ if (ret < 0) { > >+ acb->vote_ret = ret; > >+ goto free_exit; > >+ } > >+ quorum_count_vote(&acb->votes, &hash, i); > >+ } > >+ > >+ /* vote to select the most represented version */ > >+ winner = quorum_get_vote_winner(&acb->votes); > >+ /* every vote version are differents -> error */ > >+ if (!winner) { > >+ quorum_report_failure(acb); > >+ acb->vote_ret = -EIO; > >+ goto free_exit; > >+ } > >+ > >+ /* if the winner count is smaller then threshold read fail */ > s/then/than/, s/ read fail/, the read fails/ > > >+ if (winner->vote_count < s->threshold) { > >+ quorum_report_failure(acb); > >+ acb->vote_ret = -EIO; > >+ goto free_exit; > >+ } > >+ > >+ /* we have a winner: copy it */ > >+ quorum_copy_qiov(acb->qiov, &acb->aios[winner->index].qiov); > >+ > >+ /* some versions are bad print them */ > >+ quorum_report_bad_versions(acb, &winner->value); > >+ > >+free_exit: > >+ /* free lists */ > >+ quorum_free_vote_list(&acb->votes); > >+} > >+ > > static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs, > > int64_t sector_num, > > QEMUIOVector *qiov, > >@@ -194,7 +513,7 @@ static BlockDriverAIOCB > >*quorum_aio_readv(BlockDriverState *bs, > > } > > for (i = 0; i < s->total; i++) { > >- bdrv_aio_readv(&s->bs[i], sector_num, qiov, nb_sectors, > >+ bdrv_aio_readv(&s->bs[i], sector_num, &acb->aios[i].qiov, > >nb_sectors, > > quorum_aio_cb, &acb->aios[i]); > > } > >diff --git a/configure b/configure > >index 23dbaaf..efbe7d9 100755 > >--- a/configure > >+++ b/configure > >@@ -246,6 +246,7 @@ gtk="" > > gtkabi="2.0" > > tpm="no" > > libssh2="" > >+quorum="no" > > # parse CC options first > > for opt do > >@@ -972,6 +973,10 @@ for opt do > > ;; > > --enable-libssh2) libssh2="yes" > > ;; > >+ --disable-quorum) quorum="no" > >+ ;; > >+ --enable-quorum) quorum="yes" > >+ ;; > > *) echo "ERROR: unknown option $opt"; show_help="yes" > > ;; > > esac > >@@ -1203,6 +1208,8 @@ echo " --gcov=GCOV use specified gcov > >[$gcov_tool]" > > echo " --enable-tpm enable TPM support" > > echo " --disable-libssh2 disable ssh block device support" > > echo " --enable-libssh2 enable ssh block device support" > >+echo " --disable-quorum disable quorum block filter support" > >+echo " --enable-quorum enable quorum block filter support" > > echo "" > > echo "NOTE: The object files are built at the place where configure is > > launched" > > exit 1 > >@@ -1895,6 +1902,30 @@ EOF > > fi > > ########################################## > >+# Quorum probe (check for gnutls) > >+if test "$quorum" != "no" ; then > >+cat > $TMPC <<EOF > >+#include <gnutls/gnutls.h> > >+#include <gnutls/crypto.h> > >+int main(void) {char data[4096], digest[32]; > >+gnutls_hash_fast(GNUTLS_DIG_SHA256, data, 4096, digest); > >+return 0; > >+} > >+EOF > >+quorum_tls_cflags=`$pkg_config --cflags gnutls 2> /dev/null` > >+quorum_tls_libs=`$pkg_config --libs gnutls 2> /dev/null` > >+if compile_prog "$quorum_tls_cflags" "$quorum_tls_libs" ; then > >+ qcow_tls=yes > >+ libs_softmmu="$quorum_tls_libs $libs_softmmu" > >+ libs_tools="$quorum_tls_libs $libs_softmmu" > >+ QEMU_CFLAGS="$QEMU_CFLAGS $quorum_tls_cflags" > >+else > >+ echo "gnutls > 2.10.0 required to compile Quorum" > >+ exit 1 > >+fi > >+fi > >+ > >+########################################## > > # VNC SASL detection > > if test "$vnc" = "yes" -a "$vnc_sasl" != "no" ; then > > cat > $TMPC <<EOF > >@@ -3758,6 +3789,7 @@ echo "TPM support $tpm" > > echo "libssh2 support $libssh2" > > echo "TPM passthrough $tpm_passthrough" > > echo "QOM debugging $qom_cast_debug" > >+echo "Quorum $quorum" > > if test "$sdl_too_old" = "yes"; then > > echo "-> Your SDL version is too old - please upgrade to have SDL support" > >@@ -4156,6 +4188,10 @@ if test "$libssh2" = "yes" ; then > > echo "CONFIG_LIBSSH2=y" >> $config_host_mak > > fi > >+if test "$quorum" = "yes" ; then > >+ echo "CONFIG_QUORUM=y" >> $config_host_mak > >+fi > >+ > > if test "$virtio_blk_data_plane" = "yes" ; then > > echo 'CONFIG_VIRTIO_BLK_DATA_PLANE=$(CONFIG_VIRTIO)' >> $config_host_mak > > fi > >diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h > >index 10fa0e3..51902ed 100644 > >--- a/include/monitor/monitor.h > >+++ b/include/monitor/monitor.h > >@@ -49,6 +49,8 @@ typedef enum MonitorEvent { > > QEVENT_SPICE_MIGRATE_COMPLETED, > > QEVENT_GUEST_PANICKED, > > QEVENT_BLOCK_IMAGE_CORRUPTED, > >+ QEVENT_QUORUM_FAILURE, > >+ QEVENT_QUORUM_REPORT_BAD, > > /* Add to 'monitor_event_names' array in monitor.c when > > * defining new events here */ > >diff --git a/monitor.c b/monitor.c > >index 74f3f1b..89d139f 100644 > >--- a/monitor.c > >+++ b/monitor.c > >@@ -507,6 +507,8 @@ static const char *monitor_event_names[] = { > > [QEVENT_SPICE_MIGRATE_COMPLETED] = "SPICE_MIGRATE_COMPLETED", > > [QEVENT_GUEST_PANICKED] = "GUEST_PANICKED", > > [QEVENT_BLOCK_IMAGE_CORRUPTED] = "BLOCK_IMAGE_CORRUPTED", > >+ [QEVENT_QUORUM_FAILURE] = "QUORUM_FAILURE", > >+ [QEVENT_QUORUM_REPORT_BAD] = "QUORUM_REPORT_BAD", > Could you document these events in docs/qmp/qmp-events.txt? > > Max > > > }; > > QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX) > >