> -----Original Message-----
> From: Lukas Straub <lukasstra...@web.de>
> Sent: Monday, May 4, 2020 6:28 PM
> To: qemu-devel <qemu-devel@nongnu.org>
> Cc: Zhang, Chen <chen.zh...@intel.com>; Li Zhijian
> <lizhij...@cn.fujitsu.com>; Jason Wang <jasow...@redhat.com>; Marc-
> André Lureau <marcandre.lur...@redhat.com>; Paolo Bonzini
> <pbonz...@redhat.com>
> Subject: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check that colo-
> compare is active
>
> If the colo-compare object is removed before failover and a checkpoint
> happens, qemu crashes because it tries to lock the destroyed event_mtx in
> colo_notify_compares_event.
>
> Fix this by checking if everything is initialized by introducing a new
> variable
> colo_compare_active which is protected by a new mutex
> colo_compare_mutex. The new mutex also protects against concurrent
> access of the net_compares list and makes sure that
> colo_notify_compares_event isn't active while we destroy event_mtx and
> event_complete_cond.
>
> With this it also is again possible to use colo without colo-compare (periodic
> mode) and to use multiple colo-compare for multiple network interfaces.
>
Hi Lukas,
For this case I think we don't need to touch vl.c code, we can solve this issue
from another perspective:
How to remove colo-compare?
User will use qemu-monitor or QMP command to disable an object, so we just need
return operation failed
When user try to remove colo-compare object while COLO is running.
Thanks
Zhang Chen
> Signed-off-by: Lukas Straub <lukasstra...@web.de>
> ---
> net/colo-compare.c | 35 +++++++++++++++++++++++++++++------
> net/colo-compare.h | 1 +
> softmmu/vl.c | 2 ++
> 3 files changed, 32 insertions(+), 6 deletions(-)
>
> diff --git a/net/colo-compare.c b/net/colo-compare.c index
> 56db3d3bfc..c7572d75e9 100644
> --- a/net/colo-compare.c
> +++ b/net/colo-compare.c
> @@ -54,6 +54,8 @@ static NotifierList colo_compare_notifiers = #define
> REGULAR_PACKET_CHECK_MS 3000 #define DEFAULT_TIME_OUT_MS 3000
>
> +static QemuMutex colo_compare_mutex;
> +static bool colo_compare_active;
> static QemuMutex event_mtx;
> static QemuCond event_complete_cond;
> static int event_unhandled_count;
> @@ -906,6 +908,12 @@ static void check_old_packet_regular(void *opaque)
> void colo_notify_compares_event(void *opaque, int event, Error **errp) {
> CompareState *s;
> + qemu_mutex_lock(&colo_compare_mutex);
> +
> + if (!colo_compare_active) {
> + qemu_mutex_unlock(&colo_compare_mutex);
> + return;
> + }
>
> qemu_mutex_lock(&event_mtx);
> QTAILQ_FOREACH(s, &net_compares, next) { @@ -919,6 +927,7 @@ void
> colo_notify_compares_event(void *opaque, int event, Error **errp)
> }
>
> qemu_mutex_unlock(&event_mtx);
> + qemu_mutex_unlock(&colo_compare_mutex);
> }
>
> static void colo_compare_timer_init(CompareState *s) @@ -1274,7 +1283,14
> @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
> s->vnet_hdr);
> }
>
> + qemu_mutex_lock(&colo_compare_mutex);
> + if (!colo_compare_active) {
> + qemu_mutex_init(&event_mtx);
> + qemu_cond_init(&event_complete_cond);
> + colo_compare_active = true;
> + }
> QTAILQ_INSERT_TAIL(&net_compares, s, next);
> + qemu_mutex_unlock(&colo_compare_mutex);
>
> s->out_sendco.s = s;
> s->out_sendco.chr = &s->chr_out;
> @@ -1290,9 +1306,6 @@ static void colo_compare_complete(UserCreatable
> *uc, Error **errp)
>
> g_queue_init(&s->conn_list);
>
> - qemu_mutex_init(&event_mtx);
> - qemu_cond_init(&event_complete_cond);
> -
> s->connection_track_table =
> g_hash_table_new_full(connection_key_hash,
> connection_key_equal,
> g_free, @@ -1384,12
> +1397,19 @@ static void
> colo_compare_finalize(Object *obj)
>
> qemu_bh_delete(s->event_bh);
>
> + qemu_mutex_lock(&colo_compare_mutex);
> QTAILQ_FOREACH(tmp, &net_compares, next) {
> if (tmp == s) {
> QTAILQ_REMOVE(&net_compares, s, next);
> break;
> }
> }
> + if (QTAILQ_EMPTY(&net_compares)) {
> + colo_compare_active = false;
> + qemu_mutex_destroy(&event_mtx);
> + qemu_cond_destroy(&event_complete_cond);
> + }
> + qemu_mutex_unlock(&colo_compare_mutex);
>
> AioContext *ctx = iothread_get_aio_context(s->iothread);
> aio_context_acquire(ctx);
> @@ -1413,15 +1433,18 @@ static void colo_compare_finalize(Object *obj)
> object_unref(OBJECT(s->iothread));
> }
>
> - qemu_mutex_destroy(&event_mtx);
> - qemu_cond_destroy(&event_complete_cond);
> -
> g_free(s->pri_indev);
> g_free(s->sec_indev);
> g_free(s->outdev);
> g_free(s->notify_dev);
> }
>
> +void colo_compare_init_globals(void)
> +{
> + colo_compare_active = false;
> + qemu_mutex_init(&colo_compare_mutex);
> +}
> +
> static const TypeInfo colo_compare_info = {
> .name = TYPE_COLO_COMPARE,
> .parent = TYPE_OBJECT,
> diff --git a/net/colo-compare.h b/net/colo-compare.h index
> 22ddd512e2..eb483ac586 100644
> --- a/net/colo-compare.h
> +++ b/net/colo-compare.h
> @@ -17,6 +17,7 @@
> #ifndef QEMU_COLO_COMPARE_H
> #define QEMU_COLO_COMPARE_H
>
> +void colo_compare_init_globals(void);
> void colo_notify_compares_event(void *opaque, int event, Error **errp);
> void colo_compare_register_notifier(Notifier *notify); void
> colo_compare_unregister_notifier(Notifier *notify); diff --git a/softmmu/vl.c
> b/softmmu/vl.c index 32c0047889..a913ed5469 100644
> --- a/softmmu/vl.c
> +++ b/softmmu/vl.c
> @@ -112,6 +112,7 @@
> #include "qapi/qmp/qerror.h"
> #include "sysemu/iothread.h"
> #include "qemu/guest-random.h"
> +#include "net/colo-compare.h"
>
> #define MAX_VIRTIO_CONSOLES 1
>
> @@ -2906,6 +2907,7 @@ void qemu_init(int argc, char **argv, char **envp)
> precopy_infrastructure_init();
> postcopy_infrastructure_init();
> monitor_init_globals();
> + colo_compare_init_globals();
>
> if (qcrypto_init(&err) < 0) {
> error_reportf_err(err, "cannot initialize crypto: ");
> --
> 2.20.1