Hi

On Wed, Feb 5, 2020 at 10:57 AM Felipe Franciosi <fel...@nutanix.com> wrote:
>
> This introduces a self-fence mechanism to Qemu, causing it to die if a
> heartbeat condition is not met. Currently, a file-based heartbeat is
> available and can be configured as follows:
>
> -object file-fence,id=ff0,file=/foo,qtimeout=20,ktimeout=25,signal=kill
>
> Qemu will watch 'file' for attribute changes. Touching the file works as
> a heartbeat. This parameter is mandatory.
>
> Fencing happens after 'qtimeout' or 'ktimeout' seconds elapse without a
> heartbeat. At least one of these must be specified. Both may be used, in
> which case 'ktimeout' must be greater than 'qtimeout'. Setting either to
> zero has no effect (as if they weren't specified).
>
> When using 'qtimeout', an internal Qemu timer is used. Fencing with this
> method gives Qemu a chance to write a log message indicating which file
> caused the event. If Qemu's main loop is hung for whatever reason, this
> method won't successfully kill Qemu.
>
> When using 'ktimeout', a kernel timer is used. In this case, 'signal'
> can be 'kill' (for SIGKILL, default) or 'quit' (for SIGQUIT). Using
> SIGQUIT may be preferred for obtaining core dumps. If Qemu is hung
> (eg. uninterruptable sleep), this method won't successfully kill Qemu.
>
> It is worth noting that even successfully killing Qemu may not be
> sufficient to completely fence a VM as certain operations like network
> packets or block commands may be pending in the kernel. If that is a
> concern, systems should consider using further fencing mechanisms like
> hardware watchdogs either instead or in conjunction with this for
> additional protection.
>
> Signed-off-by: Felipe Franciosi <fel...@nutanix.com>
> ---
>  backends/Makefile.objs |   2 +
>  backends/file-fence.c  | 374 +++++++++++++++++++++++++++++++++++++++++
>  qemu-options.hx        |  27 ++-
>  3 files changed, 402 insertions(+), 1 deletion(-)
>  create mode 100644 backends/file-fence.c
>
> Changelog:
> v1->v2:
> - Publish patch in https://github.com/franciozzy/qemu/tree/filefence
> - Rename file_fence to file-fence and move to backends/
> - Use error_printf() instead of printf() when fencing
> - Replace a check already done by filemonitor-inotify with assert
> - Add return value to _setup() functions to simplify error logic
> - Use g_ascii_strcasecmp() to simplify logic in _set_signal()
> - Use glib memory allocation helpers in _set_file()
> - Fix bug to allow using qtimeout without ktimeout
> - Clarify usage of q/k timeouts in commit message
> - Clarify usage of hardware watchdogs in commits message
>
> diff --git a/backends/Makefile.objs b/backends/Makefile.objs
> index 28a847cd57..da2a589bdf 100644
> --- a/backends/Makefile.objs
> +++ b/backends/Makefile.objs
> @@ -9,6 +9,8 @@ common-obj-$(CONFIG_POSIX) += hostmem-file.o
>  common-obj-y += cryptodev.o
>  common-obj-y += cryptodev-builtin.o
>
> +common-obj-y += file-fence.o
> +
>  ifeq ($(CONFIG_VIRTIO_CRYPTO),y)
>  common-obj-y += cryptodev-vhost.o
>  common-obj-$(CONFIG_VHOST_CRYPTO) += cryptodev-vhost-user.o
> diff --git a/backends/file-fence.c b/backends/file-fence.c
> new file mode 100644
> index 0000000000..3dbbed7325
> --- /dev/null
> +++ b/backends/file-fence.c
> @@ -0,0 +1,374 @@
> +/*
> + * QEMU file-based self-fence mechanism
> + *
> + * Copyright (c) 2019 Nutanix Inc. All rights reserved.
> + *
> + * Authors:
> + *    Felipe Franciosi <fel...@nutanix.com>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see 
> <http://www.gnu.org/licenses/>.
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "qom/object_interfaces.h"
> +#include "qemu/error-report.h"
> +#include "qemu/filemonitor.h"
> +#include "qemu/timer.h"
> +
> +#include <time.h>
> +
> +#define TYPE_FILE_FENCE "file-fence"
> +
> +typedef struct FileFence {
> +    Object parent_obj;
> +
> +    gchar *dir;
> +    gchar *file;
> +    uint32_t qtimeout;
> +    uint32_t ktimeout;
> +    int signal;
> +
> +    timer_t ktimer;
> +    QEMUTimer *qtimer;
> +
> +    QFileMonitor *fm;
> +    uint64_t id;
> +} FileFence;
> +
> +#define FILE_FENCE(obj) \
> +    OBJECT_CHECK(FileFence, (obj), TYPE_FILE_FENCE)
> +
> +static void
> +timer_update(FileFence *ff)
> +{
> +    struct itimerspec its = {
> +        .it_value.tv_sec = ff->ktimeout,
> +    };
> +    int err;
> +
> +    if (ff->qtimeout) {
> +        timer_mod(ff->qtimer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) +
> +                              ff->qtimeout * 1000);
> +    }
> +
> +    if (ff->ktimeout) {
> +        err = timer_settime(ff->ktimer, 0, &its, NULL);
> +        g_assert(err == 0);
> +    }
> +}
> +
> +static void
> +file_fence_abort_cb(void *opaque)
> +{
> +    FileFence *ff = opaque;
> +    error_printf("Fencing after %u seconds on '%s'\n",
> +                 ff->qtimeout, g_strconcat(ff->dir, "/", ff->file, NULL));
> +    abort();
> +}
> +
> +static void
> +file_fence_watch_cb(int64_t id, QFileMonitorEvent ev, const char *file,
> +                    void *opaque)
> +{
> +    FileFence *ff = opaque;
> +
> +    if (ev != QFILE_MONITOR_EVENT_ATTRIBUTES) {
> +        return;
> +    }
> +
> +    g_assert(g_str_equal(file, ff->file));
> +
> +    timer_update(ff);
> +}
> +
> +static void
> +ktimer_tear(FileFence *ff)
> +{
> +    int err;
> +
> +    if (ff->ktimer) {
> +        err = timer_delete(ff->ktimer);
> +        g_assert(err == 0);
> +        ff->ktimer = NULL;
> +    }
> +}
> +
> +static gboolean
> +ktimer_setup(FileFence *ff, Error **errp)
> +{
> +    int err;
> +
> +    struct sigevent sev = {
> +        .sigev_notify = SIGEV_SIGNAL,
> +        .sigev_signo = ff->signal ? ff->signal : SIGKILL,
> +    };
> +
> +    if (ff->ktimeout == 0) {
> +        return TRUE;
> +    }
> +
> +    err = timer_create(CLOCK_MONOTONIC, &sev, &ff->ktimer);
> +    if (err == -1) {
> +        error_setg(errp, "Error creating kernel timer: %m");
> +        return FALSE;
> +    }
> +
> +    return TRUE;
> +}
> +
> +static void
> +qtimer_tear(FileFence *ff)
> +{
> +    if (ff->qtimer) {
> +        timer_del(ff->qtimer);
> +        timer_free(ff->qtimer);
> +    }
> +    ff->qtimer = NULL;
> +}
> +
> +static gboolean
> +qtimer_setup(FileFence *ff, Error **errp)
> +{
> +    QEMUTimer *qtimer;
> +
> +    if (ff->qtimeout == 0) {
> +        return TRUE;
> +    }
> +
> +    qtimer = timer_new_ms(QEMU_CLOCK_REALTIME, file_fence_abort_cb, ff);
> +    if (qtimer == NULL) {
> +        error_setg(errp, "Error creating Qemu timer");
> +        return FALSE;
> +    }
> +
> +    ff->qtimer = qtimer;
> +
> +    return TRUE;
> +}
> +
> +static void
> +watch_tear(FileFence *ff)
> +{
> +    if (ff->fm) {
> +        qemu_file_monitor_remove_watch(ff->fm, ff->dir, ff->id);
> +        qemu_file_monitor_free(ff->fm);
> +        ff->fm = NULL;
> +        ff->id = 0;
> +    }
> +}
> +
> +static gboolean
> +watch_setup(FileFence *ff, Error **errp)
> +{
> +    QFileMonitor *fm;
> +    int64_t id;
> +
> +    fm = qemu_file_monitor_new(errp);
> +    if (!fm) {
> +        return FALSE;
> +    }
> +
> +    id = qemu_file_monitor_add_watch(fm, ff->dir, ff->file,
> +                                     file_fence_watch_cb, ff, errp);
> +    if (id < 0) {
> +        qemu_file_monitor_free(fm);
> +        return FALSE;
> +    }
> +
> +    ff->fm = fm;
> +    ff->id = id;
> +
> +    return TRUE;
> +}
> +
> +static void
> +file_fence_complete(UserCreatable *obj, Error **errp)
> +{
> +    FileFence *ff = FILE_FENCE(obj);
> +
> +    if (ff->dir == NULL) {
> +        error_setg(errp, "A 'file' must be set");
> +        return;
> +    }
> +
> +    if (ff->signal != 0 && ff->ktimeout == 0) {
> +        error_setg(errp, "Using 'signal' requires 'ktimeout' to be set");
> +        return;
> +    }
> +
> +    if (ff->ktimeout == 0 && ff->qtimeout == 0) {
> +        error_setg(errp, "One or both of 'ktimeout' or 'qtimeout' must be 
> set");
> +        return;
> +    }
> +
> +    if (ff->qtimeout >= ff->ktimeout && ff->ktimeout != 0) {
> +        error_setg(errp, "Using 'qtimeout' >= 'ktimeout' doesn't make 
> sense");
> +        return;
> +    }
> +
> +    if (!watch_setup(ff, errp) ||
> +        !qtimer_setup(ff, errp) ||
> +        !ktimer_setup(ff, errp)) {
> +        return;
> +    }
> +
> +    timer_update(ff);
> +
> +    return;
> +}
> +
> +static void
> +file_fence_set_signal(Object *obj, const char *value, Error **errp)
> +{
> +    FileFence *ff = FILE_FENCE(obj);
> +
> +    if (ff->signal) {
> +        error_setg(errp, "Signal property already set");
> +        return;
> +    }
> +
> +    if (value == NULL) {
> +        goto err;
> +    }
> +
> +    if (g_ascii_strcasecmp(value, "QUIT") == 0) {
> +        ff->signal = SIGQUIT;
> +        return;
> +    }
> +
> +    if (g_ascii_strcasecmp(value, "KILL") == 0) {
> +        ff->signal = SIGKILL;
> +        return;
> +    }
> +
> +err:
> +    error_setg(errp, "Invalid signal. Must be 'quit' or 'kill'");
> +}
> +
> +static char *
> +file_fence_get_signal(Object *obj, Error **errp)
> +{
> +    FileFence *ff = FILE_FENCE(obj);
> +
> +    switch (ff->signal) {
> +    case SIGKILL:
> +        return g_strdup("kill");
> +    case SIGQUIT:
> +        return g_strdup("quit");
> +    }
> +
> +    /* Unreachable */
> +    abort();
> +}
> +
> +static void
> +file_fence_set_file(Object *obj, const char *value, Error **errp)
> +{
> +    FileFence *ff = FILE_FENCE(obj);
> +    g_autofree gchar *dir = NULL, *file = NULL;
> +
> +    if (ff->dir) {
> +        error_setg(errp, "File property already set");
> +        return;
> +    }
> +
> +    dir = g_path_get_dirname(value);
> +    if (g_str_equal(dir, ".")) {
> +        error_setg(errp, "Path for file-fence must be absolute");

g_path_is_absolute() ? why such limitation ?

> +        return;
> +    }
> +
> +    file = g_path_get_basename(value);
> +    if (g_str_equal(file, ".")) {
> +        error_setg(errp, "Path for file-fence must be a file");

I think you would get "." if value is "".

I am not sure you need extra error handling here, since watch_setup()
will fail if it can't open the file.

> +        return;
> +    }
> +
> +    ff->dir = g_steal_pointer(&dir);
> +    ff->file = g_steal_pointer(&file);
> +}
> +
> +static char *
> +file_fence_get_file(Object *obj, Error **errp)
> +{
> +    FileFence *ff = FILE_FENCE(obj);
> +
> +    if (ff->file) {
> +        return g_build_filename(ff->dir, ff->file, NULL);
> +    }
> +
> +    return NULL;
> +}
> +
> +static void
> +file_fence_instance_finalize(Object *obj)
> +{
> +    FileFence *ff = FILE_FENCE(obj);
> +
> +    ktimer_tear(ff);
> +    qtimer_tear(ff);
> +    watch_tear(ff);
> +
> +    g_free(ff->file);
> +    g_free(ff->dir);
> +}
> +
> +static void
> +file_fence_instance_init(Object *obj)
> +{
> +    FileFence *ff = FILE_FENCE(obj);
> +
> +    object_property_add_str(obj, "file",
> +                            file_fence_get_file,
> +                            file_fence_set_file,
> +                            &error_abort);
> +    object_property_add_str(obj, "signal",
> +                            file_fence_get_signal,
> +                            file_fence_set_signal,
> +                            &error_abort);
> +    object_property_add_uint32_ptr(obj, "qtimeout", &ff->qtimeout,
> +                                   OBJ_PROP_FLAG_READWRITE, &error_abort);
> +    object_property_add_uint32_ptr(obj, "ktimeout", &ff->ktimeout,
> +                                   OBJ_PROP_FLAG_READWRITE, &error_abort);

You could make them all class properties, right?

> +}
> +
> +static void
> +file_fence_class_init(ObjectClass *klass, void *class_data)
> +{
> +    UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
> +    ucc->complete = file_fence_complete;
> +}
> +
> +static const TypeInfo file_fence_info = {
> +    .name = TYPE_FILE_FENCE,
> +    .parent = TYPE_OBJECT,
> +    .class_init = file_fence_class_init,
> +    .instance_size = sizeof(FileFence),
> +    .instance_init = file_fence_instance_init,
> +    .instance_finalize = file_fence_instance_finalize,
> +    .interfaces = (InterfaceInfo[]) {
> +        { TYPE_USER_CREATABLE },
> +        { }
> +    }
> +};
> +
> +static void
> +register_types(void)
> +{
> +    type_register_static(&file_fence_info);
> +}
> +
> +type_init(register_types);
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 224a8e8712..5ea94b37af 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -4974,8 +4974,33 @@ The polling parameters can be modified at run-time 
> using the @code{qom-set} comm
>
>  @end table
>
> -ETEXI
> +@item -object 
> file-fence,id=@var{id},file=@var{file},qtimeout=@var{qtimeout},ktimeout=@var{ktimeout},signal=@{signal}
> +
> +Self-fence Qemu if @var{file} is not modified within a given timeout.
> +
> +Qemu will watch @var{file} for attribute changes. Touching the file works as 
> a
> +heartbeat. This parameter is mandatory.
> +
> +Fencing happens after @var{qtimeout} or @var{ktimeout} seconds elapse
> +without a heartbeat. At least one of these must be specified. Both may be 
> used.
>
> +When using @var{qtimeout}, an internal Qemu timer is used. Fencing with
> +this method gives Qemu a chance to write a log message indicating which file
> +caused the event. If Qemu's main loop is hung for whatever reason, this 
> method
> +won't successfully kill Qemu.
> +
> +When using @var{ktimeout}, a kernel timer is used. In this case, @var{signal}
> +can be 'kill' (for SIGKILL, default) or 'quit' (for SIGQUIT). Using SIGQUIT 
> may
> +be preferred for obtaining core dumps. If Qemu is hung (eg. uninterruptable
> +sleep), this method won't successfully kill Qemu.
> +
> +It is worth noting that even successfully killing Qemu may not be sufficient 
> to
> +completely fence a VM as certain operations like network packets or block
> +commands may be pending in the kernel. If that is a concern, systems should
> +consider using further fencing mechanisms like hardware watchdogs either in
> +addition or in conjunction with this feature for additional protection.
> +
> +ETEXI
>
>  HXCOMM This is the last statement. Insert new options before this line!
>  STEXI
> --
> 2.20.1
>


-- 
Marc-André Lureau

Reply via email to