Implement the callback function to handle unrecoverable AER errors, and also the public APIs that can be used to register/unregister the handler. When an AER error occurs, the handler will forcibly remove the erring PCIe device from the guest.
Signed-off-by: Venu Busireddy <venu.busire...@oracle.com> --- tools/libxl/libxl.h | 7 +++ tools/libxl/libxl_event.h | 7 +++ tools/libxl/libxl_internal.h | 8 +++ tools/libxl/libxl_pci.c | 123 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 145 insertions(+) diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h index eca0ea2c50..99a3c8ae1f 100644 --- a/tools/libxl/libxl.h +++ b/tools/libxl/libxl.h @@ -1120,6 +1120,13 @@ void libxl_mac_copy(libxl_ctx *ctx, libxl_mac *dst, const libxl_mac *src); */ #define LIBXL_HAVE_PV_SHIM 1 +/* LIBXL_HAVE_AER_EVENTS_HANDLER + * + * If this is defined, libxl has the library functions called + * libxl_reg_aer_events_handler and libxl_unreg_aer_events_handler. + */ +#define LIBXL_HAVE_AER_EVENTS_HANDLER 1 + typedef char **libxl_string_list; void libxl_string_list_dispose(libxl_string_list *sl); int libxl_string_list_length(const libxl_string_list *sl); diff --git a/tools/libxl/libxl_event.h b/tools/libxl/libxl_event.h index 1ea789e231..63c29ae800 100644 --- a/tools/libxl/libxl_event.h +++ b/tools/libxl/libxl_event.h @@ -184,6 +184,13 @@ void libxl_evdisable_domain_death(libxl_ctx *ctx, libxl_evgen_domain_death*); * may generate only a DEATH event. */ +typedef struct libxl__aer_watch libxl_aer_watch; +int libxl_reg_aer_events_handler(libxl_ctx *, uint32_t); + /* + * Registers a handler to handle the occurrence of unrecoverable AER errors. + */ +void libxl_unreg_aer_events_handler(libxl_ctx *, uint32_t); + typedef struct libxl__evgen_disk_eject libxl_evgen_disk_eject; int libxl_evenable_disk_eject(libxl_ctx *ctx, uint32_t domid, const char *vdev, libxl_ev_user, libxl_evgen_disk_eject **evgen_out); diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 506687fbe9..7972490050 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -356,6 +356,14 @@ struct libxl__ev_child { LIBXL_LIST_ENTRY(struct libxl__ev_child) entry; }; +/* + * Structure used for AER event handling. + */ +struct libxl__aer_watch { + uint32_t domid; + libxl__ev_xswatch watch; + struct libxl__aer_watch *next; +}; /* * evgen structures, which are the state we use for generating diff --git a/tools/libxl/libxl_pci.c b/tools/libxl/libxl_pci.c index 4755a0c93c..c121c9f8cc 100644 --- a/tools/libxl/libxl_pci.c +++ b/tools/libxl/libxl_pci.c @@ -1686,6 +1686,129 @@ static int libxl_device_pci_compare(libxl_device_pci *d1, return COMPARE_PCI(d1, d2); } +static void aer_backend_watch_callback(libxl__egc *egc, + libxl__ev_xswatch *watch, + const char *watch_path, + const char *event_path) +{ + EGC_GC; + libxl_aer_watch *aer_ws = CONTAINER_OF(watch, *aer_ws, watch); + int rc; + uint32_t dom, bus, dev, fn; + uint32_t domid = aer_ws->domid; + char *p, *path; + const char *aerFailedSBDF; + libxl_device_pci pcidev; + + /* Extract the backend directory. */ + path = libxl__strdup(gc, event_path); + p = strrchr(path, '/'); + if ((p == NULL) || (strcmp(p, "/aerFailedSBDF") != 0)) + return; + /* Truncate the string so it points to the backend directory. */ + *p = '\0'; + + /* Fetch the value of the failed PCI device. */ + rc = libxl__xs_read_checked(gc, XBT_NULL, + GCSPRINTF("%s/aerFailedSBDF", path), &aerFailedSBDF); + if (rc || !aerFailedSBDF) + return; + LOGD(ERROR, domid, " aerFailedSBDF = %s", aerFailedSBDF); + sscanf(aerFailedSBDF, "%x:%x:%x.%x", &dom, &bus, &dev, &fn); + + libxl_device_pci_init(&pcidev); + pcidev_struct_fill(&pcidev, dom, bus, dev, fn, 0); + /* Forcibly remove the device from the guest */ + rc = libxl__device_pci_remove_common(gc, domid, &pcidev, 1); + if (rc) + LOGD(ERROR, domid, " libxl__device_pci_remove_common() failed, rc=x%x", + (unsigned int)rc); + + return; +} + +static libxl_aer_watch *manage_aer_ws_list(libxl_aer_watch *in, uint32_t domid) +{ + static libxl_aer_watch *aer_ws = NULL; + libxl_aer_watch *iter, *prev = NULL; + + if (in) { + if (aer_ws) + in->next = aer_ws; + iter = aer_ws = in; + } else { + iter = aer_ws; + while (iter) { + if (iter->domid == domid) { + if (prev) + prev->next = iter->next; + else + aer_ws = iter->next; + break; + } + prev = iter; + iter = iter->next; + } + } + return iter; +} + +static void store_aer_ws(libxl_aer_watch *aer_ws) +{ + manage_aer_ws_list(aer_ws, 0); + return; +} + +static libxl_aer_watch *retrieve_aer_ws(uint32_t domid) +{ + return manage_aer_ws_list(NULL, domid); +} + +int libxl_reg_aer_events_handler(libxl_ctx *ctx, uint32_t domid) +{ + int rc = 0; + char *be_path; + uint32_t pciback_domid; + libxl_aer_watch *aer_ws; + GC_INIT(ctx); + + rc = libxl__get_domid(gc, (uint32_t *)(&pciback_domid)); + if (rc) { + LOGD(ERROR, domid, " libxl__get_domid() failed, rc = %d", rc); + goto out; + } + + aer_ws = libxl__calloc(NOGC, 1, sizeof(libxl_aer_watch)); + aer_ws->domid = domid; + aer_ws->next = NULL; + store_aer_ws(aer_ws); + be_path = GCSPRINTF("/local/domain/%u/backend/pci/%u/%u/%s", + pciback_domid, domid, pciback_domid, "aerFailedSBDF"); + rc = libxl__ev_xswatch_register(gc, &aer_ws->watch, + aer_backend_watch_callback, be_path); + +out: + GC_FREE; + return rc; +} + +void libxl_unreg_aer_events_handler(libxl_ctx *ctx, uint32_t domid) +{ + GC_INIT(ctx); + libxl_aer_watch *aer_ws; + + aer_ws = retrieve_aer_ws(domid); + if (!aer_ws) + goto out; + + libxl__ev_xswatch_deregister(gc, &aer_ws->watch); + free(aer_ws); + +out: + GC_FREE; + return; +} + #define libxl__device_pci_update_devid NULL DEFINE_DEVICE_TYPE_STRUCT_X(pcidev, pci, PCI); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel