The virtio device/driver (e.g., vhost-scsi) may hang due to the lost of IRQ or the lost of doorbell register kick, e.g.,
https://lists.gnu.org/archive/html/qemu-devel/2018-12/msg01711.html This patch adds a new debug interface 'DeviceEvent' to DeviceClass to help narrow down if the issue is due to lost of irq/kick. So far the new interface handles only two events: 'call' and 'kick'. Any device (e.g., e1000e or vhost-scsi) may implement (e.g., via eventfd, MSI-X or legacy IRQ). The 'call' is to inject irq on purpose by admin for a specific device (e.g., vhost-scsi) from QEMU/host to VM, while the 'kick' is to kick the doorbell on purpose by admin at QEMU/host side for a specific device. Signed-off-by: Dongli Zhang <dongli.zh...@oracle.com> --- hmp-commands.hx | 14 ++++++++++++++ include/hw/qdev-core.h | 6 ++++++ include/monitor/hmp.h | 1 + qapi/qdev.json | 30 ++++++++++++++++++++++++++++++ softmmu/qdev-monitor.c | 41 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 92 insertions(+) diff --git a/hmp-commands.hx b/hmp-commands.hx index 73e0832ea1..0fbb72568f 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1867,3 +1867,17 @@ ERST .flags = "p", }, + { + .name = "x-debug-device-event", + .args_type = "dev:s,event:s,queue:l", + .params = "dev event queue", + .help = "generate device event for a specific device queue", + .cmd = hmp_x_debug_device_event, + .flags = "p", + }, + +SRST +``x-debug-device-event`` *dev* *event* *queue* + Generate device event *event* for specific *queue* of *dev* +ERST + diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index bafc311bfa..83df3bab89 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -29,9 +29,14 @@ typedef enum DeviceCategory { DEVICE_CATEGORY_MAX } DeviceCategory; +#define DEVICE_EVENT_CALL 1 +#define DEVICE_EVENT_KICK 2 + typedef void (*DeviceRealize)(DeviceState *dev, Error **errp); typedef void (*DeviceUnrealize)(DeviceState *dev); typedef void (*DeviceReset)(DeviceState *dev); +typedef void (*DeviceEvent)(DeviceState *dev, int event, int queue, + Error **errp); typedef void (*BusRealize)(BusState *bus, Error **errp); typedef void (*BusUnrealize)(BusState *bus); @@ -132,6 +137,7 @@ struct DeviceClass { DeviceReset reset; DeviceRealize realize; DeviceUnrealize unrealize; + DeviceEvent event; /* device state */ const VMStateDescription *vmsd; diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h index ed2913fd18..ffb48fce06 100644 --- a/include/monitor/hmp.h +++ b/include/monitor/hmp.h @@ -133,5 +133,6 @@ void hmp_info_replay(Monitor *mon, const QDict *qdict); void hmp_replay_break(Monitor *mon, const QDict *qdict); void hmp_replay_delete_break(Monitor *mon, const QDict *qdict); void hmp_replay_seek(Monitor *mon, const QDict *qdict); +void hmp_x_debug_device_event(Monitor *mon, const QDict *qdict); #endif diff --git a/qapi/qdev.json b/qapi/qdev.json index b83178220b..6fc7a5bfc1 100644 --- a/qapi/qdev.json +++ b/qapi/qdev.json @@ -124,3 +124,33 @@ ## { 'event': 'DEVICE_DELETED', 'data': { '*device': 'str', 'path': 'str' } } + +## +# @x-debug-device-event: +# +# Generate device event for a specific device queue +# +# @dev: device path +# +# @event: event (e.g., kick or call) to trigger +# +# @queue: queue id +# +# Returns: Nothing on success +# +# Since: 5.3 +# +# Notes: This is used to debug VM driver hang issue. The 'kick' event is to +# send notification to QEMU/vhost while the 'call' event is to +# interrupt VM on purpose. +# +# Example: +# +# -> { "execute": "x-debug-device_event", +# "arguments": { "dev": "/machine/peripheral/vscsi0", "event": "kick", +# "queue": "1" } } +# <- { "return": {} } +# +## +{ 'command': 'x-debug-device-event', + 'data': {'dev': 'str', 'event': 'str', 'queue': 'int'} } diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c index 8dc656becc..63dee5f1a6 100644 --- a/softmmu/qdev-monitor.c +++ b/softmmu/qdev-monitor.c @@ -915,6 +915,47 @@ void hmp_device_del(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, err); } +void qmp_x_debug_device_event(const char *dev, const char *event, + int64_t queue, Error **errp) +{ + DeviceState *device = find_device_state(dev, NULL); + DeviceClass *dc; + int evt; + + if (!device) { + error_setg(errp, "Device %s not found", dev); + return; + } + + dc = DEVICE_GET_CLASS(device); + if (!dc->event) { + error_setg(errp, "device_event is not supported"); + return; + } + + if (!strcmp(event, "kick")) + evt = DEVICE_EVENT_KICK; + else if (!strcmp(event, "call")) + evt = DEVICE_EVENT_CALL; + else { + error_setg(errp, "Unsupported event %s", event); + return; + } + + dc->event(device, evt, queue, errp); +} + +void hmp_x_debug_device_event(Monitor *mon, const QDict *qdict) +{ + const char *dev = qdict_get_str(qdict, "dev"); + const char *event = qdict_get_str(qdict, "event"); + int queue = qdict_get_try_int(qdict, "queue", -1); + Error *err = NULL; + + qmp_x_debug_device_event(dev, event, queue, &err); + hmp_handle_error(mon, err); +} + BlockBackend *blk_by_qdev_id(const char *id, Error **errp) { DeviceState *dev; -- 2.17.1