On 2/5/2025 12:13 PM, Cédric Le Goater wrote:
On 1/29/25 15:43, Steve Sistare wrote:
Preserve vfio INTx state across cpr-transfer. Preserve VFIOINTx fields as
follows:
pin : Recover this from the vfio config in kernel space
interrupt : Preserve its eventfd descriptor across exec.
unmask : Ditto
route.irq : This could perhaps be recovered in vfio_pci_post_load by
calling pci_device_route_intx_to_irq(pin), whose implementation reads
config space for a bridge device such as ich9. However, there is no
guarantee that the bridge vmstate is read before vfio vmstate. Rather
than fiddling with MigrationPriority for vmstate handlers, explicitly
save route.irq in vfio vmstate.
pending : save in vfio vmstate.
mmap_timeout, mmap_timer : Re-initialize
bool kvm_accel : Re-initialize
In vfio_realize, defer calling vfio_intx_enable until the vmstate
is available, in vfio_pci_post_load. Modify vfio_intx_enable and
vfio_intx_kvm_enable to skip vfio initialization, but still perform
kvm initialization.
Signed-off-by: Steve Sistare <steven.sist...@oracle.com>
---
hw/vfio/pci.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 47 insertions(+), 4 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index df6e298..c50dbef 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -184,12 +184,17 @@ static bool vfio_intx_enable_kvm(VFIOPCIDevice *vdev,
Error **errp)
return true;
}
+ if (vdev->vbasedev.reused) {
1 x vdev->vbasedev.reused
+ goto skip_state;
+ }
+
/* Get to a known interrupt state */
qemu_set_fd_handler(irq_fd, NULL, NULL, vdev);
vfio_mask_single_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
vdev->intx.pending = false;
pci_irq_deassert(&vdev->pdev);
+skip_state:
hmm, this skip_state label and ...
/* Get an eventfd for resample/unmask */
if (vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0)) {
error_setg(errp, "vfio_notifier_init intx-unmask failed");
@@ -204,6 +209,10 @@ static bool vfio_intx_enable_kvm(VFIOPCIDevice *vdev,
Error **errp)
goto fail_irqfd;
}
+ if (vdev->vbasedev.reused) {
2 x vdev->vbasedev.reused
+ goto skip_irq;
+ }
+
if (!vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_UNMASK,
event_notifier_get_fd(&vdev->intx.unmask),
@@ -214,6 +223,7 @@ static bool vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error
**errp)
/* Let'em rip */
vfio_unmask_single_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
+skip_irq:
... this skip_irq label are one "very quick" way to get things done :)
I chose to use goto's and skip labels for your benefit as a reviewer, to reduce
diffs, so you can see that the non-cpr code is not changed. Not as a quick way
to
get this done. But if you prefer, I can use conditional blocks instead of
goto's,
and let indentation create additional diffs:
if (reused)
goto skip;
non-cpr code;
skip:
vs
if (!reused) {
non-cpr code;
}
vdev->intx.kvm_accel = true;
trace_vfio_intx_enable_kvm(vdev->vbasedev.name);
@@ -329,7 +339,13 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error
**errp)
return true;
}
- vfio_disable_interrupts(vdev);
+ /*
+ * Do not alter interrupt state during vfio_realize and cpr load. The
+ * reused flag is cleared thereafter.
+ */
+ if (!vdev->vbasedev.reused) {
3 x vdev->vbasedev.reused
+ vfio_disable_interrupts(vdev);
+ }
vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */
pci_config_set_interrupt_pin(vdev->pdev.config, pin);
@@ -351,7 +367,8 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error
**errp)
fd = event_notifier_get_fd(&vdev->intx.interrupt);
qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev);
- if (!vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
+ if (!vdev->vbasedev.reused &&
4 x vdev->vbasedev.reused
+ !vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) {
qemu_set_fd_handler(fd, NULL, NULL, vdev);
vfio_notifier_cleanup(vdev, &vdev->intx.interrupt, "intx-interrupt",
0);
@@ -3256,7 +3273,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
vfio_intx_routing_notifier);
vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
- if (!vfio_intx_enable(vdev, errp)) {
+ /* Wait until cpr load reads intx routing data to enable */
+ if (!vdev->vbasedev.reused && !vfio_intx_enable(vdev, errp)) {
5 x vdev->vbasedev.reused
This patch already adds a test on vdev->vbasedev.reused at the top of
vfio_intx_enable(). This one seems redudant.
This test is necessary. I will expand the comment to be more explicit:
/*
* During CPR, do not call vfio_intx_enable at this time. Instead,
* call it from vfio_pci_post_load after the intx routing data has
* been loaded from vmstate.
*/
if (!vdev->vbasedev.reused && !vfio_intx_enable(vdev, errp)) {
Please duplicate the whole vfio_intx_enable() routine and move it
under a cpr file.
Do you just mean vfio_intx_enable? Or also vfio_intx_enable_kvm? The
occurrences of vdev->vbasedev.reused that you flag occur in both.
I coded with reused conditionals and "skip" labels for a good reason. By
keeping the common logic inline with the cpr conditionals, I minimize the
chance that changes in the common logic will break cpr. Conversely,
outlining cpr specific versions of these functions and duplicating common
code creates the very real possibility that changes in vfio core code will
not be made in the cpr copies, and break cpr.
goto out_deregister;
}
}
@@ -3578,12 +3596,36 @@ static int vfio_pci_post_load(void *opaque, int
version_id)
vfio_claim_vectors(vdev, nr_vectors, false);> } else if
(vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) {
- g_assert_not_reached(); /* completed in a subsequent patch */
+ Error *err = NULL;
+ if (!vfio_intx_enable(vdev, &err)) {
+ error_report_err(err);
+ return -1;> + }
}
return 0;
}
+static const VMStateDescription vfio_intx_vmstate = {
+ .name = "vfio-intx",
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .fields = (VMStateField[]) {
+ VMSTATE_BOOL(pending, VFIOINTx),
+ VMSTATE_UINT32(route.mode, VFIOINTx),
+ VMSTATE_INT32(route.irq, VFIOINTx),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+#define VMSTATE_VFIO_INTX(_field, _state) { \
+ .name = (stringify(_field)), \
+ .size = sizeof(VFIOINTx), \
+ .vmsd = &vfio_intx_vmstate, \
+ .flags = VMS_STRUCT, \
+ .offset = vmstate_offset_value(_state, _field, VFIOINTx), \
+}
+
move these to cpr file please.
OK.
- Steve
static const VMStateDescription vfio_pci_vmstate = {
.name = "vfio-pci",
.version_id = 0,
@@ -3594,6 +3636,7 @@ static const VMStateDescription vfio_pci_vmstate = {
.fields = (VMStateField[]) {
VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice),
VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, vfio_msix_present),
+ VMSTATE_VFIO_INTX(intx, VFIOPCIDevice),
VMSTATE_END_OF_LIST()
}
};