Re: [PATCH V3 22/42] vfio-pci: preserve MSI

2025-06-01 Thread Cédric Le Goater

On 5/28/25 19:44, Steven Sistare wrote:

Hi Cedric,
   Do you have any comments on this before I send V4?
Ditto for patch "vfio-pci: preserve INTx".
In both, I made the changes you requested in V2.
And I will change all "reused" tests to cpr_is_incoming as we
discussed elsewhere.


I saw. Thanks for the changes.


You mentioned these possibly conflict with vfio-user, but it would
help to get your stylistic and correctness comments on these from a
cpr-only point of view before I send the next version.


OK. I will try to do that on v4 after the PR (with part 1) is sent.


And as I mentioned, I propose to block CPR when vfio-user is used,
at least initially, so you can ignore vfio-user in the cpr load paths below.


Thanks,

C.





- Steve

On 5/12/2025 11:32 AM, Steve Sistare wrote:

Save the MSI message area as part of vfio-pci vmstate, and preserve the
interrupt and notifier eventfd's.  migrate_incoming loads the MSI data,
then the vfio-pci post_load handler finds the eventfds in CPR state,
rebuilds vector data structures, and attaches the interrupts to the new
KVM instance.

Signed-off-by: Steve Sistare 
---
  hw/vfio/cpr.c  | 91 ++
  hw/vfio/pci.c  | 40 ++--
  include/hw/vfio/vfio-cpr.h |  8 
  3 files changed, 136 insertions(+), 3 deletions(-)

diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c
index 6ea8e9f..be132fa 100644
--- a/hw/vfio/cpr.c
+++ b/hw/vfio/cpr.c
@@ -9,6 +9,8 @@
  #include "hw/vfio/vfio-device.h"
  #include "hw/vfio/vfio-cpr.h"
  #include "hw/vfio/pci.h"
+#include "hw/pci/msix.h"
+#include "hw/pci/msi.h"
  #include "migration/cpr.h"
  #include "qapi/error.h"
  #include "system/runstate.h"
@@ -40,6 +42,69 @@ void vfio_cpr_unregister_container(VFIOContainerBase 
*bcontainer)
  migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
  }
+#define STRDUP_VECTOR_FD_NAME(vdev, name)   \
+    g_strdup_printf("%s_%s", (vdev)->vbasedev.name, (name))
+
+void vfio_cpr_save_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr,
+ int fd)
+{
+    g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+    cpr_save_fd(fdname, nr, fd);
+}
+
+int vfio_cpr_load_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr)
+{
+    g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+    return cpr_find_fd(fdname, nr);
+}
+
+void vfio_cpr_delete_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr)
+{
+    g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+    cpr_delete_fd(fdname, nr);
+}
+
+static void vfio_cpr_claim_vectors(VFIOPCIDevice *vdev, int nr_vectors,
+   bool msix)
+{
+    int i, fd;
+    bool pending = false;
+    PCIDevice *pdev = &vdev->pdev;
+
+    vdev->nr_vectors = nr_vectors;
+    vdev->msi_vectors = g_new0(VFIOMSIVector, nr_vectors);
+    vdev->interrupt = msix ? VFIO_INT_MSIX : VFIO_INT_MSI;
+
+    vfio_prepare_kvm_msi_virq_batch(vdev);
+
+    for (i = 0; i < nr_vectors; i++) {
+    VFIOMSIVector *vector = &vdev->msi_vectors[i];
+
+    fd = vfio_cpr_load_vector_fd(vdev, "interrupt", i);
+    if (fd >= 0) {
+    vfio_vector_init(vdev, i);
+    qemu_set_fd_handler(fd, vfio_msi_interrupt, NULL, vector);
+    }
+
+    if (vfio_cpr_load_vector_fd(vdev, "kvm_interrupt", i) >= 0) {
+    vfio_add_kvm_msi_virq(vdev, vector, i, msix);
+    } else {
+    vdev->msi_vectors[i].virq = -1;
+    }
+
+    if (msix && msix_is_pending(pdev, i) && msix_is_masked(pdev, i)) {
+    set_bit(i, vdev->msix->pending);
+    pending = true;
+    }
+    }
+
+    vfio_commit_kvm_msi_virq_batch(vdev);
+
+    if (msix) {
+    memory_region_set_enabled(&pdev->msix_pba_mmio, pending);
+    }
+}
+
  /*
   * The kernel may change non-emulated config bits.  Exclude them from the
   * changed-bits check in get_pci_config_device.
@@ -58,13 +123,39 @@ static int vfio_cpr_pci_pre_load(void *opaque)
  return 0;
  }
+static int vfio_cpr_pci_post_load(void *opaque, int version_id)
+{
+    VFIOPCIDevice *vdev = opaque;
+    PCIDevice *pdev = &vdev->pdev;
+    int nr_vectors;
+
+    if (msix_enabled(pdev)) {
+    msix_set_vector_notifiers(pdev, vfio_msix_vector_use,
+   vfio_msix_vector_release, NULL);
+    nr_vectors = vdev->msix->entries;
+    vfio_cpr_claim_vectors(vdev, nr_vectors, true);
+
+    } else if (msi_enabled(pdev)) {
+    nr_vectors = msi_nr_vectors_allocated(pdev);
+    vfio_cpr_claim_vectors(vdev, nr_vectors, false);
+
+    } else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) {
+    g_assert_not_reached();  /* completed in a subsequent patch */
+    }
+
+    return 0;
+}
+
  const VMStateDescription vfio_cpr_pci_vmstate = {
  .name = "vfio-cpr-pci",
  .version_id = 0,
  .minimum_version_id = 0,
  .pre_load = vfio_cpr_pci_pre_load,
+    .post_load = vfio_cpr_pci_post_l

Re: [PATCH V3 22/42] vfio-pci: preserve MSI

2025-05-28 Thread Steven Sistare

Hi Cedric,
  Do you have any comments on this before I send V4?
Ditto for patch "vfio-pci: preserve INTx".
In both, I made the changes you requested in V2.
And I will change all "reused" tests to cpr_is_incoming as we
discussed elsewhere.

You mentioned these possibly conflict with vfio-user, but it would
help to get your stylistic and correctness comments on these from a
cpr-only point of view before I send the next version.

And as I mentioned, I propose to block CPR when vfio-user is used,
at least initially, so you can ignore vfio-user in the cpr load paths below.

- Steve

On 5/12/2025 11:32 AM, Steve Sistare wrote:

Save the MSI message area as part of vfio-pci vmstate, and preserve the
interrupt and notifier eventfd's.  migrate_incoming loads the MSI data,
then the vfio-pci post_load handler finds the eventfds in CPR state,
rebuilds vector data structures, and attaches the interrupts to the new
KVM instance.

Signed-off-by: Steve Sistare 
---
  hw/vfio/cpr.c  | 91 ++
  hw/vfio/pci.c  | 40 ++--
  include/hw/vfio/vfio-cpr.h |  8 
  3 files changed, 136 insertions(+), 3 deletions(-)

diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c
index 6ea8e9f..be132fa 100644
--- a/hw/vfio/cpr.c
+++ b/hw/vfio/cpr.c
@@ -9,6 +9,8 @@
  #include "hw/vfio/vfio-device.h"
  #include "hw/vfio/vfio-cpr.h"
  #include "hw/vfio/pci.h"
+#include "hw/pci/msix.h"
+#include "hw/pci/msi.h"
  #include "migration/cpr.h"
  #include "qapi/error.h"
  #include "system/runstate.h"
@@ -40,6 +42,69 @@ void vfio_cpr_unregister_container(VFIOContainerBase 
*bcontainer)
  migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
  }
  
+#define STRDUP_VECTOR_FD_NAME(vdev, name)   \

+g_strdup_printf("%s_%s", (vdev)->vbasedev.name, (name))
+
+void vfio_cpr_save_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr,
+ int fd)
+{
+g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+cpr_save_fd(fdname, nr, fd);
+}
+
+int vfio_cpr_load_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr)
+{
+g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+return cpr_find_fd(fdname, nr);
+}
+
+void vfio_cpr_delete_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr)
+{
+g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+cpr_delete_fd(fdname, nr);
+}
+
+static void vfio_cpr_claim_vectors(VFIOPCIDevice *vdev, int nr_vectors,
+   bool msix)
+{
+int i, fd;
+bool pending = false;
+PCIDevice *pdev = &vdev->pdev;
+
+vdev->nr_vectors = nr_vectors;
+vdev->msi_vectors = g_new0(VFIOMSIVector, nr_vectors);
+vdev->interrupt = msix ? VFIO_INT_MSIX : VFIO_INT_MSI;
+
+vfio_prepare_kvm_msi_virq_batch(vdev);
+
+for (i = 0; i < nr_vectors; i++) {
+VFIOMSIVector *vector = &vdev->msi_vectors[i];
+
+fd = vfio_cpr_load_vector_fd(vdev, "interrupt", i);
+if (fd >= 0) {
+vfio_vector_init(vdev, i);
+qemu_set_fd_handler(fd, vfio_msi_interrupt, NULL, vector);
+}
+
+if (vfio_cpr_load_vector_fd(vdev, "kvm_interrupt", i) >= 0) {
+vfio_add_kvm_msi_virq(vdev, vector, i, msix);
+} else {
+vdev->msi_vectors[i].virq = -1;
+}
+
+if (msix && msix_is_pending(pdev, i) && msix_is_masked(pdev, i)) {
+set_bit(i, vdev->msix->pending);
+pending = true;
+}
+}
+
+vfio_commit_kvm_msi_virq_batch(vdev);
+
+if (msix) {
+memory_region_set_enabled(&pdev->msix_pba_mmio, pending);
+}
+}
+
  /*
   * The kernel may change non-emulated config bits.  Exclude them from the
   * changed-bits check in get_pci_config_device.
@@ -58,13 +123,39 @@ static int vfio_cpr_pci_pre_load(void *opaque)
  return 0;
  }
  
+static int vfio_cpr_pci_post_load(void *opaque, int version_id)

+{
+VFIOPCIDevice *vdev = opaque;
+PCIDevice *pdev = &vdev->pdev;
+int nr_vectors;
+
+if (msix_enabled(pdev)) {
+msix_set_vector_notifiers(pdev, vfio_msix_vector_use,
+   vfio_msix_vector_release, NULL);
+nr_vectors = vdev->msix->entries;
+vfio_cpr_claim_vectors(vdev, nr_vectors, true);
+
+} else if (msi_enabled(pdev)) {
+nr_vectors = msi_nr_vectors_allocated(pdev);
+vfio_cpr_claim_vectors(vdev, nr_vectors, false);
+
+} else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) {
+g_assert_not_reached();  /* completed in a subsequent patch */
+}
+
+return 0;
+}
+
  const VMStateDescription vfio_cpr_pci_vmstate = {
  .name = "vfio-cpr-pci",
  .version_id = 0,
  .minimum_version_id = 0,
  .pre_load = vfio_cpr_pci_pre_load,
+.post_load = vfio_cpr_pci_post_load,
  .needed = cpr_needed_for_reuse,
  .fields = (VMStateField[]) {
+VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice),
+VMSTATE_MSIX_TEST(pd