RE: [PATCH V5 04/38] vfio/container: preserve descriptors

2025-07-02 Thread Duan, Zhenzhong


>-Original Message-
>From: Steven Sistare 
>Subject: Re: [PATCH V5 04/38] vfio/container: preserve descriptors
>
>On 6/23/2025 5:07 AM, Duan, Zhenzhong wrote:
>>> -Original Message-
>>> From: Steve Sistare 
>>> Subject: [PATCH V5 04/38] vfio/container: preserve descriptors
>>>
>>> At vfio creation time, save the value of vfio container, group, and device
>>> descriptors in CPR state.  On qemu restart, vfio_realize() finds and uses
>>> the saved descriptors.
>>>
>>> During reuse, device and iommu state is already configured, so operations
>>> in vfio_realize that would modify the configuration, such as vfio ioctl's,
>>> are skipped.  The result is that vfio_realize constructs qemu data
>>> structures that reflect the current state of the device.
>>>
>>> Signed-off-by: Steve Sistare 
>>> Reviewed-by: Cédric Le Goater 
>>> Reviewed-by: Zhenzhong Duan 
>>> ---
>>> include/hw/vfio/vfio-cpr.h |  6 +
>>> hw/vfio/container.c| 67
>+++--
>>> -
>>> hw/vfio/cpr-legacy.c   | 42 +
>>> 3 files changed, 100 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
>>> index d4e0bd5..5a2e5f6 100644
>>> --- a/include/hw/vfio/vfio-cpr.h
>>> +++ b/include/hw/vfio/vfio-cpr.h
>>> @@ -13,6 +13,7 @@
>>>
>>> struct VFIOContainer;
>>> struct VFIOContainerBase;
>>> +struct VFIOGroup;
>>>
>>> typedef struct VFIOContainerCPR {
>>>  Error *blocker;
>>> @@ -30,4 +31,9 @@ bool vfio_cpr_register_container(struct
>VFIOContainerBase
>>> *bcontainer,
>>>   Error **errp);
>>> void vfio_cpr_unregister_container(struct VFIOContainerBase
>*bcontainer);
>>>
>>> +int vfio_cpr_group_get_device_fd(int d, const char *name);
>>> +
>>> +bool vfio_cpr_container_match(struct VFIOContainer *container,
>>> +  struct VFIOGroup *group, int fd);
>>> +
>>> #endif /* HW_VFIO_VFIO_CPR_H */
>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>> index 93cdf80..5caae4c 100644
>>> --- a/hw/vfio/container.c
>>> +++ b/hw/vfio/container.c
>>> @@ -31,6 +31,8 @@
>>> #include "system/reset.h"
>>> #include "trace.h"
>>> #include "qapi/error.h"
>>> +#include "migration/cpr.h"
>>> +#include "migration/blocker.h"
>>> #include "pci.h"
>>> #include "hw/vfio/vfio-container.h"
>>> #include "vfio-helpers.h"
>>> @@ -425,7 +427,12 @@ static VFIOContainer *vfio_create_container(int
>fd,
>>> VFIOGroup *group,
>>>  return NULL;
>>>  }
>>>
>>> -if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
>>> +/*
>>> + * During CPR, just set the container type and skip the ioctls, as the
>>> + * container and group are already configured in the kernel.
>>> + */
>>> +if (!cpr_is_incoming() &&
>>> +!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
>>>  return NULL;
>>>  }
>>>
>>> @@ -592,6 +599,11 @@ static bool
>vfio_container_group_add(VFIOContainer
>>> *container, VFIOGroup *group,
>>>  group->container = container;
>>>  QLIST_INSERT_HEAD(&container->group_list, group,
>container_next);
>>>  vfio_group_add_kvm_device(group);
>>> +/*
>>> + * Remember the container fd for each group, so we can attach to
>the same
>>> + * container after CPR.
>>> + */
>>> +cpr_resave_fd("vfio_container_for_group", group->groupid,
>container->fd);
>>
>> I know this is already merged. Just out of curious, It looks cpr_save_fd is
>enough?
>
>vfio_container_group_add is called from multiple places.  In some, we know
>that the fd
>is being saved for the first time, in others we do not know.  resave avoids
>creating
>a duplicate entry.

IIUC, vfio_container_group_add is called only once for each group. But it's 
harmless to call cpr_resave_fd() here. I'm fine to leave it as is.

Thanks
Zhenzhong


Re: [PATCH V5 04/38] vfio/container: preserve descriptors

2025-07-01 Thread Steven Sistare

On 6/23/2025 5:07 AM, Duan, Zhenzhong wrote:

-Original Message-
From: Steve Sistare 
Subject: [PATCH V5 04/38] vfio/container: preserve descriptors

At vfio creation time, save the value of vfio container, group, and device
descriptors in CPR state.  On qemu restart, vfio_realize() finds and uses
the saved descriptors.

During reuse, device and iommu state is already configured, so operations
in vfio_realize that would modify the configuration, such as vfio ioctl's,
are skipped.  The result is that vfio_realize constructs qemu data
structures that reflect the current state of the device.

Signed-off-by: Steve Sistare 
Reviewed-by: Cédric Le Goater 
Reviewed-by: Zhenzhong Duan 
---
include/hw/vfio/vfio-cpr.h |  6 +
hw/vfio/container.c| 67 +++--
-
hw/vfio/cpr-legacy.c   | 42 +
3 files changed, 100 insertions(+), 15 deletions(-)

diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
index d4e0bd5..5a2e5f6 100644
--- a/include/hw/vfio/vfio-cpr.h
+++ b/include/hw/vfio/vfio-cpr.h
@@ -13,6 +13,7 @@

struct VFIOContainer;
struct VFIOContainerBase;
+struct VFIOGroup;

typedef struct VFIOContainerCPR {
 Error *blocker;
@@ -30,4 +31,9 @@ bool vfio_cpr_register_container(struct VFIOContainerBase
*bcontainer,
  Error **errp);
void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer);

+int vfio_cpr_group_get_device_fd(int d, const char *name);
+
+bool vfio_cpr_container_match(struct VFIOContainer *container,
+  struct VFIOGroup *group, int fd);
+
#endif /* HW_VFIO_VFIO_CPR_H */
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 93cdf80..5caae4c 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -31,6 +31,8 @@
#include "system/reset.h"
#include "trace.h"
#include "qapi/error.h"
+#include "migration/cpr.h"
+#include "migration/blocker.h"
#include "pci.h"
#include "hw/vfio/vfio-container.h"
#include "vfio-helpers.h"
@@ -425,7 +427,12 @@ static VFIOContainer *vfio_create_container(int fd,
VFIOGroup *group,
 return NULL;
 }

-if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
+/*
+ * During CPR, just set the container type and skip the ioctls, as the
+ * container and group are already configured in the kernel.
+ */
+if (!cpr_is_incoming() &&
+!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
 return NULL;
 }

@@ -592,6 +599,11 @@ static bool vfio_container_group_add(VFIOContainer
*container, VFIOGroup *group,
 group->container = container;
 QLIST_INSERT_HEAD(&container->group_list, group, container_next);
 vfio_group_add_kvm_device(group);
+/*
+ * Remember the container fd for each group, so we can attach to the same
+ * container after CPR.
+ */
+cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd);


I know this is already merged. Just out of curious, It looks cpr_save_fd is 
enough?


vfio_container_group_add is called from multiple places.  In some, we know that 
the fd
is being saved for the first time, in others we do not know.  resave avoids 
creating
a duplicate entry.

- Steve




RE: [PATCH V5 04/38] vfio/container: preserve descriptors

2025-06-23 Thread Duan, Zhenzhong


>-Original Message-
>From: Steve Sistare 
>Subject: [PATCH V5 04/38] vfio/container: preserve descriptors
>
>At vfio creation time, save the value of vfio container, group, and device
>descriptors in CPR state.  On qemu restart, vfio_realize() finds and uses
>the saved descriptors.
>
>During reuse, device and iommu state is already configured, so operations
>in vfio_realize that would modify the configuration, such as vfio ioctl's,
>are skipped.  The result is that vfio_realize constructs qemu data
>structures that reflect the current state of the device.
>
>Signed-off-by: Steve Sistare 
>Reviewed-by: Cédric Le Goater 
>Reviewed-by: Zhenzhong Duan 
>---
> include/hw/vfio/vfio-cpr.h |  6 +
> hw/vfio/container.c| 67 +++--
>-
> hw/vfio/cpr-legacy.c   | 42 +
> 3 files changed, 100 insertions(+), 15 deletions(-)
>
>diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
>index d4e0bd5..5a2e5f6 100644
>--- a/include/hw/vfio/vfio-cpr.h
>+++ b/include/hw/vfio/vfio-cpr.h
>@@ -13,6 +13,7 @@
>
> struct VFIOContainer;
> struct VFIOContainerBase;
>+struct VFIOGroup;
>
> typedef struct VFIOContainerCPR {
> Error *blocker;
>@@ -30,4 +31,9 @@ bool vfio_cpr_register_container(struct VFIOContainerBase
>*bcontainer,
>  Error **errp);
> void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer);
>
>+int vfio_cpr_group_get_device_fd(int d, const char *name);
>+
>+bool vfio_cpr_container_match(struct VFIOContainer *container,
>+  struct VFIOGroup *group, int fd);
>+
> #endif /* HW_VFIO_VFIO_CPR_H */
>diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>index 93cdf80..5caae4c 100644
>--- a/hw/vfio/container.c
>+++ b/hw/vfio/container.c
>@@ -31,6 +31,8 @@
> #include "system/reset.h"
> #include "trace.h"
> #include "qapi/error.h"
>+#include "migration/cpr.h"
>+#include "migration/blocker.h"
> #include "pci.h"
> #include "hw/vfio/vfio-container.h"
> #include "vfio-helpers.h"
>@@ -425,7 +427,12 @@ static VFIOContainer *vfio_create_container(int fd,
>VFIOGroup *group,
> return NULL;
> }
>
>-if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
>+/*
>+ * During CPR, just set the container type and skip the ioctls, as the
>+ * container and group are already configured in the kernel.
>+ */
>+if (!cpr_is_incoming() &&
>+!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
> return NULL;
> }
>
>@@ -592,6 +599,11 @@ static bool vfio_container_group_add(VFIOContainer
>*container, VFIOGroup *group,
> group->container = container;
> QLIST_INSERT_HEAD(&container->group_list, group, container_next);
> vfio_group_add_kvm_device(group);
>+/*
>+ * Remember the container fd for each group, so we can attach to the same
>+ * container after CPR.
>+ */
>+cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd);

I know this is already merged. Just out of curious, It looks cpr_save_fd is 
enough?

Zhenzhong


[PATCH V5 04/38] vfio/container: preserve descriptors

2025-06-10 Thread Steve Sistare
At vfio creation time, save the value of vfio container, group, and device
descriptors in CPR state.  On qemu restart, vfio_realize() finds and uses
the saved descriptors.

During reuse, device and iommu state is already configured, so operations
in vfio_realize that would modify the configuration, such as vfio ioctl's,
are skipped.  The result is that vfio_realize constructs qemu data
structures that reflect the current state of the device.

Signed-off-by: Steve Sistare 
Reviewed-by: Cédric Le Goater 
Reviewed-by: Zhenzhong Duan 
---
 include/hw/vfio/vfio-cpr.h |  6 +
 hw/vfio/container.c| 67 +++---
 hw/vfio/cpr-legacy.c   | 42 +
 3 files changed, 100 insertions(+), 15 deletions(-)

diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
index d4e0bd5..5a2e5f6 100644
--- a/include/hw/vfio/vfio-cpr.h
+++ b/include/hw/vfio/vfio-cpr.h
@@ -13,6 +13,7 @@
 
 struct VFIOContainer;
 struct VFIOContainerBase;
+struct VFIOGroup;
 
 typedef struct VFIOContainerCPR {
 Error *blocker;
@@ -30,4 +31,9 @@ bool vfio_cpr_register_container(struct VFIOContainerBase 
*bcontainer,
  Error **errp);
 void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer);
 
+int vfio_cpr_group_get_device_fd(int d, const char *name);
+
+bool vfio_cpr_container_match(struct VFIOContainer *container,
+  struct VFIOGroup *group, int fd);
+
 #endif /* HW_VFIO_VFIO_CPR_H */
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 93cdf80..5caae4c 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -31,6 +31,8 @@
 #include "system/reset.h"
 #include "trace.h"
 #include "qapi/error.h"
+#include "migration/cpr.h"
+#include "migration/blocker.h"
 #include "pci.h"
 #include "hw/vfio/vfio-container.h"
 #include "vfio-helpers.h"
@@ -425,7 +427,12 @@ static VFIOContainer *vfio_create_container(int fd, 
VFIOGroup *group,
 return NULL;
 }
 
-if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
+/*
+ * During CPR, just set the container type and skip the ioctls, as the
+ * container and group are already configured in the kernel.
+ */
+if (!cpr_is_incoming() &&
+!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
 return NULL;
 }
 
@@ -592,6 +599,11 @@ static bool vfio_container_group_add(VFIOContainer 
*container, VFIOGroup *group,
 group->container = container;
 QLIST_INSERT_HEAD(&container->group_list, group, container_next);
 vfio_group_add_kvm_device(group);
+/*
+ * Remember the container fd for each group, so we can attach to the same
+ * container after CPR.
+ */
+cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd);
 return true;
 }
 
@@ -601,6 +613,7 @@ static void vfio_container_group_del(VFIOContainer 
*container, VFIOGroup *group)
 group->container = NULL;
 vfio_group_del_kvm_device(group);
 vfio_ram_block_discard_disable(container, false);
+cpr_delete_fd("vfio_container_for_group", group->groupid);
 }
 
 static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
@@ -615,17 +628,34 @@ static bool vfio_container_connect(VFIOGroup *group, 
AddressSpace *as,
 bool group_was_added = false;
 
 space = vfio_address_space_get(as);
+fd = cpr_find_fd("vfio_container_for_group", group->groupid);
 
-QLIST_FOREACH(bcontainer, &space->containers, next) {
-container = container_of(bcontainer, VFIOContainer, bcontainer);
-if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
-return vfio_container_group_add(container, group, errp);
+if (!cpr_is_incoming()) {
+QLIST_FOREACH(bcontainer, &space->containers, next) {
+container = container_of(bcontainer, VFIOContainer, bcontainer);
+if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
+return vfio_container_group_add(container, group, errp);
+}
 }
-}
 
-fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
-if (fd < 0) {
-goto fail;
+fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
+if (fd < 0) {
+goto fail;
+}
+} else {
+/*
+ * For incoming CPR, the group is already attached in the kernel.
+ * If a container with matching fd is found, then update the
+ * userland group list and return.  If not, then after the loop,
+ * create the container struct and group list.
+ */
+QLIST_FOREACH(bcontainer, &space->containers, next) {
+container = container_of(bcontainer, VFIOContainer, bcontainer);
+
+if (vfio_cpr_container_match(container, group, fd)) {
+return vfio_container_group_add(container, group, errp);
+}
+}
 }
 
 ret = ioctl(fd, VFIO_GET_API_VERSION);
@@ -697,6 +727,7 @@ stat