Re: [RFC Patch 09/12] IXGBEVF: Add live migration support for VF driver

2015-10-22 Thread Michael S. Tsirkin
On Thu, Oct 22, 2015 at 12:37:41AM +0800, Lan Tianyu wrote:
> To let VF driver in the guest to know migration status, Qemu will
> fake PCI configure reg 0xF0 and 0xF1 to show migrate status and
> get ack from VF driver.

I guess this works for current devices but not using
0xF0/0xF1 registers is not architectural, is it?

So it could conflict with future devices.

Maybe it's better to just have a dedicated para-virtualized
device (PCI,ACPI,etc) for this migration-related activity.
This driver would then register with this it.


> When migration starts, Qemu will set reg "0xF0" to 1, notify
> VF driver via triggering mail box msg and wait for VF driver to tell
> it's ready for migration(set reg "0xF1" to 1).

This waiting for driver is problematic: high load is one of the reasons
people migrate VMs out.  It would be much better if we could support
migration while VM is completely stopped.


> After migration, Qemu
> will set reg "0xF0" to 0 and notify VF driver by mail box irq. VF
> driver begins to restore tx/rx function after detecting sttatus change.
> 
> When VF receives mail box irq, it will check reg "0xF0" in the service
> task function to get migration status and performs related operations
> according its value.
> 
> Steps of restarting receive and transmit function
> 1) Restore VF status in the PF driver via sending mail event to PF driver
> 2) Write back reg values recorded by self emulation layer
> 3) Restart rx/tx ring
> 4) Recovery interrupt
> 
> Transmit/Receive descriptor head regs are read-only and can't
> be restored via writing back recording reg value directly and they
> are set to 0 during VF reset. To reuse original tx/rx rings, shift
> desc ring in order to move the desc pointed by original head reg to
> first entry of the ring and then enable tx/rx rings. VF restarts to
> receive and transmit from original head desc.
> 
> Signed-off-by: Lan Tianyu 
> ---
>  drivers/net/ethernet/intel/ixgbevf/defines.h   |   6 ++
>  drivers/net/ethernet/intel/ixgbevf/ixgbevf.h   |   7 +-
>  drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c  | 115 
> -
>  .../net/ethernet/intel/ixgbevf/self-emulation.c| 107 +++
>  4 files changed, 232 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h 
> b/drivers/net/ethernet/intel/ixgbevf/defines.h
> index 770e21a..113efd2 100644
> --- a/drivers/net/ethernet/intel/ixgbevf/defines.h
> +++ b/drivers/net/ethernet/intel/ixgbevf/defines.h
> @@ -239,6 +239,12 @@ struct ixgbe_adv_tx_context_desc {
>   __le32 mss_l4len_idx;
>  };
>  
> +union ixgbevf_desc {
> + union ixgbe_adv_tx_desc rx_desc;
> + union ixgbe_adv_rx_desc tx_desc;
> + struct ixgbe_adv_tx_context_desc tx_context_desc;
> +};
> +
>  /* Adv Transmit Descriptor Config Masks */
>  #define IXGBE_ADVTXD_DTYP_MASK   0x00F0 /* DTYP mask */
>  #define IXGBE_ADVTXD_DTYP_CTXT   0x0020 /* Advanced Context Desc */
> diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h 
> b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
> index c823616..6eab402e 100644
> --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
> +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
> @@ -109,7 +109,7 @@ struct ixgbevf_ring {
>   struct ixgbevf_ring *next;
>   struct net_device *netdev;
>   struct device *dev;
> - void *desc; /* descriptor ring memory */
> + union ixgbevf_desc *desc;   /* descriptor ring memory */
>   dma_addr_t dma; /* phys. address of descriptor ring */
>   unsigned int size;  /* length in bytes */
>   u16 count;  /* amount of descriptors */
> @@ -493,6 +493,11 @@ extern void ixgbevf_write_eitr(struct ixgbevf_q_vector 
> *q_vector);
>  
>  void ixgbe_napi_add_all(struct ixgbevf_adapter *adapter);
>  void ixgbe_napi_del_all(struct ixgbevf_adapter *adapter);
> +int ixgbevf_tx_ring_shift(struct ixgbevf_ring *r, u32 head);
> +int ixgbevf_rx_ring_shift(struct ixgbevf_ring *r, u32 head);
> +void ixgbevf_restore_state(struct ixgbevf_adapter *adapter);
> +inline void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter);
> +
>  
>  #ifdef DEBUG
>  char *ixgbevf_get_hw_dev_name(struct ixgbe_hw *hw);
> diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c 
> b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
> index 056841c..15ec361 100644
> --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
> +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
> @@ -91,6 +91,10 @@ MODULE_DESCRIPTION("Intel(R) 10 Gigabit Virtual Function 
> Network Driver");
>  MODULE_LICENSE("GPL");
>  MODULE_VERSION(DRV_VERSION);
>  
> +
> +#define MIGRATION_COMPLETED   0x00
> +#define MIGRATION_IN_PROGRESS 0x01
> +
>  #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
>  static int debug = -1;
>  module_param(debug, int, 0);
> @@ -221,6 +225,78 @@ static u64 

[RFC Patch 09/12] IXGBEVF: Add live migration support for VF driver

2015-10-21 Thread Lan Tianyu
To let VF driver in the guest to know migration status, Qemu will
fake PCI configure reg 0xF0 and 0xF1 to show migrate status and
get ack from VF driver.

When migration starts, Qemu will set reg "0xF0" to 1, notify
VF driver via triggering mail box msg and wait for VF driver to tell
it's ready for migration(set reg "0xF1" to 1). After migration, Qemu
will set reg "0xF0" to 0 and notify VF driver by mail box irq. VF
driver begins to restore tx/rx function after detecting sttatus change.

When VF receives mail box irq, it will check reg "0xF0" in the service
task function to get migration status and performs related operations
according its value.

Steps of restarting receive and transmit function
1) Restore VF status in the PF driver via sending mail event to PF driver
2) Write back reg values recorded by self emulation layer
3) Restart rx/tx ring
4) Recovery interrupt

Transmit/Receive descriptor head regs are read-only and can't
be restored via writing back recording reg value directly and they
are set to 0 during VF reset. To reuse original tx/rx rings, shift
desc ring in order to move the desc pointed by original head reg to
first entry of the ring and then enable tx/rx rings. VF restarts to
receive and transmit from original head desc.

Signed-off-by: Lan Tianyu 
---
 drivers/net/ethernet/intel/ixgbevf/defines.h   |   6 ++
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h   |   7 +-
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c  | 115 -
 .../net/ethernet/intel/ixgbevf/self-emulation.c| 107 +++
 4 files changed, 232 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h 
b/drivers/net/ethernet/intel/ixgbevf/defines.h
index 770e21a..113efd2 100644
--- a/drivers/net/ethernet/intel/ixgbevf/defines.h
+++ b/drivers/net/ethernet/intel/ixgbevf/defines.h
@@ -239,6 +239,12 @@ struct ixgbe_adv_tx_context_desc {
__le32 mss_l4len_idx;
 };
 
+union ixgbevf_desc {
+   union ixgbe_adv_tx_desc rx_desc;
+   union ixgbe_adv_rx_desc tx_desc;
+   struct ixgbe_adv_tx_context_desc tx_context_desc;
+};
+
 /* Adv Transmit Descriptor Config Masks */
 #define IXGBE_ADVTXD_DTYP_MASK 0x00F0 /* DTYP mask */
 #define IXGBE_ADVTXD_DTYP_CTXT 0x0020 /* Advanced Context Desc */
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h 
b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index c823616..6eab402e 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -109,7 +109,7 @@ struct ixgbevf_ring {
struct ixgbevf_ring *next;
struct net_device *netdev;
struct device *dev;
-   void *desc; /* descriptor ring memory */
+   union ixgbevf_desc *desc;   /* descriptor ring memory */
dma_addr_t dma; /* phys. address of descriptor ring */
unsigned int size;  /* length in bytes */
u16 count;  /* amount of descriptors */
@@ -493,6 +493,11 @@ extern void ixgbevf_write_eitr(struct ixgbevf_q_vector 
*q_vector);
 
 void ixgbe_napi_add_all(struct ixgbevf_adapter *adapter);
 void ixgbe_napi_del_all(struct ixgbevf_adapter *adapter);
+int ixgbevf_tx_ring_shift(struct ixgbevf_ring *r, u32 head);
+int ixgbevf_rx_ring_shift(struct ixgbevf_ring *r, u32 head);
+void ixgbevf_restore_state(struct ixgbevf_adapter *adapter);
+inline void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter);
+
 
 #ifdef DEBUG
 char *ixgbevf_get_hw_dev_name(struct ixgbe_hw *hw);
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c 
b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 056841c..15ec361 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -91,6 +91,10 @@ MODULE_DESCRIPTION("Intel(R) 10 Gigabit Virtual Function 
Network Driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 
+
+#define MIGRATION_COMPLETED   0x00
+#define MIGRATION_IN_PROGRESS 0x01
+
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
 static int debug = -1;
 module_param(debug, int, 0);
@@ -221,6 +225,78 @@ static u64 ixgbevf_get_tx_completed(struct ixgbevf_ring 
*ring)
return ring->stats.packets;
 }
 
+int ixgbevf_tx_ring_shift(struct ixgbevf_ring *r, u32 head)
+{
+   struct ixgbevf_tx_buffer *tx_buffer = NULL;
+   static union ixgbevf_desc *tx_desc = NULL;
+
+   tx_buffer = vmalloc(sizeof(struct ixgbevf_tx_buffer) * (r->count));
+   if (!tx_buffer)
+   return -ENOMEM;
+
+   tx_desc = vmalloc(sizeof(union ixgbevf_desc) * r->count);
+   if (!tx_desc)
+   return -ENOMEM;
+
+   memcpy(tx_desc, r->desc, sizeof(union ixgbevf_desc) * r->count);
+   memcpy(r->desc, _desc[head], sizeof(union ixgbevf_desc) * (r->count 
- head));
+   memcpy(>desc[r->count - head], tx_desc, sizeof(union ixgbevf_desc) * 
head);
+
+   

Re: [RFC Patch 09/12] IXGBEVF: Add live migration support for VF driver

2015-10-21 Thread Alexander Duyck

On 10/21/2015 09:37 AM, Lan Tianyu wrote:

To let VF driver in the guest to know migration status, Qemu will
fake PCI configure reg 0xF0 and 0xF1 to show migrate status and
get ack from VF driver.

When migration starts, Qemu will set reg "0xF0" to 1, notify
VF driver via triggering mail box msg and wait for VF driver to tell
it's ready for migration(set reg "0xF1" to 1). After migration, Qemu
will set reg "0xF0" to 0 and notify VF driver by mail box irq. VF
driver begins to restore tx/rx function after detecting sttatus change.

When VF receives mail box irq, it will check reg "0xF0" in the service
task function to get migration status and performs related operations
according its value.

Steps of restarting receive and transmit function
1) Restore VF status in the PF driver via sending mail event to PF driver
2) Write back reg values recorded by self emulation layer
3) Restart rx/tx ring
4) Recovery interrupt

Transmit/Receive descriptor head regs are read-only and can't
be restored via writing back recording reg value directly and they
are set to 0 during VF reset. To reuse original tx/rx rings, shift
desc ring in order to move the desc pointed by original head reg to
first entry of the ring and then enable tx/rx rings. VF restarts to
receive and transmit from original head desc.

Signed-off-by: Lan Tianyu 
---
  drivers/net/ethernet/intel/ixgbevf/defines.h   |   6 ++
  drivers/net/ethernet/intel/ixgbevf/ixgbevf.h   |   7 +-
  drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c  | 115 -
  .../net/ethernet/intel/ixgbevf/self-emulation.c| 107 +++
  4 files changed, 232 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h 
b/drivers/net/ethernet/intel/ixgbevf/defines.h
index 770e21a..113efd2 100644
--- a/drivers/net/ethernet/intel/ixgbevf/defines.h
+++ b/drivers/net/ethernet/intel/ixgbevf/defines.h
@@ -239,6 +239,12 @@ struct ixgbe_adv_tx_context_desc {
__le32 mss_l4len_idx;
  };

+union ixgbevf_desc {
+   union ixgbe_adv_tx_desc rx_desc;
+   union ixgbe_adv_rx_desc tx_desc;
+   struct ixgbe_adv_tx_context_desc tx_context_desc;
+};
+
  /* Adv Transmit Descriptor Config Masks */
  #define IXGBE_ADVTXD_DTYP_MASK0x00F0 /* DTYP mask */
  #define IXGBE_ADVTXD_DTYP_CTXT0x0020 /* Advanced Context Desc */
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h 
b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index c823616..6eab402e 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -109,7 +109,7 @@ struct ixgbevf_ring {
struct ixgbevf_ring *next;
struct net_device *netdev;
struct device *dev;
-   void *desc; /* descriptor ring memory */
+   union ixgbevf_desc *desc;   /* descriptor ring memory */
dma_addr_t dma; /* phys. address of descriptor ring */
unsigned int size;  /* length in bytes */
u16 count;  /* amount of descriptors */
@@ -493,6 +493,11 @@ extern void ixgbevf_write_eitr(struct ixgbevf_q_vector 
*q_vector);

  void ixgbe_napi_add_all(struct ixgbevf_adapter *adapter);
  void ixgbe_napi_del_all(struct ixgbevf_adapter *adapter);
+int ixgbevf_tx_ring_shift(struct ixgbevf_ring *r, u32 head);
+int ixgbevf_rx_ring_shift(struct ixgbevf_ring *r, u32 head);
+void ixgbevf_restore_state(struct ixgbevf_adapter *adapter);
+inline void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter);
+

  #ifdef DEBUG
  char *ixgbevf_get_hw_dev_name(struct ixgbe_hw *hw);
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c 
b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 056841c..15ec361 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -91,6 +91,10 @@ MODULE_DESCRIPTION("Intel(R) 10 Gigabit Virtual Function Network 
Driver");
  MODULE_LICENSE("GPL");
  MODULE_VERSION(DRV_VERSION);

+
+#define MIGRATION_COMPLETED   0x00
+#define MIGRATION_IN_PROGRESS 0x01
+
  #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
  static int debug = -1;
  module_param(debug, int, 0);
@@ -221,6 +225,78 @@ static u64 ixgbevf_get_tx_completed(struct ixgbevf_ring 
*ring)
return ring->stats.packets;
  }

+int ixgbevf_tx_ring_shift(struct ixgbevf_ring *r, u32 head)
+{
+   struct ixgbevf_tx_buffer *tx_buffer = NULL;
+   static union ixgbevf_desc *tx_desc = NULL;
+
+   tx_buffer = vmalloc(sizeof(struct ixgbevf_tx_buffer) * (r->count));
+   if (!tx_buffer)
+   return -ENOMEM;
+
+   tx_desc = vmalloc(sizeof(union ixgbevf_desc) * r->count);
+   if (!tx_desc)
+   return -ENOMEM;
+
+   memcpy(tx_desc, r->desc, sizeof(union ixgbevf_desc) * r->count);
+   memcpy(r->desc, _desc[head], sizeof(union ixgbevf_desc) * (r->count 
- head));
+