Implement the device reset function.
1, Add the fake RX/TX functions.
2, The reset function tries to stop RX/TX by replacing
   the RX/TX functions with the fake ones and getting the
   locks to make sure the regular RX/TX finished.
3, After the RX/TX stopped, reset the VF port, and then
   release the locks and restore the RX/TX functions.

BTW: The definition of some structures are moved from .c
file to .h file.

Signed-off-by: Wenzhuo Lu <wenzhuo.lu at intel.com>
---
 doc/guides/rel_notes/release_16_07.rst |   2 +-
 drivers/net/e1000/e1000_ethdev.h       | 116 ++++++++++++++++++++++++++++++
 drivers/net/e1000/igb_ethdev.c         | 104 +++++++++++++++++++++++++++
 drivers/net/e1000/igb_rxtx.c           | 128 ++++++---------------------------
 4 files changed, 243 insertions(+), 107 deletions(-)

diff --git a/doc/guides/rel_notes/release_16_07.rst 
b/doc/guides/rel_notes/release_16_07.rst
index d36c4b1..a4c0cc3 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -53,7 +53,7 @@ New Features
   VF. To handle this link up/down event, add the mailbox interruption
   support to receive the message.

-* **Added device reset support for ixgbe VF.**
+* **Added device reset support for ixgbe/igb VF.**

   Added the device reset API. APP can call this API to reset the VF port
   when it's not working.
diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 6a42994..4ae03ce 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -34,6 +34,7 @@
 #ifndef _E1000_ETHDEV_H_
 #define _E1000_ETHDEV_H_
 #include <rte_time.h>
+#include <rte_spinlock.h>

 /* need update link, bit flag */
 #define E1000_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0)
@@ -261,6 +262,113 @@ struct e1000_adapter {
        struct rte_timecounter  systime_tc;
        struct rte_timecounter  rx_tstamp_tc;
        struct rte_timecounter  tx_tstamp_tc;
+       eth_rx_burst_t rx_backup;
+       eth_tx_burst_t tx_backup;
+};
+
+/**
+ * Structure associated with each descriptor of the RX ring of a RX queue.
+ */
+struct igb_rx_entry {
+       struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
+};
+
+/**
+ * Structure associated with each descriptor of the TX ring of a TX queue.
+ */
+struct igb_tx_entry {
+       struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
+       uint16_t next_id; /**< Index of next descriptor in ring. */
+       uint16_t last_id; /**< Index of last scattered descriptor. */
+};
+
+/**
+ * Hardware context number
+ */
+enum igb_advctx_num {
+       IGB_CTX_0    = 0, /**< CTX0    */
+       IGB_CTX_1    = 1, /**< CTX1    */
+       IGB_CTX_NUM  = 2, /**< CTX_NUM */
+};
+
+/** Offload features */
+union igb_tx_offload {
+       uint64_t data;
+       struct {
+               uint64_t l3_len:9; /**< L3 (IP) Header Length. */
+               uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+               uint64_t vlan_tci:16;  /**< VLAN Tag Control Identifier(CPU 
order). */
+               uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
+               uint64_t tso_segsz:16; /**< TCP TSO segment size. */
+
+               /* uint64_t unused:8; */
+       };
+};
+
+/**
+ * Strucutre to check if new context need be built
+ */
+struct igb_advctx_info {
+       uint64_t flags;           /**< ol_flags related to context build. */
+       /** tx offload: vlan, tso, l2-l3-l4 lengths. */
+       union igb_tx_offload tx_offload;
+       /** compare mask for tx offload. */
+       union igb_tx_offload tx_offload_mask;
+};
+
+/**
+ * Structure associated with each RX queue.
+ */
+struct igb_rx_queue {
+       struct rte_mempool  *mb_pool;   /**< mbuf pool to populate RX ring. */
+       volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual 
address. */
+       uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
+       volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
+       volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
+       struct igb_rx_entry *sw_ring;   /**< address of RX software ring. */
+       struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
+       struct rte_mbuf *pkt_last_seg;  /**< Last segment of current packet. */
+       rte_spinlock_t      rx_lock; /**< Lock for packet receiption. */
+       uint16_t            nb_rx_desc; /**< number of RX descriptors. */
+       uint16_t            rx_tail;    /**< current value of RDT register. */
+       uint16_t            nb_rx_hold; /**< number of held free RX desc. */
+       uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
+       uint16_t            queue_id;   /**< RX queue index. */
+       uint16_t            reg_idx;    /**< RX queue register index. */
+       uint8_t             port_id;    /**< Device port identifier. */
+       uint8_t             pthresh;    /**< Prefetch threshold register. */
+       uint8_t             hthresh;    /**< Host threshold register. */
+       uint8_t             wthresh;    /**< Write-back threshold register. */
+       uint8_t             crc_len;    /**< 0 if CRC stripped, 4 otherwise. */
+       uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
+};
+
+/**
+ * Structure associated with each TX queue.
+ */
+struct igb_tx_queue {
+       volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
+       uint64_t               tx_ring_phys_addr; /**< TX ring DMA address. */
+       struct igb_tx_entry    *sw_ring; /**< virtual address of SW ring. */
+       volatile uint32_t      *tdt_reg_addr; /**< Address of TDT register. */
+       rte_spinlock_t         tx_lock; /**< Lock for packet transmission. */
+       uint32_t               txd_type;      /**< Device-specific TXD type */
+       uint16_t               nb_tx_desc;    /**< number of TX descriptors. */
+       uint16_t               tx_tail; /**< Current value of TDT register. */
+       uint16_t               tx_head;
+       /**< Index of first used TX descriptor. */
+       uint16_t               queue_id; /**< TX queue index. */
+       uint16_t               reg_idx;  /**< TX queue register index. */
+       uint8_t                port_id;  /**< Device port identifier. */
+       uint8_t                pthresh;  /**< Prefetch threshold register. */
+       uint8_t                hthresh;  /**< Host threshold register. */
+       uint8_t                wthresh;  /**< Write-back threshold register. */
+       uint32_t               ctx_curr;
+       /**< Current used hardware descriptor. */
+       uint32_t               ctx_start;
+       /**< Start context position for transmit queue. */
+       struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
+       /**< Hardware context history.*/
 };

 #define E1000_DEV_PRIVATE(adapter) \
@@ -316,6 +424,14 @@ uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf 
**tx_pkts,
 uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
                uint16_t nb_pkts);

+uint16_t eth_igbvf_xmit_pkts_fake(void *txq,
+                                 struct rte_mbuf **tx_pkts,
+                                 uint16_t nb_pkts);
+
+uint16_t eth_igbvf_recv_pkts_fake(void *rxq,
+                                 struct rte_mbuf **rx_pkts,
+                                 uint16_t nb_pkts);
+
 uint16_t eth_igb_recv_scattered_pkts(void *rxq,
                struct rte_mbuf **rx_pkts, uint16_t nb_pkts);

diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index 8aad741..4b78a25 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -268,6 +268,7 @@ static void eth_igb_configure_msix_intr(struct rte_eth_dev 
*dev);
 static void eth_igbvf_interrupt_handler(struct rte_intr_handle *handle,
                                        void *param);
 static void igbvf_mbx_process(struct rte_eth_dev *dev);
+static int igbvf_dev_reset(struct rte_eth_dev *dev);

 /*
  * Define VF Stats MACRO for Non "cleared on read" register
@@ -409,6 +410,7 @@ static const struct eth_dev_ops igbvf_eth_dev_ops = {
        .mac_addr_set         = igbvf_default_mac_addr_set,
        .get_reg_length       = igbvf_get_reg_length,
        .get_reg              = igbvf_get_regs,
+       .dev_reset            = igbvf_dev_reset,
 };

 /* store statistics names and its offset in stats structure */
@@ -2663,6 +2665,108 @@ void igbvf_mbx_process(struct rte_eth_dev *dev)
 }

 static int
+igbvf_dev_reset(struct rte_eth_dev *dev)
+{
+       struct e1000_adapter *adapter =
+               (struct e1000_adapter *)dev->data->dev_private;
+       struct e1000_hw *hw =
+               E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       int diag = 0;
+       uint32_t eiam;
+       uint16_t i;
+       struct igb_rx_queue *rxq;
+       struct igb_tx_queue *txq;
+       /* Reference igbvf_intr_enable */
+       uint32_t eiam_mbx = 1 << E1000_VTIVAR_MISC_MAILBOX;
+
+       /* Nothing needs to be done if the device is not started. */
+       if (!dev->data->dev_started)
+               return 0;
+
+       PMD_DRV_LOG(DEBUG, "Link up/down event detected.");
+
+       /**
+        * Stop RX/TX by fake functions and locks.
+        * Fake functions are used to make RX/TX lock easier.
+        */
+       adapter->rx_backup = dev->rx_pkt_burst;
+       adapter->tx_backup = dev->tx_pkt_burst;
+       dev->rx_pkt_burst = eth_igbvf_recv_pkts_fake;
+       dev->tx_pkt_burst = eth_igbvf_xmit_pkts_fake;
+
+       if (dev->data->rx_queues)
+               for (i = 0; i < dev->data->nb_rx_queues; i++) {
+                       rxq = dev->data->rx_queues[i];
+                       rte_spinlock_lock(&rxq->rx_lock);
+               }
+
+       if (dev->data->tx_queues)
+               for (i = 0; i < dev->data->nb_tx_queues; i++) {
+                       txq = dev->data->tx_queues[i];
+                       rte_spinlock_lock(&txq->tx_lock);
+               }
+
+       /* Performance VF reset. */
+       do {
+               dev->data->dev_started = 0;
+               igbvf_dev_stop(dev);
+               if (dev->data->dev_conf.intr_conf.lsc == 0)
+                       diag = eth_igb_link_update(dev, 0);
+               if (diag) {
+                       PMD_INIT_LOG(INFO, "Igb VF reset: "
+                                    "Failed to update link.");
+               }
+               rte_delay_ms(1000);
+
+               diag = igbvf_dev_start(dev);
+               if (diag) {
+                       PMD_INIT_LOG(ERR, "Igb VF reset: "
+                                    "Failed to start device.");
+                       return diag;
+               }
+               dev->data->dev_started = 1;
+               eth_igbvf_stats_reset(dev);
+               if (dev->data->dev_conf.intr_conf.lsc == 0)
+                       diag = eth_igb_link_update(dev, 0);
+               if (diag) {
+                       PMD_INIT_LOG(INFO, "Igb VF reset: "
+                                    "Failed to update link.");
+               }
+
+               /**
+                * When the PF link is down, there has chance
+                * that VF cannot operate its registers. Will
+                * check if the registers is written
+                * successfully. If not, repeat stop/start until
+                * the PF link is up, in other words, until the
+                * registers can be written.
+                */
+               eiam = E1000_READ_REG(hw, E1000_EIAM);
+       } while (!(eiam & eiam_mbx));
+
+       /**
+        * Release the locks for queues.
+        * Restore the RX/TX functions.
+        */
+       if (dev->data->rx_queues)
+               for (i = 0; i < dev->data->nb_rx_queues; i++) {
+                       rxq = dev->data->rx_queues[i];
+                       rte_spinlock_unlock(&rxq->rx_lock);
+               }
+
+       if (dev->data->tx_queues)
+               for (i = 0; i < dev->data->nb_tx_queues; i++) {
+                       txq = dev->data->tx_queues[i];
+                       rte_spinlock_unlock(&txq->tx_lock);
+               }
+
+       dev->rx_pkt_burst = adapter->rx_backup;
+       dev->tx_pkt_burst = adapter->tx_backup;
+
+       return 0;
+}
+
+static int
 eth_igbvf_interrupt_action(struct rte_eth_dev *dev)
 {
        struct e1000_interrupt *intr =
diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c
index 7e97330..5af7173 100644
--- a/drivers/net/e1000/igb_rxtx.c
+++ b/drivers/net/e1000/igb_rxtx.c
@@ -67,7 +67,6 @@
 #include <rte_tcp.h>
 #include <rte_sctp.h>
 #include <rte_string_fns.h>
-#include <rte_spinlock.h>

 #include "e1000_logs.h"
 #include "base/e1000_api.h"
@@ -80,72 +79,6 @@
                PKT_TX_L4_MASK |                 \
                PKT_TX_TCP_SEG)

-/**
- * Structure associated with each descriptor of the RX ring of a RX queue.
- */
-struct igb_rx_entry {
-       struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
-};
-
-/**
- * Structure associated with each descriptor of the TX ring of a TX queue.
- */
-struct igb_tx_entry {
-       struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
-       uint16_t next_id; /**< Index of next descriptor in ring. */
-       uint16_t last_id; /**< Index of last scattered descriptor. */
-};
-
-/**
- * Structure associated with each RX queue.
- */
-struct igb_rx_queue {
-       struct rte_mempool  *mb_pool;   /**< mbuf pool to populate RX ring. */
-       volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual 
address. */
-       uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
-       volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
-       volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
-       struct igb_rx_entry *sw_ring;   /**< address of RX software ring. */
-       struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
-       struct rte_mbuf *pkt_last_seg;  /**< Last segment of current packet. */
-       rte_spinlock_t      rx_lock; /**< Lock for packet receiption. */
-       uint16_t            nb_rx_desc; /**< number of RX descriptors. */
-       uint16_t            rx_tail;    /**< current value of RDT register. */
-       uint16_t            nb_rx_hold; /**< number of held free RX desc. */
-       uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
-       uint16_t            queue_id;   /**< RX queue index. */
-       uint16_t            reg_idx;    /**< RX queue register index. */
-       uint8_t             port_id;    /**< Device port identifier. */
-       uint8_t             pthresh;    /**< Prefetch threshold register. */
-       uint8_t             hthresh;    /**< Host threshold register. */
-       uint8_t             wthresh;    /**< Write-back threshold register. */
-       uint8_t             crc_len;    /**< 0 if CRC stripped, 4 otherwise. */
-       uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
-};
-
-/**
- * Hardware context number
- */
-enum igb_advctx_num {
-       IGB_CTX_0    = 0, /**< CTX0    */
-       IGB_CTX_1    = 1, /**< CTX1    */
-       IGB_CTX_NUM  = 2, /**< CTX_NUM */
-};
-
-/** Offload features */
-union igb_tx_offload {
-       uint64_t data;
-       struct {
-               uint64_t l3_len:9; /**< L3 (IP) Header Length. */
-               uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
-               uint64_t vlan_tci:16;  /**< VLAN Tag Control Identifier(CPU 
order). */
-               uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
-               uint64_t tso_segsz:16; /**< TCP TSO segment size. */
-
-               /* uint64_t unused:8; */
-       };
-};
-
 /*
  * Compare mask for igb_tx_offload.data,
  * should be in sync with igb_tx_offload layout.
@@ -158,45 +91,6 @@ union igb_tx_offload {
 #define TX_TSO_CMP_MASK        \
        (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)

-/**
- * Strucutre to check if new context need be built
- */
-struct igb_advctx_info {
-       uint64_t flags;           /**< ol_flags related to context build. */
-       /** tx offload: vlan, tso, l2-l3-l4 lengths. */
-       union igb_tx_offload tx_offload;
-       /** compare mask for tx offload. */
-       union igb_tx_offload tx_offload_mask;
-};
-
-/**
- * Structure associated with each TX queue.
- */
-struct igb_tx_queue {
-       volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
-       uint64_t               tx_ring_phys_addr; /**< TX ring DMA address. */
-       struct igb_tx_entry    *sw_ring; /**< virtual address of SW ring. */
-       rte_spinlock_t         tx_lock; /**< Lock for packet transmission. */
-       volatile uint32_t      *tdt_reg_addr; /**< Address of TDT register. */
-       uint32_t               txd_type;      /**< Device-specific TXD type */
-       uint16_t               nb_tx_desc;    /**< number of TX descriptors. */
-       uint16_t               tx_tail; /**< Current value of TDT register. */
-       uint16_t               tx_head;
-       /**< Index of first used TX descriptor. */
-       uint16_t               queue_id; /**< TX queue index. */
-       uint16_t               reg_idx;  /**< TX queue register index. */
-       uint8_t                port_id;  /**< Device port identifier. */
-       uint8_t                pthresh;  /**< Prefetch threshold register. */
-       uint8_t                hthresh;  /**< Host threshold register. */
-       uint8_t                wthresh;  /**< Write-back threshold register. */
-       uint32_t               ctx_curr;
-       /**< Current used hardware descriptor. */
-       uint32_t               ctx_start;
-       /**< Start context position for transmit queue. */
-       struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
-       /**< Hardware context history.*/
-};
-
 #if 1
 #define RTE_PMD_USE_PREFETCH
 #endif
@@ -2530,3 +2424,25 @@ igb_txq_info_get(struct rte_eth_dev *dev, uint16_t 
queue_id,
        qinfo->conf.tx_thresh.hthresh = txq->hthresh;
        qinfo->conf.tx_thresh.wthresh = txq->wthresh;
 }
+
+/**
+ * A fake function to stop transmission.
+ */
+uint16_t
+eth_igbvf_xmit_pkts_fake(void __rte_unused *tx_queue,
+                        struct rte_mbuf __rte_unused **tx_pkts,
+                        uint16_t __rte_unused nb_pkts)
+{
+       return 0;
+}
+
+/**
+ * A fake function to stop receiption.
+ */
+uint16_t
+eth_igbvf_recv_pkts_fake(void __rte_unused *rx_queue,
+                        struct rte_mbuf __rte_unused **rx_pkts,
+                        uint16_t __rte_unused nb_pkts)
+{
+       return 0;
+}
-- 
1.9.3

Reply via email to