Secondary process support had been disabled for the AF_XDP PMD
because there was no logic in place to share the AF_XDP socket
file descriptors between the processes. This commit introduces
this logic using the IPC APIs.

Since AF_XDP rings are single-producer single-consumer, rx/tx
in the secondary process is disabled. However other operations
including retrieval of stats are permitted.

Signed-off-by: Ciara Loftus <ciara.lof...@intel.com>

---
RFC -> v1:
* Added newline to af_xdp.rst
* Fixed spelling errors
* Fixed potential NULL dereference in init_internals
* Fixed potential free of address-of expression in afxdp_mp_request_fds
---
 doc/guides/nics/af_xdp.rst             |  11 +-
 doc/guides/nics/features/af_xdp.ini    |   1 +
 doc/guides/rel_notes/release_22_03.rst |   4 +
 drivers/net/af_xdp/rte_eth_af_xdp.c    | 210 +++++++++++++++++++++++--
 4 files changed, 211 insertions(+), 15 deletions(-)

diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
index c9d0e1ad6c..9e3feee8d1 100644
--- a/doc/guides/nics/af_xdp.rst
+++ b/doc/guides/nics/af_xdp.rst
@@ -143,4 +143,13 @@ Limitations
   NAPI context from a watchdog timer instead of from softirqs. More information
   on this feature can be found at [1].
 
-  [1] https://lwn.net/Articles/837010/
\ No newline at end of file
+- **Secondary Processes**
+
+  Rx and Tx are not supported for secondary processes due to the 
single-producer
+  single-consumer nature of the AF_XDP rings. However other operations 
including
+  statistics retrieval are permitted.
+  The maximum number of queues permitted for PMDs operating in this model is 8
+  as this is the maximum number of fds that can be sent through the IPC APIs as
+  defined by RTE_MP_MAX_FD_NUM.
+
+  [1] https://lwn.net/Articles/837010/
diff --git a/doc/guides/nics/features/af_xdp.ini 
b/doc/guides/nics/features/af_xdp.ini
index 54b738e616..8e7e075aaf 100644
--- a/doc/guides/nics/features/af_xdp.ini
+++ b/doc/guides/nics/features/af_xdp.ini
@@ -9,4 +9,5 @@ Power mgmt address monitor = Y
 MTU update           = Y
 Promiscuous mode     = Y
 Stats per queue      = Y
+Multiprocess aware   = Y
 x86-64               = Y
diff --git a/doc/guides/rel_notes/release_22_03.rst 
b/doc/guides/rel_notes/release_22_03.rst
index 6d99d1eaa9..ec719e9738 100644
--- a/doc/guides/rel_notes/release_22_03.rst
+++ b/doc/guides/rel_notes/release_22_03.rst
@@ -55,6 +55,10 @@ New Features
      Also, make sure to start the actual text at the margin.
      =======================================================
 
+* **Updated AF_XDP PMD.**
+
+  * Re-enabled secondary process support. RX/TX is not supported.
+
 
 Removed Items
 -------------
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c 
b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 96c2c9d939..1c8c52ddb6 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -81,6 +81,18 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE);
 
 #define ETH_AF_XDP_ETH_OVERHEAD                (RTE_ETHER_HDR_LEN + 
RTE_ETHER_CRC_LEN)
 
+#define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds"
+
+static int afxdp_dev_count;
+
+/* Message header to synchronize fds via IPC */
+struct ipc_hdr {
+       char port_name[RTE_DEV_NAME_MAX_LEN];
+       /* The file descriptors are in the dedicated part
+        * of the Unix message to be translated by the kernel.
+        */
+};
+
 struct xsk_umem_info {
        struct xsk_umem *umem;
        struct rte_ring *buf_ring;
@@ -148,6 +160,10 @@ struct pmd_internals {
        struct pkt_tx_queue *tx_queues;
 };
 
+struct pmd_process_private {
+       int rxq_xsk_fds[RTE_MAX_QUEUES_PER_PORT];
+};
+
 #define ETH_AF_XDP_IFACE_ARG                   "iface"
 #define ETH_AF_XDP_START_QUEUE_ARG             "start_queue"
 #define ETH_AF_XDP_QUEUE_COUNT_ARG             "queue_count"
@@ -857,11 +873,12 @@ static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
        struct pmd_internals *internals = dev->data->dev_private;
+       struct pmd_process_private *process_private = dev->process_private;
        struct xdp_statistics xdp_stats;
        struct pkt_rx_queue *rxq;
        struct pkt_tx_queue *txq;
        socklen_t optlen;
-       int i, ret;
+       int i, ret, fd;
 
        for (i = 0; i < dev->data->nb_rx_queues; i++) {
                optlen = sizeof(struct xdp_statistics);
@@ -877,8 +894,9 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats 
*stats)
                stats->ibytes += stats->q_ibytes[i];
                stats->imissed += rxq->stats.rx_dropped;
                stats->oerrors += txq->stats.tx_dropped;
-               ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP,
-                               XDP_STATISTICS, &xdp_stats, &optlen);
+               fd = process_private->rxq_xsk_fds[i];
+               ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
+                                          &xdp_stats, &optlen) : -1;
                if (ret != 0) {
                        AF_XDP_LOG(ERR, "getsockopt() failed for 
XDP_STATISTICS.\n");
                        return -1;
@@ -945,8 +963,10 @@ eth_dev_close(struct rte_eth_dev *dev)
        struct pkt_rx_queue *rxq;
        int i;
 
-       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+               rte_free(dev->process_private);
                return 0;
+       }
 
        AF_XDP_LOG(INFO, "Closing AF_XDP ethdev on numa socket %u\n",
                rte_socket_id());
@@ -1351,6 +1371,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
                   struct rte_mempool *mb_pool)
 {
        struct pmd_internals *internals = dev->data->dev_private;
+       struct pmd_process_private *process_private = dev->process_private;
        struct pkt_rx_queue *rxq;
        int ret;
 
@@ -1389,6 +1410,8 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
        rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
        rxq->fds[0].events = POLLIN;
 
+       process_private->rxq_xsk_fds[rx_queue_id] = rxq->fds[0].fd;
+
        dev->data->rx_queues[rx_queue_id] = rxq;
        return 0;
 
@@ -1690,6 +1713,7 @@ init_internals(struct rte_vdev_device *dev, const char 
*if_name,
 {
        const char *name = rte_vdev_device_name(dev);
        const unsigned int numa_node = dev->device.numa_node;
+       struct pmd_process_private *process_private;
        struct pmd_internals *internals;
        struct rte_eth_dev *eth_dev;
        int ret;
@@ -1755,9 +1779,17 @@ init_internals(struct rte_vdev_device *dev, const char 
*if_name,
        if (ret)
                goto err_free_tx;
 
+       process_private = (struct pmd_process_private *)
+               rte_zmalloc_socket(name, sizeof(struct pmd_process_private),
+                                  RTE_CACHE_LINE_SIZE, numa_node);
+       if (process_private == NULL) {
+               AF_XDP_LOG(ERR, "Failed to alloc memory for process private\n");
+               goto err_free_tx;
+       }
+
        eth_dev = rte_eth_vdev_allocate(dev, 0);
        if (eth_dev == NULL)
-               goto err_free_tx;
+               goto err_free_pp;
 
        eth_dev->data->dev_private = internals;
        eth_dev->data->dev_link = pmd_link;
@@ -1766,6 +1798,10 @@ init_internals(struct rte_vdev_device *dev, const char 
*if_name,
        eth_dev->dev_ops = &ops;
        eth_dev->rx_pkt_burst = eth_af_xdp_rx;
        eth_dev->tx_pkt_burst = eth_af_xdp_tx;
+       eth_dev->process_private = process_private;
+
+       for (i = 0; i < queue_cnt; i++)
+               process_private->rxq_xsk_fds[i] = -1;
 
 #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
        AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
@@ -1773,6 +1809,8 @@ init_internals(struct rte_vdev_device *dev, const char 
*if_name,
 
        return eth_dev;
 
+err_free_pp:
+       rte_free(process_private);
 err_free_tx:
        rte_free(internals->tx_queues);
 err_free_rx:
@@ -1782,6 +1820,115 @@ init_internals(struct rte_vdev_device *dev, const char 
*if_name,
        return NULL;
 }
 
+/* Secondary process requests rxq fds from primary. */
+static int
+afxdp_mp_request_fds(const char *name, struct rte_eth_dev *dev)
+{
+       struct pmd_process_private *process_private = dev->process_private;
+       struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
+       struct rte_mp_msg request, *reply;
+       struct rte_mp_reply replies;
+       struct ipc_hdr *request_param = (struct ipc_hdr *)request.param;
+       int i, ret;
+
+       /* Prepare the request */
+       memset(&request, 0, sizeof(request));
+       strlcpy(request.name, ETH_AF_XDP_MP_KEY, sizeof(request.name));
+       strlcpy(request_param->port_name, name,
+               sizeof(request_param->port_name));
+       request.len_param = sizeof(*request_param);
+
+       /* Send the request and receive the reply */
+       AF_XDP_LOG(DEBUG, "Sending IPC request for %s\n", name);
+       ret = rte_mp_request_sync(&request, &replies, &timeout);
+       if (ret < 0 || replies.nb_received != 1) {
+               AF_XDP_LOG(ERR, "Failed to request fds from primary: %d",
+                          rte_errno);
+               return -1;
+       }
+       reply = replies.msgs;
+       AF_XDP_LOG(DEBUG, "Received IPC reply for %s\n", name);
+       if (dev->data->nb_rx_queues != reply->num_fds) {
+               AF_XDP_LOG(ERR, "Incorrect number of fds received: %d != %d\n",
+                          reply->num_fds, dev->data->nb_rx_queues);
+               return -EINVAL;
+       }
+
+       for (i = 0; i < reply->num_fds; i++)
+               process_private->rxq_xsk_fds[i] = reply->fds[i];
+
+       free(reply);
+       return 0;
+}
+
+/* Primary process sends rxq fds to secondary. */
+static int
+afxdp_mp_send_fds(const struct rte_mp_msg *request, const void *peer)
+{
+       struct rte_eth_dev *dev;
+       struct pmd_process_private *process_private;
+       struct rte_mp_msg reply;
+       const struct ipc_hdr *request_param =
+               (const struct ipc_hdr *)request->param;
+       struct ipc_hdr *reply_param =
+               (struct ipc_hdr *)reply.param;
+       const char *request_name = request_param->port_name;
+       uint16_t port_id;
+       int i, ret;
+
+       AF_XDP_LOG(DEBUG, "Received IPC request for %s\n", request_name);
+
+       /* Find the requested port */
+       ret = rte_eth_dev_get_port_by_name(request_name, &port_id);
+       if (ret) {
+               AF_XDP_LOG(ERR, "Failed to get port id for %s\n", request_name);
+               return -1;
+       }
+       dev = &rte_eth_devices[port_id];
+       process_private = dev->process_private;
+
+       /* Populate the reply with the xsk fd for each queue */
+       reply.num_fds = 0;
+       if (dev->data->nb_rx_queues > RTE_MP_MAX_FD_NUM) {
+               AF_XDP_LOG(ERR, "Number of rx queues (%d) exceeds max number of 
fds (%d)\n",
+                          dev->data->nb_rx_queues, RTE_MP_MAX_FD_NUM);
+               return -EINVAL;
+       }
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++)
+               reply.fds[reply.num_fds++] = process_private->rxq_xsk_fds[i];
+
+       /* Send the reply */
+       strlcpy(reply.name, request->name, sizeof(reply.name));
+       strlcpy(reply_param->port_name, request_name,
+               sizeof(reply_param->port_name));
+       reply.len_param = sizeof(*reply_param);
+       AF_XDP_LOG(DEBUG, "Sending IPC reply for %s\n", reply_param->port_name);
+       if (rte_mp_reply(&reply, peer) < 0) {
+               AF_XDP_LOG(ERR, "Failed to reply to IPC request\n");
+               return -1;
+       }
+       return 0;
+}
+
+/* Secondary process rx function. RX is disabled because rings are SPSC */
+static uint16_t
+eth_af_xdp_rx_noop(void *queue __rte_unused,
+               struct rte_mbuf **bufs __rte_unused,
+               uint16_t nb_pkts __rte_unused)
+{
+       return 0;
+}
+
+/* Secondary process tx function. TX is disabled because rings are SPSC */
+static uint16_t
+eth_af_xdp_tx_noop(void *queue __rte_unused,
+                       struct rte_mbuf **bufs __rte_unused,
+                       uint16_t nb_pkts __rte_unused)
+{
+       return 0;
+}
+
 static int
 rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 {
@@ -1791,19 +1938,39 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
        int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT;
        int shared_umem = 0;
        char prog_path[PATH_MAX] = {'\0'};
-       int busy_budget = -1;
+       int busy_budget = -1, ret;
        struct rte_eth_dev *eth_dev = NULL;
-       const char *name;
+       const char *name = rte_vdev_device_name(dev);
 
-       AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
-               rte_vdev_device_name(dev));
+       AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", name);
 
-       name = rte_vdev_device_name(dev);
        if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
-               AF_XDP_LOG(ERR, "Failed to probe %s. "
-                               "AF_XDP PMD does not support secondary 
processes.\n",
-                               name);
-               return -ENOTSUP;
+               eth_dev = rte_eth_dev_attach_secondary(name);
+               if (eth_dev == NULL) {
+                       AF_XDP_LOG(ERR, "Failed to probe %s\n", name);
+                       return -EINVAL;
+               }
+               eth_dev->dev_ops = &ops;
+               eth_dev->device = &dev->device;
+               eth_dev->rx_pkt_burst = eth_af_xdp_rx_noop;
+               eth_dev->tx_pkt_burst = eth_af_xdp_tx_noop;
+               eth_dev->process_private = (struct pmd_process_private *)
+                       rte_zmalloc_socket(name,
+                                          sizeof(struct pmd_process_private),
+                                          RTE_CACHE_LINE_SIZE,
+                                          eth_dev->device->numa_node);
+               if (eth_dev->process_private == NULL) {
+                       AF_XDP_LOG(ERR,
+                               "Failed to alloc memory for process private\n");
+                       return -ENOMEM;
+               }
+
+               /* Obtain the xsk fds from the primary process. */
+               if (afxdp_mp_request_fds(name, eth_dev))
+                       return -1;
+
+               rte_eth_dev_probing_finish(eth_dev);
+               return 0;
        }
 
        kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
@@ -1838,6 +2005,17 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
                return -1;
        }
 
+       /* Register IPC callback which shares xsk fds from primary to secondary 
*/
+       if (!afxdp_dev_count) {
+               ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY, 
afxdp_mp_send_fds);
+               if (ret < 0) {
+                       AF_XDP_LOG(ERR, "%s: Failed to register IPC callback: 
%s",
+                                  name, strerror(rte_errno));
+                       return -1;
+               }
+       }
+       afxdp_dev_count++;
+
        rte_eth_dev_probing_finish(eth_dev);
 
        return 0;
@@ -1860,6 +2038,10 @@ rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
                return 0;
 
        eth_dev_close(eth_dev);
+       rte_free(eth_dev->process_private);
+       if (afxdp_dev_count == 1)
+               rte_mp_action_unregister(ETH_AF_XDP_MP_KEY);
+       afxdp_dev_count--;
        rte_eth_dev_release_port(eth_dev);
 
 
-- 
2.17.1

Reply via email to