I backported the fix from suanmi...@nvidia.com(related to secondary process) and it worked !!!
>From 1676903aea413fe8be4138b285633e01332efa17 Mon Sep 17 00:00:00 2001 From: RajeshKumar Kalidass <rajesh.kalid...@gigamon.com> Date: Tue, 20 Apr 2021 02:56:32 -0700 Subject: [PATCH] VM-16160 mlx5: secondary not able to transmit out pkt Change-Id: I647ba4f4d2534c2c97b5e23ce8a11a20eac207a3 --- diff --git a/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5.c b/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5.c index baffa75..0bfaddb 100644 --- a/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5.c +++ b/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5.c @@ -1183,7 +1183,7 @@ rte_errno = ENOMEM; return -rte_errno; } - ppriv->uar_table_sz = ppriv_size; + ppriv->uar_table_sz = priv->txqs_n; dev->process_private = ppriv; return 0; } @@ -1194,7 +1194,7 @@ * @param dev * Pointer to Ethernet device structure. */ -static void +void mlx5_proc_priv_uninit(struct rte_eth_dev *dev) { if (!dev->process_private) @@ -2036,24 +2036,6 @@ } return 0; } -int -mlx5_uar_table_init (struct rte_eth_dev *eth_dev) -{ - int err = 0; - int fd; - /* Receive command fd from primary process. */ - fd = mlx5_mp_req_verbs_cmd_fd(eth_dev); - if (fd < 0) { - return (rte_errno); - } - /* Remap UAR for Tx queues. */ - err = mlx5_tx_uar_init_secondary(eth_dev, fd); - if (err) { - err = rte_errno; - } - - return err; -} /** * Spawn an Ethernet device from Verbs information. * diff --git a/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5.h b/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5.h index 0c3a90e..5230ad6 100644 --- a/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5.h +++ b/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5.h @@ -793,6 +793,7 @@ int mlx5_getenv_int(const char *); int mlx5_proc_priv_init(struct rte_eth_dev *dev); +void mlx5_proc_priv_uninit(struct rte_eth_dev *dev); int64_t mlx5_get_dbr(struct rte_eth_dev *dev, struct mlx5_devx_dbr_page **dbr_page); int32_t mlx5_release_dbr(struct rte_eth_dev *dev, uint32_t umem_id, diff --git a/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5_mp.c b/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5_mp.c index b65b019..3e8a030 100644 --- a/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5_mp.c +++ b/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5_mp.c @@ -119,6 +119,8 @@ const struct mlx5_mp_param *param = (const struct mlx5_mp_param *)mp_msg->param; struct rte_eth_dev *dev; + struct mlx5_proc_priv *ppriv; + struct mlx5_priv *priv; int ret; assert(rte_eal_process_type() == RTE_PROC_SECONDARY); @@ -128,12 +130,28 @@ return -rte_errno; } dev = &rte_eth_devices[param->port_id]; + priv = dev->data->dev_private; switch (param->type) { case MLX5_MP_REQ_START_RXTX: DRV_LOG(INFO, "port %u starting datapath", dev->data->port_id); rte_mb(); dev->rx_pkt_burst = mlx5_select_rx_function(dev); dev->tx_pkt_burst = mlx5_select_tx_function(dev); + ppriv = (struct mlx5_proc_priv *)dev->process_private; + /* If Tx queue number changes, re-initialize UAR. */ + if (ppriv->uar_table_sz != priv->txqs_n) { + mlx5_tx_uar_uninit_secondary(dev); + mlx5_proc_priv_uninit(dev); + ret = mlx5_proc_priv_init(dev); + if (ret) + return -rte_errno; + ret = mlx5_tx_uar_init_secondary(dev, mp_msg->fds[0]); + if (ret) { + mlx5_proc_priv_uninit(dev); + return -rte_errno; + } + } + mp_init_msg(dev, &mp_res, param->type); res->result = 0; ret = rte_mp_reply(&mp_res, peer); @@ -175,6 +193,7 @@ struct timespec ts = {.tv_sec = MLX5_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0}; int ret; int i; + struct mlx5_priv *priv; assert(rte_eal_process_type() == RTE_PROC_PRIMARY); if (!mlx5_shared_data->secondary_cnt) @@ -184,7 +203,12 @@ dev->data->port_id, type); return; } + priv = dev->data->dev_private; mp_init_msg(dev, &mp_req, type); + if (type == MLX5_MP_REQ_START_RXTX) { + mp_req.num_fds = 1; + mp_req.fds[0] = ((struct ibv_context *)priv->sh->ctx)->cmd_fd; + } ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts); if (ret) { if (rte_errno != ENOTSUP) diff --git a/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5_rxtx.h b/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5_rxtx.h index e927343..d8b3220 100644 --- a/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5_rxtx.h +++ b/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5_rxtx.h @@ -424,6 +424,7 @@ const struct rte_eth_hairpin_conf *hairpin_conf); void mlx5_tx_queue_release(void *dpdk_txq); int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd); +void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev); struct mlx5_txq_obj *mlx5_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx, enum mlx5_txq_obj_type type); struct mlx5_txq_obj *mlx5_txq_obj_get(struct rte_eth_dev *dev, uint16_t idx); On Mon, Apr 19, 2021 at 7:05 PM Rajesh Kumar <rajeshthe...@gmail.com> wrote: > Hi, > > > > DPDK: 19.11 > > OS: Ubuntu 18.04 (Kernel: 5.4.0-1043-azure) > > Iface: Mellanox Technologies MT27710 Family [ConnectX-4 Lx Virtual > Function] > > > > We are bringing-up our dpdk based app on azure cloud, its multi-process > setup (primary does dev_configure & dev_start ) – however no packet are > getting transmitted out (Tx-packet increases upto number of descriptor and > then all further packets are txDropped) > > > > "stats": [ > > { > > "name": "rep1", > > "txPkts": 1024, <<<<<<----------------------- it > increases upto number of tx-descriptors > > "rxPkts": 5408, > > "txBytes": 65536, > > "rxBytes": 346112, > > "txDropped": 4384, <<<<<<--------------------- All further packets > are txDropped > > "rxDropped": 96, > > "txErrors": 0, > > "rxErrors": 0 > > } > > ] > > > > However mlx4 driver is working perfectly fine on multi-process setup. > Also testpmd working fine with mlx5. I guess problem is when we try to run > in multi-process setup ? > > > > > > Thanks, > > *-Rajesh* >