Since the offending commit, mlx5 driver supports probing
representors on BlueField DPUs with Socket Direct (SD).
Such card can be connected to 2 different CPUs on the host system.
On DPU, user would see the following network devices:

- p0 and p1 - physical ports
- pf0hpf and pf2hpf - PF0 on CPU 0 and CPU 1 respectively
- pf1hpf and pf3hpf - PF1 on CPU 0 and CPU 1 respectively

mlx5 driver finds the relevant netdev by matching information
provided in representor devarg to phys_port_name
reported by Linux kernel.
For the above interfaces phys_port_name's would be reported
and probed as:

- p0 -> p0, no need for representor devarg
- p1 -> p1, with representor=pf1
- pf0hpf -> c1pf0, with representor=c1pf0vf65535
- pf1hpf -> c1pf1, with representor=c1pf1vf65535
- pf2hpf -> c2pf0, with representor=c2pf0vf65535
- pf3hpf -> c2pf1, with representor=c2pf1vf65535

Although hot-plugging all these representors is successful,
RTE_ETH_FOREACH_MATCHING_DEV() macro would not find DPDK ports.
This is caused missing information reported by mlx5 driver,
through rte_eth_representor_info_get() API.
Specifically, mlx5 driver did not report controller index for all
representor ranges.

Until now mlx5 driver used static encoding for 16-bit representor_id:

- 2 bits for representor type
- 2 bits for PF index
- 12 bits for representor index (either VF or SF number)

Controller index was not encoded. This caused the mentioned issue
and on top of that:

- limits the number of PFs
- limits the number of SFs

This patch changes the mlx5 driver logic for
rte_eth_representor_info_get().
Instead of static encoding:

- representor_id's will be dynamically assigned
  to each probed representor.
- rte_eth_representor_info_get() will report N ranges:
    - N == number of probed ports on single embedded switch
    - Each range will define single representor_id
      for given controller/PF/VF/SF.

Fixes: 2f7cdd821b1b ("net/mlx5: fix probing to allow BlueField Socket Direct")
Cc: [email protected]

Signed-off-by: Dariusz Sosnowski <[email protected]>
Acked-by: Bing Zhao <[email protected]>
---
v2:
- Added missing "not" in "RTE_ETH_FOREACH_MATCHING_DEV() macro would not find 
DPDK ports"
  in the commit message.
- Fixed typo in number of bits for representor index.
  Should be 12, not 2.

 drivers/net/mlx5/linux/mlx5_os.c |   6 +-
 drivers/net/mlx5/mlx5.h          |  19 +++
 drivers/net/mlx5/mlx5_ethdev.c   | 284 +++++++++++++++++++------------
 3 files changed, 199 insertions(+), 110 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 0fc721592b..5305523c1b 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1677,9 +1677,13 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
                err = ENOMEM;
                goto error;
        }
+       priv->port_info.type = spawn->info.name_type;
+       priv->port_info.ctrl_num = spawn->info.ctrl_num;
+       priv->port_info.pf_num = spawn->info.pf_num;
+       priv->port_info.port_num = spawn->info.port_name;
        if (priv->representor) {
                eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR;
-               eth_dev->data->representor_id = priv->representor_id;
+               eth_dev->data->representor_id = eth_dev->data->port_id;
                MLX5_ETH_FOREACH_DEV(port_id, dpdk_dev) {
                        struct mlx5_priv *opriv =
                                rte_eth_devices[port_id].data->dev_private;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 49a0c03544..23803b450b 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1984,6 +1984,24 @@ struct mlx5_quota_ctx {
        struct mlx5_indexed_pool *quota_ipool; /* Manage quota objects */
 };

+/* Stores info parsed from phys_port_name related to given DPDK port. */
+struct mlx5_representor_info {
+       enum mlx5_nl_phys_port_name_type type;
+       /* PCI controller index. 0 if no controller was reported in 
phys_port_name. */
+       int32_t ctrl_num;
+       /* PF index. */
+       int32_t pf_num;
+       /*
+        * Representor number:
+        *
+        * - For VF/SF - VF/SF index.
+        * - For PFHPF - -1.
+        * - For uplink - physical port index.
+        * - For others - VF representor is assumed, so VF index.
+        */
+       int32_t port_num;
+};
+
 struct mlx5_nta_sample_ctx;
 struct mlx5_priv {
        struct rte_eth_dev_data *dev_data;  /* Pointer to device data. */
@@ -2019,6 +2037,7 @@ struct mlx5_priv {
        uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */
        uint32_t vport_meta_mask; /* Used for vport index field match mask. */
        uint16_t representor_id; /* UINT16_MAX if not a representor. */
+       struct mlx5_representor_info port_info;
        int32_t pf_bond; /* >=0, representor owner PF index in bonding. */
        int32_t mpesw_owner; /* >=0, representor owner PF index in MPESW. */
        int32_t mpesw_port; /* Related port index of MPESW device. < 0 - no 
MPESW. */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index a29cdeeb50..e14b7f148b 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -345,6 +345,23 @@ mlx5_dev_get_max_wq_size(struct mlx5_dev_ctx_shared *sh)
        return max_wqe;
 }

+/**
+ * Get switch port ID for given DPDK port.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @return
+ *   Switch port ID reported through rte_eth_dev_info_get().
+ */
+static uint16_t
+mlx5_dev_switch_info_port_id_get(struct rte_eth_dev *dev)
+{
+       if (rte_eth_dev_is_repr(dev))
+               return dev->data->port_id;
+
+       return UINT16_MAX;
+}
+
 /**
  * DPDK callback to get information about the device.
  *
@@ -401,7 +418,7 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *info)
                info->dev_capa |= RTE_ETH_DEV_CAPA_RXQ_SHARE;
        info->switch_info.name = dev->data->name;
        info->switch_info.domain_id = priv->domain_id;
-       info->switch_info.port_id = priv->representor_id;
+       info->switch_info.port_id = mlx5_dev_switch_info_port_id_get(dev);
        info->switch_info.rx_domain = 0; /* No sub Rx domains. */
        if (priv->representor) {
                uint16_t port_id;
@@ -472,14 +489,162 @@ mlx5_representor_id_encode(const struct mlx5_switch_info 
*info,
        return MLX5_REPRESENTOR_ID(pf, type, repr);
 }

+static unsigned int
+mlx5_representor_info_count_one(struct mlx5_priv *priv)
+{
+       switch (priv->port_info.type) {
+       case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
+               return 2;
+       case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
+               /* Only representor uplinks should be reported */
+               if (!priv->representor)
+                       return 0;
+               return 1;
+       case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
+               /* FALLTHROUGH */
+       case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
+               /* FALLTHROUGH */
+       case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+               /* FALLTHROUGH */
+       case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
+               /* FALLTHROUGH */
+       case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
+               /* FALLTHROUGH */
+       default:
+               return 1;
+       }
+}
+
+static unsigned int
+mlx5_representor_info_count(struct rte_eth_dev *dev)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       uint16_t port_id;
+       unsigned int count = 0;
+
+       MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
+               struct mlx5_priv *opriv = 
rte_eth_devices[port_id].data->dev_private;
+
+               if (!opriv ||
+                   opriv->sh != priv->sh ||
+                   opriv->domain_id != priv->domain_id)
+                       continue;
+
+               count += mlx5_representor_info_count_one(opriv);
+       }
+
+       return count;
+}
+
+static void
+mlx5_representor_info_fill_one(struct mlx5_priv *priv,
+                              struct rte_eth_representor_info *info)
+{
+       struct rte_eth_representor_range *range;
+       unsigned int count;
+
+       count = mlx5_representor_info_count_one(priv);
+       if (count == 0)
+               return;
+
+       if (info->nb_ranges + count > info->nb_ranges_alloc) {
+               DRV_LOG(ERR, "port %u representor info already full", 
priv->dev_data->port_id);
+               return;
+       }
+
+       range = &info->ranges[info->nb_ranges];
+
+       switch (priv->port_info.type) {
+       case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
+               range->type = RTE_ETH_REPRESENTOR_PF;
+               range->controller = priv->port_info.ctrl_num;
+               range->pf = priv->port_info.port_num;
+               range->id_base = priv->dev_data->port_id;
+               range->id_end = range->id_base;
+               snprintf(range->name, sizeof(range->name), "pf%d", range->pf);
+               break;
+       case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
+               /* Secondly, fill in SF variant. */
+               range->type = RTE_ETH_REPRESENTOR_SF;
+               range->controller = priv->port_info.ctrl_num;
+               range->pf = priv->port_info.pf_num;
+               range->sf = priv->port_info.port_num;
+               range->id_base = priv->dev_data->port_id;
+               range->id_end = range->id_base;
+               snprintf(range->name, sizeof(range->name), "pf%dsf", range->pf);
+               break;
+       case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
+               /*
+                * Host PF can be probed either through VF(0xffff) or 
SF(0xffff).
+                * Firstly fill in VF variant.
+                */
+               range->type = RTE_ETH_REPRESENTOR_VF;
+               range->controller = priv->port_info.ctrl_num;
+               range->pf = priv->port_info.pf_num;
+               range->vf = UINT16_MAX;
+               range->id_base = priv->dev_data->port_id;
+               range->id_end = range->id_base;
+               snprintf(range->name, sizeof(range->name), "pf%dvf", range->pf);
+
+               /* Move the SF variant. */
+               range++;
+
+               /* Fill in SF variant. */
+               range->type = RTE_ETH_REPRESENTOR_SF;
+               range->controller = priv->port_info.ctrl_num;
+               range->pf = priv->port_info.pf_num;
+               range->sf = UINT16_MAX;
+               range->id_base = priv->dev_data->port_id;
+               range->id_end = range->id_base;
+               snprintf(range->name, sizeof(range->name), "pf%dsf", range->pf);
+               break;
+       case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+               /* FALLTHROUGH */
+       case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
+               /* FALLTHROUGH */
+       case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
+               /* FALLTHROUGH */
+       case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
+               range->type = RTE_ETH_REPRESENTOR_VF;
+               range->controller = priv->port_info.ctrl_num;
+               range->pf = priv->port_info.pf_num;
+               range->vf = priv->port_info.port_num;
+               range->id_base = priv->dev_data->port_id;
+               range->id_end = range->id_base;
+               snprintf(range->name, sizeof(range->name), "pf%dvf", range->pf);
+               break;
+       }
+
+       info->nb_ranges += count;
+}
+
+static unsigned int
+mlx5_representor_info_fill(struct rte_eth_dev *dev,
+                          struct rte_eth_representor_info *info)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       uint16_t port_id;
+
+       info->controller = priv->port_info.ctrl_num;
+       info->pf = RTE_DEV_TO_PCI(dev->device)->addr.function;
+
+       MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
+               struct mlx5_priv *opriv = 
rte_eth_devices[port_id].data->dev_private;
+
+               if (!opriv ||
+                   opriv->sh != priv->sh ||
+                   opriv->domain_id != priv->domain_id)
+                       continue;
+
+               mlx5_representor_info_fill_one(opriv, info);
+       }
+
+       return info->nb_ranges;
+}
+
 /**
  * DPDK callback to get information about representor.
  *
- * Representor ID bits definition:
- *   vf/sf: 12
- *   type: 2
- *   pf: 2
- *
  * @param dev
  *   Pointer to Ethernet device structure.
  * @param[out] info
@@ -492,110 +657,11 @@ int
 mlx5_representor_info_get(struct rte_eth_dev *dev,
                          struct rte_eth_representor_info *info)
 {
-       struct mlx5_priv *priv = dev->data->dev_private;
-       /* Representor types: PF, VF, HPF@VF, SF and HPF@SF, total 5. */
-       int n_type = RTE_ETH_REPRESENTOR_PF + 2; /* Maximal type + 2 for HPFs. 
*/
-       int n_pf = 8; /* Maximal number of PFs. */
-       int i = 0, pf;
-       int n_entries;
-
        if (info == NULL)
-               goto out;
-
-       n_entries = n_type * n_pf;
-       if ((uint32_t)n_entries > info->nb_ranges_alloc)
-               n_entries = info->nb_ranges_alloc;
-
-       info->controller = 0;
-       info->pf = 0;
-       if (mlx5_is_port_on_mpesw_device(priv)) {
-               info->pf = priv->mpesw_port;
-               for (i = 0; i < n_pf; i++) {
-                       /* PF range, both ports will show the same information. 
*/
-                       info->ranges[i].type = RTE_ETH_REPRESENTOR_PF;
-                       info->ranges[i].controller = 0;
-                       info->ranges[i].pf = priv->mpesw_owner + i + 1;
-                       info->ranges[i].vf = 0;
-                       /*
-                        * The representor indexes should be the values set of 
"priv->mpesw_port".
-                        * In the real case now, only 1 PF/UPLINK representor 
is supported.
-                        * The port index will always be the value of "owner + 
1".
-                        */
-                       info->ranges[i].id_base =
-                               MLX5_REPRESENTOR_ID(priv->mpesw_owner,
-                                                   info->ranges[i].type,
-                                                   info->ranges[i].pf);
-                       info->ranges[i].id_end =
-                               MLX5_REPRESENTOR_ID(priv->mpesw_owner,
-                                                   info->ranges[i].type,
-                                                   info->ranges[i].pf);
-                       snprintf(info->ranges[i].name,
-                                sizeof(info->ranges[i].name),
-                                "pf%d", info->ranges[i].pf);
-               }
-       } else if (priv->pf_bond >= 0)
-               info->pf = priv->pf_bond;
-       for (pf = 0; pf < n_pf; ++pf) {
-               /* VF range. */
-               info->ranges[i].type = RTE_ETH_REPRESENTOR_VF;
-               info->ranges[i].controller = 0;
-               info->ranges[i].pf = pf;
-               info->ranges[i].vf = 0;
-               info->ranges[i].id_base =
-                       MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, 0);
-               info->ranges[i].id_end =
-                       MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
-               snprintf(info->ranges[i].name,
-                        sizeof(info->ranges[i].name), "pf%dvf", pf);
-               i++;
-               if (i == n_entries)
-                       break;
-               /* HPF range of VF type. */
-               info->ranges[i].type = RTE_ETH_REPRESENTOR_VF;
-               info->ranges[i].controller = 0;
-               info->ranges[i].pf = pf;
-               info->ranges[i].vf = UINT16_MAX;
-               info->ranges[i].id_base =
-                       MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
-               info->ranges[i].id_end =
-                       MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
-               snprintf(info->ranges[i].name,
-                        sizeof(info->ranges[i].name), "pf%dvf", pf);
-               i++;
-               if (i == n_entries)
-                       break;
-               /* SF range. */
-               info->ranges[i].type = RTE_ETH_REPRESENTOR_SF;
-               info->ranges[i].controller = 0;
-               info->ranges[i].pf = pf;
-               info->ranges[i].vf = 0;
-               info->ranges[i].id_base =
-                       MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, 0);
-               info->ranges[i].id_end =
-                       MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
-               snprintf(info->ranges[i].name,
-                        sizeof(info->ranges[i].name), "pf%dsf", pf);
-               i++;
-               if (i == n_entries)
-                       break;
-               /* HPF range of SF type. */
-               info->ranges[i].type = RTE_ETH_REPRESENTOR_SF;
-               info->ranges[i].controller = 0;
-               info->ranges[i].pf = pf;
-               info->ranges[i].vf = UINT16_MAX;
-               info->ranges[i].id_base =
-                       MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
-               info->ranges[i].id_end =
-                       MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
-               snprintf(info->ranges[i].name,
-                        sizeof(info->ranges[i].name), "pf%dsf", pf);
-               i++;
-               if (i == n_entries)
-                       break;
-       }
-       info->nb_ranges = i;
-out:
-       return n_type * n_pf;
+               return mlx5_representor_info_count(dev);
+
+       return mlx5_representor_info_fill(dev, info);
+
 }

 /**
--
2.47.3

Reply via email to