When probing an SF as a vDPA device, mlx5_roce_disable() targets the parent PF address (via mlx5_dev_to_pci_str). This incorrectly attempts to disable ROCE on the parent PF rather than the SF itself.
This causes vDPA probe failures when the parent PF already has an open IB context (e.g., probed for uplink ports or SF representors). Properly disable ROCE for auxiliary devices by using the correct devlink attributes: - DEVLINK_ATTR_BUS_NAME = "auxiliary" - DEVLINK_ATTR_DEV_NAME = device name from rte_device->name Refactor mlx5_nl_enable_roce_get(), mlx5_nl_enable_roce_set(), and mlx5_nl_driver_reload() to accept bus_name and dev_name parameters instead of hardcoding "pci". This allows the same netlink code path to work for both PCI and auxiliary bus devices. For PCI devices, behavior is unchanged: try netlink first, fall back to sysfs. For auxiliary devices, only netlink is used since sysfs ROCE control is PCI-based. Signed-off-by: Max Tottenham <[email protected]> --- drivers/common/mlx5/linux/mlx5_common_os.c | 37 ++++++++--- drivers/common/mlx5/linux/mlx5_nl.c | 72 ++++++++++++---------- drivers/common/mlx5/linux/mlx5_nl.h | 8 +-- 3 files changed, 69 insertions(+), 48 deletions(-) diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c b/drivers/common/mlx5/linux/mlx5_common_os.c index 2867e21618..3e2c66539c 100644 --- a/drivers/common/mlx5/linux/mlx5_common_os.c +++ b/drivers/common/mlx5/linux/mlx5_common_os.c @@ -609,7 +609,7 @@ mlx5_os_get_ibv_device(const struct rte_pci_device *pci_dev) /* Try to disable ROCE by Netlink\Devlink. */ static int -mlx5_nl_roce_disable(const char *addr) +mlx5_nl_roce_disable(const char *bus_name, const char *dev_name) { int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC, 0); int devlink_id; @@ -625,7 +625,8 @@ mlx5_nl_roce_disable(const char *addr) "Failed to get devlink id for ROCE operations by Netlink."); goto close; } - ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable); + ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, bus_name, dev_name, + &enable); if (ret) { DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.", ret); @@ -634,7 +635,8 @@ mlx5_nl_roce_disable(const char *addr) DRV_LOG(INFO, "ROCE has already disabled(Netlink)."); goto close; } - ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0); + ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, bus_name, dev_name, + 0); if (ret) DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret); else @@ -688,14 +690,29 @@ mlx5_sys_roce_disable(const char *addr) static int mlx5_roce_disable(const struct rte_device *dev) { - char pci_addr[PCI_PRI_STR_SIZE] = { 0 }; + if (mlx5_dev_is_pci(dev)) { + char pci_addr[PCI_PRI_STR_SIZE] = { 0 }; - if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0) - return -rte_errno; - /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */ - if (mlx5_nl_roce_disable(pci_addr) != 0 && - mlx5_sys_roce_disable(pci_addr) != 0) - return -rte_errno; + if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0) + return -rte_errno; + /* For PCI: try netlink first, fallback to sysfs. */ + if (mlx5_nl_roce_disable("pci", pci_addr) != 0 && + mlx5_sys_roce_disable(pci_addr) != 0) + return -rte_errno; + } else { + /* + * For auxiliary (SF) devices: use netlink with auxiliary bus. + * The device name from rte_device is used directly as the + * devlink device identifier. + */ + if (mlx5_nl_roce_disable("auxiliary", dev->name) != 0) { + DRV_LOG(WARNING, + "Failed to disable ROCE for SF \"%s\" via netlink.", + dev->name); + /* No sysfs fallback for auxiliary devices. */ + return -rte_errno; + } + } return 0; } diff --git a/drivers/common/mlx5/linux/mlx5_nl.c b/drivers/common/mlx5/linux/mlx5_nl.c index d53543a113..7bb1c7d258 100644 --- a/drivers/common/mlx5/linux/mlx5_nl.c +++ b/drivers/common/mlx5/linux/mlx5_nl.c @@ -1783,8 +1783,10 @@ mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg) * Netlink socket file descriptor. * @param[in] family_id * the Devlink family ID. - * @param pci_addr - * The device PCI address. + * @param[in] bus_name + * The devlink bus name (e.g., "pci" or "auxiliary"). + * @param[in] dev_name + * The devlink device name (e.g., PCI address or auxiliary device name). * @param[out] enable * Where to store the enable status. * @@ -1793,8 +1795,8 @@ mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg) * and rte_errno is set. */ int -mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr, - int *enable) +mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *bus_name, + const char *dev_name, int *enable) { struct nlmsghdr *nlh; struct genlmsghdr *genl; @@ -1815,20 +1817,20 @@ mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr, nlh->nlmsg_len += sizeof(struct genlmsghdr); genl->cmd = DEVLINK_CMD_PARAM_GET; genl->version = DEVLINK_GENL_VERSION; - nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); - nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); + nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, bus_name, strlen(bus_name) + 1); + nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, dev_name, strlen(dev_name) + 1); nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); ret = mlx5_nl_send(nlsk_fd, nlh, sn); if (ret >= 0) ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_roce_cb, &cur_en); if (ret < 0) { - DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s: %d.", - pci_addr, ret); + DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s/%s: %d.", + bus_name, dev_name, ret); return ret; } *enable = cur_en; - DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s\".", - cur_en ? "en" : "dis", pci_addr); + DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s/%s\".", + cur_en ? "en" : "dis", bus_name, dev_name); return ret; } @@ -1839,16 +1841,17 @@ mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr, * Netlink socket file descriptor. * @param[in] family_id * the Devlink family ID. - * @param pci_addr - * The device PCI address. - * @param[out] enable - * The enable status to set. + * @param[in] bus_name + * The devlink bus name (e.g., "pci" or "auxiliary"). + * @param[in] dev_name + * The devlink device name (e.g., PCI address or auxiliary device name). * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr) +mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *bus_name, + const char *dev_name) { struct nlmsghdr *nlh; struct genlmsghdr *genl; @@ -1868,18 +1871,18 @@ mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr) nlh->nlmsg_len += sizeof(struct genlmsghdr); genl->cmd = DEVLINK_CMD_RELOAD; genl->version = DEVLINK_GENL_VERSION; - nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); - nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); + nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, bus_name, strlen(bus_name) + 1); + nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, dev_name, strlen(dev_name) + 1); ret = mlx5_nl_send(nlsk_fd, nlh, sn); if (ret >= 0) ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); if (ret < 0) { - DRV_LOG(DEBUG, "Failed to reload %s device by Netlink - %d", - pci_addr, ret); + DRV_LOG(DEBUG, "Failed to reload %s/%s device by Netlink - %d", + bus_name, dev_name, ret); return ret; } - DRV_LOG(DEBUG, "Device \"%s\" was reloaded by Netlink successfully.", - pci_addr); + DRV_LOG(DEBUG, "Device \"%s/%s\" was reloaded by Netlink successfully.", + bus_name, dev_name); return 0; } @@ -1890,17 +1893,19 @@ mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr) * Netlink socket file descriptor. * @param[in] family_id * the Devlink family ID. - * @param pci_addr - * The device PCI address. - * @param[out] enable + * @param[in] bus_name + * The devlink bus name (e.g., "pci" or "auxiliary"). + * @param[in] dev_name + * The devlink device name (e.g., PCI address or auxiliary device name). + * @param[in] enable * The enable status to set. * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr, - int enable) +mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *bus_name, + const char *dev_name, int enable) { struct nlmsghdr *nlh; struct genlmsghdr *genl; @@ -1912,7 +1917,6 @@ mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr, NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 6]; uint8_t cmode = DEVLINK_PARAM_CMODE_DRIVERINIT; uint8_t ptype = NLA_FLAG; -; memset(buf, 0, sizeof(buf)); nlh = (struct nlmsghdr *)buf; @@ -1923,8 +1927,8 @@ mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr, nlh->nlmsg_len += sizeof(struct genlmsghdr); genl->cmd = DEVLINK_CMD_PARAM_SET; genl->version = DEVLINK_GENL_VERSION; - nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); - nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); + nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, bus_name, strlen(bus_name) + 1); + nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, dev_name, strlen(dev_name) + 1); nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, &cmode, sizeof(cmode)); nl_attr_put(nlh, DEVLINK_ATTR_PARAM_TYPE, &ptype, sizeof(ptype)); @@ -1934,14 +1938,14 @@ mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr, if (ret >= 0) ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); if (ret < 0) { - DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s by Netlink:" - " %d.", enable ? "en" : "dis", pci_addr, ret); + DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s/%s by Netlink:" + " %d.", enable ? "en" : "dis", bus_name, dev_name, ret); return ret; } - DRV_LOG(DEBUG, "Device %s ROCE was %sabled by Netlink successfully.", - pci_addr, enable ? "en" : "dis"); + DRV_LOG(DEBUG, "Device %s/%s ROCE was %sabled by Netlink successfully.", + bus_name, dev_name, enable ? "en" : "dis"); /* Now, need to reload the driver. */ - return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr); + return mlx5_nl_driver_reload(nlsk_fd, family_id, bus_name, dev_name); } /** diff --git a/drivers/common/mlx5/linux/mlx5_nl.h b/drivers/common/mlx5/linux/mlx5_nl.h index 3f79a73c85..6c9ec0462e 100644 --- a/drivers/common/mlx5/linux/mlx5_nl.h +++ b/drivers/common/mlx5/linux/mlx5_nl.h @@ -96,10 +96,10 @@ uint32_t mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa, __rte_internal int mlx5_nl_devlink_family_id_get(int nlsk_fd); -int mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr, - int *enable); -int mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr, - int enable); +int mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *bus_name, + const char *dev_name, int *enable); +int mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *bus_name, + const char *dev_name, int enable); __rte_internal int mlx5_nl_read_events(int nlsk_fd, mlx5_nl_event_cb *cb, void *cb_arg); -- 2.52.0

