Introduce mlx5_nl_read_events() to read Netlink events
(technically, messages) from a socket that was configured
to listen for them via a new mlx5_nl_init() parameter.
Add mlx5_nl_parse_link_status_update() helper
to extract information from link-related events.
This patch is a shared base for later fixes.

Cc: sta...@dpdk.org

Signed-off-by: Dmitry Kozlyuk <dkozl...@nvidia.com>
Reviewed-by: Viacheslav Ovsiienko <viachesl...@nvidia.com>
---
 drivers/common/mlx5/linux/mlx5_common_os.c |   2 +-
 drivers/common/mlx5/linux/mlx5_nl.c        | 102 ++++++++++++++++++++-
 drivers/common/mlx5/linux/mlx5_nl.h        |   8 +-
 drivers/common/mlx5/version.map            |   2 +
 drivers/net/mlx5/linux/mlx5_os.c           |   8 +-
 drivers/net/mlx5/linux/mlx5_vlan_os.c      |   2 +-
 6 files changed, 116 insertions(+), 8 deletions(-)

diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c 
b/drivers/common/mlx5/linux/mlx5_common_os.c
index 0d3e24e04e..25e09bb55b 100644
--- a/drivers/common/mlx5/linux/mlx5_common_os.c
+++ b/drivers/common/mlx5/linux/mlx5_common_os.c
@@ -487,7 +487,7 @@ mlx5_os_get_ibv_device(const struct rte_pci_addr *addr)
 static int
 mlx5_nl_roce_disable(const char *addr)
 {
-       int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC);
+       int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC, 0);
        int devlink_id;
        int enable;
        int ret;
diff --git a/drivers/common/mlx5/linux/mlx5_nl.c 
b/drivers/common/mlx5/linux/mlx5_nl.c
index fd4c2d2625..5d04857b38 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.c
+++ b/drivers/common/mlx5/linux/mlx5_nl.c
@@ -185,19 +185,22 @@ uint32_t atomic_sn;
  *
  * @param protocol
  *   Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA).
+ * @param groups
+ *   Groups to listen (e.g. RTMGRP_LINK), can be 0.
  *
  * @return
  *   A file descriptor on success, a negative errno value otherwise and
  *   rte_errno is set.
  */
 int
-mlx5_nl_init(int protocol)
+mlx5_nl_init(int protocol, int groups)
 {
        int fd;
        int buf_size;
        socklen_t opt_size;
        struct sockaddr_nl local = {
                .nl_family = AF_NETLINK,
+               .nl_groups = groups,
        };
        int ret;
 
@@ -1862,3 +1865,100 @@ mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, 
const char *pci_addr,
        /* Now, need to reload the driver. */
        return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr);
 }
+
+/**
+ * Try to parse a Netlink message as a link status update.
+ *
+ * @param hdr
+ *  Netlink message header.
+ * @param[out] ifindex
+ *  Index of the updated interface.
+ *
+ * @return
+ *  0 on success, negative on failure.
+ */
+int
+mlx5_nl_parse_link_status_update(struct nlmsghdr *hdr, uint32_t *ifindex)
+{
+       struct ifinfomsg *info;
+
+       switch (hdr->nlmsg_type) {
+       case RTM_NEWLINK:
+       case RTM_DELLINK:
+       case RTM_GETLINK:
+       case RTM_SETLINK:
+               info = NLMSG_DATA(hdr);
+               *ifindex = info->ifi_index;
+               return 0;
+       }
+       return -1;
+}
+
+/**
+ * Read pending events from a Netlink socket.
+ *
+ * @param nlsk_fd
+ *  Netlink socket.
+ * @param cb
+ *  Callback invoked for each of the events.
+ * @param cb_arg
+ *  User data for the callback.
+ *
+ * @return
+ *  0 on success, including the case when there are no events.
+ *  Negative on failure and rte_errno is set.
+ */
+int
+mlx5_nl_read_events(int nlsk_fd, mlx5_nl_event_cb *cb, void *cb_arg)
+{
+       char buf[8192];
+       struct sockaddr_nl addr;
+       struct iovec iov = {
+               .iov_base = buf,
+               .iov_len = sizeof(buf),
+       };
+       struct msghdr msg = {
+               .msg_name = &addr,
+               .msg_namelen = sizeof(addr),
+               .msg_iov = &iov,
+               .msg_iovlen = 1,
+       };
+       struct nlmsghdr *hdr;
+       ssize_t size;
+
+       while (1) {
+               size = recvmsg(nlsk_fd, &msg, MSG_DONTWAIT);
+               if (size < 0) {
+                       if (errno == EAGAIN)
+                               return 0;
+                       if (errno == EINTR)
+                               continue;
+                       DRV_LOG(DEBUG, "Failed to receive netlink message: %s",
+                               strerror(errno));
+                       rte_errno = errno;
+                       return -rte_errno;
+               }
+               hdr = (struct nlmsghdr *)buf;
+               while (size >= (ssize_t)sizeof(*hdr)) {
+                       ssize_t msg_len = hdr->nlmsg_len;
+                       ssize_t data_len = msg_len - sizeof(*hdr);
+                       ssize_t aligned_len;
+
+                       if (data_len < 0) {
+                               DRV_LOG(DEBUG, "Netlink message too short");
+                               rte_errno = EINVAL;
+                               return -rte_errno;
+                       }
+                       aligned_len = NLMSG_ALIGN(msg_len);
+                       if (aligned_len > size) {
+                               DRV_LOG(DEBUG, "Netlink message too long");
+                               rte_errno = EINVAL;
+                               return -rte_errno;
+                       }
+                       cb(hdr, cb_arg);
+                       hdr = RTE_PTR_ADD(hdr, aligned_len);
+                       size -= aligned_len;
+               }
+       }
+       return 0;
+}
diff --git a/drivers/common/mlx5/linux/mlx5_nl.h 
b/drivers/common/mlx5/linux/mlx5_nl.h
index 2063c0deeb..0b7552338a 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.h
+++ b/drivers/common/mlx5/linux/mlx5_nl.h
@@ -11,6 +11,7 @@
 
 #include "mlx5_common.h"
 
+typedef void (mlx5_nl_event_cb)(struct nlmsghdr *hdr, void *user_data);
 
 /* VLAN netdev for VLAN workaround. */
 struct mlx5_nl_vlan_dev {
@@ -30,7 +31,7 @@ struct mlx5_nl_vlan_vmwa_context {
 };
 
 __rte_internal
-int mlx5_nl_init(int protocol);
+int mlx5_nl_init(int protocol, int groups);
 __rte_internal
 int mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx, uint64_t 
*mac_own,
                         struct rte_ether_addr *mac, uint32_t index);
@@ -75,4 +76,9 @@ int mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const 
char *pci_addr,
 int mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr,
                            int enable);
 
+__rte_internal
+int mlx5_nl_read_events(int nlsk_fd, mlx5_nl_event_cb *cb, void *cb_arg);
+__rte_internal
+int mlx5_nl_parse_link_status_update(struct nlmsghdr *hdr, uint32_t *ifindex);
+
 #endif /* RTE_PMD_MLX5_NL_H_ */
diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
index 462b7cea5e..d9b7ccacde 100644
--- a/drivers/common/mlx5/version.map
+++ b/drivers/common/mlx5/version.map
@@ -123,9 +123,11 @@ INTERNAL {
        mlx5_nl_mac_addr_flush; # WINDOWS_NO_EXPORT
        mlx5_nl_mac_addr_remove; # WINDOWS_NO_EXPORT
        mlx5_nl_mac_addr_sync; # WINDOWS_NO_EXPORT
+       mlx5_nl_parse_link_status_update; # WINDOWS_NO_EXPORT
        mlx5_nl_port_state; # WINDOWS_NO_EXPORT
        mlx5_nl_portnum; # WINDOWS_NO_EXPORT
        mlx5_nl_promisc; # WINDOWS_NO_EXPORT
+       mlx5_nl_read_events; # WINDOWS_NO_EXPORT
        mlx5_nl_switch_info; # WINDOWS_NO_EXPORT
        mlx5_nl_vf_mac_addr_modify; # WINDOWS_NO_EXPORT
        mlx5_nl_vlan_vmwa_create; # WINDOWS_NO_EXPORT
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index bbe05bb837..602473e8f7 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1086,7 +1086,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
                " old OFED/rdma-core version or firmware configuration");
 #endif
        config->mpls_en = mpls_en;
-       nl_rdma = mlx5_nl_init(NETLINK_RDMA);
+       nl_rdma = mlx5_nl_init(NETLINK_RDMA, 0);
        /* Check port status. */
        if (spawn->phys_port <= UINT8_MAX) {
                /* Legacy Verbs api only support u8 port number. */
@@ -1133,7 +1133,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
        priv->mtu = RTE_ETHER_MTU;
        /* Some internal functions rely on Netlink sockets, open them now. */
        priv->nl_socket_rdma = nl_rdma;
-       priv->nl_socket_route = mlx5_nl_init(NETLINK_ROUTE);
+       priv->nl_socket_route = mlx5_nl_init(NETLINK_ROUTE, 0);
        priv->representor = !!switch_info->representor;
        priv->master = !!switch_info->master;
        priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
@@ -2130,8 +2130,8 @@ mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev,
         * matching ones, gathering into the list.
         */
        struct ibv_device *ibv_match[ret + 1];
-       int nl_route = mlx5_nl_init(NETLINK_ROUTE);
-       int nl_rdma = mlx5_nl_init(NETLINK_RDMA);
+       int nl_route = mlx5_nl_init(NETLINK_ROUTE, 0);
+       int nl_rdma = mlx5_nl_init(NETLINK_RDMA, 0);
        unsigned int i;
 
        while (ret-- > 0) {
diff --git a/drivers/net/mlx5/linux/mlx5_vlan_os.c 
b/drivers/net/mlx5/linux/mlx5_vlan_os.c
index 005904bdfe..7ee2460a23 100644
--- a/drivers/net/mlx5/linux/mlx5_vlan_os.c
+++ b/drivers/net/mlx5/linux/mlx5_vlan_os.c
@@ -136,7 +136,7 @@ mlx5_vlan_vmwa_init(struct rte_eth_dev *dev, uint32_t 
ifindex)
                return NULL;
        }
        rte_spinlock_init(&vmwa->sl);
-       vmwa->nl_socket = mlx5_nl_init(NETLINK_ROUTE);
+       vmwa->nl_socket = mlx5_nl_init(NETLINK_ROUTE, 0);
        if (vmwa->nl_socket < 0) {
                DRV_LOG(WARNING,
                        "Can not create Netlink socket"
-- 
2.25.1

Reply via email to