Author: hselasky
Date: Tue Jul 17 09:02:29 2018
New Revision: 336368
URL: https://svnweb.freebsd.org/changeset/base/336368

Log:
  Fix for RDMA loopback over VLAN in ibcore.
  
  Implement a more generic solution for detecting loopback.
  The problem was that the default netdevice was resolved
  for loopback also when VLAN was used. Use real network
  device instead of loopback device for bound device
  interface.
  
  How to test:
  ucmatose -b 127.0.0.1 -p 20090
  ucmatose -s 5.6.5.1 -p 20090
  
  Note that RDMA treats the IPv4 and IPv6 loopback
  addresses like any address.
  
  MFC after:            1 week
  Sponsored by:         Mellanox Technologies

Modified:
  head/sys/ofed/drivers/infiniband/core/ib_addr.c
  head/sys/ofed/drivers/infiniband/core/ib_cma.c
  head/sys/ofed/drivers/infiniband/core/ib_sa_query.c

Modified: head/sys/ofed/drivers/infiniband/core/ib_addr.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/core/ib_addr.c     Tue Jul 17 08:59:34 
2018        (r336367)
+++ head/sys/ofed/drivers/infiniband/core/ib_addr.c     Tue Jul 17 09:02:29 
2018        (r336368)
@@ -124,7 +124,7 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, str
                     const unsigned char *dst_dev_addr)
 {
        /* check for loopback device */
-       if (dev->if_type == IFT_LOOP) {
+       if (dev->if_flags & IFF_LOOPBACK) {
                dev_addr->dev_type = ARPHRD_ETHER;
                memset(dev_addr->src_dev_addr, 0, MAX_ADDR_LEN);
                memset(dev_addr->broadcast, 0, MAX_ADDR_LEN);
@@ -153,19 +153,12 @@ EXPORT_SYMBOL(rdma_copy_addr);
 int rdma_translate_ip(const struct sockaddr *addr,
                      struct rdma_dev_addr *dev_addr)
 {
-       struct net_device *dev = NULL;
-       int ret = -EADDRNOTAVAIL;
+       struct net_device *dev;
+       int ret;
 
        if (dev_addr->bound_dev_if) {
                dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
-               if (!dev)
-                       return -ENODEV;
-               ret = rdma_copy_addr(dev_addr, dev, NULL);
-               dev_put(dev);
-               return ret;
-       }
-
-       switch (addr->sa_family) {
+       } else switch (addr->sa_family) {
 #ifdef INET
        case AF_INET:
                dev = ip_dev_find(dev_addr->net,
@@ -179,12 +172,19 @@ int rdma_translate_ip(const struct sockaddr *addr,
                break;
 #endif
        default:
+               dev = NULL;
                break;
        }
 
        if (dev != NULL) {
-               ret = rdma_copy_addr(dev_addr, dev, NULL);
+               /* disallow connections through 127.0.0.1 itself */
+               if (dev->if_flags & IFF_LOOPBACK)
+                       ret = -EINVAL;
+               else
+                       ret = rdma_copy_addr(dev_addr, dev, NULL);
                dev_put(dev);
+       } else {
+               ret = -ENODEV;
        }
        return ret;
 }
@@ -305,20 +305,39 @@ static int addr4_resolve(struct sockaddr_in *src_in,
        /* Step 2 - find outgoing network interface */
        switch (type) {
        case ADDR_VALID:
-               /* check for loopback device */
-               if (rte->rt_ifp->if_flags & IFF_LOOPBACK) {
-                       ifp = rte->rt_ifp;
-                       dev_hold(ifp);
-               } else if (addr->bound_dev_if != 0) {
+               /* get source interface */
+               if (addr->bound_dev_if != 0) {
                        ifp = dev_get_by_index(addr->net, addr->bound_dev_if);
                } else {
                        ifp = ip_dev_find(addr->net, src_in->sin_addr.s_addr);
                }
+
                /* check source interface */
                if (ifp == NULL) {
                        error = ENETUNREACH;
                        goto error_rt_free;
+               } else if (ifp->if_flags & IFF_LOOPBACK) {
+                       /*
+                        * Source address cannot be a loopback device.
+                        */
+                       error = EHOSTUNREACH;
+                       goto error_put_ifp;
+               } else if (rte->rt_ifp->if_flags & IFF_LOOPBACK) {
+                       if (memcmp(&src_in->sin_addr, &dst_in->sin_addr,
+                           sizeof(src_in->sin_addr))) {
+                               /*
+                                * Destination is loopback, but source
+                                * and destination address is not the
+                                * same.
+                                */
+                               error = EHOSTUNREACH;
+                               goto error_put_ifp;
+                       }
                } else if (ifp != rte->rt_ifp) {
+                       /*
+                        * Source and destination interfaces are
+                        * different.
+                        */
                        error = ENETUNREACH;
                        goto error_put_ifp;
                }
@@ -481,20 +500,39 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
        /* Step 2 - find outgoing network interface */
        switch (type) {
        case ADDR_VALID:
-               /* check for loopback device */
-               if (rte->rt_ifp->if_flags & IFF_LOOPBACK) {
-                       ifp = rte->rt_ifp;
-                       dev_hold(ifp);
-               } else if (addr->bound_dev_if != 0) {
+               /* get source interface */
+               if (addr->bound_dev_if != 0) {
                        ifp = dev_get_by_index(addr->net, addr->bound_dev_if);
                } else {
                        ifp = ip6_dev_find(addr->net, src_in->sin6_addr);
                }
+
                /* check source interface */
                if (ifp == NULL) {
                        error = ENETUNREACH;
                        goto error_rt_free;
+               } else if (ifp->if_flags & IFF_LOOPBACK) {
+                       /*
+                        * Source address cannot be a loopback device.
+                        */
+                       error = EHOSTUNREACH;
+                       goto error_put_ifp;
+               } else if (rte->rt_ifp->if_flags & IFF_LOOPBACK) {
+                       if (memcmp(&src_in->sin6_addr, &dst_in->sin6_addr,
+                           sizeof(src_in->sin6_addr))) {
+                               /*
+                                * Destination is loopback, but source
+                                * and destination address is not the
+                                * same.
+                                */
+                               error = EHOSTUNREACH;
+                               goto error_put_ifp;
+                       }
                } else if (ifp != rte->rt_ifp) {
+                       /*
+                        * Source and destination interfaces are
+                        * different.
+                        */
                        error = ENETUNREACH;
                        goto error_put_ifp;
                }
@@ -586,11 +624,14 @@ static int addr_resolve_neigh(struct ifnet *dev,
        if (dev->if_flags & IFF_LOOPBACK) {
                int ret;
 
+               /* find real device, not loopback one */
+               addr->bound_dev_if = 0;
+
                ret = rdma_translate_ip(dst_in, addr);
-               if (!ret)
+               if (ret == 0) {
                        memcpy(addr->dst_dev_addr, addr->src_dev_addr,
                               MAX_ADDR_LEN);
-
+               }
                return ret;
        }
 
@@ -603,8 +644,7 @@ static int addr_resolve_neigh(struct ifnet *dev,
 
 static int addr_resolve(struct sockaddr *src_in,
                        const struct sockaddr *dst_in,
-                       struct rdma_dev_addr *addr,
-                       bool resolve_neigh)
+                       struct rdma_dev_addr *addr)
 {
        struct net_device *ndev = NULL;
        u8 edst[MAX_ADDR_LEN];
@@ -613,27 +653,30 @@ static int addr_resolve(struct sockaddr *src_in,
        if (dst_in->sa_family != src_in->sa_family)
                return -EINVAL;
 
-       if (src_in->sa_family == AF_INET) {
+       switch (src_in->sa_family) {
+       case AF_INET:
                ret = addr4_resolve((struct sockaddr_in *)src_in,
                                    (const struct sockaddr_in *)dst_in,
                                    addr, edst, &ndev);
-               if (ret)
-                       return ret;
-
-               if (resolve_neigh)
-                       ret = addr_resolve_neigh(ndev, dst_in, edst, addr);
-       } else {
+               break;
+       case AF_INET6:
                ret = addr6_resolve((struct sockaddr_in6 *)src_in,
                                    (const struct sockaddr_in6 *)dst_in, addr,
                                    edst, &ndev);
-               if (ret)
-                       return ret;
-
-               if (resolve_neigh)
-                       ret = addr_resolve_neigh(ndev, dst_in, edst, addr);
+               break;
+       default:
+               ret = -EADDRNOTAVAIL;
+               break;
        }
 
-       addr->bound_dev_if = ndev->if_index;
+       /* check for error */
+       if (ret != 0)
+               return ret;
+
+       /* store MAC addresses and check for loopback */
+       ret = addr_resolve_neigh(ndev, dst_in, edst, addr);
+
+       /* set belonging VNET, if any */
        addr->net = dev_net(ndev);
        dev_put(ndev);
 
@@ -653,8 +696,7 @@ static void process_req(struct work_struct *work)
                if (req->status == -ENODATA) {
                        src_in = (struct sockaddr *) &req->src_addr;
                        dst_in = (struct sockaddr *) &req->dst_addr;
-                       req->status = addr_resolve(src_in, dst_in, req->addr,
-                                                  true);
+                       req->status = addr_resolve(src_in, dst_in, req->addr);
                        if (req->status && time_after_eq(jiffies, req->timeout))
                                req->status = -ETIMEDOUT;
                        else if (req->status == -ENODATA)
@@ -714,7 +756,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
        req->client = client;
        atomic_inc(&client->refcount);
 
-       req->status = addr_resolve(src_in, dst_in, addr, true);
+       req->status = addr_resolve(src_in, dst_in, addr);
        switch (req->status) {
        case 0:
                req->timeout = jiffies;
@@ -752,7 +794,7 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
                src_in->sa_family = dst_addr->sa_family;
        }
 
-       return addr_resolve(src_in, dst_addr, addr, false);
+       return addr_resolve(src_in, dst_addr, addr);
 }
 EXPORT_SYMBOL(rdma_resolve_ip_route);
 

Modified: head/sys/ofed/drivers/infiniband/core/ib_cma.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/core/ib_cma.c      Tue Jul 17 08:59:34 
2018        (r336367)
+++ head/sys/ofed/drivers/infiniband/core/ib_cma.c      Tue Jul 17 09:02:29 
2018        (r336368)
@@ -568,12 +568,12 @@ static int cma_translate_addr(struct sockaddr *addr, s
 
 static inline int cma_validate_port(struct ib_device *device, u8 port,
                                    enum ib_gid_type gid_type,
-                                     union ib_gid *gid, int dev_type,
-                                     struct vnet *net,
-                                     int bound_if_index)
+                                   union ib_gid *gid,
+                                   const struct rdma_dev_addr *dev_addr)
 {
+       const int dev_type = dev_addr->dev_type;
+       struct net_device *ndev;
        int ret = -ENODEV;
-       struct net_device *ndev = NULL;
 
        if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
                return ret;
@@ -582,19 +582,9 @@ static inline int cma_validate_port(struct ib_device *
                return ret;
 
        if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
-               ndev = dev_get_by_index(net, bound_if_index);
-               if (ndev && ndev->if_flags & IFF_LOOPBACK) {
-                       pr_info("detected loopback device\n");
-                       dev_put(ndev);
-
-                       if (!device->get_netdev)
-                               return -EOPNOTSUPP;
-
-                       ndev = device->get_netdev(device, port);
-                       if (!ndev)
-                               return -ENODEV;
-               }
+               ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
        } else {
+               ndev = NULL;
                gid_type = IB_GID_TYPE_IB;
        }
 
@@ -636,10 +626,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_
                ret = cma_validate_port(cma_dev->device, port,
                                        rdma_protocol_ib(cma_dev->device, port) 
?
                                        IB_GID_TYPE_IB :
-                                       listen_id_priv->gid_type, gidp,
-                                       dev_addr->dev_type,
-                                       dev_addr->net,
-                                       dev_addr->bound_dev_if);
+                                       listen_id_priv->gid_type, gidp, 
dev_addr);
                if (!ret) {
                        id_priv->id.port_num = port;
                        goto out;
@@ -660,9 +647,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_
                                                
rdma_protocol_ib(cma_dev->device, port) ?
                                                IB_GID_TYPE_IB :
                                                cma_dev->default_gid_type[port 
- 1],
-                                               gidp, dev_addr->dev_type,
-                                               dev_addr->net,
-                                               dev_addr->bound_dev_if);
+                                               gidp, dev_addr);
                        if (!ret) {
                                id_priv->id.port_num = port;
                                goto out;
@@ -2521,21 +2506,6 @@ static int cma_resolve_iboe_route(struct rdma_id_priva
                if (!ndev) {
                        ret = -ENODEV;
                        goto err2;
-               }
-
-               if (ndev->if_flags & IFF_LOOPBACK) {
-                       dev_put(ndev);
-                       if (!id_priv->id.device->get_netdev) {
-                               ret = -EOPNOTSUPP;
-                               goto err2;
-                       }
-
-                       ndev = 
id_priv->id.device->get_netdev(id_priv->id.device,
-                                                             
id_priv->id.port_num);
-                       if (!ndev) {
-                               ret = -ENODEV;
-                               goto err2;
-                       }
                }
 
                route->path_rec->net = ndev->if_vnet;

Modified: head/sys/ofed/drivers/infiniband/core/ib_sa_query.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/core/ib_sa_query.c Tue Jul 17 08:59:34 
2018        (r336367)
+++ head/sys/ofed/drivers/infiniband/core/ib_sa_query.c Tue Jul 17 09:02:29 
2018        (r336368)
@@ -696,10 +696,9 @@ int ib_init_ah_from_path(struct ib_device *device, u8 
 
                resolved_dev = dev_get_by_index(dev_addr.net,
                                                dev_addr.bound_dev_if);
-               if (resolved_dev->if_flags & IFF_LOOPBACK) {
-                       dev_put(resolved_dev);
-                       resolved_dev = idev;
-                       dev_hold(resolved_dev);
+               if (!resolved_dev) {
+                       dev_put(idev);
+                       return -ENODEV;
                }
                ndev = ib_get_ndev_from_path(rec);
                rcu_read_lock();
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to