Re: [PATCH 02/11] IB/ipoib: Return IPoIB devices matching connection parameters

2015-06-16 Thread Haggai Eran
On 15/06/2015 20:22, Jason Gunthorpe wrote:
 On Mon, Jun 15, 2015 at 11:47:07AM +0300, Haggai Eran wrote:
 
 +/* Called with an RCU read lock taken */
 
 Add _rcu to the name? That is the standard convention.

Sure, I'll change that.

 
 +/* returns an IPoIB netdev on top a given ipoib device matching a pkey_index
 + * and address, if one exists. */
 +static struct net_device *ipoib_match_gid_pkey_addr(struct ipoib_dev_priv 
 *priv,
 +const union ib_gid *gid,
 +u16 pkey_index,
 +const struct sockaddr *addr)
 +{
 +struct ipoib_dev_priv *child_priv;
 +struct net_device *net_dev = NULL;
 +
 +if (priv-pkey_index == pkey_index 
 +(!gid || !memcmp(gid, priv-local_gid, sizeof(*gid {
 +net_dev = ipoib_get_net_dev_match_addr(addr, priv-dev);
 +if (net_dev)
 +return net_dev;
 
 As I said already, this should not even look at the sockaddr unless
 there are multiple possible hits on the other parameters,
What is the goal here? The only difference omitting the IP check will
make is when sending a request to a matching GID but with the wrong IP.
Is it important that we pass these requests here so that they will be
dropped at the rdma_cm module?

Also, note that ipoib_get_net_dev_match_addr can return a different
net_dev from the one ipoib created. When using bonding, it will find the
IP address on the bonding device, and return the bonding net_dev instead.

 and there
 should be a comment explaining the sockaddr is only a hack to make up
 for having an incomplete LLADDR.

Sure, I'll add a comment.

 
 That way people not using same guid children do not get incorrect
 functionality..
 
 +static struct net_device *ipoib_get_net_dev_by_params(
 +struct ib_device *dev, u8 port, u16 pkey,
 +const union ib_gid *gid, const struct sockaddr *addr)
 
 [..]
 
 +ret = ib_find_cached_pkey(dev, port, pkey, pkey_index);
 +if (ret)
 +return NULL;
 +
 +if (!rdma_protocol_ib(dev, port))
 +return NULL;
 
 This if should be first I'd think.

Okay.

 
 
 +dev_list = ib_get_client_data(dev, ipoib_client);
 +if (!dev_list)
 +return NULL;
 
 Is the locking OK here? This access protected by lists_rwsem -
 but for instance ib_unregister_device holds only the device_mutex when
 calling client-remove, which kfree's dev_list. Looks wrong to me.

I think you're right. Perhaps we can switch the client data to NULL in
ib_unregister_device under the lists_rwsem. Then the
ipoib_get_net_dev_by_params call will know to skip it. The remove()
callback will need to be augmented with the client data as a parameter,
because it won't be able to retrieve it using ib_get_client_data anymore.

Haggai
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/11] IB/ipoib: Return IPoIB devices matching connection parameters

2015-06-15 Thread Haggai Eran
From: Guy Shapiro gu...@mellanox.com

Implement the get_net_device_by_port_pkey_ip callback that returns network
device to ib_core according to connection parameters. Check the ipoib
device and iterate over all child devices to look for a match.

For each IPoIB device we iterate through all upper devices when searching
for a matching IP, in order to support bonding.

Signed-off-by: Guy Shapiro gu...@mellanox.com
Signed-off-by: Haggai Eran hagg...@mellanox.com
Signed-off-by: Yotam Kenneth yota...@mellanox.com
Signed-off-by: Shachar Raindel rain...@mellanox.com
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 141 +-
 1 file changed, 140 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c 
b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index da149c278cb8..15af32665a87 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -48,6 +48,9 @@
 
 #include linux/jhash.h
 #include net/arp.h
+#include net/addrconf.h
+#include linux/inetdevice.h
+#include rdma/ib_cache.h
 
 #define DRV_VERSION 1.0.0
 
@@ -91,11 +94,15 @@ struct ib_sa_client ipoib_sa_client;
 static void ipoib_add_one(struct ib_device *device);
 static void ipoib_remove_one(struct ib_device *device);
 static void ipoib_neigh_reclaim(struct rcu_head *rp);
+static struct net_device *ipoib_get_net_dev_by_params(
+   struct ib_device *dev, u8 port, u16 pkey,
+   const union ib_gid *gid, const struct sockaddr *addr);
 
 static struct ib_client ipoib_client = {
.name   = ipoib,
.add= ipoib_add_one,
-   .remove = ipoib_remove_one
+   .remove = ipoib_remove_one,
+   .get_net_dev_by_params = ipoib_get_net_dev_by_params,
 };
 
 int ipoib_open(struct net_device *dev)
@@ -222,6 +229,138 @@ static int ipoib_change_mtu(struct net_device *dev, int 
new_mtu)
return 0;
 }
 
+/* Called with an RCU read lock taken */
+static bool ipoib_is_dev_match_addr(const struct sockaddr *addr,
+   struct net_device *dev)
+{
+   struct net *net = dev_net(dev);
+   struct in_device *in_dev;
+   struct sockaddr_in *addr_in = (struct sockaddr_in *)addr;
+   struct sockaddr_in6 *addr_in6 = (struct sockaddr_in6 *)addr;
+   __be32 ret_addr;
+
+   switch (addr-sa_family) {
+   case AF_INET:
+   in_dev = in_dev_get(dev);
+   if (!in_dev)
+   return false;
+
+   ret_addr = inet_confirm_addr(net, in_dev, 0,
+addr_in-sin_addr.s_addr,
+RT_SCOPE_HOST);
+   in_dev_put(in_dev);
+   if (ret_addr)
+   return true;
+
+   break;
+   case AF_INET6:
+   if (IS_ENABLED(CONFIG_IPV6) 
+   ipv6_chk_addr(net, addr_in6-sin6_addr, dev, 1))
+   return true;
+
+   break;
+   }
+   return false;
+}
+
+/**
+ * Find a net_device matching the given address, which is an upper device of
+ * the given net_device.
+ * @addr: IP address to look for.
+ * @dev: base IPoIB net_device
+ *
+ * If found, returns the net_device with a reference held. Otherwise return
+ * NULL.
+ */
+static struct net_device *ipoib_get_net_dev_match_addr(
+   const struct sockaddr *addr, struct net_device *dev)
+{
+   struct net_device *upper,
+ *result = NULL;
+   struct list_head *iter;
+
+   rcu_read_lock();
+   if (ipoib_is_dev_match_addr(addr, dev)) {
+   dev_hold(dev);
+   result = dev;
+   goto out;
+   }
+
+   netdev_for_each_all_upper_dev_rcu(dev, upper, iter) {
+   if (ipoib_is_dev_match_addr(addr, upper)) {
+   dev_hold(upper);
+   result = upper;
+   break;
+   }
+   }
+out:
+   rcu_read_unlock();
+   return result;
+}
+
+/* returns an IPoIB netdev on top a given ipoib device matching a pkey_index
+ * and address, if one exists. */
+static struct net_device *ipoib_match_gid_pkey_addr(struct ipoib_dev_priv 
*priv,
+   const union ib_gid *gid,
+   u16 pkey_index,
+   const struct sockaddr *addr)
+{
+   struct ipoib_dev_priv *child_priv;
+   struct net_device *net_dev = NULL;
+
+   if (priv-pkey_index == pkey_index 
+   (!gid || !memcmp(gid, priv-local_gid, sizeof(*gid {
+   net_dev = ipoib_get_net_dev_match_addr(addr, priv-dev);
+   if (net_dev)
+   return net_dev;
+   }
+
+   /* Check child interfaces */
+   down_read(priv-vlan_rwsem);
+   list_for_each_entry(child_priv, priv-child_intfs, list) {
+   net_dev = 

Re: [PATCH 02/11] IB/ipoib: Return IPoIB devices matching connection parameters

2015-06-15 Thread Jason Gunthorpe
On Mon, Jun 15, 2015 at 11:47:07AM +0300, Haggai Eran wrote:

 +/* Called with an RCU read lock taken */

Add _rcu to the name? That is the standard convention.

 +/* returns an IPoIB netdev on top a given ipoib device matching a pkey_index
 + * and address, if one exists. */
 +static struct net_device *ipoib_match_gid_pkey_addr(struct ipoib_dev_priv 
 *priv,
 + const union ib_gid *gid,
 + u16 pkey_index,
 + const struct sockaddr *addr)
 +{
 + struct ipoib_dev_priv *child_priv;
 + struct net_device *net_dev = NULL;
 +
 + if (priv-pkey_index == pkey_index 
 + (!gid || !memcmp(gid, priv-local_gid, sizeof(*gid {
 + net_dev = ipoib_get_net_dev_match_addr(addr, priv-dev);
 + if (net_dev)
 + return net_dev;

As I said already, this should not even look at the sockaddr unless
there are multiple possible hits on the other parameters, and there
should be a comment explaining the sockaddr is only a hack to make up
for having an incomplete LLADDR.

That way people not using same guid children do not get incorrect
functionality..

 +static struct net_device *ipoib_get_net_dev_by_params(
 + struct ib_device *dev, u8 port, u16 pkey,
 + const union ib_gid *gid, const struct sockaddr *addr)

[..]

 + ret = ib_find_cached_pkey(dev, port, pkey, pkey_index);
 + if (ret)
 + return NULL;
 +
 + if (!rdma_protocol_ib(dev, port))
 + return NULL;

This if should be first I'd think.


 + dev_list = ib_get_client_data(dev, ipoib_client);
 + if (!dev_list)
 + return NULL;

Is the locking OK here? This access protected by lists_rwsem -
but for instance ib_unregister_device holds only the device_mutex when
calling client-remove, which kfree's dev_list. Looks wrong to me.

Jason
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html