[PATCH 11/30] IB/core: Add rdma_network_type to wc

2015-02-18 Thread Somnath Kotur
From: Matan Barak mat...@mellanox.com

Providers should tell IB core the wc's network type.
This is used in order to search for the proper GID in the
GID table. When using HCAs that can't provide this info,
IB core tries to deep examine the packet and extract
the GID type by itself.

Signed-off-by: Matan Barak mat...@mellanox.com
Signed-off-by: Somnath Kotur somnath.ko...@emulex.com
---
 drivers/infiniband/core/verbs.c |  106 +--
 include/rdma/ib_verbs.h |   30 +++
 2 files changed, 131 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 2c54d31..0fdac14 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -195,8 +195,84 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct 
ib_ah_attr *ah_attr)
 }
 EXPORT_SYMBOL(ib_create_ah);
 
+static int ib_get_grh_header_version(const void *h)
+{
+   const struct iphdr *ip4h = (struct iphdr *)(h + 20);
+   struct iphdr ip4h_checked;
+   const struct ipv6hdr *ip6h = (struct ipv6hdr *)h;
+
+   if (ip6h-version != 6)
+   return (ip4h-version == 4) ? 4 : 0;
+   /* version may be 6 or 4 */
+   if (ip4h-ihl != 5) /* IPv4 header length must be 5 for RR */
+   return 6;
+   /* Verify checksum.
+  We can't write on scattered buffers so we need to copy to
+  temp buffer.
+*/
+   memcpy(ip4h_checked, ip4h, sizeof(ip4h_checked));
+   ip4h_checked.check = 0;
+   ip4h_checked.check = ip_fast_csum((u8 *)ip4h_checked, 5);
+   /* if IPv4 header checksum is OK, bellive it */
+   if (ip4h-check == ip4h_checked.check)
+   return 4;
+   return 6;
+}
+
+static int ib_get_dgid_sgid_by_grh(const void *h,
+  enum rdma_network_type net_type,
+  union ib_gid *dgid, union ib_gid *sgid)
+{
+   switch (net_type) {
+   case RDMA_NETWORK_IPV4: {
+   const struct iphdr *ip4h = (struct iphdr *)(h + 20);
+
+   ipv6_addr_set_v4mapped(ip4h-daddr, (struct in6_addr *)dgid);
+   ipv6_addr_set_v4mapped(ip4h-saddr, (struct in6_addr *)sgid);
+   return 0;
+   }
+   case RDMA_NETWORK_IPV6: {
+   struct ipv6hdr *ip6h = (struct ipv6hdr *)h;
+
+   memcpy(dgid, ip6h-daddr, sizeof(*dgid));
+   memcpy(sgid, ip6h-saddr, sizeof(*sgid));
+   return 0;
+   }
+   case RDMA_NETWORK_IB: {
+   struct ib_grh *grh = (struct ib_grh *)h;
+
+   memcpy(dgid, grh-dgid, sizeof(*dgid));
+   memcpy(sgid, grh-sgid, sizeof(*sgid));
+   return 0;
+   }
+   }
+
+   return -EINVAL;
+}
+
+static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
+u8 port_num,
+const struct ib_grh *grh)
+{
+   int grh_version;
+
+   if (rdma_port_get_link_layer(device, port_num) == 
IB_LINK_LAYER_INFINIBAND)
+   return RDMA_NETWORK_IB;
+
+   grh_version = ib_get_grh_header_version(grh);
+
+   if (grh_version == 4)
+   return RDMA_NETWORK_IPV4;
+
+   if (grh-next_hdr == IPPROTO_UDP)
+   return RDMA_NETWORK_IPV6;
+
+   return RDMA_NETWORK_IB;
+}
+
 struct find_gid_index_context {
u16 vlan_id;
+   enum ib_gid_type gid_type;
 };
 
 static bool find_gid_index(const union ib_gid *gid,
@@ -206,6 +282,9 @@ static bool find_gid_index(const union ib_gid *gid,
struct find_gid_index_context *ctx =
(struct find_gid_index_context *)context;
 
+   if (ctx-gid_type != gid_attr-gid_type)
+   return false;
+
if ((!!(ctx-vlan_id != 0x) == !is_vlan_dev(gid_attr-ndev)) ||
(is_vlan_dev(gid_attr-ndev) 
 vlan_dev_vlan_id(gid_attr-ndev) != ctx-vlan_id))
@@ -216,9 +295,11 @@ static bool find_gid_index(const union ib_gid *gid,
 
 static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
   u16 vlan_id, union ib_gid *sgid,
+  enum ib_gid_type gid_type,
   u16 *gid_index)
 {
-   struct find_gid_index_context context = {.vlan_id = vlan_id};
+   struct find_gid_index_context context = {.vlan_id = vlan_id,
+.gid_type = gid_type};
 
return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
 context, gid_index);
@@ -232,9 +313,24 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 
port_num, struct ib_wc *wc,
int ret;
int is_eth = (rdma_port_get_link_layer(device, port_num) ==
IB_LINK_LAYER_ETHERNET);
+   enum rdma_network_type net_type = RDMA_NETWORK_IB;
+  

[PATCH 11/30] IB/core: Add rdma_network_type to wc

2015-02-18 Thread Somnath Kotur
From: Matan Barak mat...@mellanox.com

Providers should tell IB core the wc's network type.
This is used in order to search for the proper GID in the
GID table. When using HCAs that can't provide this info,
IB core tries to deep examine the packet and extract
the GID type by itself.

Signed-off-by: Matan Barak mat...@mellanox.com
Signed-off-by: Somnath Kotur somnath.ko...@emulex.com
---
 drivers/infiniband/core/verbs.c |  106 +--
 include/rdma/ib_verbs.h |   30 +++
 2 files changed, 131 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 2c54d31..0fdac14 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -195,8 +195,84 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct 
ib_ah_attr *ah_attr)
 }
 EXPORT_SYMBOL(ib_create_ah);
 
+static int ib_get_grh_header_version(const void *h)
+{
+   const struct iphdr *ip4h = (struct iphdr *)(h + 20);
+   struct iphdr ip4h_checked;
+   const struct ipv6hdr *ip6h = (struct ipv6hdr *)h;
+
+   if (ip6h-version != 6)
+   return (ip4h-version == 4) ? 4 : 0;
+   /* version may be 6 or 4 */
+   if (ip4h-ihl != 5) /* IPv4 header length must be 5 for RR */
+   return 6;
+   /* Verify checksum.
+  We can't write on scattered buffers so we need to copy to
+  temp buffer.
+*/
+   memcpy(ip4h_checked, ip4h, sizeof(ip4h_checked));
+   ip4h_checked.check = 0;
+   ip4h_checked.check = ip_fast_csum((u8 *)ip4h_checked, 5);
+   /* if IPv4 header checksum is OK, bellive it */
+   if (ip4h-check == ip4h_checked.check)
+   return 4;
+   return 6;
+}
+
+static int ib_get_dgid_sgid_by_grh(const void *h,
+  enum rdma_network_type net_type,
+  union ib_gid *dgid, union ib_gid *sgid)
+{
+   switch (net_type) {
+   case RDMA_NETWORK_IPV4: {
+   const struct iphdr *ip4h = (struct iphdr *)(h + 20);
+
+   ipv6_addr_set_v4mapped(ip4h-daddr, (struct in6_addr *)dgid);
+   ipv6_addr_set_v4mapped(ip4h-saddr, (struct in6_addr *)sgid);
+   return 0;
+   }
+   case RDMA_NETWORK_IPV6: {
+   struct ipv6hdr *ip6h = (struct ipv6hdr *)h;
+
+   memcpy(dgid, ip6h-daddr, sizeof(*dgid));
+   memcpy(sgid, ip6h-saddr, sizeof(*sgid));
+   return 0;
+   }
+   case RDMA_NETWORK_IB: {
+   struct ib_grh *grh = (struct ib_grh *)h;
+
+   memcpy(dgid, grh-dgid, sizeof(*dgid));
+   memcpy(sgid, grh-sgid, sizeof(*sgid));
+   return 0;
+   }
+   }
+
+   return -EINVAL;
+}
+
+static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
+u8 port_num,
+const struct ib_grh *grh)
+{
+   int grh_version;
+
+   if (rdma_port_get_link_layer(device, port_num) == 
IB_LINK_LAYER_INFINIBAND)
+   return RDMA_NETWORK_IB;
+
+   grh_version = ib_get_grh_header_version(grh);
+
+   if (grh_version == 4)
+   return RDMA_NETWORK_IPV4;
+
+   if (grh-next_hdr == IPPROTO_UDP)
+   return RDMA_NETWORK_IPV6;
+
+   return RDMA_NETWORK_IB;
+}
+
 struct find_gid_index_context {
u16 vlan_id;
+   enum ib_gid_type gid_type;
 };
 
 static bool find_gid_index(const union ib_gid *gid,
@@ -206,6 +282,9 @@ static bool find_gid_index(const union ib_gid *gid,
struct find_gid_index_context *ctx =
(struct find_gid_index_context *)context;
 
+   if (ctx-gid_type != gid_attr-gid_type)
+   return false;
+
if ((!!(ctx-vlan_id != 0x) == !is_vlan_dev(gid_attr-ndev)) ||
(is_vlan_dev(gid_attr-ndev) 
 vlan_dev_vlan_id(gid_attr-ndev) != ctx-vlan_id))
@@ -216,9 +295,11 @@ static bool find_gid_index(const union ib_gid *gid,
 
 static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
   u16 vlan_id, union ib_gid *sgid,
+  enum ib_gid_type gid_type,
   u16 *gid_index)
 {
-   struct find_gid_index_context context = {.vlan_id = vlan_id};
+   struct find_gid_index_context context = {.vlan_id = vlan_id,
+.gid_type = gid_type};
 
return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
 context, gid_index);
@@ -232,9 +313,24 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 
port_num, struct ib_wc *wc,
int ret;
int is_eth = (rdma_port_get_link_layer(device, port_num) ==
IB_LINK_LAYER_ETHERNET);
+   enum rdma_network_type net_type = RDMA_NETWORK_IB;
+