The following patch adds a simple IP to IB address translation module using ARP. It is based off AT and SDP, but kept as simple as possible.
I would like to merge this back into the trunk, and apply other changes there. Signed-off-by: Sean Hefty <[EMAIL PROTECTED]> Index: include/rdma/ib_addr.h =================================================================== --- include/rdma/ib_addr.h (revision 0) +++ include/rdma/ib_addr.h (revision 0) @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2005 Voltaire Inc. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + * + */ + +#if !defined(IB_ADDR_H) +#define IB_ADDR_H + +#include <linux/socket.h> +#include <rdma/ib_verbs.h> + +struct ib_addr { + union ib_gid sgid; + union ib_gid dgid; + u16 pkey; +}; + +/** + * ib_translate_addr - Translate a local IP address to an Infiniband GID and + * PKey. + */ +int ib_translate_addr(struct sockaddr *addr, union ib_gid *gid, u16 *pkey); + +/** + * ib_resolve_addr - Resolve source and destination IP addresses to + * Infiniband network addresses. + * @src_addr: An optional source address to use in the resolution. If a + * source address is not provided, a usable address will be returned via + * the callback. + * @dst_addr: The destination address to resolve. + * @addr: A reference to a data location that will receive the resolved + * addresses. The data location must remain valid until the callback has + * been invoked. + * @timeout_ms: Amount of time to wait for the address resolution to complete. + * @callback: Call invoked once address resolution has completed, timed out, + * or been canceled. A status of 0 indicates success. + * @context: User-specified context associated with the call. + */ +int ib_resolve_addr(struct sockaddr *src_addr, struct sockaddr *dst_addr, + struct ib_addr *addr, int timeout_ms, + void (*callback)(int status, struct sockaddr *src_addr, + struct ib_addr *addr, void *context), + void *context); + +void ib_addr_cancel(struct ib_addr *addr); + +#endif /* IB_ADDR_H */ + Index: core/addr.c =================================================================== --- core/addr.c (revision 0) +++ core/addr.c (revision 0) @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2005 Voltaire Inc. All rights reserved. + * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. + * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/inetdevice.h> +#include <linux/workqueue.h> +#include <net/arp.h> +#include <net/neighbour.h> +#include <net/route.h> +#include <rdma/ib_addr.h> + +MODULE_AUTHOR("Sean Hefty"); +MODULE_DESCRIPTION("IB Address Translation"); +MODULE_LICENSE("Dual BSD/GPL"); + +struct addr_req { + struct list_head list; + struct sockaddr src_addr; + struct sockaddr dst_addr; + struct ib_addr *addr; + void *context; + void (*callback)(int status, struct sockaddr *src_addr, + struct ib_addr *addr, void *context); + unsigned long timeout; + int status; +}; + +static void process_req(void *data); + +static DECLARE_MUTEX(mutex); +static LIST_HEAD(req_list); +static DECLARE_WORK(work, process_req, NULL); +static struct workqueue_struct *wq; + +static u16 addr_get_pkey(struct net_device *dev) +{ + return ((u16)dev->broadcast[8] << 8) | (u16)dev->broadcast[9]; +} + +int ib_translate_addr(struct sockaddr *addr, union ib_gid *gid, u16 *pkey) +{ + struct net_device *dev; + u32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; + + dev = ip_dev_find(ip); + if (!dev) + return -EADDRNOTAVAIL; + + *gid = *(union ib_gid *) (dev->dev_addr + 4); + *pkey = addr_get_pkey(dev); + dev_put(dev); + return 0; +} +EXPORT_SYMBOL(ib_translate_addr); + +static void set_timeout(unsigned long time) +{ + unsigned long delay; + + cancel_delayed_work(&work); + + delay = time - jiffies; + if ((long)delay <= 0) + delay = 1; + + queue_delayed_work(wq, &work, delay); +} + +static void queue_req(struct addr_req *req) +{ + struct addr_req *temp_req; + + down(&mutex); + list_for_each_entry_reverse(temp_req, &req_list, list) { + if (time_after(req->timeout, temp_req->timeout)) + break; + } + + list_add(&req->list, &temp_req->list); + + if (req_list.next == &req->list) + set_timeout(req->timeout); + up(&mutex); +} + +static void addr_send_arp(struct sockaddr_in *dst_in) +{ + struct rtable *rt; + struct flowi fl; + u32 dst_ip = dst_in->sin_addr.s_addr; + + memset(&fl, 0, sizeof fl); + fl.nl_u.ip4_u.daddr = dst_ip; + if (ip_route_output_key(&rt, &fl)) + return; + + arp_send(ARPOP_REQUEST, ETH_P_ARP, dst_ip, rt->idev->dev, rt->rt_src, + NULL, rt->idev->dev->dev_addr, NULL); + ip_rt_put(rt); +} + +static int addr_resolve_remote(struct sockaddr_in *src_in, + struct sockaddr_in *dst_in, + struct ib_addr *addr) +{ + u32 src_ip = src_in->sin_addr.s_addr; + u32 dst_ip = dst_in->sin_addr.s_addr; + struct flowi fl; + struct rtable *rt; + struct neighbour *neigh; + int ret; + + memset(&fl, 0, sizeof fl); + fl.nl_u.ip4_u.daddr = dst_ip; + fl.nl_u.ip4_u.saddr = src_ip; + ret = ip_route_output_key(&rt, &fl); + if (ret) + goto out; + + neigh = neigh_lookup(&arp_tbl, &dst_ip, rt->idev->dev); + if (!neigh) { + ret = -ENODATA; + goto err1; + } + + if (!(neigh->nud_state & NUD_VALID)) { + ret = -ENODATA; + goto err2; + } + + if (!src_ip) { + src_in->sin_family = dst_in->sin_family; + src_in->sin_addr.s_addr = rt->rt_src; + } + + addr->sgid = *(union ib_gid *) (neigh->dev->dev_addr + 4); + addr->dgid = *(union ib_gid *) (neigh->ha + 4); + addr->pkey = addr_get_pkey(neigh->dev); + +err2: + neigh_release(neigh); +err1: + ip_rt_put(rt); +out: + return ret; +} + +static void process_req(void *data) +{ + struct addr_req *req, *temp_req; + struct sockaddr_in *src_in, *dst_in; + struct list_head done_list; + + INIT_LIST_HEAD(&done_list); + + down(&mutex); + list_for_each_entry_safe(req, temp_req, &req_list, list) { + if (req->status) { + src_in = (struct sockaddr_in *) &req->src_addr; + dst_in = (struct sockaddr_in *) &req->dst_addr; + req->status = addr_resolve_remote(src_in, dst_in, + req->addr); + } + if (req->status && time_after(jiffies, req->timeout)) + req->status = -ETIMEDOUT; + else if (req->status == -ENODATA) + continue; + + list_del(&req->list); + list_add_tail(&req->list, &done_list); + } + + if (!list_empty(&req_list)) { + req = list_entry(req_list.next, struct addr_req, list); + set_timeout(req->timeout); + } + up(&mutex); + + list_for_each_entry_safe(req, temp_req, &done_list, list) { + list_del(&req->list); + req->callback(req->status, &req->src_addr, req->addr, + req->context); + kfree(req); + } +} + +static int addr_resolve_local(struct sockaddr_in *src_in, + struct sockaddr_in *dst_in, + struct ib_addr *addr) +{ + struct net_device *dev; + u32 src_ip = src_in->sin_addr.s_addr; + u32 dst_ip = dst_in->sin_addr.s_addr; + int ret = 0; + + dev = ip_dev_find(dst_ip); + if (!dev) + return -EADDRNOTAVAIL; + + if (!src_ip) { + src_in->sin_family = dst_in->sin_family; + src_in->sin_addr.s_addr = dst_ip; + addr->sgid = *(union ib_gid *) (dev->dev_addr + 4); + addr->pkey = addr_get_pkey(dev); + } else { + ret = ib_translate_addr((struct sockaddr *)src_in, + &addr->sgid, &addr->pkey); + if (ret) + goto out; + } + + addr->dgid = *(union ib_gid *) (dev->dev_addr + 4); +out: + dev_put(dev); + return ret; +} + +int ib_resolve_addr(struct sockaddr *src_addr, struct sockaddr *dst_addr, + struct ib_addr *addr, int timeout_ms, + void (*callback)(int status, struct sockaddr *src_addr, + struct ib_addr *addr, void *context), + void *context) +{ + struct sockaddr_in *src_in, *dst_in; + struct addr_req *req; + int ret = 0; + + req = kmalloc(sizeof *req, GFP_KERNEL); + if (!req) + return -ENOMEM; + memset(req, 0, sizeof *req); + + if (src_addr) + req->src_addr = *src_addr; + req->dst_addr = *dst_addr; + req->addr = addr; + req->callback = callback; + req->context = context; + + src_in = (struct sockaddr_in *) &req->src_addr; + dst_in = (struct sockaddr_in *) &req->dst_addr; + + req->status = addr_resolve_local(src_in, dst_in, addr); + if (req->status == -EADDRNOTAVAIL) + req->status = addr_resolve_remote(src_in, dst_in, addr); + + switch (req->status) { + case 0: + req->timeout = jiffies; + queue_req(req); + break; + case -ENODATA: + req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; + queue_req(req); + addr_send_arp(dst_in); + break; + default: + ret = req->status; + kfree(req); + break; + } + return ret; +} +EXPORT_SYMBOL(ib_resolve_addr); + +void ib_addr_cancel(struct ib_addr *addr) +{ + struct addr_req *req, *temp_req; + + up(&mutex); + list_for_each_entry_safe(req, temp_req, &req_list, list) { + if (req->addr == addr) { + req->status = -ECANCELED; + req->timeout = jiffies; + list_del(&req->list); + list_add(&req->list, &req_list); + set_timeout(req->timeout); + break; + } + } + up(&mutex); +} +EXPORT_SYMBOL(ib_addr_cancel); + +static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkt) +{ + struct arphdr *arp_hdr; + + arp_hdr = (struct arphdr *) skb->nh.raw; + + if (dev->type == ARPHRD_INFINIBAND && + (arp_hdr->ar_op == __constant_htons(ARPOP_REQUEST) || + arp_hdr->ar_op == __constant_htons(ARPOP_REPLY))) + set_timeout(jiffies); + + kfree_skb(skb); + return 0; +} + +static struct packet_type addr_arp = { + .type = __constant_htons(ETH_P_ARP), + .func = addr_arp_recv, + .af_packet_priv = (void*) 1, +}; + +static int addr_init(void) +{ + wq = create_singlethread_workqueue("ib_addr"); + if (!wq) + return -ENOMEM; + + dev_add_pack(&addr_arp); + return 0; +} + +static void addr_cleanup(void) +{ + dev_remove_pack(&addr_arp); + destroy_workqueue(wq); +} + +module_init(addr_init); +module_exit(addr_cleanup); _______________________________________________ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general