This patch is for CMA changes to support iWARP and is relative to the trunk. It includes the latest ib_addr generalizations that allowed for some simplification in the rdma_resolve_addr implementation. This patch needs the include file patch to compile.
I tested this on 2.6.14.5 with the AMSO1100 iWARP and Volataire IB adapters. Please review and comment as appropriate. I would love to get this in the trunk -- the merges are killing me. Thanks, Signed-off-by: Tom Tucker <[EMAIL PROTECTED]> Index: cm.c =================================================================== --- cm.c (revision 4748) +++ cm.c (working copy) @@ -3261,6 +3261,9 @@ int ret; u8 i; + if (device->node_type == IB_NODE_RNIC) + return; + cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) * device->phys_port_cnt, GFP_KERNEL); if (!cm_dev) Index: iwcm.c =================================================================== --- iwcm.c (revision 0) +++ iwcm.c (revision 0) @@ -0,0 +1,648 @@ +/* + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/idr.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/rbtree.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> + +#include <rdma/ib_cache.h> +#include <rdma/ib_cm.h> +#include <rdma/iw_cm.h> + +#include "cm_msgs.h" + +MODULE_AUTHOR("Tom Tucker"); +MODULE_DESCRIPTION("iWARP CM"); +MODULE_LICENSE("Dual BSD/GPL"); + +static void iwcm_add_one(struct ib_device *device); +static void iwcm_remove_one(struct ib_device *device); +struct iwcm_id_private; + +static struct ib_client iwcm_client = { + .name = "iwcm", + .add = iwcm_add_one, + .remove = iwcm_remove_one +}; + +static struct { + spinlock_t lock; + struct list_head device_list; + rwlock_t device_lock; + struct workqueue_struct* wq; +} iwcm; + +struct iwcm_device; +struct iwcm_port { + struct iwcm_device *iwcm_dev; + struct sockaddr_in local_addr; + u8 port_num; +}; + +struct iwcm_device { + struct list_head list; + struct ib_device *device; + struct iwcm_port port[0]; +}; + +struct iwcm_id_private { + struct iw_cm_id id; + + spinlock_t lock; + wait_queue_head_t wait; + atomic_t refcount; + + struct rb_node listen_node; + + struct list_head work_list; + atomic_t work_count; +}; + +struct iwcm_work { + struct work_struct work; + struct iwcm_id_private* cm_id; + struct iw_cm_event event; +}; + +/* Called whenever a reference added for a cm_id */ +static inline void iwcm_addref_id(struct iwcm_id_private *cm_id_priv) +{ + atomic_inc(&cm_id_priv->refcount); +} + +/* Called whenever releasing a reference to a cm id */ +static inline void iwcm_deref_id(struct iwcm_id_private *cm_id_priv) +{ + if (atomic_dec_and_test(&cm_id_priv->refcount)) + wake_up(&cm_id_priv->wait); +} + +static void cm_event_handler(struct iw_cm_id* cm_id, struct iw_cm_event* event); + +struct iw_cm_id *iw_create_cm_id(struct ib_device *device, + iw_cm_handler cm_handler, + void *context) +{ + struct iwcm_id_private *iwcm_id_priv; + + iwcm_id_priv = kmalloc(sizeof *iwcm_id_priv, GFP_KERNEL); + if (!iwcm_id_priv) + return ERR_PTR(-ENOMEM); + + memset(iwcm_id_priv, 0, sizeof *iwcm_id_priv); + iwcm_id_priv->id.state = IW_CM_STATE_IDLE; + iwcm_id_priv->id.device = device; + iwcm_id_priv->id.cm_handler = cm_handler; + iwcm_id_priv->id.context = context; + iwcm_id_priv->id.event_handler = cm_event_handler; + + spin_lock_init(&iwcm_id_priv->lock); + init_waitqueue_head(&iwcm_id_priv->wait); + atomic_set(&iwcm_id_priv->refcount, 1); + + return &iwcm_id_priv->id; + +} +EXPORT_SYMBOL(iw_create_cm_id); + +void iw_destroy_cm_id(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *iwcm_id_priv; + unsigned long flags; + int ret = 0; + + iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + + spin_lock_irqsave(&iwcm_id_priv->lock, flags); + switch (cm_id->state) { + case IW_CM_STATE_LISTEN: + cm_id->state = IW_CM_STATE_IDLE; + spin_unlock_irqrestore(&iwcm_id_priv->lock, flags); + ret = cm_id->device->iwcm->destroy_listen(cm_id); + break; + + case IW_CM_STATE_CONN_RECV: + case IW_CM_STATE_CONN_SENT: + case IW_CM_STATE_ESTABLISHED: + cm_id->state = IW_CM_STATE_IDLE; + spin_unlock_irqrestore(&iwcm_id_priv->lock, flags); + ret = cm_id->device->iwcm->disconnect(cm_id,1); + break; + + case IW_CM_STATE_IDLE: + spin_unlock_irqrestore(&iwcm_id_priv->lock, flags); + break; + + default: + spin_unlock_irqrestore(&iwcm_id_priv->lock, flags); + printk(KERN_ERR "%s:%s:%u Illegal state %d for iw_cm_id.\n", + __FILE__, __FUNCTION__, __LINE__, cm_id->state); + ; + } + + atomic_dec(&iwcm_id_priv->refcount); + wait_event(iwcm_id_priv->wait, !atomic_read(&iwcm_id_priv->refcount)); + + kfree(iwcm_id_priv); +} +EXPORT_SYMBOL(iw_destroy_cm_id); + +int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) +{ + struct iwcm_id_private *iwcm_id_priv; + unsigned long flags; + int ret = 0; + + if (cm_id->device == 0 || cm_id->device->iwcm == 0) + return -EINVAL; + + iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + spin_lock_irqsave(&iwcm_id_priv->lock, flags); + if (cm_id->state != IW_CM_STATE_IDLE) { + spin_unlock_irqrestore(&iwcm_id_priv->lock, flags); + return -EBUSY; + } + cm_id->state = IW_CM_STATE_LISTEN; + spin_unlock_irqrestore(&iwcm_id_priv->lock, flags); + + ret = cm_id->device->iwcm->create_listen(cm_id, backlog); + if (ret != 0) + cm_id->state = IW_CM_STATE_IDLE; + + return ret; +} +EXPORT_SYMBOL(iw_cm_listen); + +int iw_cm_getpeername(struct iw_cm_id *cm_id, + struct sockaddr_in* local_addr, + struct sockaddr_in* remote_addr) +{ + if (cm_id->device == 0) + return -EINVAL; + + if (cm_id->device->iwcm == 0) + return -EINVAL; + + /* Make sure there's a connection */ + if (cm_id->state != IW_CM_STATE_ESTABLISHED) + return -ENOTCONN; + + return cm_id->device->iwcm->getpeername(cm_id, local_addr, remote_addr); +} +EXPORT_SYMBOL(iw_cm_getpeername); + +int iw_cm_reject(struct iw_cm_id *cm_id, + const void *private_data, + u8 private_data_len) +{ + struct iwcm_id_private *iwcm_id_priv; + unsigned long flags; + int ret; + + + if (cm_id->device == 0 || cm_id->device->iwcm == 0) + return -EINVAL; + + iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + + spin_lock_irqsave(&iwcm_id_priv->lock, flags); + switch (cm_id->state) { + case IW_CM_STATE_CONN_RECV: + spin_unlock_irqrestore(&iwcm_id_priv->lock, flags); + ret = cm_id->device->iwcm->reject(cm_id, private_data, private_data_len); + cm_id->state = IW_CM_STATE_IDLE; + break; + default: + spin_unlock_irqrestore(&iwcm_id_priv->lock, flags); + ret = -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL(iw_cm_reject); + +int iw_cm_accept(struct iw_cm_id *cm_id, + const void *private_data, + u8 private_data_len) +{ + struct iwcm_id_private *iwcm_id_priv; + unsigned long flags; + int ret; + + if (cm_id->device == 0 || cm_id->device->iwcm == 0) + return -EINVAL; + + iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + + spin_lock_irqsave(&iwcm_id_priv->lock, flags); + switch (cm_id->state) { + case IW_CM_STATE_CONN_RECV: + spin_unlock_irqrestore(&iwcm_id_priv->lock, flags); + ret = cm_id->device->iwcm->accept(cm_id, private_data, + private_data_len); + if (ret == 0) { + struct iw_cm_event event; + event.event = IW_CM_EVENT_ESTABLISHED; + event.provider_id = cm_id->provider_id; + event.status = 0; + event.local_addr = cm_id->local_addr; + event.remote_addr = cm_id->remote_addr; + event.private_data = 0; + event.private_data_len = 0; + cm_event_handler(cm_id, &event); + } + + break; + default: + spin_unlock_irqrestore(&iwcm_id_priv->lock, flags); + ret = -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL(iw_cm_accept); + +int iw_cm_bind_qp(struct iw_cm_id* cm_id, struct ib_qp* qp) +{ + int ret = -EINVAL; + + if (cm_id) { + cm_id->qp = qp; + ret = 0; + } + + return ret; +} +EXPORT_SYMBOL(iw_cm_bind_qp); + +int iw_cm_connect(struct iw_cm_id *cm_id, + const void* pdata, u8 pdata_len) +{ + struct iwcm_id_private* cm_id_priv; + int ret = 0; + unsigned long flags; + + if (cm_id->device == 0 || cm_id->device->iwcm == 0) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IW_CM_STATE_IDLE) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return -EBUSY; + } + cm_id->state = IW_CM_STATE_CONN_SENT; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = cm_id->device->iwcm->connect(cm_id, pdata, pdata_len); + if (ret != 0) + cm_id->state = IW_CM_STATE_IDLE; + + return ret; +} +EXPORT_SYMBOL(iw_cm_connect); + +int iw_cm_disconnect(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *iwcm_id_priv; + unsigned long flags; + int ret; + + if (cm_id->device == 0 || cm_id->device->iwcm == 0 || cm_id->qp == 0) + return -EINVAL; + + iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + spin_lock_irqsave(&iwcm_id_priv->lock, flags); + switch (cm_id->state) { + case IW_CM_STATE_ESTABLISHED: + cm_id->state = IW_CM_STATE_IDLE; + spin_unlock_irqrestore(&iwcm_id_priv->lock, flags); + ret = cm_id->device->iwcm->disconnect(cm_id, 1); + if (ret == 0) { + struct iw_cm_event event; + event.event = IW_CM_EVENT_LLP_DISCONNECT; + event.provider_id = cm_id->provider_id; + event.status = 0; + event.local_addr = cm_id->local_addr; + event.remote_addr = cm_id->remote_addr; + event.private_data = 0; + event.private_data_len = 0; + cm_event_handler(cm_id, &event); + } + + break; + default: + spin_unlock_irqrestore(&iwcm_id_priv->lock, flags); + ret = -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL(iw_cm_disconnect); + +static void iwcm_add_one(struct ib_device *device) +{ + struct iwcm_device *iwcm_dev; + struct iwcm_port *port; + unsigned long flags; + u8 i; + + if (device->node_type != IB_NODE_RNIC) + return; + + iwcm_dev = kmalloc(sizeof(*iwcm_dev) + sizeof(*port) * + device->phys_port_cnt, GFP_KERNEL); + if (!iwcm_dev) + return; + + iwcm_dev->device = device; + + for (i = 1; i <= device->phys_port_cnt; i++) { + port = &iwcm_dev->port[i-1]; + port->iwcm_dev = iwcm_dev; + port->port_num = i; + } + + ib_set_client_data(device, &iwcm_client, iwcm_dev); + + write_lock_irqsave(&iwcm.device_lock, flags); + list_add_tail(&iwcm_dev->list, &iwcm.device_list); + write_unlock_irqrestore(&iwcm.device_lock, flags); + return; +} + +static void iwcm_remove_one(struct ib_device *device) +{ + struct iwcm_device *iwcm_dev; + unsigned long flags; + + iwcm_dev = ib_get_client_data(device, &iwcm_client); + if (!iwcm_dev) + return; + + write_lock_irqsave(&iwcm.device_lock, flags); + list_del(&iwcm_dev->list); + write_unlock_irqrestore(&iwcm.device_lock, flags); + + kfree(iwcm_dev); +} + +/* Handles an inbound connect request. The function creates a new + * iw_cm_id to represent the new connection and inherits the client + * callback function and other attributes from the listening parent. + * + * The work item contains a pointer to the listen_cm_id and the event. The + * listen_cm_id contains the client cm_handler, context and device. These are + * copied when the device is cloned. The event contains the new four tuple. + */ +static int cm_conn_req_handler(struct iwcm_work* work) +{ + struct iw_cm_id* cm_id; + struct iwcm_id_private* cm_id_priv; + int rc; + + /* If the status was not successful, ignore request */ + if (work->event.status) { + printk(KERN_ERR "%s:%d Bad status=%d for connection request ... " + "should be filtered by provider\n", + __FUNCTION__, __LINE__, + work->event.status); + return work->event.status; + } + cm_id = iw_create_cm_id(work->cm_id->id.device, work->cm_id->id.cm_handler, + work->cm_id->id.context); + if (IS_ERR(cm_id)) + return PTR_ERR(cm_id); + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + cm_id_priv->id.local_addr = work->event.local_addr; + cm_id_priv->id.remote_addr = work->event.remote_addr; + cm_id_priv->id.provider_id = work->event.provider_id; + cm_id_priv->id.state = IW_CM_STATE_CONN_RECV; + + /* Call the client CM handler */ + rc = cm_id->cm_handler(cm_id, &work->event); + if (rc) { + cm_id->state = IW_CM_STATE_IDLE; + iw_destroy_cm_id(cm_id); + } + kfree(work); + return 0; +} + +/* + * Handles the transition to established state on the passive side. + */ +static int cm_conn_est_handler(struct iwcm_work* work) +{ + struct iwcm_id_private* cm_id_priv; + unsigned long flags; + int ret = 0; + + cm_id_priv = work->cm_id; + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IW_CM_STATE_CONN_RECV) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + printk(KERN_ERR "%s:%d Invalid cm_id state=%d for established event\n", + __FUNCTION__, __LINE__, cm_id_priv->id.state); + ret = -EINVAL; + goto error_out; + } + + if (work->event.status == 0) { + cm_id_priv = work->cm_id; + cm_id_priv->id.local_addr = work->event.local_addr; + cm_id_priv->id.remote_addr = work->event.remote_addr; + cm_id_priv->id.state = IW_CM_STATE_ESTABLISHED; + } else + cm_id_priv->id.state = IW_CM_STATE_IDLE; + + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + /* Call the client CM handler */ + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->event); + if (ret) { + cm_id_priv->id.state = IW_CM_STATE_IDLE; + iw_destroy_cm_id(&cm_id_priv->id); + } + + error_out: + kfree(work); + return ret; +} + +/* + * Handles the reply to our connect request. There are three + * possibilities: + * - If the cm_id is in the wrong state when the event is + * delivered, the event is ignored. [What should we do when the + * provider does something crazy?] + * - If the remote peer accepts the connection, we update the 4-tuple + * in the cm_id with the remote peer info, move the cm_id to the + * ESTABLISHED state and deliver the event to the client. + * - If the remote peer rejects the connection, or there is some + * connection error, move the cm_id to the IDLE state, and deliver + * the event to the client. + */ +static int cm_conn_rep_handler(struct iwcm_work* work) +{ + struct iwcm_id_private* cm_id_priv; + unsigned long flags; + int ret = 0; + + cm_id_priv = work->cm_id; + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IW_CM_STATE_CONN_SENT) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + printk(KERN_ERR "%s:%d Invalid cm_id state=%d for connect reply event\n", + __FUNCTION__, __LINE__, cm_id_priv->id.state); + ret = -EINVAL; + goto error_out; + } + + if (work->event.status == 0) { + cm_id_priv = work->cm_id; + cm_id_priv->id.local_addr = work->event.local_addr; + cm_id_priv->id.remote_addr = work->event.remote_addr; + cm_id_priv->id.state = IW_CM_STATE_ESTABLISHED; + } else + cm_id_priv->id.state = IW_CM_STATE_IDLE; + + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + /* Call the client CM handler */ + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->event); + if (ret) { + cm_id_priv->id.state = IW_CM_STATE_IDLE; + iw_destroy_cm_id(&cm_id_priv->id); + } + + error_out: + kfree(work); + return ret; +} + +static int cm_disconnect_handler(struct iwcm_work* work) +{ + struct iwcm_id_private* cm_id_priv; + int ret = 0; + + cm_id_priv = work->cm_id; + + cm_id_priv->id.state = IW_CM_STATE_IDLE; + + /* Call the client CM handler */ + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->event); + if (ret) + iw_destroy_cm_id(&cm_id_priv->id); + + kfree(work); + return ret; +} + +static void cm_work_handler(void* arg) +{ + struct iwcm_work* work = (struct iwcm_work*)arg; + int rc; + + switch (work->event.event) { + case IW_CM_EVENT_CONNECT_REQUEST: + rc = cm_conn_req_handler(work); + break; + case IW_CM_EVENT_CONNECT_REPLY: + rc = cm_conn_rep_handler(work); + break; + case IW_CM_EVENT_ESTABLISHED: + rc = cm_conn_est_handler(work); + break; + case IW_CM_EVENT_LLP_DISCONNECT: + case IW_CM_EVENT_LLP_TIMEOUT: + case IW_CM_EVENT_LLP_RESET: + case IW_CM_EVENT_CLOSE: + rc = cm_disconnect_handler(work); + break; + } +} + +/* IW CM provider event callback handler. This function is called on + * interrupt context. The function builds a work queue element + * and enqueues it for processing on a work queue thread. This allows + * CM client callback functions to block. + */ +static void cm_event_handler(struct iw_cm_id* cm_id, + struct iw_cm_event* event) +{ + struct iwcm_work *work; + struct iwcm_id_private* cm_id_priv; + + work = kmalloc(sizeof *work, GFP_ATOMIC); + if (!work) + return; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + INIT_WORK(&work->work, cm_work_handler, work); + work->cm_id = cm_id_priv; + work->event = *event; + queue_work(iwcm.wq, &work->work); +} + +static int __init iw_cm_init(void) +{ + memset(&iwcm, 0, sizeof iwcm); + INIT_LIST_HEAD(&iwcm.device_list); + rwlock_init(&iwcm.device_lock); + spin_lock_init(&iwcm.lock); + iwcm.wq = create_workqueue("iw_cm"); + if (!iwcm.wq) + return -ENOMEM; + + return ib_register_client(&iwcm_client); +} + +static void __exit iw_cm_cleanup(void) +{ + ib_unregister_client(&iwcm_client); +} + +module_init(iw_cm_init); +module_exit(iw_cm_cleanup); + Index: addr.c =================================================================== --- addr.c (revision 4748) +++ addr.c (working copy) @@ -65,6 +65,9 @@ case ARPHRD_INFINIBAND: dev_addr->dev_type = IB_NODE_CA; break; + case ARPHRD_ETHER: + dev_addr->dev_type = IB_NODE_RNIC; + break; default: return -EADDRNOTAVAIL; } Index: Makefile =================================================================== --- Makefile (revision 4748) +++ Makefile (working copy) @@ -1,6 +1,6 @@ EXTRA_CFLAGS += -Idrivers/infiniband/include -Idrivers/infiniband/ulp/ipoib -obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_ping.o ib_cm.o \ +obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_ping.o ib_cm.o iw_cm.o \ ib_sa.o ib_at.o ib_addr.o rdma_cm.o obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o ib_uat.o rdma_ucm.o @@ -14,6 +14,8 @@ ib_cm-y := cm.o +iw_cm-y := iwcm.o + rdma_cm-y := cma.o rdma_ucm-y := ucma.o Index: cma.c =================================================================== --- cma.c (revision 4748) +++ cma.c (working copy) @@ -3,6 +3,7 @@ * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This Software is licensed under one of the following licenses: * @@ -31,9 +32,14 @@ #include <linux/in.h> #include <linux/in6.h> #include <linux/random.h> +#include <linux/inetdevice.h> +#include <net/route.h> +#include <net/arp.h> +#include <net/neighbour.h> #include <rdma/rdma_cm.h> #include <rdma/ib_cache.h> #include <rdma/ib_cm.h> +#include <rdma/iw_cm.h> #include <rdma/ib_sa.h> MODULE_AUTHOR("Guy German"); @@ -102,8 +108,12 @@ int timeout_ms; struct ib_sa_query *query; int query_id; - struct ib_cm_id *cm_id; + union { + struct ib_cm_id *ib; + struct iw_cm_id *iw; + } cm_id; + u32 seq_num; u32 qp_num; enum ib_qp_type qp_type; @@ -239,11 +249,40 @@ return ret; } +static int cma_acquire_iw_dev(struct rdma_id_private* id_priv) +{ + struct rdma_dev_addr* dev_addr = &id_priv->id.route.addr.dev_addr; + struct cma_device* cma_dev; + int ret = -ENOENT; + + down(&mutex); + list_for_each_entry(cma_dev, &dev_list, list) { + if (memcmp(dev_addr->src_dev_addr, + &cma_dev->node_guid, + sizeof(cma_dev->node_guid)) == 0) { + + /* If we find the device, then check if this + * is an iWARP device. If it is, then attach + */ + if (cma_dev->device->node_type == IB_NODE_RNIC) { + cma_attach_to_dev(id_priv, cma_dev); + ret = 0; + break; + } + } + } + up(&mutex); + + return ret; +} + static int cma_acquire_dev(struct rdma_id_private *id_priv) { switch (id_priv->id.route.addr.dev_addr.dev_type) { case IB_NODE_CA: return cma_acquire_ib_dev(id_priv); + case IB_NODE_RNIC: + return cma_acquire_iw_dev(id_priv); default: return -ENODEV; } @@ -306,6 +345,16 @@ IB_QP_PKEY_INDEX | IB_QP_PORT); } +static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + + qp_attr.qp_state = IB_QPS_INIT; + qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; + + return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS); +} + int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr) { @@ -325,6 +374,9 @@ case IB_NODE_CA: ret = cma_init_ib_qp(id_priv, qp); break; + case IB_NODE_RNIC: + ret = cma_init_iw_qp(id_priv, qp); + break; default: ret = -ENOSYS; break; @@ -412,7 +464,7 @@ id_priv = container_of(id, struct rdma_id_private, id); switch (id_priv->id.device->node_type) { case IB_NODE_CA: - ret = ib_cm_init_qp_attr(id_priv->cm_id, qp_attr, + ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, qp_attr_mask); if (qp_attr->qp_state == IB_QPS_RTR) qp_attr->rq_psn = id_priv->seq_num; @@ -567,8 +619,8 @@ { cma_exch(id_priv, CMA_DESTROYING); - if (id_priv->cm_id && !IS_ERR(id_priv->cm_id)) - ib_destroy_cm_id(id_priv->cm_id); + if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) + ib_destroy_cm_id(id_priv->cm_id.ib); list_del(&id_priv->listen_list); if (id_priv->cma_dev) @@ -624,9 +676,20 @@ state = cma_exch(id_priv, CMA_DESTROYING); cma_cancel_operation(id_priv, state); - if (id_priv->cm_id && !IS_ERR(id_priv->cm_id)) - ib_destroy_cm_id(id_priv->cm_id); + if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) { + switch (id->device->node_type) { + case IB_NODE_RNIC: + iw_destroy_cm_id(id_priv->cm_id.iw); + break; + default: + ib_destroy_cm_id(id_priv->cm_id.ib); + break; + } + + id_priv->cm_id.ib = NULL; + } + if (id_priv->cma_dev) { down(&mutex); cma_detach_from_dev(id_priv); @@ -652,15 +715,15 @@ ret = cma_modify_qp_rts(&id_priv->id); if (ret) goto reject; - - ret = ib_send_cm_rtu(id_priv->cm_id, NULL, 0); + + ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); if (ret) goto reject; return 0; reject: cma_modify_qp_err(&id_priv->id); - ib_send_cm_rej(id_priv->cm_id, IB_CM_REJ_CONSUMER_DEFINED, + ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); return ret; } @@ -676,7 +739,7 @@ return 0; reject: cma_modify_qp_err(&id_priv->id); - ib_send_cm_rej(id_priv->cm_id, IB_CM_REJ_CONSUMER_DEFINED, + ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); return ret; } @@ -737,7 +800,7 @@ private_data_len); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ - id_priv->cm_id = NULL; + id_priv->cm_id.ib = NULL; cma_exch(id_priv, CMA_DESTROYING); cma_release_remove(id_priv); rdma_destroy_id(&id_priv->id); @@ -819,7 +882,7 @@ goto out; } - conn_id->cm_id = cm_id; + conn_id->cm_id.ib = cm_id; cm_id->context = conn_id; cm_id->cm_handler = cma_ib_handler; @@ -829,7 +892,7 @@ IB_CM_REQ_PRIVATE_DATA_SIZE - offset); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ - conn_id->cm_id = NULL; + conn_id->cm_id.ib = NULL; cma_exch(conn_id, CMA_DESTROYING); cma_release_remove(conn_id); rdma_destroy_id(&conn_id->id); @@ -874,6 +937,115 @@ } } +static int cma_iw_handler(struct iw_cm_id* iw_id, struct iw_cm_event* event) +{ + struct rdma_id_private *id_priv = iw_id->context; + enum rdma_cm_event_type event_type = 0; + int ret = 0; + + atomic_inc(&id_priv->dev_remove); + + switch (event->event) { + case IW_CM_EVENT_LLP_DISCONNECT: + case IW_CM_EVENT_LLP_RESET: + case IW_CM_EVENT_LLP_TIMEOUT: + case IW_CM_EVENT_CLOSE: + event_type = RDMA_CM_EVENT_DISCONNECTED; + break; + + case IW_CM_EVENT_CONNECT_REQUEST: + BUG_ON(1); + break; + + case IW_CM_EVENT_CONNECT_REPLY: { + if (event->status) + event_type = RDMA_CM_EVENT_REJECTED; + else + event_type = RDMA_CM_EVENT_ESTABLISHED; + break; + } + + case IW_CM_EVENT_ESTABLISHED: + event_type = RDMA_CM_EVENT_ESTABLISHED; + break; + } + + ret = cma_notify_user(id_priv, + event_type, + event->status, + event->private_data, + event->private_data_len); + if (ret) { + /* Destroy the CM ID by returning a non-zero value. */ + id_priv->cm_id.iw = NULL; + cma_exch(id_priv, CMA_DESTROYING); + cma_release_remove(id_priv); + rdma_destroy_id(&id_priv->id); + return ret; + } + + cma_release_remove(id_priv); + return ret; +} + +static int iw_conn_req_handler(struct iw_cm_id *cm_id, + struct iw_cm_event *iw_event) +{ + struct rdma_cm_id* new_cm_id; + struct rdma_id_private *listen_id, *conn_id; + struct sockaddr_in* sin; + int ret; + + listen_id = cm_id->context; + atomic_inc(&listen_id->dev_remove); + if (!cma_comp(listen_id, CMA_LISTEN)) { + ret = -ECONNABORTED; + goto out; + } + + /* Create a new RDMA id the new IW CM ID */ + new_cm_id = rdma_create_id(listen_id->id.event_handler, + listen_id->id.context, + RDMA_PS_TCP); + if (!new_cm_id) { + ret = -ENOMEM; + goto out; + } + conn_id = container_of(new_cm_id, struct rdma_id_private, id); + atomic_inc(&conn_id->dev_remove); + conn_id->state = CMA_CONNECT; + + /* New connection inherits device from parent */ + down(&mutex); + cma_attach_to_dev(conn_id, listen_id->cma_dev); + up(&mutex); + + conn_id->cm_id.iw = cm_id; + cm_id->context = conn_id; + cm_id->cm_handler = cma_iw_handler; + + sin = (struct sockaddr_in*)&new_cm_id->route.addr.src_addr; + *sin = iw_event->local_addr; + + sin = (struct sockaddr_in*)&new_cm_id->route.addr.dst_addr; + *sin = iw_event->remote_addr; + + ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0, + iw_event->private_data, + iw_event->private_data_len); + if (ret) { + /* Destroy the CM ID by returning a non-zero value. */ + conn_id->cm_id.iw = NULL; + cma_exch(conn_id, CMA_DESTROYING); + cma_release_remove(conn_id); + rdma_destroy_id(&conn_id->id); + } + +out: + cma_release_remove(listen_id); + return ret; +} + static int cma_ib_listen(struct rdma_id_private *id_priv) { struct ib_cm_private_data_compare compare_data; @@ -881,28 +1053,52 @@ __be64 svc_id; int ret; - id_priv->cm_id = ib_create_cm_id(id_priv->id.device, cma_req_handler, + id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler, id_priv); - if (IS_ERR(id_priv->cm_id)) - return PTR_ERR(id_priv->cm_id); + if (IS_ERR(id_priv->cm_id.ib)) + return PTR_ERR(id_priv->cm_id.ib); addr = &id_priv->id.route.addr.src_addr; svc_id = cma_get_service_id(id_priv->id.ps, addr); if (cma_any_addr(addr)) - ret = ib_cm_listen(id_priv->cm_id, svc_id, 0, NULL); + ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL); else { cma_set_compare_data(addr, &compare_data); - ret = ib_cm_listen(id_priv->cm_id, svc_id, 0, &compare_data); + ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data); } if (ret) { - ib_destroy_cm_id(id_priv->cm_id); - id_priv->cm_id = NULL; + ib_destroy_cm_id(id_priv->cm_id.ib); + id_priv->cm_id.ib = NULL; } return ret; } +static int cma_iw_listen(struct rdma_id_private *id_priv) +{ + int ret; + struct sockaddr_in* sin; + + id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device, + iw_conn_req_handler, + id_priv); + if (IS_ERR(id_priv->cm_id.iw)) + return PTR_ERR(id_priv->cm_id.iw); + + sin = (struct sockaddr_in*)&id_priv->id.route.addr.src_addr; + id_priv->cm_id.iw->local_addr = *sin; + + ret = iw_cm_listen(id_priv->cm_id.iw, 10 /* backlog */); + + if (ret) { + iw_destroy_cm_id(id_priv->cm_id.iw); + id_priv->cm_id.iw = NULL; + } + + return ret; +} + static int cma_duplicate_listen(struct rdma_id_private *id_priv) { struct rdma_id_private *cur_id_priv; @@ -988,6 +1184,9 @@ case IB_NODE_CA: ret = cma_ib_listen(id_priv); break; + case IB_NODE_RNIC: + ret = cma_iw_listen(id_priv); + break; default: ret = -ENOSYS; break; @@ -1067,6 +1266,45 @@ return (id_priv->query_id < 0) ? id_priv->query_id : 0; } +static void iw_route_handler(void* data) +{ + struct cma_work *work = data; + struct rdma_id_private *id_priv = work->id; + + kfree(work); + + atomic_inc(&id_priv->dev_remove); + + if (!cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ROUTE_RESOLVED)) + goto out; + + if (cma_notify_user(id_priv, RDMA_CM_EVENT_ROUTE_RESOLVED, 0, NULL, 0)) { + cma_exch(id_priv, CMA_DESTROYING); + cma_release_remove(id_priv); + cma_deref_id(id_priv); + rdma_destroy_id(&id_priv->id); + return; + } + out: + cma_release_remove(id_priv); + cma_deref_id(id_priv); +} + +static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) +{ + struct cma_work *work; + + work = kmalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + work->id = id_priv; + INIT_WORK(&work->work, iw_route_handler, work); + queue_work(rdma_wq, &work->work); + + return 0; +} + int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) { struct rdma_id_private *id_priv; @@ -1081,6 +1319,9 @@ case IB_NODE_CA: ret = cma_resolve_ib_route(id_priv, timeout_ms); break; + case IB_NODE_RNIC: + ret = cma_resolve_iw_route(id_priv, timeout_ms); + break; default: ret = -ENOSYS; break; @@ -1221,12 +1462,36 @@ return ret; } +static void iw_addr_handler(void* data) +{ + struct cma_work *work = data; + struct rdma_id_private *id_priv = work->id; + + kfree(work); + + atomic_inc(&id_priv->dev_remove); + + if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) + goto out; + + if (cma_notify_user(id_priv, RDMA_CM_EVENT_ADDR_RESOLVED, 0, NULL, 0)) { + cma_exch(id_priv, CMA_DESTROYING); + cma_release_remove(id_priv); + cma_deref_id(id_priv); + rdma_destroy_id(&id_priv->id); + return; + } +out: + cma_release_remove(id_priv); + cma_deref_id(id_priv); +} + int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, struct sockaddr *dst_addr, int timeout_ms) { struct rdma_id_private *id_priv; enum cma_state expected_state; - int ret; + int ret = 0; id_priv = container_of(id, struct rdma_id_private, id); if (id_priv->cma_dev) { @@ -1341,10 +1606,10 @@ memcpy(private_data + offset, conn_param->private_data, conn_param->private_data_len); - id_priv->cm_id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, + id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); - if (IS_ERR(id_priv->cm_id)) { - ret = PTR_ERR(id_priv->cm_id); + if (IS_ERR(id_priv->cm_id.ib)) { + ret = PTR_ERR(id_priv->cm_id.ib); goto out; } @@ -1371,12 +1636,45 @@ req.max_cm_retries = CMA_MAX_CM_RETRIES; req.srq = id_priv->srq ? 1 : 0; - ret = ib_send_cm_req(id_priv->cm_id, &req); + ret = ib_send_cm_req(id_priv->cm_id.ib, &req); out: kfree(private_data); return ret; } +static int cma_connect_iw(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct iw_cm_id* cm_id; + struct sockaddr_in* sin; + int ret; + + if (id_priv->id.qp == NULL) + return -EINVAL; + + cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); + if (IS_ERR(cm_id)) { + ret = PTR_ERR(cm_id); + goto out; + } + + id_priv->cm_id.iw = cm_id; + + sin = (struct sockaddr_in*)&id_priv->id.route.addr.src_addr; + cm_id->local_addr = *sin; + + sin = (struct sockaddr_in*)&id_priv->id.route.addr.dst_addr; + cm_id->remote_addr = *sin; + + iw_cm_bind_qp(cm_id, id_priv->id.qp); + + ret = iw_cm_connect(cm_id, conn_param->private_data, + conn_param->private_data_len); + +out: + return ret; +} + int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { struct rdma_id_private *id_priv; @@ -1396,6 +1694,9 @@ case IB_NODE_CA: ret = cma_connect_ib(id_priv, conn_param); break; + case IB_NODE_RNIC: + ret = cma_connect_iw(id_priv, conn_param); + break; default: ret = -ENOSYS; break; @@ -1433,7 +1734,7 @@ rep.rnr_retry_count = conn_param->rnr_retry_count; rep.srq = id_priv->srq ? 1 : 0; - return ib_send_cm_rep(id_priv->cm_id, &rep); + return ib_send_cm_rep(id_priv->cm_id.ib, &rep); } int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) @@ -1458,6 +1759,12 @@ else ret = cma_rep_recv(id_priv); break; + case IB_NODE_RNIC: { + iw_cm_bind_qp(id_priv->cm_id.iw, id_priv->id.qp); + ret = iw_cm_accept(id_priv->cm_id.iw, conn_param->private_data, + conn_param->private_data_len); + break; + } default: ret = -ENOSYS; break; @@ -1486,9 +1793,15 @@ switch (id->device->node_type) { case IB_NODE_CA: - ret = ib_send_cm_rej(id_priv->cm_id, IB_CM_REJ_CONSUMER_DEFINED, + ret = ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, private_data, private_data_len); break; + + case IB_NODE_RNIC: + ret = iw_cm_reject(id_priv->cm_id.iw, + private_data, private_data_len); + break; + default: ret = -ENOSYS; break; @@ -1513,9 +1826,12 @@ switch (id->device->node_type) { case IB_NODE_CA: /* Initiate or respond to a disconnect. */ - if (ib_send_cm_dreq(id_priv->cm_id, NULL, 0)) - ib_send_cm_drep(id_priv->cm_id, NULL, 0); + if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) + ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); break; + case IB_NODE_RNIC: + ret = iw_cm_disconnect(id_priv->cm_id.iw); + break; default: break; } Index: mad.c =================================================================== --- mad.c (revision 4748) +++ mad.c (working copy) @@ -2655,7 +2655,9 @@ { int start, end, i; - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == IB_NODE_RNIC) + return; + else if (device->node_type == IB_NODE_SWITCH) { start = 0; end = 0; } else { @@ -2702,7 +2704,9 @@ { int i, num_ports, cur_port; - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == IB_NODE_RNIC) + return; + else if (device->node_type == IB_NODE_SWITCH) { num_ports = 1; cur_port = 0; } else { _______________________________________________ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general