RE: [ofa-general] [PATCH v3] iw_cxgb3: Support "iwarp-only" interfaces to avoid 4-tuple conflicts.
>It is ok to block while holding a mutex, yes? It's okay, I just didn't try to trace through the code to see if it ever tries to acquire the same mutex in the thread that needs to signal the event. - Sean - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [ofa-general] [PATCH v3] iw_cxgb3: Support "iwarp-only" interfaces to avoid 4-tuple conflicts.
Sean Hefty wrote: The sysadmin creates "for iwarp use only" alias interfaces of the form "devname:iw*" where devname is the native interface name (eg eth0) for the iwarp netdev device. The alias label can be anything starting with "iw". The "iw" immediately after the ':' is the key used by the iw_cxgb3 driver. I'm still not sure about this, but haven't come up with anything better myself. And if there's a good chance of other rnic's needing the same support, I'd rather see the common code separated out, even if just encapsulated within this module for easy re-use. As for the code, I have a couple of questions about whether deadlock and a race condition are possible, plus a few minor comments. Thanks for reviewing! Responses are in-line below. +static void insert_ifa(struct iwch_dev *rnicp, struct in_ifaddr *ifa) +{ +struct iwch_addrlist *addr; + +addr = kmalloc(sizeof *addr, GFP_KERNEL); +if (!addr) { +printk(KERN_ERR MOD "%s - failed to alloc memory!\n", + __FUNCTION__); +return; +} +addr->ifa = ifa; +mutex_lock(>mutex); +list_add_tail(>entry, >addrlist); +mutex_unlock(>mutex); +} Should this return success/failure? I think so. See below... +static int nb_callback(struct notifier_block *self, unsigned long event, + void *ctx) +{ +struct in_ifaddr *ifa = ctx; +struct iwch_dev *rnicp = container_of(self, struct iwch_dev, nb); + +PDBG("%s rnicp %p event %lx\n", __FUNCTION__, rnicp, event); + +switch (event) { +case NETDEV_UP: +if (netdev_is_ours(rnicp, ifa->ifa_dev->dev) && +is_iwarp_label(ifa->ifa_label)) { +PDBG("%s label %s addr 0x%x added\n", +__FUNCTION__, ifa->ifa_label, ifa->ifa_address); +insert_ifa(rnicp, ifa); +iwch_listeners_add_addr(rnicp, ifa->ifa_address); If insert_ifa() fails, what will iwch_listeners_add_addr() do? (I'm not easily seeing the relationship between the address list and the listen list at this point.) I guess insert_ifa() needs to return success/failure. Then if we failed to add the ifa to the list we won't update the listeners. The relationship is this: - when a listen is done on addr 0.0.0.0, the code walks the list of addresses to do specific listens on each address. - when an address is added or deleted, then the list of current listeners is walked and updated accordingly. +} +break; +case NETDEV_DOWN: +if (netdev_is_ours(rnicp, ifa->ifa_dev->dev) && +is_iwarp_label(ifa->ifa_label)) { +PDBG("%s label %s addr 0x%x deleted\n", +__FUNCTION__, ifa->ifa_label, ifa->ifa_address); +iwch_listeners_del_addr(rnicp, ifa->ifa_address); +remove_ifa(rnicp, ifa); +} +break; +default: +break; +} +return 0; +} + +static void delete_addrlist(struct iwch_dev *rnicp) +{ +struct iwch_addrlist *addr, *tmp; + +mutex_lock(>mutex); +list_for_each_entry_safe(addr, tmp, >addrlist, entry) { +list_del(>entry); +kfree(addr); +} +mutex_unlock(>mutex); +} + +static void populate_addrlist(struct iwch_dev *rnicp) +{ +int i; +struct in_device *indev; + +for (i = 0; i < rnicp->rdev.port_info.nports; i++) { +indev = in_dev_get(rnicp->rdev.port_info.lldevs[i]); +if (!indev) +continue; +for_ifa(indev) +if (is_iwarp_label(ifa->ifa_label)) { +PDBG("%s label %s addr 0x%x added\n", + __FUNCTION__, ifa->ifa_label, + ifa->ifa_address); +insert_ifa(rnicp, ifa); +} +endfor_ifa(indev); +} +} + static void rnic_init(struct iwch_dev *rnicp) { PDBG("%s iwch_dev %p\n", __FUNCTION__, rnicp); @@ -70,6 +187,12 @@ static void rnic_init(struct iwch_dev *r idr_init(>qpidr); idr_init(>mmidr); spin_lock_init(>lock); +INIT_LIST_HEAD(>addrlist); +INIT_LIST_HEAD(>listen_eps); +mutex_init(>mutex); +rnicp->nb.notifier_call = nb_callback; +populate_addrlist(rnicp); +register_inetaddr_notifier(>nb); rnicp->attr.vendor_id = 0x168; rnicp->attr.vendor_part_id = 7; @@ -148,6 +271,8 @@ static void close_rnic_dev(struct t3cdev mutex_lock(_mutex); list_for_each_entry_safe(dev, tmp, _list, entry) { if (dev->rdev.t3cdev_p == tdev) { +unregister_inetaddr_notifier(>nb); +delete_addrlist(dev); list_del(>entry); iwch_unregister_device(dev); cxio_rdev_close(>rdev); diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index caf4e60..7fa0a47 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -36,6 +36,8 @@ #include #include #include #include +#include +#include #include @@ -101,6 +103,11 @@
Re: [ofa-general] [PATCH v3] iw_cxgb3: Support "iwarp-only" interfaces to avoid 4-tuple conflicts.
The sysadmin creates "for iwarp use only" alias interfaces of the form "devname:iw*" where devname is the native interface name (eg eth0) for the iwarp netdev device. The alias label can be anything starting with "iw". The "iw" immediately after the ':' is the key used by the iw_cxgb3 driver. I'm still not sure about this, but haven't come up with anything better myself. And if there's a good chance of other rnic's needing the same support, I'd rather see the common code separated out, even if just encapsulated within this module for easy re-use. As for the code, I have a couple of questions about whether deadlock and a race condition are possible, plus a few minor comments. +static void insert_ifa(struct iwch_dev *rnicp, struct in_ifaddr *ifa) +{ + struct iwch_addrlist *addr; + + addr = kmalloc(sizeof *addr, GFP_KERNEL); + if (!addr) { + printk(KERN_ERR MOD "%s - failed to alloc memory!\n", + __FUNCTION__); + return; + } + addr->ifa = ifa; + mutex_lock(>mutex); + list_add_tail(>entry, >addrlist); + mutex_unlock(>mutex); +} Should this return success/failure? +static int nb_callback(struct notifier_block *self, unsigned long event, + void *ctx) +{ + struct in_ifaddr *ifa = ctx; + struct iwch_dev *rnicp = container_of(self, struct iwch_dev, nb); + + PDBG("%s rnicp %p event %lx\n", __FUNCTION__, rnicp, event); + + switch (event) { + case NETDEV_UP: + if (netdev_is_ours(rnicp, ifa->ifa_dev->dev) && + is_iwarp_label(ifa->ifa_label)) { + PDBG("%s label %s addr 0x%x added\n", + __FUNCTION__, ifa->ifa_label, ifa->ifa_address); + insert_ifa(rnicp, ifa); + iwch_listeners_add_addr(rnicp, ifa->ifa_address); If insert_ifa() fails, what will iwch_listeners_add_addr() do? (I'm not easily seeing the relationship between the address list and the listen list at this point.) + } + break; + case NETDEV_DOWN: + if (netdev_is_ours(rnicp, ifa->ifa_dev->dev) && + is_iwarp_label(ifa->ifa_label)) { + PDBG("%s label %s addr 0x%x deleted\n", + __FUNCTION__, ifa->ifa_label, ifa->ifa_address); + iwch_listeners_del_addr(rnicp, ifa->ifa_address); + remove_ifa(rnicp, ifa); + } + break; + default: + break; + } + return 0; +} + +static void delete_addrlist(struct iwch_dev *rnicp) +{ + struct iwch_addrlist *addr, *tmp; + + mutex_lock(>mutex); + list_for_each_entry_safe(addr, tmp, >addrlist, entry) { + list_del(>entry); + kfree(addr); + } + mutex_unlock(>mutex); +} + +static void populate_addrlist(struct iwch_dev *rnicp) +{ + int i; + struct in_device *indev; + + for (i = 0; i < rnicp->rdev.port_info.nports; i++) { + indev = in_dev_get(rnicp->rdev.port_info.lldevs[i]); + if (!indev) + continue; + for_ifa(indev) + if (is_iwarp_label(ifa->ifa_label)) { + PDBG("%s label %s addr 0x%x added\n", +__FUNCTION__, ifa->ifa_label, +ifa->ifa_address); + insert_ifa(rnicp, ifa); + } + endfor_ifa(indev); + } +} + static void rnic_init(struct iwch_dev *rnicp) { PDBG("%s iwch_dev %p\n", __FUNCTION__, rnicp); @@ -70,6 +187,12 @@ static void rnic_init(struct iwch_dev *r idr_init(>qpidr); idr_init(>mmidr); spin_lock_init(>lock); + INIT_LIST_HEAD(>addrlist); + INIT_LIST_HEAD(>listen_eps); + mutex_init(>mutex); + rnicp->nb.notifier_call = nb_callback; + populate_addrlist(rnicp); + register_inetaddr_notifier(>nb); rnicp->attr.vendor_id = 0x168; rnicp->attr.vendor_part_id = 7; @@ -148,6 +271,8 @@ static void close_rnic_dev(struct t3cdev mutex_lock(_mutex); list_for_each_entry_safe(dev, tmp, _list, entry) { if (dev->rdev.t3cdev_p == tdev) { + unregister_inetaddr_notifier(>nb); + delete_addrlist(dev); list_del(>entry); iwch_unregister_device(dev); cxio_rdev_close(>rdev); diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index caf4e60..7fa0a47 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -36,6 +36,8 @@ #include #include #include #include +#include +#include #include @@ -101,6 +103,11 @@ struct
Re: [ofa-general] [PATCH v3] iw_cxgb3: Support iwarp-only interfaces to avoid 4-tuple conflicts.
The sysadmin creates for iwarp use only alias interfaces of the form devname:iw* where devname is the native interface name (eg eth0) for the iwarp netdev device. The alias label can be anything starting with iw. The iw immediately after the ':' is the key used by the iw_cxgb3 driver. I'm still not sure about this, but haven't come up with anything better myself. And if there's a good chance of other rnic's needing the same support, I'd rather see the common code separated out, even if just encapsulated within this module for easy re-use. As for the code, I have a couple of questions about whether deadlock and a race condition are possible, plus a few minor comments. +static void insert_ifa(struct iwch_dev *rnicp, struct in_ifaddr *ifa) +{ + struct iwch_addrlist *addr; + + addr = kmalloc(sizeof *addr, GFP_KERNEL); + if (!addr) { + printk(KERN_ERR MOD %s - failed to alloc memory!\n, + __FUNCTION__); + return; + } + addr-ifa = ifa; + mutex_lock(rnicp-mutex); + list_add_tail(addr-entry, rnicp-addrlist); + mutex_unlock(rnicp-mutex); +} Should this return success/failure? +static int nb_callback(struct notifier_block *self, unsigned long event, + void *ctx) +{ + struct in_ifaddr *ifa = ctx; + struct iwch_dev *rnicp = container_of(self, struct iwch_dev, nb); + + PDBG(%s rnicp %p event %lx\n, __FUNCTION__, rnicp, event); + + switch (event) { + case NETDEV_UP: + if (netdev_is_ours(rnicp, ifa-ifa_dev-dev) + is_iwarp_label(ifa-ifa_label)) { + PDBG(%s label %s addr 0x%x added\n, + __FUNCTION__, ifa-ifa_label, ifa-ifa_address); + insert_ifa(rnicp, ifa); + iwch_listeners_add_addr(rnicp, ifa-ifa_address); If insert_ifa() fails, what will iwch_listeners_add_addr() do? (I'm not easily seeing the relationship between the address list and the listen list at this point.) + } + break; + case NETDEV_DOWN: + if (netdev_is_ours(rnicp, ifa-ifa_dev-dev) + is_iwarp_label(ifa-ifa_label)) { + PDBG(%s label %s addr 0x%x deleted\n, + __FUNCTION__, ifa-ifa_label, ifa-ifa_address); + iwch_listeners_del_addr(rnicp, ifa-ifa_address); + remove_ifa(rnicp, ifa); + } + break; + default: + break; + } + return 0; +} + +static void delete_addrlist(struct iwch_dev *rnicp) +{ + struct iwch_addrlist *addr, *tmp; + + mutex_lock(rnicp-mutex); + list_for_each_entry_safe(addr, tmp, rnicp-addrlist, entry) { + list_del(addr-entry); + kfree(addr); + } + mutex_unlock(rnicp-mutex); +} + +static void populate_addrlist(struct iwch_dev *rnicp) +{ + int i; + struct in_device *indev; + + for (i = 0; i rnicp-rdev.port_info.nports; i++) { + indev = in_dev_get(rnicp-rdev.port_info.lldevs[i]); + if (!indev) + continue; + for_ifa(indev) + if (is_iwarp_label(ifa-ifa_label)) { + PDBG(%s label %s addr 0x%x added\n, +__FUNCTION__, ifa-ifa_label, +ifa-ifa_address); + insert_ifa(rnicp, ifa); + } + endfor_ifa(indev); + } +} + static void rnic_init(struct iwch_dev *rnicp) { PDBG(%s iwch_dev %p\n, __FUNCTION__, rnicp); @@ -70,6 +187,12 @@ static void rnic_init(struct iwch_dev *r idr_init(rnicp-qpidr); idr_init(rnicp-mmidr); spin_lock_init(rnicp-lock); + INIT_LIST_HEAD(rnicp-addrlist); + INIT_LIST_HEAD(rnicp-listen_eps); + mutex_init(rnicp-mutex); + rnicp-nb.notifier_call = nb_callback; + populate_addrlist(rnicp); + register_inetaddr_notifier(rnicp-nb); rnicp-attr.vendor_id = 0x168; rnicp-attr.vendor_part_id = 7; @@ -148,6 +271,8 @@ static void close_rnic_dev(struct t3cdev mutex_lock(dev_mutex); list_for_each_entry_safe(dev, tmp, dev_list, entry) { if (dev-rdev.t3cdev_p == tdev) { + unregister_inetaddr_notifier(dev-nb); + delete_addrlist(dev); list_del(dev-entry); iwch_unregister_device(dev); cxio_rdev_close(dev-rdev); diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index caf4e60..7fa0a47 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -36,6 +36,8 @@ #include linux/mutex.h #include linux/list.h #include linux/spinlock.h
Re: [ofa-general] [PATCH v3] iw_cxgb3: Support iwarp-only interfaces to avoid 4-tuple conflicts.
Sean Hefty wrote: The sysadmin creates for iwarp use only alias interfaces of the form devname:iw* where devname is the native interface name (eg eth0) for the iwarp netdev device. The alias label can be anything starting with iw. The iw immediately after the ':' is the key used by the iw_cxgb3 driver. I'm still not sure about this, but haven't come up with anything better myself. And if there's a good chance of other rnic's needing the same support, I'd rather see the common code separated out, even if just encapsulated within this module for easy re-use. As for the code, I have a couple of questions about whether deadlock and a race condition are possible, plus a few minor comments. Thanks for reviewing! Responses are in-line below. +static void insert_ifa(struct iwch_dev *rnicp, struct in_ifaddr *ifa) +{ +struct iwch_addrlist *addr; + +addr = kmalloc(sizeof *addr, GFP_KERNEL); +if (!addr) { +printk(KERN_ERR MOD %s - failed to alloc memory!\n, + __FUNCTION__); +return; +} +addr-ifa = ifa; +mutex_lock(rnicp-mutex); +list_add_tail(addr-entry, rnicp-addrlist); +mutex_unlock(rnicp-mutex); +} Should this return success/failure? I think so. See below... +static int nb_callback(struct notifier_block *self, unsigned long event, + void *ctx) +{ +struct in_ifaddr *ifa = ctx; +struct iwch_dev *rnicp = container_of(self, struct iwch_dev, nb); + +PDBG(%s rnicp %p event %lx\n, __FUNCTION__, rnicp, event); + +switch (event) { +case NETDEV_UP: +if (netdev_is_ours(rnicp, ifa-ifa_dev-dev) +is_iwarp_label(ifa-ifa_label)) { +PDBG(%s label %s addr 0x%x added\n, +__FUNCTION__, ifa-ifa_label, ifa-ifa_address); +insert_ifa(rnicp, ifa); +iwch_listeners_add_addr(rnicp, ifa-ifa_address); If insert_ifa() fails, what will iwch_listeners_add_addr() do? (I'm not easily seeing the relationship between the address list and the listen list at this point.) I guess insert_ifa() needs to return success/failure. Then if we failed to add the ifa to the list we won't update the listeners. The relationship is this: - when a listen is done on addr 0.0.0.0, the code walks the list of addresses to do specific listens on each address. - when an address is added or deleted, then the list of current listeners is walked and updated accordingly. +} +break; +case NETDEV_DOWN: +if (netdev_is_ours(rnicp, ifa-ifa_dev-dev) +is_iwarp_label(ifa-ifa_label)) { +PDBG(%s label %s addr 0x%x deleted\n, +__FUNCTION__, ifa-ifa_label, ifa-ifa_address); +iwch_listeners_del_addr(rnicp, ifa-ifa_address); +remove_ifa(rnicp, ifa); +} +break; +default: +break; +} +return 0; +} + +static void delete_addrlist(struct iwch_dev *rnicp) +{ +struct iwch_addrlist *addr, *tmp; + +mutex_lock(rnicp-mutex); +list_for_each_entry_safe(addr, tmp, rnicp-addrlist, entry) { +list_del(addr-entry); +kfree(addr); +} +mutex_unlock(rnicp-mutex); +} + +static void populate_addrlist(struct iwch_dev *rnicp) +{ +int i; +struct in_device *indev; + +for (i = 0; i rnicp-rdev.port_info.nports; i++) { +indev = in_dev_get(rnicp-rdev.port_info.lldevs[i]); +if (!indev) +continue; +for_ifa(indev) +if (is_iwarp_label(ifa-ifa_label)) { +PDBG(%s label %s addr 0x%x added\n, + __FUNCTION__, ifa-ifa_label, + ifa-ifa_address); +insert_ifa(rnicp, ifa); +} +endfor_ifa(indev); +} +} + static void rnic_init(struct iwch_dev *rnicp) { PDBG(%s iwch_dev %p\n, __FUNCTION__, rnicp); @@ -70,6 +187,12 @@ static void rnic_init(struct iwch_dev *r idr_init(rnicp-qpidr); idr_init(rnicp-mmidr); spin_lock_init(rnicp-lock); +INIT_LIST_HEAD(rnicp-addrlist); +INIT_LIST_HEAD(rnicp-listen_eps); +mutex_init(rnicp-mutex); +rnicp-nb.notifier_call = nb_callback; +populate_addrlist(rnicp); +register_inetaddr_notifier(rnicp-nb); rnicp-attr.vendor_id = 0x168; rnicp-attr.vendor_part_id = 7; @@ -148,6 +271,8 @@ static void close_rnic_dev(struct t3cdev mutex_lock(dev_mutex); list_for_each_entry_safe(dev, tmp, dev_list, entry) { if (dev-rdev.t3cdev_p == tdev) { +unregister_inetaddr_notifier(dev-nb); +delete_addrlist(dev); list_del(dev-entry); iwch_unregister_device(dev); cxio_rdev_close(dev-rdev); diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index caf4e60..7fa0a47 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -36,6 +36,8 @@ #include linux/mutex.h #include linux/list.h #include
RE: [ofa-general] [PATCH v3] iw_cxgb3: Support iwarp-only interfaces to avoid 4-tuple conflicts.
It is ok to block while holding a mutex, yes? It's okay, I just didn't try to trace through the code to see if it ever tries to acquire the same mutex in the thread that needs to signal the event. - Sean - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [ofa-general] [PATCH v3] iw_cxgb3: Support "iwarp-only" interfaces to avoid 4-tuple conflicts.
Rolan/Sean, What do you all think? Steve. Steve Wise wrote: iw_cxgb3: Support "iwarp-only" interfaces to avoid 4-tuple conflicts. Version 3: - don't use list_del_init() where list_del() is sufficient. Version 2: - added a per-device mutex for the address and listening endpoints lists. - wait for all replies if sending multiple passive_open requests to rnic. - log warning if no addresses are available when a listen is issued. - tested --- Design: The sysadmin creates "for iwarp use only" alias interfaces of the form "devname:iw*" where devname is the native interface name (eg eth0) for the iwarp netdev device. The alias label can be anything starting with "iw". The "iw" immediately after the ':' is the key used by the iw_cxgb3 driver. EG: ifconfig eth0 192.168.70.123 up ifconfig eth0:iw1 192.168.71.123 up ifconfig eth0:iw2 192.168.72.123 up In the above example, 192.168.70/24 is for TCP traffic, while 192.168.71/24 and 192.168.72/24 are for iWARP/RDMA use. The rdma-only interface must be on its own IP subnet. This allows routing all rdma traffic onto this interface. The iWARP driver must translate all listens on address 0.0.0.0 to the set of rdma-only ip addresses for the device in question. This prevents incoming connect requests to the TCP ipaddresses from going up the rdma stack. Implementation Details: - The iw_cxgb3 driver registers for inetaddr events via register_inetaddr_notifier(). This allows tracking the iwarp-only addresses/subnets as they get added and deleted. The iwarp driver maintains a list of the current iwarp-only addresses. - The iw_cxgb3 driver builds the list of iwarp-only addresses for its devices at module insert time. This is needed because the inetaddr notifier callbacks don't "replay" address-add events when someone registers. So the driver must build the initial list at module load time. - When a listen is done on address 0.0.0.0, then the iw_cxgb3 driver must translate that into a set of listens on the iwarp-only addresses. This is implemented by maintaining a list of stid/addr entries per listening endpoint. - When a new iwarp-only address is added or removed, the iw_cxgb3 driver must traverse the set of listening endpoints and update them accordingly. This allows an application to bind to 0.0.0.0 prior to the iwarp-only interfaces being configured. It also allows changing the iwarp-only set of addresses and getting the expected behavior for apps already bound to 0.0.0.0. This is done by maintaining a list of listening endpoints off the device struct. - The address list, the listening endpoint list, and each list of stid/addrs in use per listening endpoint are all protected via a mutex per iw_cxgb3 device. Signed-off-by: Steve Wise <[EMAIL PROTECTED]> --- drivers/infiniband/hw/cxgb3/iwch.c| 125 drivers/infiniband/hw/cxgb3/iwch.h| 11 + drivers/infiniband/hw/cxgb3/iwch_cm.c | 259 +++-- drivers/infiniband/hw/cxgb3/iwch_cm.h | 15 ++ 4 files changed, 360 insertions(+), 50 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 0315c9d..d81d46e 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -63,6 +63,123 @@ struct cxgb3_client t3c_client = { static LIST_HEAD(dev_list); static DEFINE_MUTEX(dev_mutex); +static void insert_ifa(struct iwch_dev *rnicp, struct in_ifaddr *ifa) +{ + struct iwch_addrlist *addr; + + addr = kmalloc(sizeof *addr, GFP_KERNEL); + if (!addr) { + printk(KERN_ERR MOD "%s - failed to alloc memory!\n", + __FUNCTION__); + return; + } + addr->ifa = ifa; + mutex_lock(>mutex); + list_add_tail(>entry, >addrlist); + mutex_unlock(>mutex); +} + +static void remove_ifa(struct iwch_dev *rnicp, struct in_ifaddr *ifa) +{ + struct iwch_addrlist *addr, *tmp; + + mutex_lock(>mutex); + list_for_each_entry_safe(addr, tmp, >addrlist, entry) { + if (addr->ifa == ifa) { + list_del(>entry); + kfree(addr); + goto out; + } + } +out: + mutex_unlock(>mutex); +} + +static int netdev_is_ours(struct iwch_dev *rnicp, struct net_device *netdev) +{ + int i; + + for (i = 0; i < rnicp->rdev.port_info.nports; i++) + if (netdev == rnicp->rdev.port_info.lldevs[i]) + return 1; + return 0; +} + +static inline int is_iwarp_label(char *label) +{ + char *colon; + + colon = strchr(label, ':'); + if (colon && !strncmp(colon+1, "iw", 2)) + return 1; + return 0; +} + +static int nb_callback(struct notifier_block *self, unsigned long event, + void *ctx) +{ + struct in_ifaddr *ifa = ctx; + struct iwch_dev *rnicp = container_of(self, struct iwch_dev, nb);
Re: [ofa-general] [PATCH v3] iw_cxgb3: Support iwarp-only interfaces to avoid 4-tuple conflicts.
Rolan/Sean, What do you all think? Steve. Steve Wise wrote: iw_cxgb3: Support iwarp-only interfaces to avoid 4-tuple conflicts. Version 3: - don't use list_del_init() where list_del() is sufficient. Version 2: - added a per-device mutex for the address and listening endpoints lists. - wait for all replies if sending multiple passive_open requests to rnic. - log warning if no addresses are available when a listen is issued. - tested --- Design: The sysadmin creates for iwarp use only alias interfaces of the form devname:iw* where devname is the native interface name (eg eth0) for the iwarp netdev device. The alias label can be anything starting with iw. The iw immediately after the ':' is the key used by the iw_cxgb3 driver. EG: ifconfig eth0 192.168.70.123 up ifconfig eth0:iw1 192.168.71.123 up ifconfig eth0:iw2 192.168.72.123 up In the above example, 192.168.70/24 is for TCP traffic, while 192.168.71/24 and 192.168.72/24 are for iWARP/RDMA use. The rdma-only interface must be on its own IP subnet. This allows routing all rdma traffic onto this interface. The iWARP driver must translate all listens on address 0.0.0.0 to the set of rdma-only ip addresses for the device in question. This prevents incoming connect requests to the TCP ipaddresses from going up the rdma stack. Implementation Details: - The iw_cxgb3 driver registers for inetaddr events via register_inetaddr_notifier(). This allows tracking the iwarp-only addresses/subnets as they get added and deleted. The iwarp driver maintains a list of the current iwarp-only addresses. - The iw_cxgb3 driver builds the list of iwarp-only addresses for its devices at module insert time. This is needed because the inetaddr notifier callbacks don't replay address-add events when someone registers. So the driver must build the initial list at module load time. - When a listen is done on address 0.0.0.0, then the iw_cxgb3 driver must translate that into a set of listens on the iwarp-only addresses. This is implemented by maintaining a list of stid/addr entries per listening endpoint. - When a new iwarp-only address is added or removed, the iw_cxgb3 driver must traverse the set of listening endpoints and update them accordingly. This allows an application to bind to 0.0.0.0 prior to the iwarp-only interfaces being configured. It also allows changing the iwarp-only set of addresses and getting the expected behavior for apps already bound to 0.0.0.0. This is done by maintaining a list of listening endpoints off the device struct. - The address list, the listening endpoint list, and each list of stid/addrs in use per listening endpoint are all protected via a mutex per iw_cxgb3 device. Signed-off-by: Steve Wise [EMAIL PROTECTED] --- drivers/infiniband/hw/cxgb3/iwch.c| 125 drivers/infiniband/hw/cxgb3/iwch.h| 11 + drivers/infiniband/hw/cxgb3/iwch_cm.c | 259 +++-- drivers/infiniband/hw/cxgb3/iwch_cm.h | 15 ++ 4 files changed, 360 insertions(+), 50 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 0315c9d..d81d46e 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -63,6 +63,123 @@ struct cxgb3_client t3c_client = { static LIST_HEAD(dev_list); static DEFINE_MUTEX(dev_mutex); +static void insert_ifa(struct iwch_dev *rnicp, struct in_ifaddr *ifa) +{ + struct iwch_addrlist *addr; + + addr = kmalloc(sizeof *addr, GFP_KERNEL); + if (!addr) { + printk(KERN_ERR MOD %s - failed to alloc memory!\n, + __FUNCTION__); + return; + } + addr-ifa = ifa; + mutex_lock(rnicp-mutex); + list_add_tail(addr-entry, rnicp-addrlist); + mutex_unlock(rnicp-mutex); +} + +static void remove_ifa(struct iwch_dev *rnicp, struct in_ifaddr *ifa) +{ + struct iwch_addrlist *addr, *tmp; + + mutex_lock(rnicp-mutex); + list_for_each_entry_safe(addr, tmp, rnicp-addrlist, entry) { + if (addr-ifa == ifa) { + list_del(addr-entry); + kfree(addr); + goto out; + } + } +out: + mutex_unlock(rnicp-mutex); +} + +static int netdev_is_ours(struct iwch_dev *rnicp, struct net_device *netdev) +{ + int i; + + for (i = 0; i rnicp-rdev.port_info.nports; i++) + if (netdev == rnicp-rdev.port_info.lldevs[i]) + return 1; + return 0; +} + +static inline int is_iwarp_label(char *label) +{ + char *colon; + + colon = strchr(label, ':'); + if (colon !strncmp(colon+1, iw, 2)) + return 1; + return 0; +} + +static int nb_callback(struct notifier_block *self, unsigned long event, + void *ctx) +{ + struct in_ifaddr *ifa = ctx; + struct iwch_dev *rnicp = container_of(self, struct